ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 11 | // |
| 12 | // Implements helper functions and classes for intelligibility enhancement. |
| 13 | // |
| 14 | |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 15 | #include "webrtc/modules/audio_processing/intelligibility/intelligibility_utils.h" |
| 16 | |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 17 | #include <math.h> |
ekmeyerson | 3ab2f14 | 2015-07-23 12:15:24 -0700 | [diff] [blame] | 18 | #include <stdlib.h> |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 19 | #include <string.h> |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 20 | #include <algorithm> |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 21 | |
| 22 | using std::complex; |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 23 | using std::min; |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 24 | |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 25 | namespace webrtc { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 26 | |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 27 | namespace intelligibility { |
| 28 | |
| 29 | float UpdateFactor(float target, float current, float limit) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 30 | float delta = fabsf(target - current); |
| 31 | float sign = copysign(1.0f, target - current); |
| 32 | return current + sign * fminf(delta, limit); |
| 33 | } |
| 34 | |
ekmeyerson | 3ab2f14 | 2015-07-23 12:15:24 -0700 | [diff] [blame] | 35 | float AddDitherIfZero(float value) { |
| 36 | return value == 0.f ? std::rand() * 0.01f / RAND_MAX : value; |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 37 | } |
| 38 | |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 39 | complex<float> zerofudge(complex<float> c) { |
ekmeyerson | 3ab2f14 | 2015-07-23 12:15:24 -0700 | [diff] [blame] | 40 | return complex<float>(AddDitherIfZero(c.real()), AddDitherIfZero(c.imag())); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 41 | } |
| 42 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 43 | complex<float> NewMean(complex<float> mean, complex<float> data, size_t count) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 44 | return mean + (data - mean) / static_cast<float>(count); |
| 45 | } |
| 46 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 47 | void AddToMean(complex<float> data, size_t count, complex<float>* mean) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 48 | (*mean) = NewMean(*mean, data, count); |
| 49 | } |
| 50 | |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 51 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 52 | static const size_t kWindowBlockSize = 10; |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 53 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 54 | VarianceArray::VarianceArray(size_t num_freqs, |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 55 | StepType type, |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 56 | size_t window_size, |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 57 | float decay) |
ekmeyerson | 60d9b33 | 2015-08-14 10:35:55 -0700 | [diff] [blame] | 58 | : running_mean_(new complex<float>[num_freqs]()), |
| 59 | running_mean_sq_(new complex<float>[num_freqs]()), |
| 60 | sub_running_mean_(new complex<float>[num_freqs]()), |
| 61 | sub_running_mean_sq_(new complex<float>[num_freqs]()), |
| 62 | variance_(new float[num_freqs]()), |
| 63 | conj_sum_(new float[num_freqs]()), |
| 64 | num_freqs_(num_freqs), |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 65 | window_size_(window_size), |
| 66 | decay_(decay), |
| 67 | history_cursor_(0), |
| 68 | count_(0), |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 69 | array_mean_(0.0f), |
| 70 | buffer_full_(false) { |
ekmeyerson | 60d9b33 | 2015-08-14 10:35:55 -0700 | [diff] [blame] | 71 | history_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]()); |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 72 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 73 | history_[i].reset(new complex<float>[window_size_]()); |
| 74 | } |
ekmeyerson | 60d9b33 | 2015-08-14 10:35:55 -0700 | [diff] [blame] | 75 | subhistory_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]()); |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 76 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 77 | subhistory_[i].reset(new complex<float>[window_size_]()); |
| 78 | } |
ekmeyerson | 60d9b33 | 2015-08-14 10:35:55 -0700 | [diff] [blame] | 79 | subhistory_sq_.reset(new rtc::scoped_ptr<complex<float>[]>[num_freqs_]()); |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 80 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 81 | subhistory_sq_[i].reset(new complex<float>[window_size_]()); |
| 82 | } |
| 83 | switch (type) { |
| 84 | case kStepInfinite: |
| 85 | step_func_ = &VarianceArray::InfiniteStep; |
| 86 | break; |
| 87 | case kStepDecaying: |
| 88 | step_func_ = &VarianceArray::DecayStep; |
| 89 | break; |
| 90 | case kStepWindowed: |
| 91 | step_func_ = &VarianceArray::WindowedStep; |
| 92 | break; |
| 93 | case kStepBlocked: |
| 94 | step_func_ = &VarianceArray::BlockedStep; |
| 95 | break; |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 96 | case kStepBlockBasedMovingAverage: |
| 97 | step_func_ = &VarianceArray::BlockBasedMovingAverage; |
| 98 | break; |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 99 | } |
| 100 | } |
| 101 | |
| 102 | // Compute the variance with Welford's algorithm, adding some fudge to |
| 103 | // the input in case of all-zeroes. |
| 104 | void VarianceArray::InfiniteStep(const complex<float>* data, bool skip_fudge) { |
| 105 | array_mean_ = 0.0f; |
| 106 | ++count_; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 107 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 108 | complex<float> sample = data[i]; |
| 109 | if (!skip_fudge) { |
| 110 | sample = zerofudge(sample); |
| 111 | } |
| 112 | if (count_ == 1) { |
| 113 | running_mean_[i] = sample; |
| 114 | variance_[i] = 0.0f; |
| 115 | } else { |
| 116 | float old_sum = conj_sum_[i]; |
| 117 | complex<float> old_mean = running_mean_[i]; |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 118 | running_mean_[i] = |
| 119 | old_mean + (sample - old_mean) / static_cast<float>(count_); |
| 120 | conj_sum_[i] = |
| 121 | (old_sum + std::conj(sample - old_mean) * (sample - running_mean_[i])) |
| 122 | .real(); |
| 123 | variance_[i] = |
ekmeyerson | 3ab2f14 | 2015-07-23 12:15:24 -0700 | [diff] [blame] | 124 | conj_sum_[i] / (count_ - 1); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 125 | } |
| 126 | array_mean_ += (variance_[i] - array_mean_) / (i + 1); |
| 127 | } |
| 128 | } |
| 129 | |
| 130 | // Compute the variance from the beginning, with exponential decaying of the |
| 131 | // series data. |
| 132 | void VarianceArray::DecayStep(const complex<float>* data, bool /*dummy*/) { |
| 133 | array_mean_ = 0.0f; |
| 134 | ++count_; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 135 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 136 | complex<float> sample = data[i]; |
| 137 | sample = zerofudge(sample); |
| 138 | |
| 139 | if (count_ == 1) { |
| 140 | running_mean_[i] = sample; |
| 141 | running_mean_sq_[i] = sample * std::conj(sample); |
| 142 | variance_[i] = 0.0f; |
| 143 | } else { |
| 144 | complex<float> prev = running_mean_[i]; |
| 145 | complex<float> prev2 = running_mean_sq_[i]; |
| 146 | running_mean_[i] = decay_ * prev + (1.0f - decay_) * sample; |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 147 | running_mean_sq_[i] = |
| 148 | decay_ * prev2 + (1.0f - decay_) * sample * std::conj(sample); |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 149 | variance_[i] = (running_mean_sq_[i] - |
| 150 | running_mean_[i] * std::conj(running_mean_[i])).real(); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 151 | } |
| 152 | |
| 153 | array_mean_ += (variance_[i] - array_mean_) / (i + 1); |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | // Windowed variance computation. On each step, the variances for the |
| 158 | // window are recomputed from scratch, using Welford's algorithm. |
| 159 | void VarianceArray::WindowedStep(const complex<float>* data, bool /*dummy*/) { |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 160 | size_t num = min(count_ + 1, window_size_); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 161 | array_mean_ = 0.0f; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 162 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 163 | complex<float> mean; |
| 164 | float conj_sum = 0.0f; |
| 165 | |
| 166 | history_[i][history_cursor_] = data[i]; |
| 167 | |
| 168 | mean = history_[i][history_cursor_]; |
| 169 | variance_[i] = 0.0f; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 170 | for (size_t j = 1; j < num; ++j) { |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 171 | complex<float> sample = |
| 172 | zerofudge(history_[i][(history_cursor_ + j) % window_size_]); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 173 | sample = history_[i][(history_cursor_ + j) % window_size_]; |
| 174 | float old_sum = conj_sum; |
| 175 | complex<float> old_mean = mean; |
| 176 | |
| 177 | mean = old_mean + (sample - old_mean) / static_cast<float>(j + 1); |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 178 | conj_sum = |
| 179 | (old_sum + std::conj(sample - old_mean) * (sample - mean)).real(); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 180 | variance_[i] = conj_sum / (j); |
| 181 | } |
| 182 | array_mean_ += (variance_[i] - array_mean_) / (i + 1); |
| 183 | } |
| 184 | history_cursor_ = (history_cursor_ + 1) % window_size_; |
| 185 | ++count_; |
| 186 | } |
| 187 | |
| 188 | // Variance with a window of blocks. Within each block, the variances are |
| 189 | // recomputed from scratch at every stp, using |Var(X) = E(X^2) - E^2(X)|. |
| 190 | // Once a block is filled with kWindowBlockSize samples, it is added to the |
| 191 | // history window and a new block is started. The variances for the window |
| 192 | // are recomputed from scratch at each of these transitions. |
| 193 | void VarianceArray::BlockedStep(const complex<float>* data, bool /*dummy*/) { |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 194 | size_t blocks = min(window_size_, history_cursor_ + 1); |
| 195 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 196 | AddToMean(data[i], count_ + 1, &sub_running_mean_[i]); |
| 197 | AddToMean(data[i] * std::conj(data[i]), count_ + 1, |
| 198 | &sub_running_mean_sq_[i]); |
| 199 | subhistory_[i][history_cursor_ % window_size_] = sub_running_mean_[i]; |
| 200 | subhistory_sq_[i][history_cursor_ % window_size_] = sub_running_mean_sq_[i]; |
| 201 | |
ekm | db4fecf | 2015-06-22 17:49:08 -0700 | [diff] [blame] | 202 | variance_[i] = |
| 203 | (NewMean(running_mean_sq_[i], sub_running_mean_sq_[i], blocks) - |
| 204 | NewMean(running_mean_[i], sub_running_mean_[i], blocks) * |
| 205 | std::conj(NewMean(running_mean_[i], sub_running_mean_[i], blocks))) |
| 206 | .real(); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 207 | if (count_ == kWindowBlockSize - 1) { |
| 208 | sub_running_mean_[i] = complex<float>(0.0f, 0.0f); |
| 209 | sub_running_mean_sq_[i] = complex<float>(0.0f, 0.0f); |
| 210 | running_mean_[i] = complex<float>(0.0f, 0.0f); |
| 211 | running_mean_sq_[i] = complex<float>(0.0f, 0.0f); |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 212 | for (size_t j = 0; j < min(window_size_, history_cursor_); ++j) { |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 213 | AddToMean(subhistory_[i][j], j + 1, &running_mean_[i]); |
| 214 | AddToMean(subhistory_sq_[i][j], j + 1, &running_mean_sq_[i]); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 215 | } |
| 216 | ++history_cursor_; |
| 217 | } |
| 218 | } |
| 219 | ++count_; |
| 220 | if (count_ == kWindowBlockSize) { |
| 221 | count_ = 0; |
| 222 | } |
| 223 | } |
| 224 | |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 225 | // Recomputes variances for each window from scratch based on previous window. |
| 226 | void VarianceArray::BlockBasedMovingAverage(const std::complex<float>* data, |
| 227 | bool /*dummy*/) { |
| 228 | // TODO(ekmeyerson) To mitigate potential divergence, add counter so that |
| 229 | // after every so often sums are computed scratch by summing over all |
| 230 | // elements instead of subtracting oldest and adding newest. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 231 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 232 | sub_running_mean_[i] += data[i]; |
| 233 | sub_running_mean_sq_[i] += data[i] * std::conj(data[i]); |
| 234 | } |
| 235 | ++count_; |
| 236 | |
| 237 | // TODO(ekmeyerson) Make kWindowBlockSize nonconstant to allow |
| 238 | // experimentation with different block size,window size pairs. |
| 239 | if (count_ >= kWindowBlockSize) { |
| 240 | count_ = 0; |
| 241 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 242 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 35b72fb | 2015-07-10 14:11:52 -0700 | [diff] [blame] | 243 | running_mean_[i] -= subhistory_[i][history_cursor_]; |
| 244 | running_mean_sq_[i] -= subhistory_sq_[i][history_cursor_]; |
| 245 | |
| 246 | float scale = 1.f / kWindowBlockSize; |
| 247 | subhistory_[i][history_cursor_] = sub_running_mean_[i] * scale; |
| 248 | subhistory_sq_[i][history_cursor_] = sub_running_mean_sq_[i] * scale; |
| 249 | |
| 250 | sub_running_mean_[i] = std::complex<float>(0.0f, 0.0f); |
| 251 | sub_running_mean_sq_[i] = std::complex<float>(0.0f, 0.0f); |
| 252 | |
| 253 | running_mean_[i] += subhistory_[i][history_cursor_]; |
| 254 | running_mean_sq_[i] += subhistory_sq_[i][history_cursor_]; |
| 255 | |
| 256 | scale = 1.f / (buffer_full_ ? window_size_ : history_cursor_ + 1); |
| 257 | variance_[i] = std::real(running_mean_sq_[i] * scale - |
| 258 | running_mean_[i] * scale * |
| 259 | std::conj(running_mean_[i]) * scale); |
| 260 | } |
| 261 | |
| 262 | ++history_cursor_; |
| 263 | if (history_cursor_ >= window_size_) { |
| 264 | buffer_full_ = true; |
| 265 | history_cursor_ = 0; |
| 266 | } |
| 267 | } |
| 268 | } |
| 269 | |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 270 | void VarianceArray::Clear() { |
ekmeyerson | 60d9b33 | 2015-08-14 10:35:55 -0700 | [diff] [blame] | 271 | memset(running_mean_.get(), 0, sizeof(*running_mean_.get()) * num_freqs_); |
| 272 | memset(running_mean_sq_.get(), 0, |
| 273 | sizeof(*running_mean_sq_.get()) * num_freqs_); |
| 274 | memset(variance_.get(), 0, sizeof(*variance_.get()) * num_freqs_); |
| 275 | memset(conj_sum_.get(), 0, sizeof(*conj_sum_.get()) * num_freqs_); |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 276 | history_cursor_ = 0; |
| 277 | count_ = 0; |
| 278 | array_mean_ = 0.0f; |
| 279 | } |
| 280 | |
| 281 | void VarianceArray::ApplyScale(float scale) { |
| 282 | array_mean_ = 0.0f; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 283 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 284 | variance_[i] *= scale * scale; |
| 285 | array_mean_ += (variance_[i] - array_mean_) / (i + 1); |
| 286 | } |
| 287 | } |
| 288 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 289 | GainApplier::GainApplier(size_t freqs, float change_limit) |
ekmeyerson | 60d9b33 | 2015-08-14 10:35:55 -0700 | [diff] [blame] | 290 | : num_freqs_(freqs), |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 291 | change_limit_(change_limit), |
| 292 | target_(new float[freqs]()), |
| 293 | current_(new float[freqs]()) { |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 294 | for (size_t i = 0; i < freqs; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 295 | target_[i] = 1.0f; |
| 296 | current_[i] = 1.0f; |
| 297 | } |
| 298 | } |
| 299 | |
| 300 | void GainApplier::Apply(const complex<float>* in_block, |
| 301 | complex<float>* out_block) { |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 302 | for (size_t i = 0; i < num_freqs_; ++i) { |
ekm | 030249d | 2015-06-15 13:02:24 -0700 | [diff] [blame] | 303 | float factor = sqrtf(fabsf(current_[i])); |
| 304 | if (!std::isnormal(factor)) { |
| 305 | factor = 1.0f; |
| 306 | } |
| 307 | out_block[i] = factor * in_block[i]; |
| 308 | current_[i] = UpdateFactor(target_[i], current_[i], change_limit_); |
| 309 | } |
| 310 | } |
| 311 | |
| 312 | } // namespace intelligibility |
| 313 | |
| 314 | } // namespace webrtc |