pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #include "modules/audio_processing/transient/transient_detector.h" |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 12 | |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 13 | #include <float.h> |
| 14 | #include <math.h> |
| 15 | #include <string.h> |
| 16 | |
kwiberg | 85d8bb0 | 2016-02-16 20:39:36 -0800 | [diff] [blame] | 17 | #include <algorithm> |
| 18 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 19 | #include "modules/audio_processing/transient/common.h" |
| 20 | #include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h" |
| 21 | #include "modules/audio_processing/transient/moving_moments.h" |
| 22 | #include "modules/audio_processing/transient/wpd_tree.h" |
| 23 | #include "rtc_base/checks.h" |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 24 | |
| 25 | namespace webrtc { |
| 26 | |
| 27 | static const int kTransientLengthMs = 30; |
| 28 | static const int kChunksAtStartupLeftToDelete = |
| 29 | kTransientLengthMs / ts::kChunkSizeMs; |
| 30 | static const float kDetectThreshold = 16.f; |
| 31 | |
| 32 | TransientDetector::TransientDetector(int sample_rate_hz) |
| 33 | : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000), |
| 34 | last_first_moment_(), |
| 35 | last_second_moment_(), |
| 36 | chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete), |
| 37 | reference_energy_(1.f), |
| 38 | using_reference_(false) { |
kwiberg | 9e2be5f | 2016-09-14 05:23:22 -0700 | [diff] [blame] | 39 | RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz || |
| 40 | sample_rate_hz == ts::kSampleRate16kHz || |
| 41 | sample_rate_hz == ts::kSampleRate32kHz || |
| 42 | sample_rate_hz == ts::kSampleRate48kHz); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 43 | int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000; |
| 44 | // Adjustment to avoid data loss while downsampling, making |
| 45 | // |samples_per_chunk_| and |samples_per_transient| always divisible by |
| 46 | // |kLeaves|. |
| 47 | samples_per_chunk_ -= samples_per_chunk_ % kLeaves; |
| 48 | samples_per_transient -= samples_per_transient % kLeaves; |
| 49 | |
| 50 | tree_leaves_data_length_ = samples_per_chunk_ / kLeaves; |
| 51 | wpd_tree_.reset(new WPDTree(samples_per_chunk_, |
| 52 | kDaubechies8HighPassCoefficients, |
| 53 | kDaubechies8LowPassCoefficients, |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 54 | kDaubechies8CoefficientsLength, kLevels)); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 55 | for (size_t i = 0; i < kLeaves; ++i) { |
| 56 | moving_moments_[i].reset( |
| 57 | new MovingMoments(samples_per_transient / kLeaves)); |
| 58 | } |
| 59 | |
| 60 | first_moments_.reset(new float[tree_leaves_data_length_]); |
| 61 | second_moments_.reset(new float[tree_leaves_data_length_]); |
| 62 | |
| 63 | for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) { |
| 64 | previous_results_.push_back(0.f); |
| 65 | } |
| 66 | } |
| 67 | |
| 68 | TransientDetector::~TransientDetector() {} |
| 69 | |
| 70 | float TransientDetector::Detect(const float* data, |
| 71 | size_t data_length, |
| 72 | const float* reference_data, |
| 73 | size_t reference_length) { |
kwiberg | 9e2be5f | 2016-09-14 05:23:22 -0700 | [diff] [blame] | 74 | RTC_DCHECK(data); |
| 75 | RTC_DCHECK_EQ(samples_per_chunk_, data_length); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 76 | |
| 77 | // TODO(aluebs): Check if these errors can logically happen and if not assert |
| 78 | // on them. |
| 79 | if (wpd_tree_->Update(data, samples_per_chunk_) != 0) { |
| 80 | return -1.f; |
| 81 | } |
| 82 | |
| 83 | float result = 0.f; |
| 84 | |
| 85 | for (size_t i = 0; i < kLeaves; ++i) { |
| 86 | WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i); |
| 87 | |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 88 | moving_moments_[i]->CalculateMoments(leaf->data(), tree_leaves_data_length_, |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 89 | first_moments_.get(), |
| 90 | second_moments_.get()); |
| 91 | |
| 92 | // Add value delayed (Use the last moments from the last call to Detect). |
| 93 | float unbiased_data = leaf->data()[0] - last_first_moment_[i]; |
| 94 | result += |
| 95 | unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN); |
| 96 | |
| 97 | // Add new values. |
| 98 | for (size_t j = 1; j < tree_leaves_data_length_; ++j) { |
| 99 | unbiased_data = leaf->data()[j] - first_moments_[j - 1]; |
| 100 | result += |
| 101 | unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN); |
| 102 | } |
| 103 | |
| 104 | last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1]; |
| 105 | last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1]; |
| 106 | } |
| 107 | |
| 108 | result /= tree_leaves_data_length_; |
| 109 | |
| 110 | result *= ReferenceDetectionValue(reference_data, reference_length); |
| 111 | |
| 112 | if (chunks_at_startup_left_to_delete_ > 0) { |
| 113 | chunks_at_startup_left_to_delete_--; |
| 114 | result = 0.f; |
| 115 | } |
| 116 | |
| 117 | if (result >= kDetectThreshold) { |
| 118 | result = 1.f; |
| 119 | } else { |
| 120 | // Get proportional value. |
| 121 | // Proportion achieved with a squared raised cosine function with domain |
| 122 | // [0, kDetectThreshold) and image [0, 1), it's always increasing. |
| 123 | const float horizontal_scaling = ts::kPi / kDetectThreshold; |
| 124 | const float kHorizontalShift = ts::kPi; |
| 125 | const float kVerticalScaling = 0.5f; |
| 126 | const float kVerticalShift = 1.f; |
| 127 | |
Yves Gerey | 665174f | 2018-06-19 15:03:05 +0200 | [diff] [blame] | 128 | result = |
| 129 | (cos(result * horizontal_scaling + kHorizontalShift) + kVerticalShift) * |
| 130 | kVerticalScaling; |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 131 | result *= result; |
| 132 | } |
| 133 | |
| 134 | previous_results_.pop_front(); |
| 135 | previous_results_.push_back(result); |
| 136 | |
| 137 | // In the current implementation we return the max of the current result and |
| 138 | // the previous results, so the high results have a width equals to |
| 139 | // |transient_length|. |
| 140 | return *std::max_element(previous_results_.begin(), previous_results_.end()); |
| 141 | } |
| 142 | |
| 143 | // Looks for the highest slope and compares it with the previous ones. |
| 144 | // An exponential transformation takes this to the [0, 1] range. This value is |
| 145 | // multiplied by the detection result to avoid false positives. |
| 146 | float TransientDetector::ReferenceDetectionValue(const float* data, |
| 147 | size_t length) { |
| 148 | if (data == NULL) { |
| 149 | using_reference_ = false; |
| 150 | return 1.f; |
| 151 | } |
| 152 | static const float kEnergyRatioThreshold = 0.2f; |
| 153 | static const float kReferenceNonLinearity = 20.f; |
| 154 | static const float kMemory = 0.99f; |
| 155 | float reference_energy = 0.f; |
| 156 | for (size_t i = 1; i < length; ++i) { |
| 157 | reference_energy += data[i] * data[i]; |
| 158 | } |
| 159 | if (reference_energy == 0.f) { |
| 160 | using_reference_ = false; |
| 161 | return 1.f; |
| 162 | } |
kwiberg | 9e2be5f | 2016-09-14 05:23:22 -0700 | [diff] [blame] | 163 | RTC_DCHECK_NE(0, reference_energy_); |
pbos@webrtc.org | 788acd1 | 2014-12-15 09:41:24 +0000 | [diff] [blame] | 164 | float result = 1.f / (1.f + exp(kReferenceNonLinearity * |
| 165 | (kEnergyRatioThreshold - |
| 166 | reference_energy / reference_energy_))); |
| 167 | reference_energy_ = |
| 168 | kMemory * reference_energy_ + (1.f - kMemory) * reference_energy; |
| 169 | |
| 170 | using_reference_ = true; |
| 171 | |
| 172 | return result; |
| 173 | } |
| 174 | |
| 175 | } // namespace webrtc |