blob: 8997d4c09206faae3a5c4ca41d420e9eeef58acd [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/transient/transient_detector.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000012
pbos@webrtc.org788acd12014-12-15 09:41:24 +000013#include <float.h>
14#include <math.h>
15#include <string.h>
kwiberg85d8bb02016-02-16 20:39:36 -080016#include <algorithm>
17
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020018#include "modules/audio_processing/transient/common.h"
19#include "modules/audio_processing/transient/daubechies_8_wavelet_coeffs.h"
20#include "modules/audio_processing/transient/moving_moments.h"
Yves Gerey988cc082018-10-23 12:03:01 +020021#include "modules/audio_processing/transient/wpd_node.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020022#include "modules/audio_processing/transient/wpd_tree.h"
23#include "rtc_base/checks.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000024
25namespace webrtc {
26
27static const int kTransientLengthMs = 30;
28static const int kChunksAtStartupLeftToDelete =
29 kTransientLengthMs / ts::kChunkSizeMs;
30static const float kDetectThreshold = 16.f;
31
32TransientDetector::TransientDetector(int sample_rate_hz)
33 : samples_per_chunk_(sample_rate_hz * ts::kChunkSizeMs / 1000),
34 last_first_moment_(),
35 last_second_moment_(),
36 chunks_at_startup_left_to_delete_(kChunksAtStartupLeftToDelete),
37 reference_energy_(1.f),
38 using_reference_(false) {
kwiberg9e2be5f2016-09-14 05:23:22 -070039 RTC_DCHECK(sample_rate_hz == ts::kSampleRate8kHz ||
40 sample_rate_hz == ts::kSampleRate16kHz ||
41 sample_rate_hz == ts::kSampleRate32kHz ||
42 sample_rate_hz == ts::kSampleRate48kHz);
pbos@webrtc.org788acd12014-12-15 09:41:24 +000043 int samples_per_transient = sample_rate_hz * kTransientLengthMs / 1000;
44 // Adjustment to avoid data loss while downsampling, making
45 // |samples_per_chunk_| and |samples_per_transient| always divisible by
46 // |kLeaves|.
47 samples_per_chunk_ -= samples_per_chunk_ % kLeaves;
48 samples_per_transient -= samples_per_transient % kLeaves;
49
50 tree_leaves_data_length_ = samples_per_chunk_ / kLeaves;
51 wpd_tree_.reset(new WPDTree(samples_per_chunk_,
52 kDaubechies8HighPassCoefficients,
53 kDaubechies8LowPassCoefficients,
Yves Gerey665174f2018-06-19 15:03:05 +020054 kDaubechies8CoefficientsLength, kLevels));
pbos@webrtc.org788acd12014-12-15 09:41:24 +000055 for (size_t i = 0; i < kLeaves; ++i) {
56 moving_moments_[i].reset(
57 new MovingMoments(samples_per_transient / kLeaves));
58 }
59
60 first_moments_.reset(new float[tree_leaves_data_length_]);
61 second_moments_.reset(new float[tree_leaves_data_length_]);
62
63 for (int i = 0; i < kChunksAtStartupLeftToDelete; ++i) {
64 previous_results_.push_back(0.f);
65 }
66}
67
68TransientDetector::~TransientDetector() {}
69
70float TransientDetector::Detect(const float* data,
71 size_t data_length,
72 const float* reference_data,
73 size_t reference_length) {
kwiberg9e2be5f2016-09-14 05:23:22 -070074 RTC_DCHECK(data);
75 RTC_DCHECK_EQ(samples_per_chunk_, data_length);
pbos@webrtc.org788acd12014-12-15 09:41:24 +000076
77 // TODO(aluebs): Check if these errors can logically happen and if not assert
78 // on them.
79 if (wpd_tree_->Update(data, samples_per_chunk_) != 0) {
80 return -1.f;
81 }
82
83 float result = 0.f;
84
85 for (size_t i = 0; i < kLeaves; ++i) {
86 WPDNode* leaf = wpd_tree_->NodeAt(kLevels, i);
87
Yves Gerey665174f2018-06-19 15:03:05 +020088 moving_moments_[i]->CalculateMoments(leaf->data(), tree_leaves_data_length_,
pbos@webrtc.org788acd12014-12-15 09:41:24 +000089 first_moments_.get(),
90 second_moments_.get());
91
92 // Add value delayed (Use the last moments from the last call to Detect).
93 float unbiased_data = leaf->data()[0] - last_first_moment_[i];
94 result +=
95 unbiased_data * unbiased_data / (last_second_moment_[i] + FLT_MIN);
96
97 // Add new values.
98 for (size_t j = 1; j < tree_leaves_data_length_; ++j) {
99 unbiased_data = leaf->data()[j] - first_moments_[j - 1];
100 result +=
101 unbiased_data * unbiased_data / (second_moments_[j - 1] + FLT_MIN);
102 }
103
104 last_first_moment_[i] = first_moments_[tree_leaves_data_length_ - 1];
105 last_second_moment_[i] = second_moments_[tree_leaves_data_length_ - 1];
106 }
107
108 result /= tree_leaves_data_length_;
109
110 result *= ReferenceDetectionValue(reference_data, reference_length);
111
112 if (chunks_at_startup_left_to_delete_ > 0) {
113 chunks_at_startup_left_to_delete_--;
114 result = 0.f;
115 }
116
117 if (result >= kDetectThreshold) {
118 result = 1.f;
119 } else {
120 // Get proportional value.
121 // Proportion achieved with a squared raised cosine function with domain
122 // [0, kDetectThreshold) and image [0, 1), it's always increasing.
123 const float horizontal_scaling = ts::kPi / kDetectThreshold;
124 const float kHorizontalShift = ts::kPi;
125 const float kVerticalScaling = 0.5f;
126 const float kVerticalShift = 1.f;
127
Yves Gerey665174f2018-06-19 15:03:05 +0200128 result =
129 (cos(result * horizontal_scaling + kHorizontalShift) + kVerticalShift) *
130 kVerticalScaling;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000131 result *= result;
132 }
133
134 previous_results_.pop_front();
135 previous_results_.push_back(result);
136
137 // In the current implementation we return the max of the current result and
138 // the previous results, so the high results have a width equals to
139 // |transient_length|.
140 return *std::max_element(previous_results_.begin(), previous_results_.end());
141}
142
143// Looks for the highest slope and compares it with the previous ones.
144// An exponential transformation takes this to the [0, 1] range. This value is
145// multiplied by the detection result to avoid false positives.
146float TransientDetector::ReferenceDetectionValue(const float* data,
147 size_t length) {
148 if (data == NULL) {
149 using_reference_ = false;
150 return 1.f;
151 }
152 static const float kEnergyRatioThreshold = 0.2f;
153 static const float kReferenceNonLinearity = 20.f;
154 static const float kMemory = 0.99f;
155 float reference_energy = 0.f;
156 for (size_t i = 1; i < length; ++i) {
157 reference_energy += data[i] * data[i];
158 }
159 if (reference_energy == 0.f) {
160 using_reference_ = false;
161 return 1.f;
162 }
kwiberg9e2be5f2016-09-14 05:23:22 -0700163 RTC_DCHECK_NE(0, reference_energy_);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000164 float result = 1.f / (1.f + exp(kReferenceNonLinearity *
165 (kEnergyRatioThreshold -
166 reference_energy / reference_energy_)));
167 reference_energy_ =
168 kMemory * reference_energy_ + (1.f - kMemory) * reference_energy;
169
170 using_reference_ = true;
171
172 return result;
173}
174
175} // namespace webrtc