blob: 9ae3fc660a881e2dfc51f5f79b73a46abef819a0 [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
12#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
pbos@webrtc.org788acd12014-12-15 09:41:24 +000013
14#include <deque>
kwiberg85d8bb02016-02-16 20:39:36 -080015#include <memory>
pbos@webrtc.org788acd12014-12-15 09:41:24 +000016#include <set>
17
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020018#include "rtc_base/gtest_prod_util.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000019
20namespace webrtc {
21
22class TransientDetector;
23
24// Detects transients in an audio stream and suppress them using a simple
25// restoration algorithm that attenuates unexpected spikes in the spectrum.
26class TransientSuppressor {
27 public:
28 TransientSuppressor();
29 ~TransientSuppressor();
30
31 int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
32
33 // Processes a |data| chunk, and returns it with keystrokes suppressed from
34 // it. The float format is assumed to be int16 ranged. If there are more than
35 // one channel, the chunks are concatenated one after the other in |data|.
36 // |data_length| must be equal to |data_length_|.
37 // |num_channels| must be equal to |num_channels_|.
38 // A sub-band, ideally the higher, can be used as |detection_data|. If it is
39 // NULL, |data| is used for the detection too. The |detection_data| is always
40 // assumed mono.
41 // If a reference signal (e.g. keyboard microphone) is available, it can be
42 // passed in as |reference_data|. It is assumed mono and must have the same
43 // length as |data|. NULL is accepted if unavailable.
44 // This suppressor performs better if voice information is available.
45 // |voice_probability| is the probability of voice being present in this chunk
46 // of audio. If voice information is not available, |voice_probability| must
47 // always be set to 1.
48 // |key_pressed| determines if a key was pressed on this audio chunk.
49 // Returns 0 on success and -1 otherwise.
50 int Suppress(float* data,
51 size_t data_length,
52 int num_channels,
53 const float* detection_data,
54 size_t detection_length,
55 const float* reference_data,
56 size_t reference_length,
57 float voice_probability,
58 bool key_pressed);
59
60 private:
61 FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
62 TypingDetectionLogicWorksAsExpectedForMono);
63 void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
64
65 void UpdateKeypress(bool key_pressed);
66 void UpdateRestoration(float voice_probability);
67
68 void UpdateBuffers(float* data);
69
70 void HardRestoration(float* spectral_mean);
71 void SoftRestoration(float* spectral_mean);
72
kwiberg85d8bb02016-02-16 20:39:36 -080073 std::unique_ptr<TransientDetector> detector_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000074
75 size_t data_length_;
76 size_t detection_length_;
77 size_t analysis_length_;
78 size_t buffer_delay_;
79 size_t complex_analysis_length_;
80 int num_channels_;
81 // Input buffer where the original samples are stored.
kwiberg85d8bb02016-02-16 20:39:36 -080082 std::unique_ptr<float[]> in_buffer_;
83 std::unique_ptr<float[]> detection_buffer_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000084 // Output buffer where the restored samples are stored.
kwiberg85d8bb02016-02-16 20:39:36 -080085 std::unique_ptr<float[]> out_buffer_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000086
87 // Arrays for fft.
kwiberg85d8bb02016-02-16 20:39:36 -080088 std::unique_ptr<size_t[]> ip_;
89 std::unique_ptr<float[]> wfft_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000090
kwiberg85d8bb02016-02-16 20:39:36 -080091 std::unique_ptr<float[]> spectral_mean_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000092
93 // Stores the data for the fft.
kwiberg85d8bb02016-02-16 20:39:36 -080094 std::unique_ptr<float[]> fft_buffer_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000095
kwiberg85d8bb02016-02-16 20:39:36 -080096 std::unique_ptr<float[]> magnitudes_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000097
98 const float* window_;
99
kwiberg85d8bb02016-02-16 20:39:36 -0800100 std::unique_ptr<float[]> mean_factor_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000101
102 float detector_smoothed_;
103
104 int keypress_counter_;
105 int chunks_since_keypress_;
106 bool detection_enabled_;
107 bool suppression_enabled_;
108
109 bool use_hard_restoration_;
110 int chunks_since_voice_change_;
111
112 uint32_t seed_;
113
114 bool using_reference_;
115};
116
117} // namespace webrtc
118
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200119#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_