blob: 2322b8f2c1cacc578cc8db38d28db41050a582a4 [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
12#define MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_
pbos@webrtc.org788acd12014-12-15 09:41:24 +000013
Yves Gerey988cc082018-10-23 12:03:01 +020014#include <stddef.h>
15#include <stdint.h>
Jonas Olssona4d87372019-07-05 19:08:33 +020016
kwiberg85d8bb02016-02-16 20:39:36 -080017#include <memory>
pbos@webrtc.org788acd12014-12-15 09:41:24 +000018
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020019#include "rtc_base/gtest_prod_util.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000020
21namespace webrtc {
22
23class TransientDetector;
24
25// Detects transients in an audio stream and suppress them using a simple
26// restoration algorithm that attenuates unexpected spikes in the spectrum.
27class TransientSuppressor {
28 public:
29 TransientSuppressor();
30 ~TransientSuppressor();
31
32 int Initialize(int sample_rate_hz, int detector_rate_hz, int num_channels);
33
34 // Processes a |data| chunk, and returns it with keystrokes suppressed from
35 // it. The float format is assumed to be int16 ranged. If there are more than
36 // one channel, the chunks are concatenated one after the other in |data|.
37 // |data_length| must be equal to |data_length_|.
38 // |num_channels| must be equal to |num_channels_|.
39 // A sub-band, ideally the higher, can be used as |detection_data|. If it is
40 // NULL, |data| is used for the detection too. The |detection_data| is always
41 // assumed mono.
42 // If a reference signal (e.g. keyboard microphone) is available, it can be
43 // passed in as |reference_data|. It is assumed mono and must have the same
44 // length as |data|. NULL is accepted if unavailable.
45 // This suppressor performs better if voice information is available.
46 // |voice_probability| is the probability of voice being present in this chunk
47 // of audio. If voice information is not available, |voice_probability| must
48 // always be set to 1.
49 // |key_pressed| determines if a key was pressed on this audio chunk.
50 // Returns 0 on success and -1 otherwise.
51 int Suppress(float* data,
52 size_t data_length,
53 int num_channels,
54 const float* detection_data,
55 size_t detection_length,
56 const float* reference_data,
57 size_t reference_length,
58 float voice_probability,
59 bool key_pressed);
60
61 private:
62 FRIEND_TEST_ALL_PREFIXES(TransientSuppressorTest,
63 TypingDetectionLogicWorksAsExpectedForMono);
64 void Suppress(float* in_ptr, float* spectral_mean, float* out_ptr);
65
66 void UpdateKeypress(bool key_pressed);
67 void UpdateRestoration(float voice_probability);
68
69 void UpdateBuffers(float* data);
70
71 void HardRestoration(float* spectral_mean);
72 void SoftRestoration(float* spectral_mean);
73
kwiberg85d8bb02016-02-16 20:39:36 -080074 std::unique_ptr<TransientDetector> detector_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000075
76 size_t data_length_;
77 size_t detection_length_;
78 size_t analysis_length_;
79 size_t buffer_delay_;
80 size_t complex_analysis_length_;
81 int num_channels_;
82 // Input buffer where the original samples are stored.
kwiberg85d8bb02016-02-16 20:39:36 -080083 std::unique_ptr<float[]> in_buffer_;
84 std::unique_ptr<float[]> detection_buffer_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000085 // Output buffer where the restored samples are stored.
kwiberg85d8bb02016-02-16 20:39:36 -080086 std::unique_ptr<float[]> out_buffer_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000087
88 // Arrays for fft.
kwiberg85d8bb02016-02-16 20:39:36 -080089 std::unique_ptr<size_t[]> ip_;
90 std::unique_ptr<float[]> wfft_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000091
kwiberg85d8bb02016-02-16 20:39:36 -080092 std::unique_ptr<float[]> spectral_mean_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000093
94 // Stores the data for the fft.
kwiberg85d8bb02016-02-16 20:39:36 -080095 std::unique_ptr<float[]> fft_buffer_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000096
kwiberg85d8bb02016-02-16 20:39:36 -080097 std::unique_ptr<float[]> magnitudes_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000098
99 const float* window_;
100
kwiberg85d8bb02016-02-16 20:39:36 -0800101 std::unique_ptr<float[]> mean_factor_;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000102
103 float detector_smoothed_;
104
105 int keypress_counter_;
106 int chunks_since_keypress_;
107 bool detection_enabled_;
108 bool suppression_enabled_;
109
110 bool use_hard_restoration_;
111 int chunks_since_voice_change_;
112
113 uint32_t seed_;
114
115 bool using_reference_;
116};
117
118} // namespace webrtc
119
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200120#endif // MODULES_AUDIO_PROCESSING_TRANSIENT_TRANSIENT_SUPPRESSOR_H_