blob: 3442ee0af6f02cd17d0f57e7bd676e8b9e6a2280 [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/transient/transient_suppressor.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000012
13#include <stdlib.h>
14#include <stdio.h>
oprypin6e09d872017-08-31 03:21:39 -070015#include <string.h>
kwiberg85d8bb02016-02-16 20:39:36 -080016
17#include <memory>
pbos@webrtc.org788acd12014-12-15 09:41:24 +000018#include <string>
19
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "common_audio/include/audio_util.h"
21#include "modules/audio_processing/agc/agc.h"
22#include "modules/include/module_common_types.h"
23#include "rtc_base/flags.h"
24#include "test/gtest.h"
25#include "test/testsupport/fileutils.h"
Mirko Bonadei71207422017-09-15 13:58:09 +020026#include "typedefs.h" // NOLINT(build/include)
pbos@webrtc.org788acd12014-12-15 09:41:24 +000027
28DEFINE_string(in_file_name, "", "PCM file that contains the signal.");
29DEFINE_string(detection_file_name,
30 "",
31 "PCM file that contains the detection signal.");
32DEFINE_string(reference_file_name,
33 "",
34 "PCM file that contains the reference signal.");
35
oprypin6e09d872017-08-31 03:21:39 -070036DEFINE_int(chunk_size_ms,
37 10,
38 "Time between each chunk of samples in milliseconds.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000039
oprypin6e09d872017-08-31 03:21:39 -070040DEFINE_int(sample_rate_hz,
41 16000,
42 "Sampling frequency of the signal in Hertz.");
43DEFINE_int(detection_rate_hz,
44 0,
45 "Sampling frequency of the detection signal in Hertz.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000046
oprypin6e09d872017-08-31 03:21:39 -070047DEFINE_int(num_channels, 1, "Number of channels.");
48
49DEFINE_bool(help, false, "Print this message.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000050
51namespace webrtc {
52
53const char kUsage[] =
54 "\nDetects and suppresses transients from file.\n\n"
55 "This application loads the signal from the in_file_name with a specific\n"
56 "num_channels and sample_rate_hz, the detection signal from the\n"
57 "detection_file_name with a specific detection_rate_hz, and the reference\n"
58 "signal from the reference_file_name with sample_rate_hz, divides them\n"
59 "into chunk_size_ms blocks, computes its voice value and depending on the\n"
60 "voice_threshold does the respective restoration. You can always get the\n"
61 "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
62 "1 respectively.\n\n";
63
64// Read next buffers from the test files (signed 16-bit host-endian PCM
65// format). audio_buffer has int16 samples, detection_buffer has float samples
66// with range [-32768,32767], and reference_buffer has float samples with range
67// [-1,1]. Return true iff all the buffers were filled completely.
68bool ReadBuffers(FILE* in_file,
69 size_t audio_buffer_size,
70 int num_channels,
71 int16_t* audio_buffer,
72 FILE* detection_file,
73 size_t detection_buffer_size,
74 float* detection_buffer,
75 FILE* reference_file,
76 float* reference_buffer) {
kwiberg85d8bb02016-02-16 20:39:36 -080077 std::unique_ptr<int16_t[]> tmpbuf;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000078 int16_t* read_ptr = audio_buffer;
79 if (num_channels > 1) {
80 tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
81 read_ptr = tmpbuf.get();
82 }
83 if (fread(read_ptr,
84 sizeof(*read_ptr),
85 num_channels * audio_buffer_size,
86 in_file) != num_channels * audio_buffer_size) {
87 return false;
88 }
89 // De-interleave.
90 if (num_channels > 1) {
91 for (int i = 0; i < num_channels; ++i) {
92 for (size_t j = 0; j < audio_buffer_size; ++j) {
93 audio_buffer[i * audio_buffer_size + j] =
94 read_ptr[i + j * num_channels];
95 }
96 }
97 }
98 if (detection_file) {
kwiberg85d8bb02016-02-16 20:39:36 -080099 std::unique_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000100 if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
101 detection_file) != detection_buffer_size)
102 return false;
103 for (size_t i = 0; i < detection_buffer_size; ++i)
104 detection_buffer[i] = ibuf[i];
105 }
106 if (reference_file) {
kwiberg85d8bb02016-02-16 20:39:36 -0800107 std::unique_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000108 if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file)
109 != audio_buffer_size)
110 return false;
111 S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
112 }
113 return true;
114}
115
116// Write a number of samples to an open signed 16-bit host-endian PCM file.
117static void WritePCM(FILE* f,
118 size_t num_samples,
119 int num_channels,
120 const float* buffer) {
kwiberg85d8bb02016-02-16 20:39:36 -0800121 std::unique_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000122 // Interleave.
123 for (int i = 0; i < num_channels; ++i) {
124 for (size_t j = 0; j < num_samples; ++j) {
125 ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
126 }
127 }
128 fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
129}
130
131// This application tests the transient suppression by providing a processed
132// PCM file, which has to be listened to in order to evaluate the
133// performance.
134// It gets an audio file, and its voice gain information, and the suppressor
135// process it giving the output file "suppressed_keystrokes.pcm".
136void void_main() {
137 // TODO(aluebs): Remove all FileWrappers.
138 // Prepare the input file.
oprypin6e09d872017-08-31 03:21:39 -0700139 FILE* in_file = fopen(FLAG_in_file_name, "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000140 ASSERT_TRUE(in_file != NULL);
141
142 // Prepare the detection file.
143 FILE* detection_file = NULL;
oprypin6e09d872017-08-31 03:21:39 -0700144 if (strlen(FLAG_detection_file_name) > 0) {
145 detection_file = fopen(FLAG_detection_file_name, "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000146 }
147
148 // Prepare the reference file.
149 FILE* reference_file = NULL;
oprypin6e09d872017-08-31 03:21:39 -0700150 if (strlen(FLAG_reference_file_name) > 0) {
151 reference_file = fopen(FLAG_reference_file_name, "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000152 }
153
154 // Prepare the output file.
155 std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
156 FILE* out_file = fopen(out_file_name.c_str(), "wb");
157 ASSERT_TRUE(out_file != NULL);
158
oprypin6e09d872017-08-31 03:21:39 -0700159 int detection_rate_hz = FLAG_detection_rate_hz;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000160 if (detection_rate_hz == 0) {
oprypin6e09d872017-08-31 03:21:39 -0700161 detection_rate_hz = FLAG_sample_rate_hz;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000162 }
163
164 Agc agc;
165
166 TransientSuppressor suppressor;
167 suppressor.Initialize(
oprypin6e09d872017-08-31 03:21:39 -0700168 FLAG_sample_rate_hz, detection_rate_hz, FLAG_num_channels);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000169
170 const size_t audio_buffer_size =
oprypin6e09d872017-08-31 03:21:39 -0700171 FLAG_chunk_size_ms * FLAG_sample_rate_hz / 1000;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000172 const size_t detection_buffer_size =
oprypin6e09d872017-08-31 03:21:39 -0700173 FLAG_chunk_size_ms * detection_rate_hz / 1000;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000174
175 // int16 and float variants of the same data.
kwiberg85d8bb02016-02-16 20:39:36 -0800176 std::unique_ptr<int16_t[]> audio_buffer_i(
oprypin6e09d872017-08-31 03:21:39 -0700177 new int16_t[FLAG_num_channels * audio_buffer_size]);
kwiberg85d8bb02016-02-16 20:39:36 -0800178 std::unique_ptr<float[]> audio_buffer_f(
oprypin6e09d872017-08-31 03:21:39 -0700179 new float[FLAG_num_channels * audio_buffer_size]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000180
kwiberg85d8bb02016-02-16 20:39:36 -0800181 std::unique_ptr<float[]> detection_buffer, reference_buffer;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000182
183 if (detection_file)
184 detection_buffer.reset(new float[detection_buffer_size]);
185 if (reference_file)
186 reference_buffer.reset(new float[audio_buffer_size]);
187
188 while (ReadBuffers(in_file,
189 audio_buffer_size,
oprypin6e09d872017-08-31 03:21:39 -0700190 FLAG_num_channels,
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000191 audio_buffer_i.get(),
192 detection_file,
193 detection_buffer_size,
194 detection_buffer.get(),
195 reference_file,
196 reference_buffer.get())) {
197 ASSERT_EQ(0,
198 agc.Process(audio_buffer_i.get(),
199 static_cast<int>(audio_buffer_size),
oprypin6e09d872017-08-31 03:21:39 -0700200 FLAG_sample_rate_hz))
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000201 << "The AGC could not process the frame";
202
oprypin6e09d872017-08-31 03:21:39 -0700203 for (size_t i = 0; i < FLAG_num_channels * audio_buffer_size; ++i) {
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000204 audio_buffer_f[i] = audio_buffer_i[i];
205 }
206
207 ASSERT_EQ(0,
208 suppressor.Suppress(audio_buffer_f.get(),
209 audio_buffer_size,
oprypin6e09d872017-08-31 03:21:39 -0700210 FLAG_num_channels,
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000211 detection_buffer.get(),
212 detection_buffer_size,
213 reference_buffer.get(),
214 audio_buffer_size,
215 agc.voice_probability(),
216 true))
217 << "The transient suppressor could not suppress the frame";
218
219 // Write result to out file.
220 WritePCM(
oprypin6e09d872017-08-31 03:21:39 -0700221 out_file, audio_buffer_size, FLAG_num_channels, audio_buffer_f.get());
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000222 }
223
224 fclose(in_file);
225 if (detection_file) {
226 fclose(detection_file);
227 }
228 if (reference_file) {
229 fclose(reference_file);
230 }
231 fclose(out_file);
232}
233
234} // namespace webrtc
235
236int main(int argc, char* argv[]) {
oprypin6e09d872017-08-31 03:21:39 -0700237 if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) ||
238 FLAG_help || argc != 1) {
239 printf("%s", webrtc::kUsage);
240 if (FLAG_help) {
241 rtc::FlagList::Print(nullptr, false);
242 return 0;
243 }
244 return 1;
245 }
246 RTC_CHECK_GT(FLAG_chunk_size_ms, 0);
247 RTC_CHECK_GT(FLAG_sample_rate_hz, 0);
248 RTC_CHECK_GT(FLAG_num_channels, 0);
249
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000250 webrtc::void_main();
251 return 0;
252}