blob: 57bddb62c4069913284de15a5e0202e8fec71baa [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/transient/transient_suppressor.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000012
pbos@webrtc.org788acd12014-12-15 09:41:24 +000013#include <stdio.h>
Yves Gerey665174f2018-06-19 15:03:05 +020014#include <stdlib.h>
oprypin6e09d872017-08-31 03:21:39 -070015#include <string.h>
kwiberg85d8bb02016-02-16 20:39:36 -080016
17#include <memory>
pbos@webrtc.org788acd12014-12-15 09:41:24 +000018#include <string>
19
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020020#include "common_audio/include/audio_util.h"
21#include "modules/audio_processing/agc/agc.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020022#include "rtc_base/flags.h"
23#include "test/gtest.h"
Steve Anton10542f22019-01-11 09:11:00 -080024#include "test/testsupport/file_utils.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000025
Mirko Bonadei2dfa9982018-10-18 11:35:32 +020026WEBRTC_DEFINE_string(in_file_name, "", "PCM file that contains the signal.");
27WEBRTC_DEFINE_string(detection_file_name,
28 "",
29 "PCM file that contains the detection signal.");
30WEBRTC_DEFINE_string(reference_file_name,
31 "",
32 "PCM file that contains the reference signal.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000033
Mirko Bonadei2dfa9982018-10-18 11:35:32 +020034WEBRTC_DEFINE_int(chunk_size_ms,
35 10,
36 "Time between each chunk of samples in milliseconds.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000037
Mirko Bonadei2dfa9982018-10-18 11:35:32 +020038WEBRTC_DEFINE_int(sample_rate_hz,
39 16000,
40 "Sampling frequency of the signal in Hertz.");
41WEBRTC_DEFINE_int(detection_rate_hz,
42 0,
43 "Sampling frequency of the detection signal in Hertz.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000044
Mirko Bonadei2dfa9982018-10-18 11:35:32 +020045WEBRTC_DEFINE_int(num_channels, 1, "Number of channels.");
oprypin6e09d872017-08-31 03:21:39 -070046
Mirko Bonadei2dfa9982018-10-18 11:35:32 +020047WEBRTC_DEFINE_bool(help, false, "Print this message.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000048
49namespace webrtc {
50
51const char kUsage[] =
52 "\nDetects and suppresses transients from file.\n\n"
53 "This application loads the signal from the in_file_name with a specific\n"
54 "num_channels and sample_rate_hz, the detection signal from the\n"
55 "detection_file_name with a specific detection_rate_hz, and the reference\n"
56 "signal from the reference_file_name with sample_rate_hz, divides them\n"
57 "into chunk_size_ms blocks, computes its voice value and depending on the\n"
58 "voice_threshold does the respective restoration. You can always get the\n"
59 "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
60 "1 respectively.\n\n";
61
62// Read next buffers from the test files (signed 16-bit host-endian PCM
63// format). audio_buffer has int16 samples, detection_buffer has float samples
64// with range [-32768,32767], and reference_buffer has float samples with range
65// [-1,1]. Return true iff all the buffers were filled completely.
66bool ReadBuffers(FILE* in_file,
67 size_t audio_buffer_size,
68 int num_channels,
69 int16_t* audio_buffer,
70 FILE* detection_file,
71 size_t detection_buffer_size,
72 float* detection_buffer,
73 FILE* reference_file,
74 float* reference_buffer) {
kwiberg85d8bb02016-02-16 20:39:36 -080075 std::unique_ptr<int16_t[]> tmpbuf;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000076 int16_t* read_ptr = audio_buffer;
77 if (num_channels > 1) {
78 tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
79 read_ptr = tmpbuf.get();
80 }
Yves Gerey665174f2018-06-19 15:03:05 +020081 if (fread(read_ptr, sizeof(*read_ptr), num_channels * audio_buffer_size,
pbos@webrtc.org788acd12014-12-15 09:41:24 +000082 in_file) != num_channels * audio_buffer_size) {
83 return false;
84 }
85 // De-interleave.
86 if (num_channels > 1) {
87 for (int i = 0; i < num_channels; ++i) {
88 for (size_t j = 0; j < audio_buffer_size; ++j) {
89 audio_buffer[i * audio_buffer_size + j] =
90 read_ptr[i + j * num_channels];
91 }
92 }
93 }
94 if (detection_file) {
kwiberg85d8bb02016-02-16 20:39:36 -080095 std::unique_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +000096 if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
97 detection_file) != detection_buffer_size)
98 return false;
99 for (size_t i = 0; i < detection_buffer_size; ++i)
100 detection_buffer[i] = ibuf[i];
101 }
102 if (reference_file) {
kwiberg85d8bb02016-02-16 20:39:36 -0800103 std::unique_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
Yves Gerey665174f2018-06-19 15:03:05 +0200104 if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) !=
105 audio_buffer_size)
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000106 return false;
107 S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
108 }
109 return true;
110}
111
112// Write a number of samples to an open signed 16-bit host-endian PCM file.
113static void WritePCM(FILE* f,
114 size_t num_samples,
115 int num_channels,
116 const float* buffer) {
kwiberg85d8bb02016-02-16 20:39:36 -0800117 std::unique_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000118 // Interleave.
119 for (int i = 0; i < num_channels; ++i) {
120 for (size_t j = 0; j < num_samples; ++j) {
121 ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
122 }
123 }
124 fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
125}
126
127// This application tests the transient suppression by providing a processed
128// PCM file, which has to be listened to in order to evaluate the
129// performance.
130// It gets an audio file, and its voice gain information, and the suppressor
131// process it giving the output file "suppressed_keystrokes.pcm".
132void void_main() {
133 // TODO(aluebs): Remove all FileWrappers.
134 // Prepare the input file.
oprypin6e09d872017-08-31 03:21:39 -0700135 FILE* in_file = fopen(FLAG_in_file_name, "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000136 ASSERT_TRUE(in_file != NULL);
137
138 // Prepare the detection file.
139 FILE* detection_file = NULL;
oprypin6e09d872017-08-31 03:21:39 -0700140 if (strlen(FLAG_detection_file_name) > 0) {
141 detection_file = fopen(FLAG_detection_file_name, "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000142 }
143
144 // Prepare the reference file.
145 FILE* reference_file = NULL;
oprypin6e09d872017-08-31 03:21:39 -0700146 if (strlen(FLAG_reference_file_name) > 0) {
147 reference_file = fopen(FLAG_reference_file_name, "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000148 }
149
150 // Prepare the output file.
151 std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
152 FILE* out_file = fopen(out_file_name.c_str(), "wb");
153 ASSERT_TRUE(out_file != NULL);
154
oprypin6e09d872017-08-31 03:21:39 -0700155 int detection_rate_hz = FLAG_detection_rate_hz;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000156 if (detection_rate_hz == 0) {
oprypin6e09d872017-08-31 03:21:39 -0700157 detection_rate_hz = FLAG_sample_rate_hz;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000158 }
159
160 Agc agc;
161
162 TransientSuppressor suppressor;
Yves Gerey665174f2018-06-19 15:03:05 +0200163 suppressor.Initialize(FLAG_sample_rate_hz, detection_rate_hz,
164 FLAG_num_channels);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000165
166 const size_t audio_buffer_size =
oprypin6e09d872017-08-31 03:21:39 -0700167 FLAG_chunk_size_ms * FLAG_sample_rate_hz / 1000;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000168 const size_t detection_buffer_size =
oprypin6e09d872017-08-31 03:21:39 -0700169 FLAG_chunk_size_ms * detection_rate_hz / 1000;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000170
171 // int16 and float variants of the same data.
kwiberg85d8bb02016-02-16 20:39:36 -0800172 std::unique_ptr<int16_t[]> audio_buffer_i(
oprypin6e09d872017-08-31 03:21:39 -0700173 new int16_t[FLAG_num_channels * audio_buffer_size]);
kwiberg85d8bb02016-02-16 20:39:36 -0800174 std::unique_ptr<float[]> audio_buffer_f(
oprypin6e09d872017-08-31 03:21:39 -0700175 new float[FLAG_num_channels * audio_buffer_size]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000176
kwiberg85d8bb02016-02-16 20:39:36 -0800177 std::unique_ptr<float[]> detection_buffer, reference_buffer;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000178
179 if (detection_file)
180 detection_buffer.reset(new float[detection_buffer_size]);
181 if (reference_file)
182 reference_buffer.reset(new float[audio_buffer_size]);
183
Yves Gerey665174f2018-06-19 15:03:05 +0200184 while (ReadBuffers(in_file, audio_buffer_size, FLAG_num_channels,
185 audio_buffer_i.get(), detection_file,
186 detection_buffer_size, detection_buffer.get(),
187 reference_file, reference_buffer.get())) {
188 agc.Process(audio_buffer_i.get(), static_cast<int>(audio_buffer_size),
Jonas Olsson645b0272018-02-15 15:16:27 +0100189 FLAG_sample_rate_hz);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000190
oprypin6e09d872017-08-31 03:21:39 -0700191 for (size_t i = 0; i < FLAG_num_channels * audio_buffer_size; ++i) {
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000192 audio_buffer_f[i] = audio_buffer_i[i];
193 }
194
Yves Gerey665174f2018-06-19 15:03:05 +0200195 ASSERT_EQ(0, suppressor.Suppress(audio_buffer_f.get(), audio_buffer_size,
196 FLAG_num_channels, detection_buffer.get(),
197 detection_buffer_size,
198 reference_buffer.get(), audio_buffer_size,
199 agc.voice_probability(), true))
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000200 << "The transient suppressor could not suppress the frame";
201
202 // Write result to out file.
Yves Gerey665174f2018-06-19 15:03:05 +0200203 WritePCM(out_file, audio_buffer_size, FLAG_num_channels,
204 audio_buffer_f.get());
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000205 }
206
207 fclose(in_file);
208 if (detection_file) {
209 fclose(detection_file);
210 }
211 if (reference_file) {
212 fclose(reference_file);
213 }
214 fclose(out_file);
215}
216
217} // namespace webrtc
218
219int main(int argc, char* argv[]) {
Yves Gerey665174f2018-06-19 15:03:05 +0200220 if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) || FLAG_help ||
221 argc != 1) {
oprypin6e09d872017-08-31 03:21:39 -0700222 printf("%s", webrtc::kUsage);
223 if (FLAG_help) {
224 rtc::FlagList::Print(nullptr, false);
225 return 0;
226 }
227 return 1;
228 }
229 RTC_CHECK_GT(FLAG_chunk_size_ms, 0);
230 RTC_CHECK_GT(FLAG_sample_rate_hz, 0);
231 RTC_CHECK_GT(FLAG_num_channels, 0);
232
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000233 webrtc::void_main();
234 return 0;
235}