blob: 7a2f6a32fe4e39aed303f206afb9bb43e2f5e9dc [file] [log] [blame]
pbos@webrtc.org788acd12014-12-15 09:41:24 +00001/*
2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
pbos@webrtc.org788acd12014-12-15 09:41:24 +000011#include <stdio.h>
Yves Gerey665174f2018-06-19 15:03:05 +020012#include <stdlib.h>
oprypin6e09d872017-08-31 03:21:39 -070013#include <string.h>
kwiberg85d8bb02016-02-16 20:39:36 -080014
15#include <memory>
pbos@webrtc.org788acd12014-12-15 09:41:24 +000016#include <string>
Mirko Bonadei2ab97f62019-07-18 13:44:12 +020017#include <vector>
pbos@webrtc.org788acd12014-12-15 09:41:24 +000018
Mirko Bonadei2ab97f62019-07-18 13:44:12 +020019#include "absl/flags/flag.h"
20#include "absl/flags/parse.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020021#include "common_audio/include/audio_util.h"
22#include "modules/audio_processing/agc/agc.h"
Jonas Olssona4d87372019-07-05 19:08:33 +020023#include "modules/audio_processing/transient/transient_suppressor.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020024#include "test/gtest.h"
Steve Anton10542f22019-01-11 09:11:00 -080025#include "test/testsupport/file_utils.h"
pbos@webrtc.org788acd12014-12-15 09:41:24 +000026
Mirko Bonadei2ab97f62019-07-18 13:44:12 +020027ABSL_FLAG(std::string, in_file_name, "", "PCM file that contains the signal.");
28ABSL_FLAG(std::string,
29 detection_file_name,
30 "",
31 "PCM file that contains the detection signal.");
32ABSL_FLAG(std::string,
33 reference_file_name,
34 "",
35 "PCM file that contains the reference signal.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000036
Mirko Bonadei2ab97f62019-07-18 13:44:12 +020037ABSL_FLAG(int,
38 chunk_size_ms,
39 10,
40 "Time between each chunk of samples in milliseconds.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000041
Mirko Bonadei2ab97f62019-07-18 13:44:12 +020042ABSL_FLAG(int,
43 sample_rate_hz,
44 16000,
45 "Sampling frequency of the signal in Hertz.");
46ABSL_FLAG(int,
47 detection_rate_hz,
48 0,
49 "Sampling frequency of the detection signal in Hertz.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000050
Mirko Bonadei2ab97f62019-07-18 13:44:12 +020051ABSL_FLAG(int, num_channels, 1, "Number of channels.");
pbos@webrtc.org788acd12014-12-15 09:41:24 +000052
53namespace webrtc {
54
55const char kUsage[] =
56 "\nDetects and suppresses transients from file.\n\n"
57 "This application loads the signal from the in_file_name with a specific\n"
58 "num_channels and sample_rate_hz, the detection signal from the\n"
59 "detection_file_name with a specific detection_rate_hz, and the reference\n"
60 "signal from the reference_file_name with sample_rate_hz, divides them\n"
61 "into chunk_size_ms blocks, computes its voice value and depending on the\n"
62 "voice_threshold does the respective restoration. You can always get the\n"
63 "all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
64 "1 respectively.\n\n";
65
66// Read next buffers from the test files (signed 16-bit host-endian PCM
67// format). audio_buffer has int16 samples, detection_buffer has float samples
68// with range [-32768,32767], and reference_buffer has float samples with range
69// [-1,1]. Return true iff all the buffers were filled completely.
70bool ReadBuffers(FILE* in_file,
71 size_t audio_buffer_size,
72 int num_channels,
73 int16_t* audio_buffer,
74 FILE* detection_file,
75 size_t detection_buffer_size,
76 float* detection_buffer,
77 FILE* reference_file,
78 float* reference_buffer) {
kwiberg85d8bb02016-02-16 20:39:36 -080079 std::unique_ptr<int16_t[]> tmpbuf;
pbos@webrtc.org788acd12014-12-15 09:41:24 +000080 int16_t* read_ptr = audio_buffer;
81 if (num_channels > 1) {
82 tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
83 read_ptr = tmpbuf.get();
84 }
Yves Gerey665174f2018-06-19 15:03:05 +020085 if (fread(read_ptr, sizeof(*read_ptr), num_channels * audio_buffer_size,
pbos@webrtc.org788acd12014-12-15 09:41:24 +000086 in_file) != num_channels * audio_buffer_size) {
87 return false;
88 }
89 // De-interleave.
90 if (num_channels > 1) {
91 for (int i = 0; i < num_channels; ++i) {
92 for (size_t j = 0; j < audio_buffer_size; ++j) {
93 audio_buffer[i * audio_buffer_size + j] =
94 read_ptr[i + j * num_channels];
95 }
96 }
97 }
98 if (detection_file) {
kwiberg85d8bb02016-02-16 20:39:36 -080099 std::unique_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000100 if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
101 detection_file) != detection_buffer_size)
102 return false;
103 for (size_t i = 0; i < detection_buffer_size; ++i)
104 detection_buffer[i] = ibuf[i];
105 }
106 if (reference_file) {
kwiberg85d8bb02016-02-16 20:39:36 -0800107 std::unique_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
Yves Gerey665174f2018-06-19 15:03:05 +0200108 if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file) !=
109 audio_buffer_size)
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000110 return false;
111 S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
112 }
113 return true;
114}
115
116// Write a number of samples to an open signed 16-bit host-endian PCM file.
117static void WritePCM(FILE* f,
118 size_t num_samples,
119 int num_channels,
120 const float* buffer) {
kwiberg85d8bb02016-02-16 20:39:36 -0800121 std::unique_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000122 // Interleave.
123 for (int i = 0; i < num_channels; ++i) {
124 for (size_t j = 0; j < num_samples; ++j) {
125 ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
126 }
127 }
128 fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
129}
130
131// This application tests the transient suppression by providing a processed
132// PCM file, which has to be listened to in order to evaluate the
133// performance.
134// It gets an audio file, and its voice gain information, and the suppressor
135// process it giving the output file "suppressed_keystrokes.pcm".
136void void_main() {
137 // TODO(aluebs): Remove all FileWrappers.
138 // Prepare the input file.
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200139 FILE* in_file = fopen(absl::GetFlag(FLAGS_in_file_name).c_str(), "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000140 ASSERT_TRUE(in_file != NULL);
141
142 // Prepare the detection file.
143 FILE* detection_file = NULL;
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200144 if (!absl::GetFlag(FLAGS_detection_file_name).empty()) {
145 detection_file =
146 fopen(absl::GetFlag(FLAGS_detection_file_name).c_str(), "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000147 }
148
149 // Prepare the reference file.
150 FILE* reference_file = NULL;
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200151 if (!absl::GetFlag(FLAGS_reference_file_name).empty()) {
152 reference_file =
153 fopen(absl::GetFlag(FLAGS_reference_file_name).c_str(), "rb");
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000154 }
155
156 // Prepare the output file.
157 std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
158 FILE* out_file = fopen(out_file_name.c_str(), "wb");
159 ASSERT_TRUE(out_file != NULL);
160
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200161 int detection_rate_hz = absl::GetFlag(FLAGS_detection_rate_hz);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000162 if (detection_rate_hz == 0) {
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200163 detection_rate_hz = absl::GetFlag(FLAGS_sample_rate_hz);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000164 }
165
166 Agc agc;
167
168 TransientSuppressor suppressor;
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200169 suppressor.Initialize(absl::GetFlag(FLAGS_sample_rate_hz), detection_rate_hz,
170 absl::GetFlag(FLAGS_num_channels));
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000171
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200172 const size_t audio_buffer_size = absl::GetFlag(FLAGS_chunk_size_ms) *
173 absl::GetFlag(FLAGS_sample_rate_hz) / 1000;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000174 const size_t detection_buffer_size =
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200175 absl::GetFlag(FLAGS_chunk_size_ms) * detection_rate_hz / 1000;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000176
177 // int16 and float variants of the same data.
kwiberg85d8bb02016-02-16 20:39:36 -0800178 std::unique_ptr<int16_t[]> audio_buffer_i(
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200179 new int16_t[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]);
kwiberg85d8bb02016-02-16 20:39:36 -0800180 std::unique_ptr<float[]> audio_buffer_f(
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200181 new float[absl::GetFlag(FLAGS_num_channels) * audio_buffer_size]);
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000182
kwiberg85d8bb02016-02-16 20:39:36 -0800183 std::unique_ptr<float[]> detection_buffer, reference_buffer;
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000184
185 if (detection_file)
186 detection_buffer.reset(new float[detection_buffer_size]);
187 if (reference_file)
188 reference_buffer.reset(new float[audio_buffer_size]);
189
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200190 while (ReadBuffers(
191 in_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels),
192 audio_buffer_i.get(), detection_file, detection_buffer_size,
193 detection_buffer.get(), reference_file, reference_buffer.get())) {
Yves Gerey665174f2018-06-19 15:03:05 +0200194 agc.Process(audio_buffer_i.get(), static_cast<int>(audio_buffer_size),
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200195 absl::GetFlag(FLAGS_sample_rate_hz));
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000196
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200197 for (size_t i = 0;
198 i < absl::GetFlag(FLAGS_num_channels) * audio_buffer_size; ++i) {
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000199 audio_buffer_f[i] = audio_buffer_i[i];
200 }
201
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200202 ASSERT_EQ(0, suppressor.Suppress(
203 audio_buffer_f.get(), audio_buffer_size,
204 absl::GetFlag(FLAGS_num_channels), detection_buffer.get(),
205 detection_buffer_size, reference_buffer.get(),
206 audio_buffer_size, agc.voice_probability(), true))
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000207 << "The transient suppressor could not suppress the frame";
208
209 // Write result to out file.
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200210 WritePCM(out_file, audio_buffer_size, absl::GetFlag(FLAGS_num_channels),
Yves Gerey665174f2018-06-19 15:03:05 +0200211 audio_buffer_f.get());
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000212 }
213
214 fclose(in_file);
215 if (detection_file) {
216 fclose(detection_file);
217 }
218 if (reference_file) {
219 fclose(reference_file);
220 }
221 fclose(out_file);
222}
223
224} // namespace webrtc
225
226int main(int argc, char* argv[]) {
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200227 std::vector<char*> args = absl::ParseCommandLine(argc, argv);
228 if (args.size() != 1) {
oprypin6e09d872017-08-31 03:21:39 -0700229 printf("%s", webrtc::kUsage);
oprypin6e09d872017-08-31 03:21:39 -0700230 return 1;
231 }
Mirko Bonadei2ab97f62019-07-18 13:44:12 +0200232 RTC_CHECK_GT(absl::GetFlag(FLAGS_chunk_size_ms), 0);
233 RTC_CHECK_GT(absl::GetFlag(FLAGS_sample_rate_hz), 0);
234 RTC_CHECK_GT(absl::GetFlag(FLAGS_num_channels), 0);
oprypin6e09d872017-08-31 03:21:39 -0700235
pbos@webrtc.org788acd12014-12-15 09:41:24 +0000236 webrtc::void_main();
237 return 0;
238}