Blame - modules/audio_processing/transient/transient_suppression_test.cc - webrtc.googlesource.com/src

blob: 3442ee0af6f02cd17d0f57e7bd676e8b9e6a2280 [file] [log] [blame]

pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2013 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	11	#include "modules/audio_processing/transient/transient_suppressor.h"
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	12
				13	#include <stdlib.h>
				14	#include <stdio.h>
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	15	#include <string.h>
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	16
				17	#include <memory>
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	18	#include <string>
				19
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	20	#include "common_audio/include/audio_util.h"
				21	#include "modules/audio_processing/agc/agc.h"
				22	#include "modules/include/module_common_types.h"
				23	#include "rtc_base/flags.h"
				24	#include "test/gtest.h"
				25	#include "test/testsupport/fileutils.h"
Mirko Bonadei	7120742	2017-09-15 13:58:09 +0200	[diff] [blame^]	26	#include "typedefs.h" // NOLINT(build/include)
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	27
				28	DEFINE_string(in_file_name, "", "PCM file that contains the signal.");
				29	DEFINE_string(detection_file_name,
				30	"",
				31	"PCM file that contains the detection signal.");
				32	DEFINE_string(reference_file_name,
				33	"",
				34	"PCM file that contains the reference signal.");
				35
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	36	DEFINE_int(chunk_size_ms,
				37	10,
				38	"Time between each chunk of samples in milliseconds.");
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	39
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	40	DEFINE_int(sample_rate_hz,
				41	16000,
				42	"Sampling frequency of the signal in Hertz.");
				43	DEFINE_int(detection_rate_hz,
				44	0,
				45	"Sampling frequency of the detection signal in Hertz.");
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	46
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	47	DEFINE_int(num_channels, 1, "Number of channels.");
				48
				49	DEFINE_bool(help, false, "Print this message.");
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	50
				51	namespace webrtc {
				52
				53	const char kUsage[] =
				54	"\nDetects and suppresses transients from file.\n\n"
				55	"This application loads the signal from the in_file_name with a specific\n"
				56	"num_channels and sample_rate_hz, the detection signal from the\n"
				57	"detection_file_name with a specific detection_rate_hz, and the reference\n"
				58	"signal from the reference_file_name with sample_rate_hz, divides them\n"
				59	"into chunk_size_ms blocks, computes its voice value and depending on the\n"
				60	"voice_threshold does the respective restoration. You can always get the\n"
				61	"all-voiced or all-unvoiced cases by setting the voice_threshold to 0 or\n"
				62	"1 respectively.\n\n";
				63
				64	// Read next buffers from the test files (signed 16-bit host-endian PCM
				65	// format). audio_buffer has int16 samples, detection_buffer has float samples
				66	// with range [-32768,32767], and reference_buffer has float samples with range
				67	// [-1,1]. Return true iff all the buffers were filled completely.
				68	bool ReadBuffers(FILE* in_file,
				69	size_t audio_buffer_size,
				70	int num_channels,
				71	int16_t* audio_buffer,
				72	FILE* detection_file,
				73	size_t detection_buffer_size,
				74	float* detection_buffer,
				75	FILE* reference_file,
				76	float* reference_buffer) {
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	77	std::unique_ptr<int16_t[]> tmpbuf;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	78	int16_t* read_ptr = audio_buffer;
				79	if (num_channels > 1) {
				80	tmpbuf.reset(new int16_t[num_channels * audio_buffer_size]);
				81	read_ptr = tmpbuf.get();
				82	}
				83	if (fread(read_ptr,
				84	sizeof(*read_ptr),
				85	num_channels * audio_buffer_size,
				86	in_file) != num_channels * audio_buffer_size) {
				87	return false;
				88	}
				89	// De-interleave.
				90	if (num_channels > 1) {
				91	for (int i = 0; i < num_channels; ++i) {
				92	for (size_t j = 0; j < audio_buffer_size; ++j) {
				93	audio_buffer[i * audio_buffer_size + j] =
				94	read_ptr[i + j * num_channels];
				95	}
				96	}
				97	}
				98	if (detection_file) {
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	99	std::unique_ptr<int16_t[]> ibuf(new int16_t[detection_buffer_size]);
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	100	if (fread(ibuf.get(), sizeof(ibuf[0]), detection_buffer_size,
				101	detection_file) != detection_buffer_size)
				102	return false;
				103	for (size_t i = 0; i < detection_buffer_size; ++i)
				104	detection_buffer[i] = ibuf[i];
				105	}
				106	if (reference_file) {
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	107	std::unique_ptr<int16_t[]> ibuf(new int16_t[audio_buffer_size]);
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	108	if (fread(ibuf.get(), sizeof(ibuf[0]), audio_buffer_size, reference_file)
				109	!= audio_buffer_size)
				110	return false;
				111	S16ToFloat(ibuf.get(), audio_buffer_size, reference_buffer);
				112	}
				113	return true;
				114	}
				115
				116	// Write a number of samples to an open signed 16-bit host-endian PCM file.
				117	static void WritePCM(FILE* f,
				118	size_t num_samples,
				119	int num_channels,
				120	const float* buffer) {
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	121	std::unique_ptr<int16_t[]> ibuf(new int16_t[num_channels * num_samples]);
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	122	// Interleave.
				123	for (int i = 0; i < num_channels; ++i) {
				124	for (size_t j = 0; j < num_samples; ++j) {
				125	ibuf[i + j * num_channels] = FloatS16ToS16(buffer[i * num_samples + j]);
				126	}
				127	}
				128	fwrite(ibuf.get(), sizeof(ibuf[0]), num_channels * num_samples, f);
				129	}
				130
				131	// This application tests the transient suppression by providing a processed
				132	// PCM file, which has to be listened to in order to evaluate the
				133	// performance.
				134	// It gets an audio file, and its voice gain information, and the suppressor
				135	// process it giving the output file "suppressed_keystrokes.pcm".
				136	void void_main() {
				137	// TODO(aluebs): Remove all FileWrappers.
				138	// Prepare the input file.
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	139	FILE* in_file = fopen(FLAG_in_file_name, "rb");
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	140	ASSERT_TRUE(in_file != NULL);
				141
				142	// Prepare the detection file.
				143	FILE* detection_file = NULL;
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	144	if (strlen(FLAG_detection_file_name) > 0) {
				145	detection_file = fopen(FLAG_detection_file_name, "rb");
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	146	}
				147
				148	// Prepare the reference file.
				149	FILE* reference_file = NULL;
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	150	if (strlen(FLAG_reference_file_name) > 0) {
				151	reference_file = fopen(FLAG_reference_file_name, "rb");
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	152	}
				153
				154	// Prepare the output file.
				155	std::string out_file_name = test::OutputPath() + "suppressed_keystrokes.pcm";
				156	FILE* out_file = fopen(out_file_name.c_str(), "wb");
				157	ASSERT_TRUE(out_file != NULL);
				158
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	159	int detection_rate_hz = FLAG_detection_rate_hz;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	160	if (detection_rate_hz == 0) {
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	161	detection_rate_hz = FLAG_sample_rate_hz;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	162	}
				163
				164	Agc agc;
				165
				166	TransientSuppressor suppressor;
				167	suppressor.Initialize(
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	168	FLAG_sample_rate_hz, detection_rate_hz, FLAG_num_channels);
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	169
				170	const size_t audio_buffer_size =
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	171	FLAG_chunk_size_ms * FLAG_sample_rate_hz / 1000;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	172	const size_t detection_buffer_size =
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	173	FLAG_chunk_size_ms * detection_rate_hz / 1000;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	174
				175	// int16 and float variants of the same data.
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	176	std::unique_ptr<int16_t[]> audio_buffer_i(
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	177	new int16_t[FLAG_num_channels * audio_buffer_size]);
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	178	std::unique_ptr<float[]> audio_buffer_f(
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	179	new float[FLAG_num_channels * audio_buffer_size]);
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	180
kwiberg	85d8bb0	2016-02-16 20:39:36 -0800	[diff] [blame]	181	std::unique_ptr<float[]> detection_buffer, reference_buffer;
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	182
				183	if (detection_file)
				184	detection_buffer.reset(new float[detection_buffer_size]);
				185	if (reference_file)
				186	reference_buffer.reset(new float[audio_buffer_size]);
				187
				188	while (ReadBuffers(in_file,
				189	audio_buffer_size,
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	190	FLAG_num_channels,
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	191	audio_buffer_i.get(),
				192	detection_file,
				193	detection_buffer_size,
				194	detection_buffer.get(),
				195	reference_file,
				196	reference_buffer.get())) {
				197	ASSERT_EQ(0,
				198	agc.Process(audio_buffer_i.get(),
				199	static_cast<int>(audio_buffer_size),
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	200	FLAG_sample_rate_hz))
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	201	<< "The AGC could not process the frame";
				202
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	203	for (size_t i = 0; i < FLAG_num_channels * audio_buffer_size; ++i) {
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	204	audio_buffer_f[i] = audio_buffer_i[i];
				205	}
				206
				207	ASSERT_EQ(0,
				208	suppressor.Suppress(audio_buffer_f.get(),
				209	audio_buffer_size,
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	210	FLAG_num_channels,
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	211	detection_buffer.get(),
				212	detection_buffer_size,
				213	reference_buffer.get(),
				214	audio_buffer_size,
				215	agc.voice_probability(),
				216	true))
				217	<< "The transient suppressor could not suppress the frame";
				218
				219	// Write result to out file.
				220	WritePCM(
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	221	out_file, audio_buffer_size, FLAG_num_channels, audio_buffer_f.get());
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	222	}
				223
				224	fclose(in_file);
				225	if (detection_file) {
				226	fclose(detection_file);
				227	}
				228	if (reference_file) {
				229	fclose(reference_file);
				230	}
				231	fclose(out_file);
				232	}
				233
				234	} // namespace webrtc
				235
				236	int main(int argc, char* argv[]) {
oprypin	6e09d87	2017-08-31 03:21:39 -0700	[diff] [blame]	237	if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true) \|\|
				238	FLAG_help \|\| argc != 1) {
				239	printf("%s", webrtc::kUsage);
				240	if (FLAG_help) {
				241	rtc::FlagList::Print(nullptr, false);
				242	return 0;
				243	}
				244	return 1;
				245	}
				246	RTC_CHECK_GT(FLAG_chunk_size_ms, 0);
				247	RTC_CHECK_GT(FLAG_sample_rate_hz, 0);
				248	RTC_CHECK_GT(FLAG_num_channels, 0);
				249
pbos@webrtc.org	788acd1	2014-12-15 09:41:24 +0000	[diff] [blame]	250	webrtc::void_main();
				251	return 0;
				252	}