blob: 6045e9fd15ba241cdb24dff861ffe4ce275bfbfa [file] [log] [blame]
ekm030249d2015-06-15 13:02:24 -07001/*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
aluebs0a007592016-02-26 17:17:38 -080011#include "webrtc/common_audio/channel_buffer.h"
Alejandro Luebs32348192016-02-17 20:04:19 -080012#include "webrtc/common_audio/include/audio_util.h"
ekmdb4fecf2015-06-22 17:49:08 -070013#include "webrtc/common_audio/wav_file.h"
aluebsc466bad2016-02-10 12:03:00 -080014#include "webrtc/modules/audio_processing/audio_buffer.h"
ekm030249d2015-06-15 13:02:24 -070015#include "webrtc/modules/audio_processing/intelligibility/intelligibility_enhancer.h"
aluebsc466bad2016-02-10 12:03:00 -080016#include "webrtc/modules/audio_processing/noise_suppression_impl.h"
Edward Lemurc20978e2017-07-06 19:44:34 +020017#include "webrtc/rtc_base/criticalsection.h"
oprypin6e09d872017-08-31 03:21:39 -070018#include "webrtc/rtc_base/flags.h"
ekm030249d2015-06-15 13:02:24 -070019
20using std::complex;
ekmdb4fecf2015-06-22 17:49:08 -070021
22namespace webrtc {
pkastingb297c5a2015-07-22 15:17:22 -070023namespace {
ekmdb4fecf2015-06-22 17:49:08 -070024
ekmdb4fecf2015-06-22 17:49:08 -070025DEFINE_string(clear_file, "speech.wav", "Input file with clear speech.");
26DEFINE_string(noise_file, "noise.wav", "Input file with noise data.");
Alejandro Luebs4458d092016-02-18 19:16:08 -080027DEFINE_string(out_file, "proc_enhanced.wav", "Enhanced output file.");
oprypin6e09d872017-08-31 03:21:39 -070028DEFINE_bool(help, false, "Print this message.");
ekm030249d2015-06-15 13:02:24 -070029
oprypin6e09d872017-08-31 03:21:39 -070030int int_main(int argc, char* argv[]) {
31 if (rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true)) {
32 return 1;
33 }
34 if (FLAG_help) {
35 rtc::FlagList::Print(nullptr, false);
36 return 0;
37 }
38 if (argc != 1) {
39 printf("\n\nInput files must be little-endian 16-bit signed raw PCM.\n");
40 return 0;
41 }
ekm030249d2015-06-15 13:02:24 -070042
oprypin6e09d872017-08-31 03:21:39 -070043 WavReader in_file(FLAG_clear_file);
44 WavReader noise_file(FLAG_noise_file);
45 WavWriter out_file(FLAG_out_file, in_file.sample_rate(),
aluebs0a007592016-02-26 17:17:38 -080046 in_file.num_channels());
aluebsc466bad2016-02-10 12:03:00 -080047 rtc::CriticalSection crit;
48 NoiseSuppressionImpl ns(&crit);
Alejandro Luebsef009252016-09-20 14:51:56 -070049 IntelligibilityEnhancer enh(in_file.sample_rate(), in_file.num_channels(), 1u,
Alex Luebs57ae8292016-03-09 16:24:34 +010050 NoiseSuppressionImpl::num_noise_bins());
Alejandro Luebs18fcbcf2016-02-22 15:57:38 -080051 ns.Initialize(noise_file.num_channels(), noise_file.sample_rate());
aluebsc466bad2016-02-10 12:03:00 -080052 ns.Enable(true);
aluebs0a007592016-02-26 17:17:38 -080053 const size_t in_samples = noise_file.sample_rate() / 100;
54 const size_t noise_samples = noise_file.sample_rate() / 100;
55 std::vector<float> in(in_samples * in_file.num_channels());
56 std::vector<float> noise(noise_samples * noise_file.num_channels());
57 ChannelBuffer<float> in_buf(in_samples, in_file.num_channels());
58 ChannelBuffer<float> noise_buf(noise_samples, noise_file.num_channels());
59 AudioBuffer capture_audio(noise_samples, noise_file.num_channels(),
60 noise_samples, noise_file.num_channels(),
61 noise_samples);
Alejandro Luebsef009252016-09-20 14:51:56 -070062 AudioBuffer render_audio(in_samples, in_file.num_channels(), in_samples,
63 in_file.num_channels(), in_samples);
64 StreamConfig noise_config(noise_file.sample_rate(),
65 noise_file.num_channels());
66 StreamConfig in_config(in_file.sample_rate(), in_file.num_channels());
aluebs0a007592016-02-26 17:17:38 -080067 while (in_file.ReadSamples(in.size(), in.data()) == in.size() &&
68 noise_file.ReadSamples(noise.size(), noise.data()) == noise.size()) {
aluebs0a007592016-02-26 17:17:38 -080069 FloatS16ToFloat(noise.data(), noise.size(), noise.data());
Alejandro Luebsef009252016-09-20 14:51:56 -070070 FloatS16ToFloat(in.data(), in.size(), in.data());
aluebs0a007592016-02-26 17:17:38 -080071 Deinterleave(in.data(), in_buf.num_frames(), in_buf.num_channels(),
72 in_buf.channels());
73 Deinterleave(noise.data(), noise_buf.num_frames(), noise_buf.num_channels(),
74 noise_buf.channels());
Alejandro Luebsef009252016-09-20 14:51:56 -070075 capture_audio.CopyFrom(noise_buf.channels(), noise_config);
76 render_audio.CopyFrom(in_buf.channels(), in_config);
aluebsc466bad2016-02-10 12:03:00 -080077 ns.AnalyzeCaptureAudio(&capture_audio);
78 ns.ProcessCaptureAudio(&capture_audio);
Alejandro Luebsef009252016-09-20 14:51:56 -070079 enh.SetCaptureNoiseEstimate(ns.NoiseEstimate(), 1);
80 enh.ProcessRenderAudio(&render_audio);
81 render_audio.CopyTo(in_config, in_buf.channels());
aluebs0a007592016-02-26 17:17:38 -080082 Interleave(in_buf.channels(), in_buf.num_frames(), in_buf.num_channels(),
83 in.data());
Alejandro Luebsef009252016-09-20 14:51:56 -070084 FloatToFloatS16(in.data(), in.size(), in.data());
aluebs0a007592016-02-26 17:17:38 -080085 out_file.WriteSamples(in.data(), in.size());
ekm030249d2015-06-15 13:02:24 -070086 }
oprypin6e09d872017-08-31 03:21:39 -070087
88 return 0;
ekmb7553df2015-06-16 18:57:32 -070089}
aluebsc555b992015-06-16 20:26:16 -070090
pkastingb297c5a2015-07-22 15:17:22 -070091} // namespace
ekmdb4fecf2015-06-22 17:49:08 -070092} // namespace webrtc
93
94int main(int argc, char* argv[]) {
oprypin6e09d872017-08-31 03:21:39 -070095 return webrtc::int_main(argc, argv);
ekmdb4fecf2015-06-22 17:49:08 -070096}