blob: a451d0a4cdf8547b7921604d5d2abff8909ca7e2 [file] [log] [blame]
andrew@webrtc.org08df9b22014-12-16 20:57:15 +00001/*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include <stdio.h>
mgraczyk@chromium.org4ddde2e2015-01-29 22:39:44 +000012#include <sstream>
mgraczyk@chromium.org5a92b782015-01-15 01:28:36 +000013#include <string>
andrew@webrtc.org08df9b22014-12-16 20:57:15 +000014
15#include "gflags/gflags.h"
16#include "webrtc/base/checks.h"
kwiberg@webrtc.org00b8f6b2015-02-26 14:34:55 +000017#include "webrtc/base/scoped_ptr.h"
kjellander@webrtc.org035e9122015-01-28 19:57:00 +000018#include "webrtc/common_audio/channel_buffer.h"
andrew@webrtc.org08df9b22014-12-16 20:57:15 +000019#include "webrtc/common_audio/wav_file.h"
andrew@webrtc.org08df9b22014-12-16 20:57:15 +000020#include "webrtc/modules/audio_processing/include/audio_processing.h"
21#include "webrtc/modules/audio_processing/test/test_utils.h"
andrew@webrtc.org08df9b22014-12-16 20:57:15 +000022
23DEFINE_string(dump, "", "The name of the debug dump file to read from.");
24DEFINE_string(c, "", "The name of the capture input file to read from.");
25DEFINE_string(o, "out.wav", "Name of the capture output file to write to.");
26DEFINE_int32(o_channels, 0, "Number of output channels. Defaults to input.");
27DEFINE_int32(o_sample_rate, 0, "Output sample rate in Hz. Defaults to input.");
mgraczyk@chromium.org5a92b782015-01-15 01:28:36 +000028DEFINE_double(mic_spacing, 0.0,
mgraczyk@chromium.org4ddde2e2015-01-29 22:39:44 +000029 "Alternate way to specify mic_positions. "
30 "Assumes uniform linear array with specified spacings.");
31DEFINE_string(mic_positions, "",
32 "Space delimited cartesian coordinates of microphones in meters. "
33 "The coordinates of each point are contiguous. "
34 "For a two element array: \"x1 y1 z1 x2 y2 z2\"");
andrew@webrtc.org08df9b22014-12-16 20:57:15 +000035
36DEFINE_bool(aec, false, "Enable echo cancellation.");
37DEFINE_bool(agc, false, "Enable automatic gain control.");
38DEFINE_bool(hpf, false, "Enable high-pass filtering.");
39DEFINE_bool(ns, false, "Enable noise suppression.");
40DEFINE_bool(ts, false, "Enable transient suppression.");
mgraczyk@chromium.org5a92b782015-01-15 01:28:36 +000041DEFINE_bool(bf, false, "Enable beamforming.");
andrew@webrtc.org08df9b22014-12-16 20:57:15 +000042DEFINE_bool(all, false, "Enable all components.");
43
44DEFINE_int32(ns_level, -1, "Noise suppression level [0 - 3].");
45
46static const int kChunksPerSecond = 100;
47static const char kUsage[] =
48 "Command-line tool to run audio processing on WAV files. Accepts either\n"
49 "an input capture WAV file or protobuf debug dump and writes to an output\n"
50 "WAV file.\n"
51 "\n"
52 "All components are disabled by default. If any bi-directional components\n"
53 "are enabled, only debug dump files are permitted.";
54
55namespace webrtc {
56
mgraczyk@chromium.org4ddde2e2015-01-29 22:39:44 +000057namespace {
58
59// Returns a vector<T> parsed from whitespace delimited values in to_parse,
60// or an empty vector if the string could not be parsed.
61template<typename T>
62std::vector<T> parse_list(std::string to_parse) {
63 std::vector<T> values;
64
65 std::istringstream str(to_parse);
66 std::copy(
67 std::istream_iterator<T>(str),
68 std::istream_iterator<T>(),
69 std::back_inserter(values));
70
71 return values;
72}
73
74// Parses the array geometry from the command line.
75//
76// If a vector with size != num_mics is returned, an error has occurred and an
77// appropriate error message has been printed to stdout.
78std::vector<Point> get_array_geometry(size_t num_mics) {
79 std::vector<Point> result;
80 result.reserve(num_mics);
81
82 if (FLAGS_mic_positions.length()) {
83 CHECK(FLAGS_mic_spacing == 0.0 &&
84 "mic_positions and mic_spacing should not both be specified");
85
86 const std::vector<float> values = parse_list<float>(FLAGS_mic_positions);
87 if (values.size() != 3 * num_mics) {
88 fprintf(stderr,
89 "Could not parse mic_positions or incorrect number of points.\n");
90 } else {
91 for (size_t i = 0; i < values.size(); i += 3) {
92 double x = values[i + 0];
93 double y = values[i + 1];
94 double z = values[i + 2];
95 result.push_back(Point(x, y, z));
96 }
97 }
98 } else {
99 if (FLAGS_mic_spacing <= 0) {
100 fprintf(stderr,
101 "mic_spacing must a positive value when beamforming is enabled.\n");
102 } else {
103 for (size_t i = 0; i < num_mics; ++i) {
aluebs@webrtc.org1d883942015-03-05 20:38:21 +0000104 result.push_back(Point(i * FLAGS_mic_spacing, 0.f, 0.f));
mgraczyk@chromium.org4ddde2e2015-01-29 22:39:44 +0000105 }
106 }
107 }
108
109 return result;
110}
111
112} // namespace
113
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000114int main(int argc, char* argv[]) {
115 {
116 const std::string program_name = argv[0];
117 const std::string usage = kUsage;
118 google::SetUsageMessage(usage);
119 }
120 google::ParseCommandLineFlags(&argc, &argv, true);
121
122 if (!((FLAGS_c == "") ^ (FLAGS_dump == ""))) {
123 fprintf(stderr,
124 "An input file must be specified with either -c or -dump.\n");
125 return 1;
126 }
127 if (FLAGS_dump != "") {
128 fprintf(stderr, "FIXME: the -dump option is not yet implemented.\n");
129 return 1;
130 }
131
132 WavReader c_file(FLAGS_c);
133 // If the output format is uninitialized, use the input format.
134 int o_channels = FLAGS_o_channels;
135 if (!o_channels)
136 o_channels = c_file.num_channels();
137 int o_sample_rate = FLAGS_o_sample_rate;
138 if (!o_sample_rate)
139 o_sample_rate = c_file.sample_rate();
140 WavWriter o_file(FLAGS_o, o_sample_rate, o_channels);
141
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000142 Config config;
143 config.Set<ExperimentalNs>(new ExperimentalNs(FLAGS_ts || FLAGS_all));
mgraczyk@chromium.org5a92b782015-01-15 01:28:36 +0000144
145 if (FLAGS_bf || FLAGS_all) {
mgraczyk@chromium.org5a92b782015-01-15 01:28:36 +0000146 const size_t num_mics = c_file.num_channels();
mgraczyk@chromium.org4ddde2e2015-01-29 22:39:44 +0000147 const std::vector<Point> array_geometry = get_array_geometry(num_mics);
148 if (array_geometry.size() != num_mics) {
149 return 1;
mgraczyk@chromium.org5a92b782015-01-15 01:28:36 +0000150 }
151
152 config.Set<Beamforming>(new Beamforming(true, array_geometry));
153 }
154
kwiberg@webrtc.org00b8f6b2015-02-26 14:34:55 +0000155 rtc::scoped_ptr<AudioProcessing> ap(AudioProcessing::Create(config));
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000156 if (FLAGS_dump != "") {
157 CHECK_EQ(kNoErr, ap->echo_cancellation()->Enable(FLAGS_aec || FLAGS_all));
158 } else if (FLAGS_aec) {
159 fprintf(stderr, "-aec requires a -dump file.\n");
160 return -1;
161 }
162 CHECK_EQ(kNoErr, ap->gain_control()->Enable(FLAGS_agc || FLAGS_all));
163 CHECK_EQ(kNoErr, ap->gain_control()->set_mode(GainControl::kFixedDigital));
164 CHECK_EQ(kNoErr, ap->high_pass_filter()->Enable(FLAGS_hpf || FLAGS_all));
165 CHECK_EQ(kNoErr, ap->noise_suppression()->Enable(FLAGS_ns || FLAGS_all));
166 if (FLAGS_ns_level != -1)
167 CHECK_EQ(kNoErr, ap->noise_suppression()->set_level(
168 static_cast<NoiseSuppression::Level>(FLAGS_ns_level)));
169
mgraczyk@chromium.org5a92b782015-01-15 01:28:36 +0000170 printf("Input file: %s\nChannels: %d, Sample rate: %d Hz\n\n",
171 FLAGS_c.c_str(), c_file.num_channels(), c_file.sample_rate());
172 printf("Output file: %s\nChannels: %d, Sample rate: %d Hz\n\n",
173 FLAGS_o.c_str(), o_file.num_channels(), o_file.sample_rate());
174
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000175 ChannelBuffer<float> c_buf(c_file.sample_rate() / kChunksPerSecond,
176 c_file.num_channels());
177 ChannelBuffer<float> o_buf(o_file.sample_rate() / kChunksPerSecond,
178 o_file.num_channels());
179
aluebs@webrtc.orgd35a5c32015-02-10 22:52:15 +0000180 const size_t c_length =
181 static_cast<size_t>(c_buf.num_channels() * c_buf.num_frames());
182 const size_t o_length =
183 static_cast<size_t>(o_buf.num_channels() * o_buf.num_frames());
kwiberg@webrtc.org00b8f6b2015-02-26 14:34:55 +0000184 rtc::scoped_ptr<float[]> c_interleaved(new float[c_length]);
185 rtc::scoped_ptr<float[]> o_interleaved(new float[o_length]);
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000186 while (c_file.ReadSamples(c_length, c_interleaved.get()) == c_length) {
187 FloatS16ToFloat(c_interleaved.get(), c_length, c_interleaved.get());
aluebs@webrtc.orgd35a5c32015-02-10 22:52:15 +0000188 Deinterleave(c_interleaved.get(), c_buf.num_frames(),
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000189 c_buf.num_channels(), c_buf.channels());
190
191 CHECK_EQ(kNoErr,
192 ap->ProcessStream(c_buf.channels(),
aluebs@webrtc.orgd35a5c32015-02-10 22:52:15 +0000193 c_buf.num_frames(),
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000194 c_file.sample_rate(),
195 LayoutFromChannels(c_buf.num_channels()),
196 o_file.sample_rate(),
197 LayoutFromChannels(o_buf.num_channels()),
198 o_buf.channels()));
199
aluebs@webrtc.orgd35a5c32015-02-10 22:52:15 +0000200 Interleave(o_buf.channels(), o_buf.num_frames(),
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000201 o_buf.num_channels(), o_interleaved.get());
aluebs@webrtc.orgd35a5c32015-02-10 22:52:15 +0000202 FloatToFloatS16(o_interleaved.get(), o_length, o_interleaved.get());
203 o_file.WriteSamples(o_interleaved.get(), o_length);
andrew@webrtc.org08df9b22014-12-16 20:57:15 +0000204 }
205
206 return 0;
207}
208
209} // namespace webrtc
210
211int main(int argc, char* argv[]) {
212 return webrtc::main(argc, argv);
213}