pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | |
| 12 | #include <math.h> |
| 13 | #include <stdio.h> |
| 14 | #include <stdlib.h> |
| 15 | |
| 16 | #include <algorithm> |
kwiberg | bfefb03 | 2016-05-01 14:53:46 -0700 | [diff] [blame] | 17 | #include <memory> |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 18 | |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 19 | #include "webrtc/modules/audio_processing/agc/agc.h" |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 20 | #include "webrtc/modules/audio_processing/agc/loudness_histogram.h" |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 21 | #include "webrtc/modules/audio_processing/agc/utility.h" |
aluebs | ecf6b81 | 2015-06-25 12:28:48 -0700 | [diff] [blame] | 22 | #include "webrtc/modules/audio_processing/vad/common.h" |
| 23 | #include "webrtc/modules/audio_processing/vad/pitch_based_vad.h" |
| 24 | #include "webrtc/modules/audio_processing/vad/standalone_vad.h" |
kwiberg | ac9f876 | 2016-09-30 22:29:43 -0700 | [diff] [blame] | 25 | #include "webrtc/modules/audio_processing/vad/vad_audio_proc.h" |
Henrik Kjellander | ff761fb | 2015-11-04 08:31:52 +0100 | [diff] [blame] | 26 | #include "webrtc/modules/include/module_common_types.h" |
Edward Lemur | c20978e | 2017-07-06 19:44:34 +0200 | [diff] [blame] | 27 | #include "webrtc/rtc_base/flags.h" |
| 28 | #include "webrtc/rtc_base/safe_minmax.h" |
kwiberg | ac9f876 | 2016-09-30 22:29:43 -0700 | [diff] [blame] | 29 | #include "webrtc/test/gtest.h" |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 30 | |
| 31 | static const int kAgcAnalWindowSamples = 100; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 32 | static const float kDefaultActivityThreshold = 0.3f; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 33 | |
| 34 | DEFINE_bool(standalone_vad, true, "enable stand-alone VAD"); |
| 35 | DEFINE_string(true_vad, "", "name of a file containing true VAD in 'int'" |
| 36 | " format"); |
| 37 | DEFINE_string(video_vad, "", "name of a file containing video VAD (activity" |
| 38 | " probabilities) in double format. One activity per 10ms is" |
| 39 | " required. If no file is given the video information is not" |
| 40 | " incorporated. Negative activity is interpreted as video is" |
| 41 | " not adapted and the statistics are not computed during" |
| 42 | " the learning phase. Note that the negative video activities" |
| 43 | " are ONLY allowed at the beginning."); |
| 44 | DEFINE_string(result, "", "name of a file to write the results. The results" |
| 45 | " will be appended to the end of the file. This is optional."); |
| 46 | DEFINE_string(audio_content, "", "name of a file where audio content is written" |
| 47 | " to, in double format."); |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 48 | DEFINE_float(activity_threshold, kDefaultActivityThreshold, |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 49 | "Activity threshold"); |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 50 | DEFINE_bool(help, false, "prints this message"); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 51 | |
| 52 | namespace webrtc { |
| 53 | |
| 54 | // TODO(turajs) A new CL will be committed soon where ExtractFeatures will |
| 55 | // notify the caller of "silence" input, instead of bailing out. We would not |
| 56 | // need the following function when such a change is made. |
| 57 | |
| 58 | // Add some dither to quiet frames. This avoids the ExtractFeatures skip a |
| 59 | // silence frame. Otherwise true VAD would drift with respect to the audio. |
| 60 | // We only consider mono inputs. |
| 61 | static void DitherSilence(AudioFrame* frame) { |
Peter Kasting | 6955870 | 2016-01-12 16:26:35 -0800 | [diff] [blame] | 62 | ASSERT_EQ(1u, frame->num_channels_); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 63 | const double kRmsSilence = 5; |
| 64 | const double sum_squared_silence = kRmsSilence * kRmsSilence * |
| 65 | frame->samples_per_channel_; |
| 66 | double sum_squared = 0; |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 67 | int16_t* frame_data = frame->mutable_data(); |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 68 | for (size_t n = 0; n < frame->samples_per_channel_; n++) |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 69 | sum_squared += frame_data[n] * frame_data[n]; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 70 | if (sum_squared <= sum_squared_silence) { |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 71 | for (size_t n = 0; n < frame->samples_per_channel_; n++) |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 72 | frame_data[n] = (rand() & 0xF) - 8; // NOLINT: ignore non-threadsafe. |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 73 | } |
| 74 | } |
| 75 | |
| 76 | class AgcStat { |
| 77 | public: |
| 78 | AgcStat() |
| 79 | : video_index_(0), |
| 80 | activity_threshold_(kDefaultActivityThreshold), |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 81 | audio_content_(LoudnessHistogram::Create(kAgcAnalWindowSamples)), |
aluebs | ecf6b81 | 2015-06-25 12:28:48 -0700 | [diff] [blame] | 82 | audio_processing_(new VadAudioProc()), |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 83 | vad_(new PitchBasedVad()), |
| 84 | standalone_vad_(StandaloneVad::Create()), |
| 85 | audio_content_fid_(NULL) { |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 86 | for (size_t n = 0; n < kMaxNumFrames; n++) |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 87 | video_vad_[n] = 0.5; |
| 88 | } |
| 89 | |
| 90 | ~AgcStat() { |
| 91 | if (audio_content_fid_ != NULL) { |
| 92 | fclose(audio_content_fid_); |
| 93 | } |
| 94 | } |
| 95 | |
| 96 | void set_audio_content_file(FILE* audio_content_fid) { |
| 97 | audio_content_fid_ = audio_content_fid; |
| 98 | } |
| 99 | |
| 100 | int AddAudio(const AudioFrame& frame, double p_video, |
| 101 | int* combined_vad) { |
| 102 | if (frame.num_channels_ != 1 || |
| 103 | frame.samples_per_channel_ != |
| 104 | kSampleRateHz / 100 || |
| 105 | frame.sample_rate_hz_ != kSampleRateHz) |
| 106 | return -1; |
| 107 | video_vad_[video_index_++] = p_video; |
| 108 | AudioFeatures features; |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 109 | const int16_t* frame_data = frame.data(); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 110 | audio_processing_->ExtractFeatures( |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 111 | frame_data, frame.samples_per_channel_, &features); |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 112 | if (FLAG_standalone_vad) { |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 113 | standalone_vad_->AddAudio(frame_data, |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 114 | frame.samples_per_channel_); |
| 115 | } |
| 116 | if (features.num_frames > 0) { |
| 117 | double p[kMaxNumFrames] = {0.5, 0.5, 0.5, 0.5}; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 118 | if (FLAG_standalone_vad) { |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 119 | standalone_vad_->GetActivity(p, kMaxNumFrames); |
| 120 | } |
| 121 | // TODO(turajs) combining and limiting are used in the source files as |
| 122 | // well they can be moved to utility. |
| 123 | // Combine Video and stand-alone VAD. |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 124 | for (size_t n = 0; n < features.num_frames; n++) { |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 125 | double p_active = p[n] * video_vad_[n]; |
| 126 | double p_passive = (1 - p[n]) * (1 - video_vad_[n]); |
kwiberg | 0703856 | 2017-06-12 11:40:47 -0700 | [diff] [blame] | 127 | p[n] = rtc::SafeClamp(p_active / (p_active + p_passive), 0.01, 0.99); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 128 | } |
| 129 | if (vad_->VoicingProbability(features, p) < 0) |
| 130 | return -1; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 131 | for (size_t n = 0; n < features.num_frames; n++) { |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 132 | audio_content_->Update(features.rms[n], p[n]); |
| 133 | double ac = audio_content_->AudioContent(); |
| 134 | if (audio_content_fid_ != NULL) { |
| 135 | fwrite(&ac, sizeof(ac), 1, audio_content_fid_); |
| 136 | } |
| 137 | if (ac > kAgcAnalWindowSamples * activity_threshold_) { |
| 138 | combined_vad[n] = 1; |
| 139 | } else { |
| 140 | combined_vad[n] = 0; |
| 141 | } |
| 142 | } |
| 143 | video_index_ = 0; |
| 144 | } |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 145 | return static_cast<int>(features.num_frames); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 146 | } |
| 147 | |
| 148 | void Reset() { |
| 149 | audio_content_->Reset(); |
| 150 | } |
| 151 | |
| 152 | void SetActivityThreshold(double activity_threshold) { |
| 153 | activity_threshold_ = activity_threshold; |
| 154 | } |
| 155 | |
| 156 | private: |
| 157 | int video_index_; |
| 158 | double activity_threshold_; |
| 159 | double video_vad_[kMaxNumFrames]; |
peah | bbe4233 | 2016-06-08 06:42:02 -0700 | [diff] [blame] | 160 | std::unique_ptr<LoudnessHistogram> audio_content_; |
kwiberg | bfefb03 | 2016-05-01 14:53:46 -0700 | [diff] [blame] | 161 | std::unique_ptr<VadAudioProc> audio_processing_; |
| 162 | std::unique_ptr<PitchBasedVad> vad_; |
| 163 | std::unique_ptr<StandaloneVad> standalone_vad_; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 164 | |
| 165 | FILE* audio_content_fid_; |
| 166 | }; |
| 167 | |
| 168 | |
| 169 | void void_main(int argc, char* argv[]) { |
| 170 | webrtc::AgcStat agc_stat; |
| 171 | |
| 172 | FILE* pcm_fid = fopen(argv[1], "rb"); |
| 173 | ASSERT_TRUE(pcm_fid != NULL) << "Cannot open PCM file " << argv[1]; |
| 174 | |
| 175 | if (argc < 2) { |
| 176 | fprintf(stderr, "\nNot Enough arguments\n"); |
| 177 | } |
| 178 | |
| 179 | FILE* true_vad_fid = NULL; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 180 | ASSERT_GT(strlen(FLAG_true_vad), 0u) << "Specify the file containing true " |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 181 | "VADs using --true_vad flag."; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 182 | true_vad_fid = fopen(FLAG_true_vad, "rb"); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 183 | ASSERT_TRUE(true_vad_fid != NULL) << "Cannot open the active list " << |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 184 | FLAG_true_vad; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 185 | |
| 186 | FILE* results_fid = NULL; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 187 | if (strlen(FLAG_result) > 0) { |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 188 | // True if this is the first time writing to this function and we add a |
| 189 | // header to the beginning of the file. |
| 190 | bool write_header; |
| 191 | // Open in the read mode. If it fails, the file doesn't exist and has to |
| 192 | // write a header for it. Otherwise no need to write a header. |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 193 | results_fid = fopen(FLAG_result, "r"); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 194 | if (results_fid == NULL) { |
| 195 | write_header = true; |
| 196 | } else { |
| 197 | fclose(results_fid); |
| 198 | write_header = false; |
| 199 | } |
| 200 | // Open in append mode. |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 201 | results_fid = fopen(FLAG_result, "a"); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 202 | ASSERT_TRUE(results_fid != NULL) << "Cannot open the file, " << |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 203 | FLAG_result << ", to write the results."; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 204 | // Write the header if required. |
| 205 | if (write_header) { |
| 206 | fprintf(results_fid, "%% Total Active, Misdetection, " |
| 207 | "Total inactive, False Positive, On-sets, Missed segments, " |
| 208 | "Average response\n"); |
| 209 | } |
| 210 | } |
| 211 | |
| 212 | FILE* video_vad_fid = NULL; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 213 | if (strlen(FLAG_video_vad) > 0) { |
| 214 | video_vad_fid = fopen(FLAG_video_vad, "rb"); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 215 | ASSERT_TRUE(video_vad_fid != NULL) << "Cannot open the file, " << |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 216 | FLAG_video_vad << " to read video-based VAD decisions.\n"; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 217 | } |
| 218 | |
| 219 | // AgsStat will be the owner of this file and will close it at its |
| 220 | // destructor. |
| 221 | FILE* audio_content_fid = NULL; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 222 | if (strlen(FLAG_audio_content) > 0) { |
| 223 | audio_content_fid = fopen(FLAG_audio_content, "wb"); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 224 | ASSERT_TRUE(audio_content_fid != NULL) << "Cannot open file, " << |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 225 | FLAG_audio_content << " to write audio-content.\n"; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 226 | agc_stat.set_audio_content_file(audio_content_fid); |
| 227 | } |
| 228 | |
| 229 | webrtc::AudioFrame frame; |
| 230 | frame.num_channels_ = 1; |
| 231 | frame.sample_rate_hz_ = 16000; |
| 232 | frame.samples_per_channel_ = frame.sample_rate_hz_ / 100; |
| 233 | const size_t kSamplesToRead = frame.num_channels_ * |
| 234 | frame.samples_per_channel_; |
| 235 | |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 236 | agc_stat.SetActivityThreshold(FLAG_activity_threshold); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 237 | |
| 238 | int ret_val = 0; |
| 239 | int num_frames = 0; |
| 240 | int agc_vad[kMaxNumFrames]; |
| 241 | uint8_t true_vad[kMaxNumFrames]; |
| 242 | double p_video = 0.5; |
| 243 | int total_active = 0; |
| 244 | int total_passive = 0; |
| 245 | int total_false_positive = 0; |
| 246 | int total_missed_detection = 0; |
| 247 | int onset_adaptation = 0; |
| 248 | int num_onsets = 0; |
| 249 | bool onset = false; |
| 250 | uint8_t previous_true_vad = 0; |
| 251 | int num_not_adapted = 0; |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 252 | size_t true_vad_index = 0; |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 253 | bool in_false_positive_region = false; |
| 254 | int total_false_positive_duration = 0; |
| 255 | bool video_adapted = false; |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 256 | while (kSamplesToRead == fread(frame.mutable_data(), sizeof(int16_t), |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 257 | kSamplesToRead, pcm_fid)) { |
| 258 | assert(true_vad_index < kMaxNumFrames); |
| 259 | ASSERT_EQ(1u, fread(&true_vad[true_vad_index], sizeof(*true_vad), 1, |
| 260 | true_vad_fid)) |
| 261 | << "Size mismatch between True-VAD and the PCM file.\n"; |
| 262 | if (video_vad_fid != NULL) { |
| 263 | ASSERT_EQ(1u, fread(&p_video, sizeof(p_video), 1, video_vad_fid)) << |
| 264 | "Not enough video-based VAD probabilities."; |
| 265 | } |
| 266 | |
| 267 | // Negative video activity indicates that the video-based VAD is not yet |
| 268 | // adapted. Disregards the learning phase in statistics. |
| 269 | if (p_video < 0) { |
| 270 | if (video_adapted) { |
| 271 | fprintf(stderr, "Negative video probabilities ONLY allowed at the " |
| 272 | "beginning of the sequence, not in the middle.\n"); |
| 273 | exit(1); |
| 274 | } |
| 275 | continue; |
| 276 | } else { |
| 277 | video_adapted = true; |
| 278 | } |
| 279 | |
| 280 | num_frames++; |
| 281 | uint8_t last_true_vad; |
| 282 | if (true_vad_index == 0) { |
| 283 | last_true_vad = previous_true_vad; |
| 284 | } else { |
| 285 | last_true_vad = true_vad[true_vad_index - 1]; |
| 286 | } |
| 287 | if (last_true_vad == 1 && true_vad[true_vad_index] == 0) { |
| 288 | agc_stat.Reset(); |
| 289 | } |
| 290 | true_vad_index++; |
| 291 | |
| 292 | DitherSilence(&frame); |
| 293 | |
| 294 | ret_val = agc_stat.AddAudio(frame, p_video, agc_vad); |
| 295 | ASSERT_GE(ret_val, 0); |
| 296 | |
| 297 | if (ret_val > 0) { |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 298 | ASSERT_EQ(true_vad_index, static_cast<size_t>(ret_val)); |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 299 | for (int n = 0; n < ret_val; n++) { |
| 300 | if (true_vad[n] == 1) { |
| 301 | total_active++; |
| 302 | if (previous_true_vad == 0) { |
| 303 | num_onsets++; |
| 304 | onset = true; |
| 305 | } |
| 306 | if (agc_vad[n] == 0) { |
| 307 | total_missed_detection++; |
| 308 | if (onset) |
| 309 | onset_adaptation++; |
| 310 | } else { |
| 311 | in_false_positive_region = false; |
| 312 | onset = false; |
| 313 | } |
| 314 | } else if (true_vad[n] == 0) { |
| 315 | // Check if |on_set| flag is still up. If so it means that we totally |
| 316 | // missed an active region |
| 317 | if (onset) |
| 318 | num_not_adapted++; |
| 319 | onset = false; |
| 320 | |
| 321 | total_passive++; |
| 322 | if (agc_vad[n] == 1) { |
| 323 | total_false_positive++; |
| 324 | in_false_positive_region = true; |
| 325 | } |
| 326 | if (in_false_positive_region) { |
| 327 | total_false_positive_duration++; |
| 328 | } |
| 329 | } else { |
| 330 | ASSERT_TRUE(false) << "Invalid value for true-VAD.\n"; |
| 331 | } |
| 332 | previous_true_vad = true_vad[n]; |
| 333 | } |
| 334 | true_vad_index = 0; |
| 335 | } |
| 336 | } |
| 337 | |
| 338 | if (results_fid != NULL) { |
| 339 | fprintf(results_fid, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", |
| 340 | total_active, |
| 341 | total_missed_detection, |
| 342 | total_passive, |
| 343 | total_false_positive, |
| 344 | num_onsets, |
| 345 | num_not_adapted, |
| 346 | static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), |
| 347 | static_cast<float>(total_false_positive_duration) / |
| 348 | (total_passive + 1e-12)); |
| 349 | } |
| 350 | fprintf(stdout, "%4d %4d %4d %4d %4d %4d %4.0f %4.0f\n", |
| 351 | total_active, |
| 352 | total_missed_detection, |
| 353 | total_passive, |
| 354 | total_false_positive, |
| 355 | num_onsets, |
| 356 | num_not_adapted, |
| 357 | static_cast<float>(onset_adaptation) / (num_onsets + 1e-12), |
| 358 | static_cast<float>(total_false_positive_duration) / |
| 359 | (total_passive + 1e-12)); |
| 360 | |
| 361 | fclose(true_vad_fid); |
| 362 | fclose(pcm_fid); |
| 363 | if (video_vad_fid != NULL) { |
| 364 | fclose(video_vad_fid); |
| 365 | } |
| 366 | if (results_fid != NULL) { |
| 367 | fclose(results_fid); |
| 368 | } |
| 369 | } |
| 370 | |
| 371 | } // namespace webrtc |
| 372 | |
| 373 | int main(int argc, char* argv[]) { |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 374 | if (argc == 1) { |
| 375 | // Print usage information. |
| 376 | std::cout << |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 377 | "\nCompute the number of misdetected and false-positive frames. Not\n" |
| 378 | " that for each frame of audio (10 ms) there should be one true\n" |
| 379 | " activity. If any video-based activity is given, there should also be\n" |
| 380 | " one probability per frame.\n" |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 381 | "Run with --help for more details on available flags.\n" |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 382 | "\nUsage:\n\n" |
| 383 | "activity_metric input_pcm [options]\n" |
| 384 | "where 'input_pcm' is the input audio sampled at 16 kHz in 16 bits " |
| 385 | "format.\n\n"; |
kjellander | 4fa5be4 | 2017-05-16 00:01:23 -0700 | [diff] [blame] | 386 | return 0; |
| 387 | } |
| 388 | rtc::FlagList::SetFlagsFromCommandLine(&argc, argv, true); |
| 389 | if (FLAG_help) { |
| 390 | rtc::FlagList::Print(nullptr, false); |
| 391 | return 0; |
| 392 | } |
pbos@webrtc.org | a7f7772 | 2014-12-15 16:33:16 +0000 | [diff] [blame] | 393 | webrtc::void_main(argc, argv); |
| 394 | return 0; |
| 395 | } |