peah | bdbceef | 2016-03-20 09:53:32 -0700 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | #include <vector> |
| 11 | |
Henrik Kjellander | dca1e09 | 2017-07-01 16:42:22 +0200 | [diff] [blame^] | 12 | #include "webrtc/base/array_view.h" |
peah | bdbceef | 2016-03-20 09:53:32 -0700 | [diff] [blame] | 13 | #include "webrtc/modules/audio_processing/audio_buffer.h" |
peah | bdbceef | 2016-03-20 09:53:32 -0700 | [diff] [blame] | 14 | #include "webrtc/modules/audio_processing/test/audio_buffer_tools.h" |
| 15 | #include "webrtc/modules/audio_processing/test/bitexactness_tools.h" |
kwiberg | ac9f876 | 2016-09-30 22:29:43 -0700 | [diff] [blame] | 16 | #include "webrtc/modules/audio_processing/voice_detection_impl.h" |
| 17 | #include "webrtc/test/gtest.h" |
peah | bdbceef | 2016-03-20 09:53:32 -0700 | [diff] [blame] | 18 | |
| 19 | namespace webrtc { |
| 20 | namespace { |
| 21 | |
| 22 | const int kNumFramesToProcess = 1000; |
| 23 | |
| 24 | // Process one frame of data and produce the output. |
| 25 | void ProcessOneFrame(int sample_rate_hz, |
| 26 | AudioBuffer* audio_buffer, |
| 27 | VoiceDetectionImpl* voice_detection) { |
| 28 | if (sample_rate_hz > AudioProcessing::kSampleRate16kHz) { |
| 29 | audio_buffer->SplitIntoFrequencyBands(); |
| 30 | } |
| 31 | |
| 32 | voice_detection->ProcessCaptureAudio(audio_buffer); |
| 33 | } |
| 34 | |
| 35 | // Processes a specified amount of frames, verifies the results and reports |
| 36 | // any errors. |
| 37 | void RunBitexactnessTest(int sample_rate_hz, |
| 38 | size_t num_channels, |
| 39 | int frame_size_ms_reference, |
| 40 | bool stream_has_voice_reference, |
| 41 | VoiceDetection::Likelihood likelihood_reference) { |
| 42 | rtc::CriticalSection crit_capture; |
| 43 | VoiceDetectionImpl voice_detection(&crit_capture); |
| 44 | voice_detection.Initialize(sample_rate_hz > 16000 ? 16000 : sample_rate_hz); |
| 45 | voice_detection.Enable(true); |
| 46 | |
| 47 | int samples_per_channel = rtc::CheckedDivExact(sample_rate_hz, 100); |
| 48 | const StreamConfig capture_config(sample_rate_hz, num_channels, false); |
| 49 | AudioBuffer capture_buffer( |
| 50 | capture_config.num_frames(), capture_config.num_channels(), |
| 51 | capture_config.num_frames(), capture_config.num_channels(), |
| 52 | capture_config.num_frames()); |
| 53 | test::InputAudioFile capture_file( |
| 54 | test::GetApmCaptureTestVectorFileName(sample_rate_hz)); |
| 55 | std::vector<float> capture_input(samples_per_channel * num_channels); |
| 56 | for (int frame_no = 0; frame_no < kNumFramesToProcess; ++frame_no) { |
| 57 | ReadFloatSamplesFromStereoFile(samples_per_channel, num_channels, |
| 58 | &capture_file, capture_input); |
| 59 | |
| 60 | test::CopyVectorToAudioBuffer(capture_config, capture_input, |
| 61 | &capture_buffer); |
| 62 | |
| 63 | ProcessOneFrame(sample_rate_hz, &capture_buffer, &voice_detection); |
| 64 | } |
| 65 | |
| 66 | int frame_size_ms = voice_detection.frame_size_ms(); |
| 67 | bool stream_has_voice = voice_detection.stream_has_voice(); |
| 68 | VoiceDetection::Likelihood likelihood = voice_detection.likelihood(); |
| 69 | |
| 70 | // Compare the outputs to the references. |
| 71 | EXPECT_EQ(frame_size_ms_reference, frame_size_ms); |
| 72 | EXPECT_EQ(stream_has_voice_reference, stream_has_voice); |
| 73 | EXPECT_EQ(likelihood_reference, likelihood); |
| 74 | } |
| 75 | |
| 76 | const int kFrameSizeMsReference = 10; |
| 77 | const bool kStreamHasVoiceReference = true; |
| 78 | const VoiceDetection::Likelihood kLikelihoodReference = |
| 79 | VoiceDetection::kLowLikelihood; |
| 80 | |
| 81 | } // namespace |
| 82 | |
| 83 | TEST(VoiceDetectionBitExactnessTest, Mono8kHz) { |
| 84 | RunBitexactnessTest(8000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 85 | kLikelihoodReference); |
| 86 | } |
| 87 | |
| 88 | TEST(VoiceDetectionBitExactnessTest, Mono16kHz) { |
| 89 | RunBitexactnessTest(16000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 90 | kLikelihoodReference); |
| 91 | } |
| 92 | |
| 93 | TEST(VoiceDetectionBitExactnessTest, Mono32kHz) { |
| 94 | RunBitexactnessTest(32000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 95 | kLikelihoodReference); |
| 96 | } |
| 97 | |
| 98 | TEST(VoiceDetectionBitExactnessTest, Mono48kHz) { |
| 99 | RunBitexactnessTest(48000, 1, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 100 | kLikelihoodReference); |
| 101 | } |
| 102 | |
| 103 | TEST(VoiceDetectionBitExactnessTest, Stereo8kHz) { |
| 104 | RunBitexactnessTest(8000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 105 | kLikelihoodReference); |
| 106 | } |
| 107 | |
| 108 | TEST(VoiceDetectionBitExactnessTest, Stereo16kHz) { |
| 109 | RunBitexactnessTest(16000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 110 | kLikelihoodReference); |
| 111 | } |
| 112 | |
| 113 | TEST(VoiceDetectionBitExactnessTest, Stereo32kHz) { |
| 114 | RunBitexactnessTest(32000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 115 | kLikelihoodReference); |
| 116 | } |
| 117 | |
| 118 | TEST(VoiceDetectionBitExactnessTest, Stereo48kHz) { |
| 119 | RunBitexactnessTest(48000, 2, kFrameSizeMsReference, kStreamHasVoiceReference, |
| 120 | kLikelihoodReference); |
| 121 | } |
| 122 | |
| 123 | } // namespace webrtc |