blob: 2774e35571d17cb65b31605a0a74c6e51ab6b3d7 [file] [log] [blame]
Sam Zackrisson0824c6f2019-10-07 14:03:56 +02001/*
2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "modules/audio_processing/voice_detection.h"
12
13#include "api/audio/audio_frame.h"
14#include "common_audio/vad/include/webrtc_vad.h"
15#include "modules/audio_processing/audio_buffer.h"
16#include "rtc_base/checks.h"
17
18namespace webrtc {
19class VoiceDetection::Vad {
20 public:
21 Vad() {
22 state_ = WebRtcVad_Create();
23 RTC_CHECK(state_);
24 int error = WebRtcVad_Init(state_);
25 RTC_DCHECK_EQ(0, error);
26 }
27 ~Vad() { WebRtcVad_Free(state_); }
28
29 Vad(Vad&) = delete;
30 Vad& operator=(Vad&) = delete;
31
32 VadInst* state() { return state_; }
33
34 private:
35 VadInst* state_ = nullptr;
36};
37
38VoiceDetection::VoiceDetection(int sample_rate_hz, Likelihood likelihood)
39 : sample_rate_hz_(sample_rate_hz),
40 frame_size_samples_(static_cast<size_t>(sample_rate_hz_ / 100)),
41 likelihood_(likelihood),
42 vad_(new Vad()) {
43 int mode = 2;
44 switch (likelihood) {
45 case VoiceDetection::kVeryLowLikelihood:
46 mode = 3;
47 break;
48 case VoiceDetection::kLowLikelihood:
49 mode = 2;
50 break;
51 case VoiceDetection::kModerateLikelihood:
52 mode = 1;
53 break;
54 case VoiceDetection::kHighLikelihood:
55 mode = 0;
56 break;
57 default:
58 RTC_NOTREACHED();
59 break;
60 }
61 int error = WebRtcVad_set_mode(vad_->state(), mode);
62 RTC_DCHECK_EQ(0, error);
63}
64
65VoiceDetection::~VoiceDetection() {}
66
67bool VoiceDetection::ProcessCaptureAudio(AudioBuffer* audio) {
68 RTC_DCHECK_GE(AudioBuffer::kMaxSplitFrameLength,
69 audio->num_frames_per_band());
70 std::array<int16_t, AudioBuffer::kMaxSplitFrameLength> mixed_low_pass_data;
71 rtc::ArrayView<const int16_t> mixed_low_pass(mixed_low_pass_data.data(),
72 audio->num_frames_per_band());
73 if (audio->num_channels() == 1) {
74 FloatS16ToS16(audio->split_bands_const(0)[kBand0To8kHz],
75 audio->num_frames_per_band(), mixed_low_pass_data.data());
76 } else {
77 const int num_channels = static_cast<int>(audio->num_channels());
78 for (size_t i = 0; i < audio->num_frames_per_band(); ++i) {
79 int32_t value =
80 FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[0][i]);
81 for (int j = 1; j < num_channels; ++j) {
82 value += FloatS16ToS16(audio->split_channels_const(kBand0To8kHz)[j][i]);
83 }
84 mixed_low_pass_data[i] = value / num_channels;
85 }
86 }
87
88 int vad_ret = WebRtcVad_Process(vad_->state(), sample_rate_hz_,
89 mixed_low_pass.data(), frame_size_samples_);
90 RTC_DCHECK(vad_ret == 0 || vad_ret == 1);
91 return vad_ret == 0 ? false : true;
92}
93} // namespace webrtc