blob: 79d44e647c5cac4c2242467b9b7b53216b819670 [file] [log] [blame]
Sam Zackrisson0824c6f2019-10-07 14:03:56 +02001/*
2 * Copyright (c) 2019 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#ifndef MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_
12#define MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_
13
14#include <stddef.h>
15
16#include <memory>
17
18#include "modules/audio_processing/include/audio_processing.h"
19
20namespace webrtc {
21
22class AudioBuffer;
23
24// The voice activity detection (VAD) component analyzes the stream to
25// determine if voice is present.
26class VoiceDetection {
27 public:
28 // Specifies the likelihood that a frame will be declared to contain voice.
29 // A higher value makes it more likely that speech will not be clipped, at
30 // the expense of more noise being detected as voice.
31 enum Likelihood {
32 kVeryLowLikelihood,
33 kLowLikelihood,
34 kModerateLikelihood,
35 kHighLikelihood
36 };
37
38 VoiceDetection(int sample_rate_hz, Likelihood likelihood);
39 ~VoiceDetection();
40
41 VoiceDetection(VoiceDetection&) = delete;
42 VoiceDetection& operator=(VoiceDetection&) = delete;
43
44 // Returns true if voice is detected in the current frame.
45 bool ProcessCaptureAudio(AudioBuffer* audio);
46
47 Likelihood likelihood() const { return likelihood_; }
48
49 private:
50 class Vad;
51
52 int sample_rate_hz_;
53 size_t frame_size_samples_;
54 Likelihood likelihood_;
55 std::unique_ptr<Vad> vad_;
56};
57} // namespace webrtc
58
59#endif // MODULES_AUDIO_PROCESSING_VOICE_DETECTION_H_