niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
bjornv@webrtc.org | 152c34c | 2012-01-23 12:36:46 +0000 | [diff] [blame] | 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | |
| 12 | /* |
| 13 | * This header file includes the VAD API calls. Specific function calls are given below. |
| 14 | */ |
| 15 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 16 | #ifndef COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT |
| 17 | #define COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 18 | |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 19 | #include <stddef.h> |
| 20 | |
Mirko Bonadei | 7120742 | 2017-09-15 13:58:09 +0200 | [diff] [blame^] | 21 | #include "typedefs.h" // NOLINT(build/include) |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 22 | |
| 23 | typedef struct WebRtcVadInst VadInst; |
| 24 | |
| 25 | #ifdef __cplusplus |
bjornv@webrtc.org | ed700db | 2012-03-12 12:17:26 +0000 | [diff] [blame] | 26 | extern "C" { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 27 | #endif |
| 28 | |
bjornv@webrtc.org | 26e8a58 | 2012-01-31 14:42:50 +0000 | [diff] [blame] | 29 | // Creates an instance to the VAD structure. |
Bjorn Volcker | de4703c | 2015-05-27 07:22:58 +0200 | [diff] [blame] | 30 | VadInst* WebRtcVad_Create(); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 31 | |
bjornv@webrtc.org | 26e8a58 | 2012-01-31 14:42:50 +0000 | [diff] [blame] | 32 | // Frees the dynamic memory of a specified VAD instance. |
| 33 | // |
| 34 | // - handle [i] : Pointer to VAD instance that should be freed. |
bjornv@webrtc.org | 2a79672 | 2014-04-22 04:45:35 +0000 | [diff] [blame] | 35 | void WebRtcVad_Free(VadInst* handle); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 36 | |
bjornv@webrtc.org | ed700db | 2012-03-12 12:17:26 +0000 | [diff] [blame] | 37 | // Initializes a VAD instance. |
| 38 | // |
| 39 | // - handle [i/o] : Instance that should be initialized. |
| 40 | // |
| 41 | // returns : 0 - (OK), |
deadbeef | 922246a | 2017-02-26 04:18:12 -0800 | [diff] [blame] | 42 | // -1 - (null pointer or Default mode could not be set). |
bjornv@webrtc.org | ed700db | 2012-03-12 12:17:26 +0000 | [diff] [blame] | 43 | int WebRtcVad_Init(VadInst* handle); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 44 | |
bjornv@webrtc.org | 78f0cdc | 2012-03-27 11:06:29 +0000 | [diff] [blame] | 45 | // Sets the VAD operating mode. A more aggressive (higher mode) VAD is more |
| 46 | // restrictive in reporting speech. Put in other words the probability of being |
| 47 | // speech when the VAD returns 1 is increased with increasing mode. As a |
| 48 | // consequence also the missed detection rate goes up. |
| 49 | // |
| 50 | // - handle [i/o] : VAD instance. |
| 51 | // - mode [i] : Aggressiveness mode (0, 1, 2, or 3). |
| 52 | // |
| 53 | // returns : 0 - (OK), |
deadbeef | 922246a | 2017-02-26 04:18:12 -0800 | [diff] [blame] | 54 | // -1 - (null pointer, mode could not be set or the VAD instance |
bjornv@webrtc.org | 78f0cdc | 2012-03-27 11:06:29 +0000 | [diff] [blame] | 55 | // has not been initialized). |
| 56 | int WebRtcVad_set_mode(VadInst* handle, int mode); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 57 | |
bjornv@webrtc.org | b1c3276 | 2012-06-12 08:19:24 +0000 | [diff] [blame] | 58 | // Calculates a VAD decision for the |audio_frame|. For valid sampling rates |
| 59 | // frame lengths, see the description of WebRtcVad_ValidRatesAndFrameLengths(). |
| 60 | // |
| 61 | // - handle [i/o] : VAD Instance. Needs to be initialized by |
| 62 | // WebRtcVad_Init() before call. |
| 63 | // - fs [i] : Sampling frequency (Hz): 8000, 16000, or 32000 |
| 64 | // - audio_frame [i] : Audio frame buffer. |
| 65 | // - frame_length [i] : Length of audio frame buffer in number of samples. |
| 66 | // |
| 67 | // returns : 1 - (Active Voice), |
| 68 | // 0 - (Non-active Voice), |
| 69 | // -1 - (Error) |
andrew@webrtc.org | 65f9338 | 2014-04-30 16:44:13 +0000 | [diff] [blame] | 70 | int WebRtcVad_Process(VadInst* handle, int fs, const int16_t* audio_frame, |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 71 | size_t frame_length); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 72 | |
bjornv@webrtc.org | b1c3276 | 2012-06-12 08:19:24 +0000 | [diff] [blame] | 73 | // Checks for valid combinations of |rate| and |frame_length|. We support 10, |
| 74 | // 20 and 30 ms frames and the rates 8000, 16000 and 32000 Hz. |
| 75 | // |
| 76 | // - rate [i] : Sampling frequency (Hz). |
| 77 | // - frame_length [i] : Speech frame buffer length in number of samples. |
| 78 | // |
| 79 | // returns : 0 - (valid combination), -1 - (invalid combination) |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 80 | int WebRtcVad_ValidRateAndFrameLength(int rate, size_t frame_length); |
bjornv@webrtc.org | b1c3276 | 2012-06-12 08:19:24 +0000 | [diff] [blame] | 81 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 82 | #ifdef __cplusplus |
| 83 | } |
| 84 | #endif |
| 85 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 86 | #endif // COMMON_AUDIO_VAD_INCLUDE_WEBRTC_VAD_H_ // NOLINT |