blob: 9bce896dfe5500225086de326ce3e8a66e53af33 [file] [log] [blame]
peahfaed4ab2016-04-05 14:57:48 -07001/*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
andrew@webrtc.org325cff02014-10-01 17:42:18 +000010
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#ifndef COMMON_AUDIO_BLOCKER_H_
12#define COMMON_AUDIO_BLOCKER_H_
andrew@webrtc.org325cff02014-10-01 17:42:18 +000013
peahfaed4ab2016-04-05 14:57:48 -070014#include <memory>
kwibergc2b785d2016-02-24 05:22:32 -080015
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020016#include "common_audio/audio_ring_buffer.h"
17#include "common_audio/channel_buffer.h"
peahfaed4ab2016-04-05 14:57:48 -070018
19namespace webrtc {
20
21// The callback function to process audio in the time domain. Input has already
22// been windowed, and output will be windowed. The number of input channels
23// must be >= the number of output channels.
24class BlockerCallback {
25 public:
26 virtual ~BlockerCallback() {}
27
28 virtual void ProcessBlock(const float* const* input,
29 size_t num_frames,
30 size_t num_input_channels,
31 size_t num_output_channels,
32 float* const* output) = 0;
33};
34
35// The main purpose of Blocker is to abstract away the fact that often we
36// receive a different number of audio frames than our transform takes. For
37// example, most FFTs work best when the fft-size is a power of 2, but suppose
38// we receive 20ms of audio at a sample rate of 48000. That comes to 960 frames
39// of audio, which is not a power of 2. Blocker allows us to specify the
40// transform and all other necessary processing via the Process() callback
41// function without any constraints on the transform-size
42// (read: |block_size_|) or received-audio-size (read: |chunk_size_|).
43// We handle this for the multichannel audio case, allowing for different
44// numbers of input and output channels (for example, beamforming takes 2 or
45// more input channels and returns 1 output channel). Audio signals are
46// represented as deinterleaved floats in the range [-1, 1].
47//
48// Blocker is responsible for:
49// - blocking audio while handling potential discontinuities on the edges
50// of chunks
51// - windowing blocks before sending them to Process()
52// - windowing processed blocks, and overlap-adding them together before
53// sending back a processed chunk
54//
55// To use blocker:
56// 1. Impelment a BlockerCallback object |bc|.
57// 2. Instantiate a Blocker object |b|, passing in |bc|.
58// 3. As you receive audio, call b.ProcessChunk() to get processed audio.
59//
60// A small amount of delay is added to the first received chunk to deal with
61// the difference in chunk/block sizes. This delay is <= chunk_size.
62//
63// Ownership of window is retained by the caller. That is, Blocker makes a
64// copy of window and does not attempt to delete it.
65class Blocker {
66 public:
67 Blocker(size_t chunk_size,
68 size_t block_size,
69 size_t num_input_channels,
70 size_t num_output_channels,
71 const float* window,
72 size_t shift_amount,
73 BlockerCallback* callback);
kwiberg942c8512016-08-29 13:10:29 -070074 ~Blocker();
peahfaed4ab2016-04-05 14:57:48 -070075
76 void ProcessChunk(const float* const* input,
77 size_t chunk_size,
78 size_t num_input_channels,
79 size_t num_output_channels,
80 float* const* output);
81
Alejandro Luebsef009252016-09-20 14:51:56 -070082 size_t initial_delay() const { return initial_delay_; }
83
peahfaed4ab2016-04-05 14:57:48 -070084 private:
85 const size_t chunk_size_;
86 const size_t block_size_;
87 const size_t num_input_channels_;
88 const size_t num_output_channels_;
89
90 // The number of frames of delay to add at the beginning of the first chunk.
91 const size_t initial_delay_;
92
93 // The frame index into the input buffer where the first block should be read
94 // from. This is necessary because shift_amount_ is not necessarily a
95 // multiple of chunk_size_, so blocks won't line up at the start of the
96 // buffer.
97 size_t frame_offset_;
98
99 // Since blocks nearly always overlap, there are certain blocks that require
100 // frames from the end of one chunk and the beginning of the next chunk. The
101 // input and output buffers are responsible for saving those frames between
102 // calls to ProcessChunk().
103 //
104 // Both contain |initial delay| + |chunk_size| frames. The input is a fairly
105 // standard FIFO, but due to the overlap-add it's harder to use an
106 // AudioRingBuffer for the output.
107 AudioRingBuffer input_buffer_;
108 ChannelBuffer<float> output_buffer_;
109
110 // Space for the input block (can't wrap because of windowing).
111 ChannelBuffer<float> input_block_;
112
113 // Space for the output block (can't wrap because of overlap/add).
114 ChannelBuffer<float> output_block_;
115
116 std::unique_ptr<float[]> window_;
117
118 // The amount of frames between the start of contiguous blocks. For example,
119 // |shift_amount_| = |block_size_| / 2 for a Hann window.
120 size_t shift_amount_;
121
122 BlockerCallback* callback_;
123};
124
125} // namespace webrtc
126
Mirko Bonadei92ea95e2017-09-15 06:47:31 +0200127#endif // COMMON_AUDIO_BLOCKER_H_