blob: 586fc4660b4248342fd0cf0765e8234c95a53ca7 [file] [log] [blame]
Fredrik Solenberg2a877972017-12-15 16:42:15 +01001/*
2 * Copyright (c) 2016 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
11#include "audio/audio_transport_impl.h"
12
13#include <algorithm>
14#include <memory>
15#include <utility>
16
17#include "audio/utility/audio_frame_operations.h"
18#include "call/audio_send_stream.h"
19#include "rtc_base/logging.h"
20#include "voice_engine/utility.h"
21
22namespace webrtc {
23
24namespace {
25
26// We want to process at the lowest sample rate and channel count possible
27// without losing information. Choose the lowest native rate at least equal to
28// the minimum of input and codec rates, choose lowest channel count, and
29// configure the audio frame.
30void InitializeCaptureFrame(int input_sample_rate,
31 int send_sample_rate_hz,
32 size_t input_num_channels,
33 size_t send_num_channels,
34 AudioFrame* audio_frame) {
35 RTC_DCHECK(audio_frame);
36 int min_processing_rate_hz = std::min(input_sample_rate, send_sample_rate_hz);
37 for (int native_rate_hz : AudioProcessing::kNativeSampleRatesHz) {
38 audio_frame->sample_rate_hz_ = native_rate_hz;
39 if (audio_frame->sample_rate_hz_ >= min_processing_rate_hz) {
40 break;
41 }
42 }
43 audio_frame->num_channels_ = std::min(input_num_channels, send_num_channels);
44}
45
46void ProcessCaptureFrame(int analog_level,
47 uint32_t delay_ms,
48 bool key_pressed,
49 bool swap_stereo_channels,
50 AudioProcessing* audio_processing,
51 AudioFrame* audio_frame) {
52 RTC_DCHECK(audio_processing);
53 RTC_DCHECK(audio_frame);
54 RTC_DCHECK(
55 !audio_processing->echo_cancellation()->is_drift_compensation_enabled());
56 GainControl* agc = audio_processing->gain_control();
57 int error = agc->set_stream_analog_level(analog_level);
58 RTC_DCHECK_EQ(0, error) <<
59 "set_stream_analog_level failed: analog_level = " << analog_level;
60 audio_processing->set_stream_delay_ms(delay_ms);
61 audio_processing->set_stream_key_pressed(key_pressed);
62 error = audio_processing->ProcessStream(audio_frame);
63 RTC_DCHECK_EQ(0, error) << "ProcessStream() error: " << error;
64 if (swap_stereo_channels) {
65 AudioFrameOperations::SwapStereoChannels(audio_frame);
66 }
67}
68
69// Resample audio in |frame| to given sample rate preserving the
70// channel count and place the result in |destination|.
71int Resample(const AudioFrame& frame,
72 const int destination_sample_rate,
73 PushResampler<int16_t>* resampler,
74 int16_t* destination) {
75 const int number_of_channels = static_cast<int>(frame.num_channels_);
76 const int target_number_of_samples_per_channel =
77 destination_sample_rate / 100;
78 resampler->InitializeIfNeeded(frame.sample_rate_hz_, destination_sample_rate,
79 number_of_channels);
80
81 // TODO(yujo): make resampler take an AudioFrame, and add special case
82 // handling of muted frames.
83 return resampler->Resample(
84 frame.data(), frame.samples_per_channel_ * number_of_channels,
85 destination, number_of_channels * target_number_of_samples_per_channel);
86}
87} // namespace
88
89AudioTransportImpl::AudioTransportImpl(AudioMixer* mixer,
90 AudioProcessing* audio_processing,
91 AudioDeviceModule* audio_device_module)
92 : audio_processing_(audio_processing),
93 audio_device_module_(audio_device_module),
94 mixer_(mixer) {
95 RTC_DCHECK(mixer);
96 RTC_DCHECK(audio_processing);
97 RTC_DCHECK(audio_device_module);
98}
99
100AudioTransportImpl::~AudioTransportImpl() {}
101
102// Not used in Chromium. Process captured audio and distribute to all sending
103// streams, and try to do this at the lowest possible sample rate.
104int32_t AudioTransportImpl::RecordedDataIsAvailable(
105 const void* audio_data,
106 const size_t number_of_frames,
107 const size_t bytes_per_sample,
108 const size_t number_of_channels,
109 const uint32_t sample_rate,
110 const uint32_t audio_delay_milliseconds,
111 const int32_t /*clock_drift*/,
112 const uint32_t volume,
113 const bool key_pressed,
114 uint32_t& /*new_mic_volume*/) { // NOLINT: to avoid changing APIs
115 RTC_DCHECK(audio_data);
116 RTC_DCHECK_GE(number_of_channels, 1);
117 RTC_DCHECK_LE(number_of_channels, 2);
118 RTC_DCHECK_EQ(2 * number_of_channels, bytes_per_sample);
119 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
120 // 100 = 1 second / data duration (10 ms).
121 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
122 RTC_DCHECK_LE(bytes_per_sample * number_of_frames * number_of_channels,
123 AudioFrame::kMaxDataSizeBytes);
124
125 // TODO(solenberg): Remove volume handling since it is now always 0.
126 uint16_t voe_mic_level = 0;
127 {
128 constexpr uint32_t kMaxVolumeLevel = 255;
129 uint32_t max_volume = 0;
130
131 // Check for zero to skip this calculation; the consumer may use this to
132 // indicate no volume is available.
133 if (volume != 0) {
134 // Scale from ADM to VoE level range
135 if (audio_device_module_->MaxMicrophoneVolume(&max_volume) == 0) {
136 if (max_volume != 0) {
137 voe_mic_level = static_cast<uint16_t>(
138 (volume * kMaxVolumeLevel + static_cast<int>(max_volume / 2)) /
139 max_volume);
140 }
141 }
142 // We learned that on certain systems (e.g Linux) the voe_mic_level
143 // can be greater than the maxVolumeLevel therefore
144 // we are going to cap the voe_mic_level to the maxVolumeLevel
145 // and change the maxVolume to volume if it turns out that
146 // the voe_mic_level is indeed greater than the maxVolumeLevel.
147 if (voe_mic_level > kMaxVolumeLevel) {
148 voe_mic_level = kMaxVolumeLevel;
149 max_volume = volume;
150 }
151 }
152 }
153
154 int send_sample_rate_hz = 0;
155 size_t send_num_channels = 0;
156 bool swap_stereo_channels = false;
157 {
158 rtc::CritScope lock(&capture_lock_);
159 send_sample_rate_hz = send_sample_rate_hz_;
160 send_num_channels = send_num_channels_;
161 swap_stereo_channels = swap_stereo_channels_;
162 }
163
164 std::unique_ptr<AudioFrame> audio_frame(new AudioFrame());
165 InitializeCaptureFrame(sample_rate, send_sample_rate_hz,
166 number_of_channels, send_num_channels,
167 audio_frame.get());
168 voe::RemixAndResample(static_cast<const int16_t*>(audio_data),
169 number_of_frames, number_of_channels, sample_rate,
170 &capture_resampler_, audio_frame.get());
171 ProcessCaptureFrame(voe_mic_level, audio_delay_milliseconds, key_pressed,
172 swap_stereo_channels, audio_processing_,
173 audio_frame.get());
174
175 // Typing detection (utilizes the APM/VAD decision). We let the VAD determine
176 // if we're using this feature or not.
177 // TODO(solenberg): is_enabled() takes a lock. Work around that.
178 bool typing_detected = false;
179 if (audio_processing_->voice_detection()->is_enabled()) {
180 if (audio_frame->vad_activity_ != AudioFrame::kVadUnknown) {
181 bool vad_active = audio_frame->vad_activity_ == AudioFrame::kVadActive;
182 typing_detected = typing_detection_.Process(key_pressed, vad_active);
183 }
184 }
185
186 // Measure audio level of speech after all processing.
187 double sample_duration = static_cast<double>(number_of_frames) / sample_rate;
188 audio_level_.ComputeLevel(*audio_frame.get(), sample_duration);
189
190 // Copy frame and push to each sending stream. The copy is required since an
191 // encoding task will be posted internally to each stream.
192 {
193 rtc::CritScope lock(&capture_lock_);
194 typing_noise_detected_ = typing_detected;
195
196 RTC_DCHECK_GT(audio_frame->samples_per_channel_, 0);
197 if (!sending_streams_.empty()) {
198 auto it = sending_streams_.begin();
199 while (++it != sending_streams_.end()) {
200 std::unique_ptr<AudioFrame> audio_frame_copy(new AudioFrame());
201 audio_frame_copy->CopyFrom(*audio_frame.get());
202 (*it)->SendAudioData(std::move(audio_frame_copy));
203 }
204 // Send the original frame to the first stream w/o copying.
205 (*sending_streams_.begin())->SendAudioData(std::move(audio_frame));
206 }
207 }
208
209 return 0;
210}
211
212// Mix all received streams, feed the result to the AudioProcessing module, then
213// resample the result to the requested output rate.
214int32_t AudioTransportImpl::NeedMorePlayData(const size_t nSamples,
215 const size_t nBytesPerSample,
216 const size_t nChannels,
217 const uint32_t samplesPerSec,
218 void* audioSamples,
219 size_t& nSamplesOut,
220 int64_t* elapsed_time_ms,
221 int64_t* ntp_time_ms) {
222 RTC_DCHECK_EQ(sizeof(int16_t) * nChannels, nBytesPerSample);
223 RTC_DCHECK_GE(nChannels, 1);
224 RTC_DCHECK_LE(nChannels, 2);
225 RTC_DCHECK_GE(
226 samplesPerSec,
227 static_cast<uint32_t>(AudioProcessing::NativeRate::kSampleRate8kHz));
228
229 // 100 = 1 second / data duration (10 ms).
230 RTC_DCHECK_EQ(nSamples * 100, samplesPerSec);
231 RTC_DCHECK_LE(nBytesPerSample * nSamples * nChannels,
232 AudioFrame::kMaxDataSizeBytes);
233
234 mixer_->Mix(nChannels, &mixed_frame_);
235 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
236 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
237
238 const auto error = audio_processing_->ProcessReverseStream(&mixed_frame_);
239 RTC_DCHECK_EQ(error, AudioProcessing::kNoError);
240
241 nSamplesOut = Resample(mixed_frame_, samplesPerSec, &render_resampler_,
242 static_cast<int16_t*>(audioSamples));
243 RTC_DCHECK_EQ(nSamplesOut, nChannels * nSamples);
244 return 0;
245}
246
247// Used by Chromium - same as NeedMorePlayData() but because Chrome has its
248// own APM instance, does not call audio_processing_->ProcessReverseStream().
249void AudioTransportImpl::PullRenderData(int bits_per_sample,
250 int sample_rate,
251 size_t number_of_channels,
252 size_t number_of_frames,
253 void* audio_data,
254 int64_t* elapsed_time_ms,
255 int64_t* ntp_time_ms) {
256 RTC_DCHECK_EQ(bits_per_sample, 16);
257 RTC_DCHECK_GE(number_of_channels, 1);
258 RTC_DCHECK_LE(number_of_channels, 2);
259 RTC_DCHECK_GE(sample_rate, AudioProcessing::NativeRate::kSampleRate8kHz);
260
261 // 100 = 1 second / data duration (10 ms).
262 RTC_DCHECK_EQ(number_of_frames * 100, sample_rate);
263
264 // 8 = bits per byte.
265 RTC_DCHECK_LE(bits_per_sample / 8 * number_of_frames * number_of_channels,
266 AudioFrame::kMaxDataSizeBytes);
267 mixer_->Mix(number_of_channels, &mixed_frame_);
268 *elapsed_time_ms = mixed_frame_.elapsed_time_ms_;
269 *ntp_time_ms = mixed_frame_.ntp_time_ms_;
270
271 auto output_samples = Resample(mixed_frame_, sample_rate, &render_resampler_,
272 static_cast<int16_t*>(audio_data));
273 RTC_DCHECK_EQ(output_samples, number_of_channels * number_of_frames);
274}
275
276void AudioTransportImpl::UpdateSendingStreams(
277 std::vector<AudioSendStream*> streams, int send_sample_rate_hz,
278 size_t send_num_channels) {
279 rtc::CritScope lock(&capture_lock_);
280 sending_streams_ = std::move(streams);
281 send_sample_rate_hz_ = send_sample_rate_hz;
282 send_num_channels_ = send_num_channels;
283}
284
285void AudioTransportImpl::SetStereoChannelSwapping(bool enable) {
286 rtc::CritScope lock(&capture_lock_);
287 swap_stereo_channels_ = enable;
288}
289
290bool AudioTransportImpl::typing_noise_detected() const {
291 rtc::CritScope lock(&capture_lock_);
292 return typing_noise_detected_;
293}
294} // namespace webrtc