niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
andrew@webrtc.org | 02d7174 | 2012-04-24 19:47:00 +0000 | [diff] [blame] | 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 11 | #include "audio/utility/audio_frame_operations.h" |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 12 | |
Raphael Kubo da Costa | 7ce3091 | 2018-04-16 11:17:10 +0200 | [diff] [blame] | 13 | #include <string.h> |
Jonas Olsson | a4d8737 | 2019-07-05 19:08:33 +0200 | [diff] [blame] | 14 | |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 15 | #include <algorithm> |
Yves Gerey | 988cc08 | 2018-10-23 12:03:01 +0200 | [diff] [blame] | 16 | #include <cstdint> |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 17 | #include <utility> |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 18 | |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 19 | #include "common_audio/include/audio_util.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 20 | #include "rtc_base/checks.h" |
Karl Wiberg | e40468b | 2017-11-22 10:42:26 +0100 | [diff] [blame] | 21 | #include "rtc_base/numerics/safe_conversions.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 22 | |
| 23 | namespace webrtc { |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 24 | namespace { |
solenberg | 1c2af8e | 2016-03-24 10:36:00 -0700 | [diff] [blame] | 25 | |
| 26 | // 2.7ms @ 48kHz, 4ms @ 32kHz, 8ms @ 16kHz. |
| 27 | const size_t kMuteFadeFrames = 128; |
| 28 | const float kMuteFadeInc = 1.0f / kMuteFadeFrames; |
| 29 | |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 30 | } // namespace |
| 31 | |
| 32 | void AudioFrameOperations::Add(const AudioFrame& frame_to_add, |
| 33 | AudioFrame* result_frame) { |
| 34 | // Sanity check. |
| 35 | RTC_DCHECK(result_frame); |
| 36 | RTC_DCHECK_GT(result_frame->num_channels_, 0); |
| 37 | RTC_DCHECK_EQ(result_frame->num_channels_, frame_to_add.num_channels_); |
| 38 | |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 39 | bool no_previous_data = result_frame->muted(); |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 40 | if (result_frame->samples_per_channel_ != frame_to_add.samples_per_channel_) { |
| 41 | // Special case we have no data to start with. |
| 42 | RTC_DCHECK_EQ(result_frame->samples_per_channel_, 0); |
| 43 | result_frame->samples_per_channel_ = frame_to_add.samples_per_channel_; |
| 44 | no_previous_data = true; |
| 45 | } |
| 46 | |
| 47 | if (result_frame->vad_activity_ == AudioFrame::kVadActive || |
| 48 | frame_to_add.vad_activity_ == AudioFrame::kVadActive) { |
| 49 | result_frame->vad_activity_ = AudioFrame::kVadActive; |
| 50 | } else if (result_frame->vad_activity_ == AudioFrame::kVadUnknown || |
| 51 | frame_to_add.vad_activity_ == AudioFrame::kVadUnknown) { |
| 52 | result_frame->vad_activity_ = AudioFrame::kVadUnknown; |
| 53 | } |
| 54 | |
| 55 | if (result_frame->speech_type_ != frame_to_add.speech_type_) |
| 56 | result_frame->speech_type_ = AudioFrame::kUndefined; |
| 57 | |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 58 | if (!frame_to_add.muted()) { |
| 59 | const int16_t* in_data = frame_to_add.data(); |
| 60 | int16_t* out_data = result_frame->mutable_data(); |
| 61 | size_t length = |
| 62 | frame_to_add.samples_per_channel_ * frame_to_add.num_channels_; |
| 63 | if (no_previous_data) { |
| 64 | std::copy(in_data, in_data + length, out_data); |
| 65 | } else { |
| 66 | for (size_t i = 0; i < length; i++) { |
| 67 | const int32_t wrap_guard = static_cast<int32_t>(out_data[i]) + |
| 68 | static_cast<int32_t>(in_data[i]); |
| 69 | out_data[i] = rtc::saturated_cast<int16_t>(wrap_guard); |
| 70 | } |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 71 | } |
| 72 | } |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 73 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 74 | |
andrew@webrtc.org | 4ecea3e | 2012-06-27 03:25:31 +0000 | [diff] [blame] | 75 | int AudioFrameOperations::MonoToStereo(AudioFrame* frame) { |
| 76 | if (frame->num_channels_ != 1) { |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 77 | return -1; |
| 78 | } |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 79 | UpmixChannels(2, frame); |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 80 | return 0; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 81 | } |
| 82 | |
andrew@webrtc.org | 4ecea3e | 2012-06-27 03:25:31 +0000 | [diff] [blame] | 83 | int AudioFrameOperations::StereoToMono(AudioFrame* frame) { |
| 84 | if (frame->num_channels_ != 2) { |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 85 | return -1; |
| 86 | } |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 87 | DownmixChannels(1, frame); |
| 88 | return frame->num_channels_ == 1 ? 0 : -1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 89 | } |
| 90 | |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 91 | void AudioFrameOperations::QuadToStereo(const int16_t* src_audio, |
| 92 | size_t samples_per_channel, |
| 93 | int16_t* dst_audio) { |
| 94 | for (size_t i = 0; i < samples_per_channel; i++) { |
| 95 | dst_audio[i * 2] = |
| 96 | (static_cast<int32_t>(src_audio[4 * i]) + src_audio[4 * i + 1]) >> 1; |
| 97 | dst_audio[i * 2 + 1] = |
| 98 | (static_cast<int32_t>(src_audio[4 * i + 2]) + src_audio[4 * i + 3]) >> |
| 99 | 1; |
| 100 | } |
| 101 | } |
| 102 | |
| 103 | int AudioFrameOperations::QuadToStereo(AudioFrame* frame) { |
| 104 | if (frame->num_channels_ != 4) { |
| 105 | return -1; |
| 106 | } |
| 107 | |
| 108 | RTC_DCHECK_LE(frame->samples_per_channel_ * 4, |
| 109 | AudioFrame::kMaxDataSizeSamples); |
| 110 | |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 111 | if (!frame->muted()) { |
| 112 | QuadToStereo(frame->data(), frame->samples_per_channel_, |
| 113 | frame->mutable_data()); |
| 114 | } |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 115 | frame->num_channels_ = 2; |
| 116 | |
| 117 | return 0; |
| 118 | } |
| 119 | |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 120 | void AudioFrameOperations::DownmixChannels(const int16_t* src_audio, |
| 121 | size_t src_channels, |
| 122 | size_t samples_per_channel, |
| 123 | size_t dst_channels, |
| 124 | int16_t* dst_audio) { |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 125 | if (src_channels > 1 && dst_channels == 1) { |
| 126 | DownmixInterleavedToMono(src_audio, samples_per_channel, src_channels, |
| 127 | dst_audio); |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 128 | return; |
| 129 | } else if (src_channels == 4 && dst_channels == 2) { |
| 130 | QuadToStereo(src_audio, samples_per_channel, dst_audio); |
| 131 | return; |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 132 | } |
| 133 | |
| 134 | RTC_NOTREACHED() << "src_channels: " << src_channels |
| 135 | << ", dst_channels: " << dst_channels; |
| 136 | } |
| 137 | |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 138 | void AudioFrameOperations::DownmixChannels(size_t dst_channels, |
| 139 | AudioFrame* frame) { |
| 140 | RTC_DCHECK_LE(frame->samples_per_channel_ * frame->num_channels_, |
| 141 | AudioFrame::kMaxDataSizeSamples); |
| 142 | if (frame->num_channels_ > 1 && dst_channels == 1) { |
| 143 | if (!frame->muted()) { |
| 144 | DownmixInterleavedToMono(frame->data(), frame->samples_per_channel_, |
| 145 | frame->num_channels_, frame->mutable_data()); |
| 146 | } |
| 147 | frame->num_channels_ = 1; |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 148 | } else if (frame->num_channels_ == 4 && dst_channels == 2) { |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 149 | int err = QuadToStereo(frame); |
| 150 | RTC_DCHECK_EQ(err, 0); |
| 151 | } else { |
| 152 | RTC_NOTREACHED() << "src_channels: " << frame->num_channels_ |
| 153 | << ", dst_channels: " << dst_channels; |
| 154 | } |
| 155 | } |
| 156 | |
| 157 | void AudioFrameOperations::UpmixChannels(size_t target_number_of_channels, |
| 158 | AudioFrame* frame) { |
| 159 | RTC_DCHECK_EQ(frame->num_channels_, 1); |
| 160 | RTC_DCHECK_LE(frame->samples_per_channel_ * target_number_of_channels, |
| 161 | AudioFrame::kMaxDataSizeSamples); |
| 162 | |
| 163 | if (frame->num_channels_ != 1 || |
| 164 | frame->samples_per_channel_ * target_number_of_channels > |
| 165 | AudioFrame::kMaxDataSizeSamples) { |
| 166 | return; |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 167 | } |
| 168 | |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 169 | if (!frame->muted()) { |
| 170 | // Up-mixing done in place. Going backwards through the frame ensure nothing |
| 171 | // is irrevocably overwritten. |
| 172 | for (int i = frame->samples_per_channel_ - 1; i >= 0; i--) { |
| 173 | for (size_t j = 0; j < target_number_of_channels; ++j) { |
| 174 | frame->mutable_data()[target_number_of_channels * i + j] = |
| 175 | frame->data()[i]; |
| 176 | } |
| 177 | } |
| 178 | } |
| 179 | frame->num_channels_ = target_number_of_channels; |
jens.nielsen | 228c268 | 2017-03-01 05:11:22 -0800 | [diff] [blame] | 180 | } |
| 181 | |
andrew@webrtc.org | 02d7174 | 2012-04-24 19:47:00 +0000 | [diff] [blame] | 182 | void AudioFrameOperations::SwapStereoChannels(AudioFrame* frame) { |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 183 | RTC_DCHECK(frame); |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 184 | if (frame->num_channels_ != 2 || frame->muted()) { |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 185 | return; |
| 186 | } |
andrew@webrtc.org | 1c7bfe0 | 2012-04-26 00:20:28 +0000 | [diff] [blame] | 187 | |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 188 | int16_t* frame_data = frame->mutable_data(); |
Peter Kasting | dce40cf | 2015-08-24 14:52:23 -0700 | [diff] [blame] | 189 | for (size_t i = 0; i < frame->samples_per_channel_ * 2; i += 2) { |
Alex Loiko | b4977de | 2019-01-28 16:38:38 +0100 | [diff] [blame] | 190 | std::swap(frame_data[i], frame_data[i + 1]); |
andrew@webrtc.org | 02d7174 | 2012-04-24 19:47:00 +0000 | [diff] [blame] | 191 | } |
| 192 | } |
| 193 | |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 194 | void AudioFrameOperations::Mute(AudioFrame* frame, |
| 195 | bool previous_frame_muted, |
solenberg | 1c2af8e | 2016-03-24 10:36:00 -0700 | [diff] [blame] | 196 | bool current_frame_muted) { |
| 197 | RTC_DCHECK(frame); |
solenberg | 1c2af8e | 2016-03-24 10:36:00 -0700 | [diff] [blame] | 198 | if (!previous_frame_muted && !current_frame_muted) { |
| 199 | // Not muted, don't touch. |
| 200 | } else if (previous_frame_muted && current_frame_muted) { |
| 201 | // Frame fully muted. |
| 202 | size_t total_samples = frame->samples_per_channel_ * frame->num_channels_; |
| 203 | RTC_DCHECK_GE(AudioFrame::kMaxDataSizeSamples, total_samples); |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 204 | frame->Mute(); |
solenberg | 1c2af8e | 2016-03-24 10:36:00 -0700 | [diff] [blame] | 205 | } else { |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 206 | // Fade is a no-op on a muted frame. |
| 207 | if (frame->muted()) { |
| 208 | return; |
| 209 | } |
| 210 | |
solenberg | 1c2af8e | 2016-03-24 10:36:00 -0700 | [diff] [blame] | 211 | // Limit number of samples to fade, if frame isn't long enough. |
| 212 | size_t count = kMuteFadeFrames; |
| 213 | float inc = kMuteFadeInc; |
| 214 | if (frame->samples_per_channel_ < kMuteFadeFrames) { |
| 215 | count = frame->samples_per_channel_; |
| 216 | if (count > 0) { |
| 217 | inc = 1.0f / count; |
| 218 | } |
| 219 | } |
| 220 | |
| 221 | size_t start = 0; |
| 222 | size_t end = count; |
| 223 | float start_g = 0.0f; |
| 224 | if (current_frame_muted) { |
| 225 | // Fade out the last |count| samples of frame. |
| 226 | RTC_DCHECK(!previous_frame_muted); |
| 227 | start = frame->samples_per_channel_ - count; |
| 228 | end = frame->samples_per_channel_; |
| 229 | start_g = 1.0f; |
| 230 | inc = -inc; |
| 231 | } else { |
| 232 | // Fade in the first |count| samples of frame. |
| 233 | RTC_DCHECK(previous_frame_muted); |
| 234 | } |
| 235 | |
| 236 | // Perform fade. |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 237 | int16_t* frame_data = frame->mutable_data(); |
solenberg | 1c2af8e | 2016-03-24 10:36:00 -0700 | [diff] [blame] | 238 | size_t channels = frame->num_channels_; |
| 239 | for (size_t j = 0; j < channels; ++j) { |
| 240 | float g = start_g; |
| 241 | for (size_t i = start * channels; i < end * channels; i += channels) { |
| 242 | g += inc; |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 243 | frame_data[i + j] *= g; |
solenberg | 1c2af8e | 2016-03-24 10:36:00 -0700 | [diff] [blame] | 244 | } |
| 245 | } |
| 246 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 247 | } |
| 248 | |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 249 | void AudioFrameOperations::Mute(AudioFrame* frame) { |
| 250 | Mute(frame, true, true); |
| 251 | } |
| 252 | |
| 253 | void AudioFrameOperations::ApplyHalfGain(AudioFrame* frame) { |
| 254 | RTC_DCHECK(frame); |
| 255 | RTC_DCHECK_GT(frame->num_channels_, 0); |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 256 | if (frame->num_channels_ < 1 || frame->muted()) { |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 257 | return; |
| 258 | } |
| 259 | |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 260 | int16_t* frame_data = frame->mutable_data(); |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 261 | for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; |
| 262 | i++) { |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 263 | frame_data[i] = frame_data[i] >> 1; |
aleloi | 6321b49 | 2016-12-05 01:46:09 -0800 | [diff] [blame] | 264 | } |
| 265 | } |
| 266 | |
oprypin | 67fdb80 | 2017-03-09 06:25:06 -0800 | [diff] [blame] | 267 | int AudioFrameOperations::Scale(float left, float right, AudioFrame* frame) { |
| 268 | if (frame->num_channels_ != 2) { |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 269 | return -1; |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 270 | } else if (frame->muted()) { |
| 271 | return 0; |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 272 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 273 | |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 274 | int16_t* frame_data = frame->mutable_data(); |
oprypin | 67fdb80 | 2017-03-09 06:25:06 -0800 | [diff] [blame] | 275 | for (size_t i = 0; i < frame->samples_per_channel_; i++) { |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 276 | frame_data[2 * i] = static_cast<int16_t>(left * frame_data[2 * i]); |
| 277 | frame_data[2 * i + 1] = static_cast<int16_t>(right * frame_data[2 * i + 1]); |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 278 | } |
| 279 | return 0; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 280 | } |
| 281 | |
oprypin | 67fdb80 | 2017-03-09 06:25:06 -0800 | [diff] [blame] | 282 | int AudioFrameOperations::ScaleWithSat(float scale, AudioFrame* frame) { |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 283 | if (frame->muted()) { |
| 284 | return 0; |
| 285 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 286 | |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 287 | int16_t* frame_data = frame->mutable_data(); |
oprypin | 67fdb80 | 2017-03-09 06:25:06 -0800 | [diff] [blame] | 288 | for (size_t i = 0; i < frame->samples_per_channel_ * frame->num_channels_; |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 289 | i++) { |
yujo | 36b1a5f | 2017-06-12 12:45:32 -0700 | [diff] [blame] | 290 | frame_data[i] = rtc::saturated_cast<int16_t>(scale * frame_data[i]); |
andrew@webrtc.org | 9c4f6a5 | 2012-04-26 22:32:03 +0000 | [diff] [blame] | 291 | } |
| 292 | return 0; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 293 | } |
pbos@webrtc.org | d900e8b | 2013-07-03 15:12:26 +0000 | [diff] [blame] | 294 | } // namespace webrtc |