Karl Wiberg | 2224294 | 2015-07-03 04:04:33 +0200 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2015 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include <algorithm> |
| 12 | #include <numeric> |
| 13 | #include <sstream> |
| 14 | #include <vector> |
| 15 | |
| 16 | #include "testing/gtest/include/gtest/gtest.h" |
| 17 | #include "webrtc/base/buffer.h" |
| 18 | #include "webrtc/modules/audio_coding/codecs/isac/fix/interface/audio_encoder_isacfix.h" |
| 19 | #include "webrtc/modules/audio_coding/codecs/isac/main/interface/audio_encoder_isac.h" |
| 20 | #include "webrtc/modules/audio_coding/neteq/tools/input_audio_file.h" |
| 21 | #include "webrtc/test/testsupport/fileutils.h" |
| 22 | |
| 23 | namespace webrtc { |
| 24 | |
| 25 | namespace { |
| 26 | |
| 27 | std::vector<int16_t> LoadSpeechData() { |
| 28 | webrtc::test::InputAudioFile input_file( |
| 29 | webrtc::test::ResourcePath("audio_coding/testfile32kHz", "pcm")); |
| 30 | static const int kIsacNumberOfSamples = 32 * 60; // 60 ms at 32 kHz |
| 31 | std::vector<int16_t> speech_data(kIsacNumberOfSamples); |
| 32 | input_file.Read(kIsacNumberOfSamples, speech_data.data()); |
| 33 | return speech_data; |
| 34 | } |
| 35 | |
| 36 | template <typename T> |
| 37 | IsacBandwidthInfo GetBwInfo(typename T::instance_type* inst) { |
| 38 | IsacBandwidthInfo bi; |
| 39 | T::GetBandwidthInfo(inst, &bi); |
| 40 | EXPECT_TRUE(bi.in_use); |
| 41 | return bi; |
| 42 | } |
| 43 | |
| 44 | template <typename T> |
| 45 | rtc::Buffer EncodePacket(typename T::instance_type* inst, |
| 46 | const IsacBandwidthInfo* bi, |
| 47 | const int16_t* speech_data, |
| 48 | int framesize_ms) { |
| 49 | rtc::Buffer output(1000); |
| 50 | for (int i = 0;; ++i) { |
| 51 | if (bi) |
| 52 | T::SetBandwidthInfo(inst, bi); |
| 53 | int encoded_bytes = T::Encode(inst, speech_data, output.data()); |
| 54 | if (i + 1 == framesize_ms / 10) { |
| 55 | EXPECT_GT(encoded_bytes, 0); |
| 56 | EXPECT_LE(static_cast<size_t>(encoded_bytes), output.size()); |
| 57 | output.SetSize(encoded_bytes); |
| 58 | return output; |
| 59 | } |
| 60 | EXPECT_EQ(0, encoded_bytes); |
| 61 | } |
| 62 | } |
| 63 | |
| 64 | class BoundedCapacityChannel final { |
| 65 | public: |
| 66 | BoundedCapacityChannel(int rate_bits_per_second) |
| 67 | : current_time_rtp_(0), |
| 68 | channel_rate_bytes_per_sample_(rate_bits_per_second / |
| 69 | (8.0 * kSamplesPerSecond)) {} |
| 70 | |
| 71 | // Simulate sending the given number of bytes at the given RTP time. Returns |
| 72 | // the new current RTP time after the sending is done. |
| 73 | int Send(int send_time_rtp, int nbytes) { |
| 74 | current_time_rtp_ = std::max(current_time_rtp_, send_time_rtp) + |
| 75 | nbytes / channel_rate_bytes_per_sample_; |
| 76 | return current_time_rtp_; |
| 77 | } |
| 78 | |
| 79 | private: |
| 80 | int current_time_rtp_; |
| 81 | // The somewhat strange unit for channel rate, bytes per sample, is because |
| 82 | // RTP time is measured in samples: |
| 83 | const double channel_rate_bytes_per_sample_; |
| 84 | static const int kSamplesPerSecond = 16000; |
| 85 | }; |
| 86 | |
| 87 | template <typename T, bool adaptive> |
| 88 | struct TestParam {}; |
| 89 | |
| 90 | template <> |
| 91 | struct TestParam<IsacFloat, true> { |
| 92 | static const int time_to_settle = 200; |
| 93 | static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
| 94 | return rate_bits_per_second; |
| 95 | } |
| 96 | }; |
| 97 | |
| 98 | template <> |
| 99 | struct TestParam<IsacFix, true> { |
| 100 | static const int time_to_settle = 350; |
| 101 | static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
| 102 | // For some reason, IsacFix fails to adapt to the channel's actual |
| 103 | // bandwidth. Instead, it settles on a few hundred packets at 10kbit/s, |
| 104 | // then a few hundred at 5kbit/s, then a few hundred at 10kbit/s, and so |
| 105 | // on. The 200 packets starting at 350 are in the middle of the first |
| 106 | // 10kbit/s run. |
| 107 | return 10000; |
| 108 | } |
| 109 | }; |
| 110 | |
| 111 | template <> |
| 112 | struct TestParam<IsacFloat, false> { |
| 113 | static const int time_to_settle = 0; |
| 114 | static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
| 115 | return 32000; |
| 116 | } |
| 117 | }; |
| 118 | |
| 119 | template <> |
| 120 | struct TestParam<IsacFix, false> { |
| 121 | static const int time_to_settle = 0; |
| 122 | static int ExpectedRateBitsPerSecond(int rate_bits_per_second) { |
| 123 | return 16000; |
| 124 | } |
| 125 | }; |
| 126 | |
| 127 | // Test that the iSAC encoder produces identical output whether or not we use a |
| 128 | // conjoined encoder+decoder pair or a separate encoder and decoder that |
| 129 | // communicate BW estimation info explicitly. |
| 130 | template <typename T, bool adaptive> |
| 131 | void TestGetSetBandwidthInfo(const int16_t* speech_data, |
| 132 | int rate_bits_per_second) { |
| 133 | using Param = TestParam<T, adaptive>; |
| 134 | const int framesize_ms = adaptive ? 60 : 30; |
| 135 | |
| 136 | // Conjoined encoder/decoder pair: |
| 137 | typename T::instance_type* encdec; |
| 138 | ASSERT_EQ(0, T::Create(&encdec)); |
| 139 | ASSERT_EQ(0, T::EncoderInit(encdec, adaptive ? 0 : 1)); |
| 140 | ASSERT_EQ(0, T::DecoderInit(encdec)); |
| 141 | |
| 142 | // Disjoint encoder/decoder pair: |
| 143 | typename T::instance_type* enc; |
| 144 | ASSERT_EQ(0, T::Create(&enc)); |
| 145 | ASSERT_EQ(0, T::EncoderInit(enc, adaptive ? 0 : 1)); |
| 146 | typename T::instance_type* dec; |
| 147 | ASSERT_EQ(0, T::Create(&dec)); |
| 148 | ASSERT_EQ(0, T::DecoderInit(dec)); |
| 149 | |
| 150 | // 0. Get initial BW info from decoder. |
| 151 | auto bi = GetBwInfo<T>(dec); |
| 152 | |
| 153 | BoundedCapacityChannel channel1(rate_bits_per_second), |
| 154 | channel2(rate_bits_per_second); |
| 155 | std::vector<size_t> packet_sizes; |
| 156 | for (int i = 0; i < Param::time_to_settle + 200; ++i) { |
| 157 | std::ostringstream ss; |
| 158 | ss << " i = " << i; |
| 159 | SCOPED_TRACE(ss.str()); |
| 160 | |
| 161 | // 1. Encode 6 * 10 ms (adaptive) or 3 * 10 ms (nonadaptive). The separate |
| 162 | // encoder is given the BW info before each encode call. |
| 163 | auto bitstream1 = |
| 164 | EncodePacket<T>(encdec, nullptr, speech_data, framesize_ms); |
| 165 | auto bitstream2 = EncodePacket<T>(enc, &bi, speech_data, framesize_ms); |
| 166 | EXPECT_EQ(bitstream1, bitstream2); |
| 167 | if (i > Param::time_to_settle) |
| 168 | packet_sizes.push_back(bitstream1.size()); |
| 169 | |
| 170 | // 2. Deliver the encoded data to the decoders (but don't actually ask them |
| 171 | // to decode it; that's not necessary). Then get new BW info from the |
| 172 | // separate decoder. |
| 173 | const int samples_per_packet = 16 * framesize_ms; |
| 174 | const int send_time = i * samples_per_packet; |
| 175 | EXPECT_EQ(0, T::UpdateBwEstimate( |
| 176 | encdec, bitstream1.data(), bitstream1.size(), i, send_time, |
| 177 | channel1.Send(send_time, bitstream1.size()))); |
| 178 | EXPECT_EQ(0, T::UpdateBwEstimate( |
| 179 | dec, bitstream2.data(), bitstream2.size(), i, send_time, |
| 180 | channel2.Send(send_time, bitstream2.size()))); |
| 181 | bi = GetBwInfo<T>(dec); |
| 182 | } |
| 183 | |
| 184 | EXPECT_EQ(0, T::Free(encdec)); |
| 185 | EXPECT_EQ(0, T::Free(enc)); |
| 186 | EXPECT_EQ(0, T::Free(dec)); |
| 187 | |
| 188 | // The average send bitrate is close to the channel's capacity. |
| 189 | double avg_size = |
| 190 | std::accumulate(packet_sizes.begin(), packet_sizes.end(), 0) / |
| 191 | static_cast<double>(packet_sizes.size()); |
| 192 | double avg_rate_bits_per_second = 8.0 * avg_size / (framesize_ms * 1e-3); |
| 193 | double expected_rate_bits_per_second = |
| 194 | Param::ExpectedRateBitsPerSecond(rate_bits_per_second); |
| 195 | EXPECT_GT(avg_rate_bits_per_second / expected_rate_bits_per_second, 0.95); |
| 196 | EXPECT_LT(avg_rate_bits_per_second / expected_rate_bits_per_second, 1.06); |
| 197 | |
| 198 | // The largest packet isn't that large, and the smallest not that small. |
| 199 | size_t min_size = *std::min_element(packet_sizes.begin(), packet_sizes.end()); |
| 200 | size_t max_size = *std::max_element(packet_sizes.begin(), packet_sizes.end()); |
| 201 | double size_range = max_size - min_size; |
| 202 | EXPECT_LE(size_range / avg_size, 0.16); |
| 203 | } |
| 204 | |
| 205 | } // namespace |
| 206 | |
| 207 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat12kAdaptive) { |
| 208 | TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 12000); |
| 209 | } |
| 210 | |
| 211 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat15kAdaptive) { |
| 212 | TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 15000); |
| 213 | } |
| 214 | |
| 215 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat19kAdaptive) { |
| 216 | TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 19000); |
| 217 | } |
| 218 | |
| 219 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat22kAdaptive) { |
| 220 | TestGetSetBandwidthInfo<IsacFloat, true>(LoadSpeechData().data(), 22000); |
| 221 | } |
| 222 | |
| 223 | TEST(IsacCommonTest, GetSetBandwidthInfoFix12kAdaptive) { |
| 224 | TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 12000); |
| 225 | } |
| 226 | |
| 227 | TEST(IsacCommonTest, GetSetBandwidthInfoFix15kAdaptive) { |
| 228 | TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 15000); |
| 229 | } |
| 230 | |
| 231 | TEST(IsacCommonTest, GetSetBandwidthInfoFix19kAdaptive) { |
| 232 | TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 19000); |
| 233 | } |
| 234 | |
| 235 | TEST(IsacCommonTest, GetSetBandwidthInfoFix22kAdaptive) { |
| 236 | TestGetSetBandwidthInfo<IsacFix, true>(LoadSpeechData().data(), 22000); |
| 237 | } |
| 238 | |
| 239 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat12k) { |
| 240 | TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 12000); |
| 241 | } |
| 242 | |
| 243 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat15k) { |
| 244 | TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 15000); |
| 245 | } |
| 246 | |
| 247 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat19k) { |
| 248 | TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 19000); |
| 249 | } |
| 250 | |
| 251 | TEST(IsacCommonTest, GetSetBandwidthInfoFloat22k) { |
| 252 | TestGetSetBandwidthInfo<IsacFloat, false>(LoadSpeechData().data(), 22000); |
| 253 | } |
| 254 | |
| 255 | TEST(IsacCommonTest, GetSetBandwidthInfoFix12k) { |
| 256 | TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 12000); |
| 257 | } |
| 258 | |
| 259 | TEST(IsacCommonTest, GetSetBandwidthInfoFix15k) { |
| 260 | TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 15000); |
| 261 | } |
| 262 | |
| 263 | TEST(IsacCommonTest, GetSetBandwidthInfoFix19k) { |
| 264 | TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 19000); |
| 265 | } |
| 266 | |
| 267 | TEST(IsacCommonTest, GetSetBandwidthInfoFix22k) { |
| 268 | TestGetSetBandwidthInfo<IsacFix, false>(LoadSpeechData().data(), 22000); |
| 269 | } |
| 270 | |
| 271 | } // namespace webrtc |