henrik.lundin@webrtc.org | d94659d | 2013-01-29 12:09:21 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
| 11 | #include "webrtc/modules/audio_coding/neteq4/delay_manager.h" |
| 12 | |
| 13 | #include <assert.h> |
| 14 | #include <math.h> |
| 15 | |
| 16 | #include <algorithm> // max, min |
| 17 | |
| 18 | #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" |
| 19 | #include "webrtc/modules/audio_coding/neteq4/delay_peak_detector.h" |
| 20 | #include "webrtc/system_wrappers/interface/logging.h" |
| 21 | |
| 22 | namespace webrtc { |
| 23 | |
| 24 | DelayManager::DelayManager(int max_packets_in_buffer, |
| 25 | DelayPeakDetector* peak_detector) |
| 26 | : first_packet_received_(false), |
| 27 | max_packets_in_buffer_(max_packets_in_buffer), |
| 28 | iat_vector_(kMaxIat + 1, 0), |
| 29 | iat_factor_(0), |
| 30 | packet_iat_count_ms_(0), |
| 31 | base_target_level_(4), // In Q0 domain. |
| 32 | target_level_(base_target_level_ << 8), // In Q8 domain. |
| 33 | packet_len_ms_(0), |
| 34 | streaming_mode_(false), |
| 35 | last_seq_no_(0), |
| 36 | last_timestamp_(0), |
| 37 | extra_delay_ms_(0), |
| 38 | iat_cumulative_sum_(0), |
| 39 | max_iat_cumulative_sum_(0), |
| 40 | max_timer_ms_(0), |
| 41 | peak_detector_(*peak_detector), |
| 42 | last_pack_cng_or_dtmf_(1) { |
| 43 | assert(peak_detector); // Should never be NULL. |
| 44 | Reset(); |
| 45 | } |
| 46 | |
| 47 | // Set the histogram vector to an exponentially decaying distribution |
| 48 | // iat_vector_[i] = 0.5^(i+1), i = 0, 1, 2, ... |
| 49 | // iat_vector_ is in Q30. |
| 50 | void DelayManager::ResetHistogram() { |
| 51 | // Set temp_prob to (slightly more than) 1 in Q14. This ensures that the sum |
| 52 | // of iat_vector_ is 1. |
| 53 | uint16_t temp_prob = 0x4002; // 16384 + 2 = 100000000000010 binary. |
| 54 | IATVector::iterator it = iat_vector_.begin(); |
| 55 | for (; it < iat_vector_.end(); it++) { |
| 56 | temp_prob >>= 1; |
| 57 | (*it) = temp_prob << 16; |
| 58 | } |
| 59 | base_target_level_ = 4; |
| 60 | target_level_ = base_target_level_ << 8; |
| 61 | } |
| 62 | |
| 63 | int DelayManager::Update(uint16_t sequence_number, |
| 64 | uint32_t timestamp, |
| 65 | int sample_rate_hz) { |
| 66 | if (sample_rate_hz <= 0) { |
| 67 | return -1; |
| 68 | } |
| 69 | |
| 70 | if (!first_packet_received_) { |
| 71 | // Prepare for next packet arrival. |
| 72 | packet_iat_count_ms_ = 0; |
| 73 | last_seq_no_ = sequence_number; |
| 74 | last_timestamp_ = timestamp; |
| 75 | first_packet_received_ = true; |
| 76 | return 0; |
| 77 | } |
| 78 | |
| 79 | // Try calculating packet length from current and previous timestamps. |
| 80 | // TODO(hlundin): Take care of wrap-around. Not done yet due to legacy |
| 81 | // bit-exactness. |
| 82 | int packet_len_ms; |
| 83 | if ((timestamp <= last_timestamp_) || (sequence_number <= last_seq_no_)) { |
| 84 | // Wrong timestamp or sequence order; use stored value. |
| 85 | packet_len_ms = packet_len_ms_; |
| 86 | } else { |
| 87 | // Calculate timestamps per packet and derive packet length in ms. |
| 88 | int packet_len_samp = |
| 89 | static_cast<uint32_t>(timestamp - last_timestamp_) / |
| 90 | static_cast<uint16_t>(sequence_number - last_seq_no_); |
| 91 | packet_len_ms = (1000 * packet_len_samp) / sample_rate_hz; |
| 92 | } |
| 93 | |
| 94 | if (packet_len_ms > 0) { |
| 95 | // Cannot update statistics unless |packet_len_ms| is valid. |
| 96 | // Calculate inter-arrival time (IAT) in integer "packet times" |
| 97 | // (rounding down). This is the value used as index to the histogram |
| 98 | // vector |iat_vector_|. |
| 99 | int iat_packets = packet_iat_count_ms_ / packet_len_ms; |
| 100 | |
| 101 | if (streaming_mode_) { |
| 102 | UpdateCumulativeSums(packet_len_ms, sequence_number); |
| 103 | } |
| 104 | |
| 105 | // Check for discontinuous packet sequence and re-ordering. |
| 106 | if (sequence_number > last_seq_no_ + 1) { |
| 107 | // TODO(hlundin): Take care of wrap-around. Not done yet due to legacy |
| 108 | // bit-exactness. |
| 109 | // Compensate for gap in the sequence numbers. Reduce IAT with the |
| 110 | // expected extra time due to lost packets, but ensure that the IAT is |
| 111 | // not negative. |
| 112 | iat_packets -= sequence_number - last_seq_no_ - 1; |
| 113 | iat_packets = std::max(iat_packets, 0); |
| 114 | } else if (sequence_number < last_seq_no_) { |
| 115 | // TODO(hlundin): Take care of wrap-around. |
| 116 | // Compensate for re-ordering. |
| 117 | iat_packets += last_seq_no_ + 1 - sequence_number; |
| 118 | } |
| 119 | |
| 120 | // Saturate IAT at maximum value. |
| 121 | const int max_iat = kMaxIat; |
| 122 | iat_packets = std::min(iat_packets, max_iat); |
| 123 | UpdateHistogram(iat_packets); |
| 124 | // Calculate new |target_level_| based on updated statistics. |
| 125 | target_level_ = CalculateTargetLevel(iat_packets); |
| 126 | if (streaming_mode_) { |
| 127 | target_level_ = std::max(target_level_, max_iat_cumulative_sum_); |
| 128 | } |
| 129 | |
| 130 | LimitTargetLevel(); |
| 131 | } // End if (packet_len_ms > 0). |
| 132 | |
| 133 | // Prepare for next packet arrival. |
| 134 | packet_iat_count_ms_ = 0; |
| 135 | last_seq_no_ = sequence_number; |
| 136 | last_timestamp_ = timestamp; |
| 137 | return 0; |
| 138 | } |
| 139 | |
| 140 | void DelayManager::UpdateCumulativeSums(int packet_len_ms, |
| 141 | uint16_t sequence_number) { |
| 142 | // Calculate IAT in Q8, including fractions of a packet (i.e., more |
| 143 | // accurate than |iat_packets|. |
| 144 | int iat_packets_q8 = (packet_iat_count_ms_ << 8) / packet_len_ms; |
| 145 | // Calculate cumulative sum IAT with sequence number compensation. The sum |
| 146 | // is zero if there is no clock-drift. |
| 147 | iat_cumulative_sum_ += (iat_packets_q8 - |
| 148 | (static_cast<int>(sequence_number - last_seq_no_) << 8)); |
| 149 | // Subtract drift term. |
| 150 | iat_cumulative_sum_ -= kCumulativeSumDrift; |
| 151 | // Ensure not negative. |
| 152 | iat_cumulative_sum_ = std::max(iat_cumulative_sum_, 0); |
| 153 | if (iat_cumulative_sum_ > max_iat_cumulative_sum_) { |
| 154 | // Found a new maximum. |
| 155 | max_iat_cumulative_sum_ = iat_cumulative_sum_; |
| 156 | max_timer_ms_ = 0; |
| 157 | } |
| 158 | if (max_timer_ms_ > kMaxStreamingPeakPeriodMs) { |
| 159 | // Too long since the last maximum was observed; decrease max value. |
| 160 | max_iat_cumulative_sum_ -= kCumulativeSumDrift; |
| 161 | } |
| 162 | } |
| 163 | |
| 164 | // Each element in the vector is first multiplied by the forgetting factor |
| 165 | // |iat_factor_|. Then the vector element indicated by |iat_packets| is then |
| 166 | // increased (additive) by 1 - |iat_factor_|. This way, the probability of |
| 167 | // |iat_packets| is slightly increased, while the sum of the histogram remains |
| 168 | // constant (=1). |
| 169 | // Due to inaccuracies in the fixed-point arithmetic, the histogram may no |
| 170 | // longer sum up to 1 (in Q30) after the update. To correct this, a correction |
| 171 | // term is added or subtracted from the first element (or elements) of the |
| 172 | // vector. |
| 173 | // The forgetting factor |iat_factor_| is also updated. When the DelayManager |
| 174 | // is reset, the factor is set to 0 to facilitate rapid convergence in the |
| 175 | // beginning. With each update of the histogram, the factor is increased towards |
| 176 | // the steady-state value |kIatFactor_|. |
| 177 | void DelayManager::UpdateHistogram(size_t iat_packets) { |
| 178 | assert(iat_packets < iat_vector_.size()); |
| 179 | int vector_sum = 0; // Sum up the vector elements as they are processed. |
| 180 | // Multiply each element in |iat_vector_| with |iat_factor_|. |
| 181 | for (IATVector::iterator it = iat_vector_.begin(); |
| 182 | it != iat_vector_.end(); ++it) { |
| 183 | *it = (static_cast<int64_t>(*it) * iat_factor_) >> 15; |
| 184 | vector_sum += *it; |
| 185 | } |
| 186 | |
| 187 | // Increase the probability for the currently observed inter-arrival time |
| 188 | // by 1 - |iat_factor_|. The factor is in Q15, |iat_vector_| in Q30. |
| 189 | // Thus, left-shift 15 steps to obtain result in Q30. |
| 190 | iat_vector_[iat_packets] += (32768 - iat_factor_) << 15; |
| 191 | vector_sum += (32768 - iat_factor_) << 15; // Add to vector sum. |
| 192 | |
| 193 | // |iat_vector_| should sum up to 1 (in Q30), but it may not due to |
| 194 | // fixed-point rounding errors. |
| 195 | vector_sum -= 1 << 30; // Should be zero. Compensate if not. |
| 196 | if (vector_sum != 0) { |
| 197 | // Modify a few values early in |iat_vector_|. |
| 198 | int flip_sign = vector_sum > 0 ? -1 : 1; |
| 199 | IATVector::iterator it = iat_vector_.begin(); |
| 200 | while (it != iat_vector_.end() && abs(vector_sum) > 0) { |
| 201 | // Add/subtract 1/16 of the element, but not more than |vector_sum|. |
| 202 | int correction = flip_sign * std::min(abs(vector_sum), (*it) >> 4); |
| 203 | *it += correction; |
| 204 | vector_sum += correction; |
| 205 | ++it; |
| 206 | } |
| 207 | } |
| 208 | assert(vector_sum == 0); // Verify that the above is correct. |
| 209 | |
| 210 | // Update |iat_factor_| (changes only during the first seconds after a reset). |
| 211 | // The factor converges to |kIatFactor_|. |
| 212 | iat_factor_ += (kIatFactor_ - iat_factor_ + 3) >> 2; |
| 213 | } |
| 214 | |
| 215 | // Enforces upper limit for |target_level_|. The limit is chosen to be |
| 216 | // 75% of |max_packets_in_buffer_|, to leave some headroom for natural |
| 217 | // fluctuations around the target. If an extra delay is requested, the |
| 218 | // cap is lowered even further. Note that in practice, this does not have |
| 219 | // any impact, since the target level is far below the buffer capacity in |
| 220 | // all reasonable cases. |
| 221 | // TODO(hlundin): Move this check to the buffer logistics class. |
| 222 | void DelayManager::LimitTargetLevel() { |
| 223 | int max_buffer_len = max_packets_in_buffer_; |
| 224 | if (extra_delay_ms_ > 0 && packet_len_ms_ > 0) { |
| 225 | max_buffer_len -= extra_delay_ms_ / packet_len_ms_; |
| 226 | max_buffer_len = std::max(max_buffer_len, 1); // Sanity check. |
| 227 | } |
| 228 | max_buffer_len = (3 * (max_buffer_len << 8)) / 4; // Shift to Q8, then 75%. |
| 229 | target_level_ = std::min(target_level_, max_buffer_len); |
| 230 | } |
| 231 | |
| 232 | int DelayManager::CalculateTargetLevel(int iat_packets) { |
| 233 | int limit_probability = kLimitProbability; |
| 234 | if (streaming_mode_) { |
| 235 | limit_probability = kLimitProbabilityStreaming; |
| 236 | } |
| 237 | |
| 238 | // Calculate target buffer level from inter-arrival time histogram. |
| 239 | // Find the |iat_index| for which the probability of observing an |
| 240 | // inter-arrival time larger than or equal to |iat_index| is less than or |
| 241 | // equal to |limit_probability|. The sought probability is estimated using |
| 242 | // the histogram as the reverse cumulant PDF, i.e., the sum of elements from |
| 243 | // the end up until |iat_index|. Now, since the sum of all elements is 1 |
| 244 | // (in Q30) by definition, and since the solution is often a low value for |
| 245 | // |iat_index|, it is more efficient to start with |sum| = 1 and subtract |
| 246 | // elements from the start of the histogram. |
| 247 | size_t index = 0; // Start from the beginning of |iat_vector_|. |
| 248 | int sum = 1 << 30; // Assign to 1 in Q30. |
| 249 | sum -= iat_vector_[index]; // Ensure that target level is >= 1. |
| 250 | |
| 251 | do { |
| 252 | // Subtract the probabilities one by one until the sum is no longer greater |
| 253 | // than limit_probability. |
| 254 | ++index; |
| 255 | sum -= iat_vector_[index]; |
| 256 | } while ((sum > limit_probability) && (index < iat_vector_.size() - 1)); |
| 257 | |
| 258 | // This is the base value for the target buffer level. |
| 259 | int target_level = index; |
| 260 | base_target_level_ = index; |
| 261 | |
| 262 | // Update detector for delay peaks. |
| 263 | bool delay_peak_found = peak_detector_.Update(iat_packets, target_level); |
| 264 | if (delay_peak_found) { |
| 265 | target_level = std::max(static_cast<int>(target_level), |
| 266 | peak_detector_.MaxPeakHeight()); |
| 267 | } |
| 268 | |
| 269 | // Sanity check. |target_level| must be strictly positive. |
| 270 | target_level = std::max(target_level, 1); |
| 271 | // Scale to Q8 and assign to member variable. |
| 272 | target_level_ = target_level << 8; |
| 273 | return target_level_; |
| 274 | } |
| 275 | |
| 276 | int DelayManager::SetPacketAudioLength(int length_ms) { |
| 277 | if (length_ms <= 0) { |
| 278 | LOG_F(LS_ERROR) << "length_ms = " << length_ms; |
| 279 | return -1; |
| 280 | } |
| 281 | packet_len_ms_ = length_ms; |
| 282 | peak_detector_.SetPacketAudioLength(packet_len_ms_); |
| 283 | packet_iat_count_ms_ = 0; |
| 284 | last_pack_cng_or_dtmf_ = 1; // TODO(hlundin): Legacy. Remove? |
| 285 | return 0; |
| 286 | } |
| 287 | |
| 288 | |
| 289 | void DelayManager::Reset() { |
| 290 | packet_len_ms_ = 0; // Packet size unknown. |
| 291 | streaming_mode_ = false; |
| 292 | peak_detector_.Reset(); |
| 293 | ResetHistogram(); // Resets target levels too. |
| 294 | iat_factor_ = 0; // Adapt the histogram faster for the first few packets. |
| 295 | packet_iat_count_ms_ = 0; |
| 296 | max_timer_ms_ = 0; |
| 297 | iat_cumulative_sum_ = 0; |
| 298 | max_iat_cumulative_sum_ = 0; |
| 299 | last_pack_cng_or_dtmf_ = 1; |
| 300 | } |
| 301 | |
| 302 | int DelayManager::AverageIAT() const { |
| 303 | int32_t sum_q24 = 0; |
| 304 | assert(iat_vector_.size() == 65); // Algorithm is hard-coded for this size. |
| 305 | for (size_t i = 0; i < iat_vector_.size(); ++i) { |
| 306 | // Shift 6 to fit worst case: 2^30 * 64. |
| 307 | sum_q24 += (iat_vector_[i] >> 6) * i; |
| 308 | } |
| 309 | // Subtract the nominal inter-arrival time 1 = 2^24 in Q24. |
| 310 | sum_q24 -= (1 << 24); |
| 311 | // Multiply with 1000000 / 2^24 = 15625 / 2^18 to get in parts-per-million. |
| 312 | // Shift 7 to Q17 first, then multiply with 15625 and shift another 11. |
| 313 | return ((sum_q24 >> 7) * 15625) >> 11; |
| 314 | } |
| 315 | |
| 316 | bool DelayManager::PeakFound() const { |
| 317 | return peak_detector_.peak_found(); |
| 318 | } |
| 319 | |
| 320 | void DelayManager::UpdateCounters(int elapsed_time_ms) { |
| 321 | packet_iat_count_ms_ += elapsed_time_ms; |
| 322 | peak_detector_.IncrementCounter(elapsed_time_ms); |
| 323 | max_timer_ms_ += elapsed_time_ms; |
| 324 | } |
| 325 | |
| 326 | void DelayManager::BufferLimits(int* lower_limit, int* higher_limit) const { |
| 327 | if (!lower_limit || !higher_limit) { |
| 328 | LOG_F(LS_ERROR) << "NULL pointers supplied as input"; |
| 329 | assert(false); |
| 330 | return; |
| 331 | } |
| 332 | |
| 333 | int extra_delay_packets_q8 = 0; |
| 334 | int window_20ms = 0x7FFF; // Default large value for legacy bit-exactness. |
| 335 | if (packet_len_ms_ > 0) { |
| 336 | extra_delay_packets_q8 = (extra_delay_ms_ << 8) / packet_len_ms_; |
| 337 | window_20ms = (20 << 8) / packet_len_ms_; |
| 338 | } |
| 339 | // |lower_limit| is 75% of |target_level_| + extra delay. |
| 340 | // |target_level_| is in Q8 already. |
| 341 | *lower_limit = (target_level_ * 3) / 4 + extra_delay_packets_q8; |
| 342 | // |higher_limit| is equal to |target_level_| + extra delay, but should at |
| 343 | // least be 20 ms higher than |lower_limit_|. |
| 344 | *higher_limit = std::max(target_level_ + extra_delay_packets_q8, |
| 345 | *lower_limit + window_20ms); |
| 346 | } |
| 347 | |
| 348 | int DelayManager::TargetLevel() const { |
| 349 | if (packet_len_ms_ > 0) { |
| 350 | // Add |extra_delay_ms_| converted to packets in Q8. |
| 351 | return target_level_ + (extra_delay_ms_ << 8) / packet_len_ms_; |
| 352 | } else { |
| 353 | // Cannot convert |extra_delay_ms_|; simply return |target_level_|. |
| 354 | return target_level_; |
| 355 | } |
| 356 | } |
| 357 | |
| 358 | void DelayManager::LastDecoderType(NetEqDecoder decoder_type) { |
| 359 | if (decoder_type == kDecoderAVT || |
| 360 | decoder_type == kDecoderCNGnb || |
| 361 | decoder_type == kDecoderCNGwb || |
| 362 | decoder_type == kDecoderCNGswb32kHz || |
| 363 | decoder_type == kDecoderCNGswb48kHz) { |
| 364 | last_pack_cng_or_dtmf_ = 1; |
| 365 | } else if (last_pack_cng_or_dtmf_ != 0) { |
| 366 | last_pack_cng_or_dtmf_ = -1; |
| 367 | } |
| 368 | } |
| 369 | } // namespace webrtc |