Update a ton of audio code to use size_t more correctly and in general reduce
use of int16_t/uint16_t.

This is the upshot of a recommendation by henrik.lundin and kwiberg on an original small change ( https://webrtc-codereview.appspot.com/42569004/#ps1 ) to stop using int16_t just because values could fit in it, and is similar in nature to a previous "mass change to use size_t more" ( https://webrtc-codereview.appspot.com/23129004/ ) which also needed to be split up for review but to land all at once, since, like adding "const", such changes tend to cause a lot of transitive effects.

This was be reviewed and approved in pieces:
https://codereview.webrtc.org/1224093003
https://codereview.webrtc.org/1224123002
https://codereview.webrtc.org/1224163002
https://codereview.webrtc.org/1225133003
https://codereview.webrtc.org/1225173002
https://codereview.webrtc.org/1227163003
https://codereview.webrtc.org/1227203003
https://codereview.webrtc.org/1227213002
https://codereview.webrtc.org/1227893002
https://codereview.webrtc.org/1228793004
https://codereview.webrtc.org/1228803003
https://codereview.webrtc.org/1228823002
https://codereview.webrtc.org/1228823003
https://codereview.webrtc.org/1228843002
https://codereview.webrtc.org/1230693002
https://codereview.webrtc.org/1231713002

The change is being landed as TBR to all the folks who reviewed the above.

BUG=chromium:81439
TEST=none
R=andrew@webrtc.org, pbos@webrtc.org
TBR=aluebs, andrew, asapersson, henrika, hlundin, jan.skoglund, kwiberg, minyue, pbos, pthatcher

Review URL: https://codereview.webrtc.org/1230503003 .

Cr-Commit-Position: refs/heads/master@{#9768}
diff --git a/webrtc/modules/audio_coding/neteq/neteq_impl.cc b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
index 636ae87..d890acb 100644
--- a/webrtc/modules/audio_coding/neteq/neteq_impl.cc
+++ b/webrtc/modules/audio_coding/neteq/neteq_impl.cc
@@ -16,6 +16,7 @@
 #include <algorithm>
 
 #include "webrtc/base/logging.h"
+#include "webrtc/base/safe_conversions.h"
 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
 #include "webrtc/modules/audio_coding/codecs/audio_decoder.h"
 #include "webrtc/modules/audio_coding/neteq/accelerate.h"
@@ -104,7 +105,7 @@
   }
   fs_hz_ = fs;
   fs_mult_ = fs / 8000;
-  output_size_samples_ = kOutputSizeMs * 8 * fs_mult_;
+  output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
   decoder_frame_length_ = 3 * output_size_samples_;
   WebRtcSpl_Init();
   if (create_components) {
@@ -154,7 +155,7 @@
 }
 
 int NetEqImpl::GetAudio(size_t max_length, int16_t* output_audio,
-                        int* samples_per_channel, int* num_channels,
+                        size_t* samples_per_channel, int* num_channels,
                         NetEqOutputType* type) {
   CriticalSectionScoped lock(crit_sect_.get());
   LOG(LS_VERBOSE) << "GetAudio";
@@ -305,10 +306,10 @@
 int NetEqImpl::NetworkStatistics(NetEqNetworkStatistics* stats) {
   CriticalSectionScoped lock(crit_sect_.get());
   assert(decoder_database_.get());
-  const int total_samples_in_buffers =
+  const size_t total_samples_in_buffers =
       packet_buffer_->NumSamplesInBuffer(decoder_database_.get(),
                                          decoder_frame_length_) +
-      static_cast<int>(sync_buffer_->FutureLength());
+      sync_buffer_->FutureLength();
   assert(delay_manager_.get());
   assert(decision_logic_.get());
   stats_.GetNetworkStatistics(fs_hz_, total_samples_in_buffers,
@@ -603,7 +604,7 @@
   }
 
   // Insert packets in buffer.
-  int temp_bufsize = packet_buffer_->NumPacketsInBuffer();
+  size_t temp_bufsize = packet_buffer_->NumPacketsInBuffer();
   ret = packet_buffer_->InsertPacketList(
       &packet_list,
       *decoder_database_,
@@ -665,7 +666,8 @@
     if ((temp_bufsize > 0) &&
         (temp_bufsize != decision_logic_->packet_length_samples())) {
       decision_logic_->set_packet_length_samples(temp_bufsize);
-      delay_manager_->SetPacketAudioLength((1000 * temp_bufsize) / fs_hz_);
+      delay_manager_->SetPacketAudioLength(
+          static_cast<int>((1000 * temp_bufsize) / fs_hz_));
     }
 
     // Update statistics.
@@ -688,7 +690,7 @@
 
 int NetEqImpl::GetAudioInternal(size_t max_length,
                                 int16_t* output,
-                                int* samples_per_channel,
+                                size_t* samples_per_channel,
                                 int* num_channels) {
   PacketList packet_list;
   DtmfEvent dtmf_event;
@@ -712,7 +714,7 @@
   assert(vad_.get());
   bool sid_frame_available =
       (operation == kRfc3389Cng && !packet_list.empty());
-  vad_->Update(decoded_buffer_.get(), length, speech_type,
+  vad_->Update(decoded_buffer_.get(), static_cast<size_t>(length), speech_type,
                sid_frame_available, fs_hz_);
 
   algorithm_buffer_->Clear();
@@ -811,12 +813,11 @@
     LOG(LS_WARNING) << "Output array is too short. " << max_length << " < " <<
         output_size_samples_ << " * " << sync_buffer_->Channels();
     num_output_samples = max_length;
-    num_output_samples_per_channel = static_cast<int>(
-        max_length / sync_buffer_->Channels());
+    num_output_samples_per_channel = max_length / sync_buffer_->Channels();
   }
-  const int samples_from_sync =
-      static_cast<int>(sync_buffer_->GetNextAudioInterleaved(
-          num_output_samples_per_channel, output));
+  const size_t samples_from_sync =
+      sync_buffer_->GetNextAudioInterleaved(num_output_samples_per_channel,
+                                            output);
   *num_channels = static_cast<int>(sync_buffer_->Channels());
   LOG(LS_VERBOSE) << "Sync buffer (" << *num_channels << " channel(s)):" <<
       " insert " << algorithm_buffer_->Size() << " samples, extract " <<
@@ -922,7 +923,8 @@
       last_mode_ == kModePreemptiveExpandSuccess ||
       last_mode_ == kModePreemptiveExpandLowEnergy) {
     // Subtract (samples_left + output_size_samples_) from sampleMemory.
-    decision_logic_->AddSampleMemory(-(samples_left + output_size_samples_));
+    decision_logic_->AddSampleMemory(
+        -(samples_left + rtc::checked_cast<int>(output_size_samples_)));
   }
 
   // Check if it is time to play a DTMF event.
@@ -947,8 +949,10 @@
   // Check if we already have enough samples in the |sync_buffer_|. If so,
   // change decision to normal, unless the decision was merge, accelerate, or
   // preemptive expand.
-  if (samples_left >= output_size_samples_ && *operation != kMerge &&
-      *operation != kAccelerate && *operation != kFastAccelerate &&
+  if (samples_left >= rtc::checked_cast<int>(output_size_samples_) &&
+      *operation != kMerge &&
+      *operation != kAccelerate &&
+      *operation != kFastAccelerate &&
       *operation != kPreemptiveExpand) {
     *operation = kNormal;
     return 0;
@@ -996,10 +1000,10 @@
     stats_.ResetMcu();
   }
 
-  int required_samples = output_size_samples_;
-  const int samples_10_ms = 80 * fs_mult_;
-  const int samples_20_ms = 2 * samples_10_ms;
-  const int samples_30_ms = 3 * samples_10_ms;
+  size_t required_samples = output_size_samples_;
+  const size_t samples_10_ms = static_cast<size_t>(80 * fs_mult_);
+  const size_t samples_20_ms = 2 * samples_10_ms;
+  const size_t samples_30_ms = 3 * samples_10_ms;
 
   switch (*operation) {
     case kExpand: {
@@ -1028,17 +1032,17 @@
     case kAccelerate:
     case kFastAccelerate: {
       // In order to do an accelerate we need at least 30 ms of audio data.
-      if (samples_left >= samples_30_ms) {
+      if (samples_left >= static_cast<int>(samples_30_ms)) {
         // Already have enough data, so we do not need to extract any more.
         decision_logic_->set_sample_memory(samples_left);
         decision_logic_->set_prev_time_scale(true);
         return 0;
-      } else if (samples_left >= samples_10_ms &&
+      } else if (samples_left >= static_cast<int>(samples_10_ms) &&
           decoder_frame_length_ >= samples_30_ms) {
         // Avoid decoding more data as it might overflow the playout buffer.
         *operation = kNormal;
         return 0;
-      } else if (samples_left < samples_20_ms &&
+      } else if (samples_left < static_cast<int>(samples_20_ms) &&
           decoder_frame_length_ < samples_30_ms) {
         // Build up decoded data by decoding at least 20 ms of audio data. Do
         // not perform accelerate yet, but wait until we only need to do one
@@ -1056,8 +1060,8 @@
     case kPreemptiveExpand: {
       // In order to do a preemptive expand we need at least 30 ms of decoded
       // audio data.
-      if ((samples_left >= samples_30_ms) ||
-          (samples_left >= samples_10_ms &&
+      if ((samples_left >= static_cast<int>(samples_30_ms)) ||
+          (samples_left >= static_cast<int>(samples_10_ms) &&
               decoder_frame_length_ >= samples_30_ms)) {
         // Already have enough data, so we do not need to extract any more.
         // Or, avoid decoding more data as it might overflow the playout buffer.
@@ -1066,7 +1070,7 @@
         decision_logic_->set_prev_time_scale(true);
         return 0;
       }
-      if (samples_left < samples_20_ms &&
+      if (samples_left < static_cast<int>(samples_20_ms) &&
           decoder_frame_length_ < samples_30_ms) {
         // Build up decoded data by decoding at least 20 ms of audio data.
         // Still try to perform preemptive expand.
@@ -1123,7 +1127,7 @@
 
   if (*operation == kAccelerate || *operation == kFastAccelerate) {
     // Check that we have enough data (30ms) to do accelerate.
-    if (extracted_samples + samples_left < samples_30_ms) {
+    if (extracted_samples + samples_left < static_cast<int>(samples_30_ms)) {
       // TODO(hlundin): Write test for this.
       // Not enough, do normal operation instead.
       *operation = kNormal;
@@ -1274,7 +1278,7 @@
       memset(&decoded_buffer_[*decoded_length], 0,
              decoder_frame_length_ * decoder->Channels() *
                  sizeof(decoded_buffer_[0]));
-      decode_length = decoder_frame_length_;
+      decode_length = rtc::checked_cast<int>(decoder_frame_length_);
     } else if (!packet->primary) {
       // This is a redundant payload; call the special decoder method.
       LOG(LS_VERBOSE) << "Decoding packet (redundant):" <<
@@ -1307,7 +1311,7 @@
       *decoded_length += decode_length;
       // Update |decoder_frame_length_| with number of samples per channel.
       decoder_frame_length_ =
-          decode_length / static_cast<int>(decoder->Channels());
+          static_cast<size_t>(decode_length) / decoder->Channels();
       LOG(LS_VERBOSE) << "Decoded " << decode_length << " samples ("
                       << decoder->Channels() << " channel(s) -> "
                       << decoder_frame_length_ << " samples per channel)";
@@ -1366,11 +1370,11 @@
                         AudioDecoder::SpeechType speech_type, bool play_dtmf) {
   assert(mute_factor_array_.get());
   assert(merge_.get());
-  int new_length = merge_->Process(decoded_buffer, decoded_length,
-                                   mute_factor_array_.get(),
-                                   algorithm_buffer_.get());
-  int expand_length_correction = new_length -
-      static_cast<int>(decoded_length / algorithm_buffer_->Channels());
+  size_t new_length = merge_->Process(decoded_buffer, decoded_length,
+                                      mute_factor_array_.get(),
+                                      algorithm_buffer_.get());
+  size_t expand_length_correction = new_length -
+      decoded_length / algorithm_buffer_->Channels();
 
   // Update in-call and post-call statistics.
   if (expand_->MuteFactor(0) == 0) {
@@ -1394,10 +1398,10 @@
 
 int NetEqImpl::DoExpand(bool play_dtmf) {
   while ((sync_buffer_->FutureLength() - expand_->overlap_length()) <
-      static_cast<size_t>(output_size_samples_)) {
+      output_size_samples_) {
     algorithm_buffer_->Clear();
     int return_value = expand_->Process(algorithm_buffer_.get());
-    int length = static_cast<int>(algorithm_buffer_->Size());
+    size_t length = algorithm_buffer_->Size();
 
     // Update in-call and post-call statistics.
     if (expand_->MuteFactor(0) == 0) {
@@ -1428,7 +1432,8 @@
                             AudioDecoder::SpeechType speech_type,
                             bool play_dtmf,
                             bool fast_accelerate) {
-  const size_t required_samples = 240 * fs_mult_;  // Must have 30 ms.
+  const size_t required_samples =
+      static_cast<size_t>(240 * fs_mult_);  // Must have 30 ms.
   size_t borrowed_samples_per_channel = 0;
   size_t num_channels = algorithm_buffer_->Channels();
   size_t decoded_length_per_channel = decoded_length / num_channels;
@@ -1444,7 +1449,7 @@
     decoded_length = required_samples * num_channels;
   }
 
-  int16_t samples_removed;
+  size_t samples_removed;
   Accelerate::ReturnCodes return_code =
       accelerate_->Process(decoded_buffer, decoded_length, fast_accelerate,
                            algorithm_buffer_.get(), &samples_removed);
@@ -1501,20 +1506,20 @@
                                   size_t decoded_length,
                                   AudioDecoder::SpeechType speech_type,
                                   bool play_dtmf) {
-  const size_t required_samples = 240 * fs_mult_;  // Must have 30 ms.
+  const size_t required_samples =
+      static_cast<size_t>(240 * fs_mult_);  // Must have 30 ms.
   size_t num_channels = algorithm_buffer_->Channels();
-  int borrowed_samples_per_channel = 0;
-  int old_borrowed_samples_per_channel = 0;
+  size_t borrowed_samples_per_channel = 0;
+  size_t old_borrowed_samples_per_channel = 0;
   size_t decoded_length_per_channel = decoded_length / num_channels;
   if (decoded_length_per_channel < required_samples) {
     // Must move data from the |sync_buffer_| in order to get 30 ms.
-    borrowed_samples_per_channel = static_cast<int>(required_samples -
-        decoded_length_per_channel);
+    borrowed_samples_per_channel =
+        required_samples - decoded_length_per_channel;
     // Calculate how many of these were already played out.
-    const int future_length = static_cast<int>(sync_buffer_->FutureLength());
     old_borrowed_samples_per_channel =
-        (borrowed_samples_per_channel > future_length) ?
-        (borrowed_samples_per_channel - future_length) : 0;
+        (borrowed_samples_per_channel > sync_buffer_->FutureLength()) ?
+        (borrowed_samples_per_channel - sync_buffer_->FutureLength()) : 0;
     memmove(&decoded_buffer[borrowed_samples_per_channel * num_channels],
             decoded_buffer,
             sizeof(int16_t) * decoded_length);
@@ -1523,9 +1528,9 @@
     decoded_length = required_samples * num_channels;
   }
 
-  int16_t samples_added;
+  size_t samples_added;
   PreemptiveExpand::ReturnCodes return_code = preemptive_expand_->Process(
-      decoded_buffer, static_cast<int>(decoded_length),
+      decoded_buffer, decoded_length,
       old_borrowed_samples_per_channel,
       algorithm_buffer_.get(), &samples_added);
   stats_.PreemptiveExpandedSamples(samples_added);
@@ -1719,17 +1724,14 @@
 
 void NetEqImpl::DoAlternativePlc(bool increase_timestamp) {
   AudioDecoder* decoder = decoder_database_->GetActiveDecoder();
-  int length;
+  size_t length;
   if (decoder && decoder->HasDecodePlc()) {
     // Use the decoder's packet-loss concealment.
     // TODO(hlundin): Will probably need a longer buffer for multi-channel.
     int16_t decoded_buffer[kMaxFrameSize];
     length = decoder->DecodePlc(1, decoded_buffer);
-    if (length > 0) {
+    if (length > 0)
       algorithm_buffer_->PushBackInterleaved(decoded_buffer, length);
-    } else {
-      length = 0;
-    }
   } else {
     // Do simple zero-stuffing.
     length = output_size_samples_;
@@ -1746,14 +1748,14 @@
 int NetEqImpl::DtmfOverdub(const DtmfEvent& dtmf_event, size_t num_channels,
                            int16_t* output) const {
   size_t out_index = 0;
-  int overdub_length = output_size_samples_;  // Default value.
+  size_t overdub_length = output_size_samples_;  // Default value.
 
   if (sync_buffer_->dtmf_index() > sync_buffer_->next_index()) {
     // Special operation for transition from "DTMF only" to "DTMF overdub".
     out_index = std::min(
         sync_buffer_->dtmf_index() - sync_buffer_->next_index(),
-        static_cast<size_t>(output_size_samples_));
-    overdub_length = output_size_samples_ - static_cast<int>(out_index);
+        output_size_samples_);
+    overdub_length = output_size_samples_ - out_index;
   }
 
   AudioMultiVector dtmf_output(num_channels);
@@ -1765,13 +1767,14 @@
   if (dtmf_return_value == 0) {
     dtmf_return_value = dtmf_tone_generator_->Generate(overdub_length,
                                                        &dtmf_output);
-    assert((size_t) overdub_length == dtmf_output.Size());
+    assert(overdub_length == dtmf_output.Size());
   }
   dtmf_output.ReadInterleaved(overdub_length, &output[out_index]);
   return dtmf_return_value < 0 ? dtmf_return_value : 0;
 }
 
-int NetEqImpl::ExtractPackets(int required_samples, PacketList* packet_list) {
+int NetEqImpl::ExtractPackets(size_t required_samples,
+                              PacketList* packet_list) {
   bool first_packet = true;
   uint8_t prev_payload_type = 0;
   uint32_t prev_timestamp = 0;
@@ -1790,7 +1793,7 @@
   // Packet extraction loop.
   do {
     timestamp_ = header->timestamp;
-    int discard_count = 0;
+    size_t discard_count = 0;
     Packet* packet = packet_buffer_->GetNextPacket(&discard_count);
     // |header| may be invalid after the |packet_buffer_| operation.
     header = NULL;
@@ -1819,7 +1822,7 @@
         packet->header.payloadType);
     if (decoder) {
       if (packet->sync_packet) {
-        packet_duration = decoder_frame_length_;
+        packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
       } else {
         if (packet->primary) {
           packet_duration = decoder->PacketDuration(packet->payload,
@@ -1838,7 +1841,7 @@
     if (packet_duration <= 0) {
       // Decoder did not return a packet duration. Assume that the packet
       // contains the same number of samples as the previous one.
-      packet_duration = decoder_frame_length_;
+      packet_duration = rtc::checked_cast<int>(decoder_frame_length_);
     }
     extracted_samples = packet->header.timestamp - first_timestamp +
         packet_duration;
@@ -1848,7 +1851,7 @@
     next_packet_available = false;
     if (header && prev_payload_type == header->payloadType) {
       int16_t seq_no_diff = header->sequenceNumber - prev_sequence_number;
-      int32_t ts_diff = header->timestamp - prev_timestamp;
+      size_t ts_diff = header->timestamp - prev_timestamp;
       if (seq_no_diff == 1 ||
           (seq_no_diff == 0 && ts_diff == decoder_frame_length_)) {
         // The next sequence number is available, or the next part of a packet
@@ -1857,7 +1860,8 @@
       }
       prev_sequence_number = header->sequenceNumber;
     }
-  } while (extracted_samples < required_samples && next_packet_available);
+  } while (extracted_samples < rtc::checked_cast<int>(required_samples) &&
+           next_packet_available);
 
   if (extracted_samples > 0) {
     // Delete old packets only when we are going to decode something. Otherwise,
@@ -1886,7 +1890,7 @@
 
   fs_hz_ = fs_hz;
   fs_mult_ = fs_hz / 8000;
-  output_size_samples_ = kOutputSizeMs * 8 * fs_mult_;
+  output_size_samples_ = static_cast<size_t>(kOutputSizeMs * 8 * fs_mult_);
   decoder_frame_length_ = 3 * output_size_samples_;  // Initialize to 30ms.
 
   last_mode_ = kModeNormal;
@@ -1931,9 +1935,7 @@
   accelerate_.reset(
       accelerate_factory_->Create(fs_hz, channels, *background_noise_));
   preemptive_expand_.reset(preemptive_expand_factory_->Create(
-      fs_hz, channels,
-      *background_noise_,
-      static_cast<int>(expand_->overlap_length())));
+      fs_hz, channels, *background_noise_, expand_->overlap_length()));
 
   // Delete ComfortNoise object and create a new one.
   comfort_noise_.reset(new ComfortNoise(fs_hz, decoder_database_.get(),