Reland of Opus multistream.

This is a reland of
https://webrtc-review.googlesource.com/c/src/+/111750.

This time we don't use the multistream decoder unless we have to.
(Which is when #channels >2). Pros: don't make downstream projects
crash due to used up stack space, a few % more efficiency for the
typical case (because multistream adds some overhead). Cons: Messy
C-code with "union" types and #define MACROs, probably more
maintenance.

Bug: webrtc:8649
Change-Id: I4253a5e0c382f67ac7c6731dc6602a31e6779e63
Reviewed-on: https://webrtc-review.googlesource.com/c/120049
Commit-Queue: Alex Loiko <aleloi@webrtc.org>
Reviewed-by: Minyue Li <minyue@webrtc.org>
Cr-Commit-Position: refs/heads/master@{#26445}
diff --git a/modules/audio_coding/codecs/opus/opus_interface.c b/modules/audio_coding/codecs/opus/opus_interface.c
index d219098..3369f8e 100644
--- a/modules/audio_coding/codecs/opus/opus_interface.c
+++ b/modules/audio_coding/codecs/opus/opus_interface.c
@@ -37,6 +37,40 @@
   kWebRtcOpusDefaultFrameSize = 960,
 };
 
+int16_t GetSurroundParameters(int channels,
+                              int *streams,
+                              int *coupled_streams,
+                              unsigned char *mapping) {
+  int opus_error;
+  int ret = 0;
+  // Use 'surround encoder create' to get values for 'coupled_streams',
+  // 'streams' and 'mapping'.
+  OpusMSEncoder* ms_encoder_ptr = opus_multistream_surround_encoder_create(
+      48000,
+      channels,
+      /* mapping family */ channels <= 2 ? 0 : 1,
+      streams,
+      coupled_streams,
+      mapping,
+      OPUS_APPLICATION_VOIP, // Application type shouldn't affect
+                             // streams/mapping values.
+      &opus_error);
+
+  // This shouldn't fail; if it fails,
+  // signal an error and return invalid values.
+  if (opus_error != OPUS_OK || ms_encoder_ptr == NULL) {
+    ret = -1;
+    *streams = -1;
+    *coupled_streams = -1;
+  }
+
+  // We don't need the encoder.
+  if (ms_encoder_ptr != NULL) {
+    opus_multistream_encoder_destroy(ms_encoder_ptr);
+  }
+  return ret;
+}
+
 int16_t WebRtcOpus_EncoderCreate(OpusEncInst** inst,
                                  size_t channels,
                                  int32_t application) {
@@ -55,13 +89,34 @@
       return -1;
   }
 
-  OpusEncInst* state = calloc(1, sizeof(OpusEncInst));
+  OpusEncInst* state = (OpusEncInst*)calloc(1, sizeof(OpusEncInst));
   RTC_DCHECK(state);
 
   int error;
-  state->encoder = opus_encoder_create(48000, (int)channels, opus_app,
-                                       &error);
-  if (error != OPUS_OK || !state->encoder) {
+  if (channels <= 2) {
+    state->encoder.encoder = opus_encoder_create(48000, (int)channels, opus_app,
+                                         &error);
+
+  } else {
+    unsigned char mapping[255];
+    memset(mapping, 0, 255);
+    int streams = -1;
+    int coupled_streams = -1;
+
+    state->encoder.multistream_encoder =
+        opus_multistream_surround_encoder_create(
+        48000,
+        channels,
+        /* mapping family */ 1,
+        &streams,
+        &coupled_streams,
+        mapping,
+        opus_app,
+        &error);
+  }
+
+  if (error != OPUS_OK || (!state->encoder.encoder &&
+                           !state->encoder.multistream_encoder)) {
     WebRtcOpus_EncoderFree(state);
     return -1;
   }
@@ -75,7 +130,11 @@
 
 int16_t WebRtcOpus_EncoderFree(OpusEncInst* inst) {
   if (inst) {
-    opus_encoder_destroy(inst->encoder);
+    if (inst->channels <= 2) {
+      opus_encoder_destroy(inst->encoder.encoder);
+    } else {
+      opus_multistream_encoder_destroy(inst->encoder.multistream_encoder);
+    }
     free(inst);
     return 0;
   } else {
@@ -94,11 +153,19 @@
     return -1;
   }
 
-  res = opus_encode(inst->encoder,
-                    (const opus_int16*)audio_in,
-                    (int)samples,
-                    encoded,
-                    (opus_int32)length_encoded_buffer);
+  if (inst->channels <= 2) {
+    res = opus_encode(inst->encoder.encoder,
+                      (const opus_int16*)audio_in,
+                      (int)samples,
+                      encoded,
+                      (opus_int32)length_encoded_buffer);
+  } else {
+    res = opus_multistream_encode(inst->encoder.multistream_encoder,
+                                  (const opus_int16*)audio_in,
+                                  (int)samples,
+                                  encoded,
+                                  (opus_int32)length_encoded_buffer);
+  }
 
   if (res <= 0) {
     return -1;
@@ -120,9 +187,15 @@
   return res;
 }
 
+#define ENCODER_CTL(inst, vargs) (                                      \
+    inst->channels <= 2 ?                                               \
+    opus_encoder_ctl(inst->encoder.encoder, vargs)                      \
+    : opus_multistream_encoder_ctl(inst->encoder.multistream_encoder, vargs))
+
+
 int16_t WebRtcOpus_SetBitRate(OpusEncInst* inst, int32_t rate) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_BITRATE(rate));
+    return ENCODER_CTL(inst, OPUS_SET_BITRATE(rate));
   } else {
     return -1;
   }
@@ -130,8 +203,7 @@
 
 int16_t WebRtcOpus_SetPacketLossRate(OpusEncInst* inst, int32_t loss_rate) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder,
-                            OPUS_SET_PACKET_LOSS_PERC(loss_rate));
+    return ENCODER_CTL(inst, OPUS_SET_PACKET_LOSS_PERC(loss_rate));
   } else {
     return -1;
   }
@@ -154,13 +226,52 @@
   } else {
     set_bandwidth = OPUS_BANDWIDTH_FULLBAND;
   }
-  return opus_encoder_ctl(inst->encoder,
-                          OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
+  return ENCODER_CTL(inst, OPUS_SET_MAX_BANDWIDTH(set_bandwidth));
+}
+
+int16_t WebRtcOpus_GetMaxPlaybackRate(OpusEncInst* const inst,
+                                      int32_t* result_hz) {
+  if (inst->channels <= 2) {
+    if (opus_encoder_ctl(
+            inst->encoder.encoder,
+            OPUS_GET_MAX_BANDWIDTH(result_hz)) == OPUS_OK) {
+      return 0;
+    }
+    return -1;
+  }
+
+  opus_int32 max_bandwidth;
+  int s;
+  int ret;
+
+  max_bandwidth = 0;
+  ret = OPUS_OK;
+  s = 0;
+  while (ret == OPUS_OK) {
+    OpusEncoder *enc;
+    opus_int32 bandwidth;
+
+    ret = ENCODER_CTL(inst, OPUS_MULTISTREAM_GET_ENCODER_STATE(s, &enc));
+    if (ret == OPUS_BAD_ARG)
+      break;
+    if (ret != OPUS_OK)
+      return -1;
+    if (opus_encoder_ctl(enc, OPUS_GET_MAX_BANDWIDTH(&bandwidth)) != OPUS_OK)
+      return -1;
+
+    if (max_bandwidth != 0 && max_bandwidth != bandwidth)
+      return -1;
+
+    max_bandwidth = bandwidth;
+    s++;
+  }
+  *result_hz = max_bandwidth;
+  return 0;
 }
 
 int16_t WebRtcOpus_EnableFec(OpusEncInst* inst) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(1));
+    return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(1));
   } else {
     return -1;
   }
@@ -168,7 +279,7 @@
 
 int16_t WebRtcOpus_DisableFec(OpusEncInst* inst) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_INBAND_FEC(0));
+    return ENCODER_CTL(inst, OPUS_SET_INBAND_FEC(0));
   } else {
     return -1;
   }
@@ -184,21 +295,21 @@
   // last long during a pure silence, if the signal type is not forced.
   // TODO(minyue): Remove the signal type forcing when Opus DTX works properly
   // without it.
-  int ret = opus_encoder_ctl(inst->encoder,
-                             OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
+  int ret = ENCODER_CTL(inst,
+                        OPUS_SET_SIGNAL(OPUS_SIGNAL_VOICE));
   if (ret != OPUS_OK)
     return ret;
 
-  return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(1));
+  return ENCODER_CTL(inst, OPUS_SET_DTX(1));
 }
 
 int16_t WebRtcOpus_DisableDtx(OpusEncInst* inst) {
   if (inst) {
-    int ret = opus_encoder_ctl(inst->encoder,
-                               OPUS_SET_SIGNAL(OPUS_AUTO));
+    int ret = ENCODER_CTL(inst,
+                          OPUS_SET_SIGNAL(OPUS_AUTO));
     if (ret != OPUS_OK)
       return ret;
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_DTX(0));
+    return ENCODER_CTL(inst, OPUS_SET_DTX(0));
   } else {
     return -1;
   }
@@ -206,7 +317,7 @@
 
 int16_t WebRtcOpus_EnableCbr(OpusEncInst* inst) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(0));
+    return ENCODER_CTL(inst, OPUS_SET_VBR(0));
   } else {
     return -1;
   }
@@ -214,7 +325,7 @@
 
 int16_t WebRtcOpus_DisableCbr(OpusEncInst* inst) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_VBR(1));
+    return ENCODER_CTL(inst, OPUS_SET_VBR(1));
   } else {
     return -1;
   }
@@ -222,7 +333,8 @@
 
 int16_t WebRtcOpus_SetComplexity(OpusEncInst* inst, int32_t complexity) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_COMPLEXITY(complexity));
+    return ENCODER_CTL(inst,
+                       OPUS_SET_COMPLEXITY(complexity));
   } else {
     return -1;
   }
@@ -233,7 +345,8 @@
     return -1;
   }
   int32_t bandwidth;
-  if (opus_encoder_ctl(inst->encoder, OPUS_GET_BANDWIDTH(&bandwidth)) == 0) {
+  if (ENCODER_CTL(inst,
+                  OPUS_GET_BANDWIDTH(&bandwidth)) == 0) {
     return bandwidth;
   } else {
     return -1;
@@ -243,7 +356,8 @@
 
 int16_t WebRtcOpus_SetBandwidth(OpusEncInst* inst, int32_t bandwidth) {
   if (inst) {
-    return opus_encoder_ctl(inst->encoder, OPUS_SET_BANDWIDTH(bandwidth));
+    return ENCODER_CTL(inst,
+                       OPUS_SET_BANDWIDTH(bandwidth));
   } else {
     return -1;
   }
@@ -253,11 +367,11 @@
   if (!inst)
     return -1;
   if (num_channels == 0) {
-    return opus_encoder_ctl(inst->encoder,
+    return ENCODER_CTL(inst,
                             OPUS_SET_FORCE_CHANNELS(OPUS_AUTO));
   } else if (num_channels == 1 || num_channels == 2) {
-    return opus_encoder_ctl(inst->encoder,
-                            OPUS_SET_FORCE_CHANNELS(num_channels));
+    return ENCODER_CTL(inst,
+                       OPUS_SET_FORCE_CHANNELS(num_channels));
   } else {
     return -1;
   }
@@ -268,16 +382,37 @@
   OpusDecInst* state;
 
   if (inst != NULL) {
-    /* Create Opus decoder state. */
+    // Create Opus decoder state.
     state = (OpusDecInst*) calloc(1, sizeof(OpusDecInst));
     if (state == NULL) {
       return -1;
     }
 
-    /* Create new memory, always at 48000 Hz. */
-    state->decoder = opus_decoder_create(48000, (int)channels, &error);
-    if (error == OPUS_OK && state->decoder != NULL) {
-      /* Creation of memory all ok. */
+    if (channels <= 2) {
+      state->decoder.decoder = opus_decoder_create(48000,
+                                                   (int)channels, &error);
+    } else {
+      unsigned char mapping[255];
+      memset(mapping, 0, 255);
+      int streams = -1;
+      int coupled_streams = -1;
+      if (GetSurroundParameters(channels, &streams,
+                                &coupled_streams, mapping) != 0) {
+        free(state);
+        return -1;
+      }
+
+      // Create new memory, always at 48000 Hz.
+      state->decoder.multistream_decoder = opus_multistream_decoder_create(
+          48000, (int)channels,
+          /* streams = */ streams,
+          /* coupled streams = */ coupled_streams,
+          mapping,
+          &error);
+    }
+    if (error == OPUS_OK && (state->decoder.decoder ||
+                             state->decoder.multistream_decoder)) {
+      // Creation of memory all ok.
       state->channels = channels;
       state->prev_decoded_samples = kWebRtcOpusDefaultFrameSize;
       state->in_dtx_mode = 0;
@@ -285,9 +420,12 @@
       return 0;
     }
 
-    /* If memory allocation was unsuccessful, free the entire state. */
-    if (state->decoder) {
-      opus_decoder_destroy(state->decoder);
+    // If memory allocation was unsuccessful, free the entire state.
+    if (state->decoder.decoder) {
+      opus_decoder_destroy(state->decoder.decoder);
+
+    } else if (state->decoder.multistream_decoder) {
+      opus_multistream_decoder_destroy(state->decoder.multistream_decoder);
     }
     free(state);
   }
@@ -296,7 +434,11 @@
 
 int16_t WebRtcOpus_DecoderFree(OpusDecInst* inst) {
   if (inst) {
-    opus_decoder_destroy(inst->decoder);
+    if (inst->channels <= 2) {
+      opus_decoder_destroy(inst->decoder.decoder);
+    } else if (inst->channels > 2) {
+      opus_multistream_decoder_destroy(inst->decoder.multistream_decoder);
+    }
     free(inst);
     return 0;
   } else {
@@ -309,7 +451,12 @@
 }
 
 void WebRtcOpus_DecoderInit(OpusDecInst* inst) {
-  opus_decoder_ctl(inst->decoder, OPUS_RESET_STATE);
+  if (inst->channels <= 2) {
+    opus_decoder_ctl(inst->decoder.decoder, OPUS_RESET_STATE);
+  } else {
+    opus_multistream_decoder_ctl(inst->decoder.multistream_decoder,
+                                 OPUS_RESET_STATE);
+  }
   inst->in_dtx_mode = 0;
 }
 
@@ -324,6 +471,10 @@
     // fact a 1-byte TOC with a 1-byte payload. That will be erroneously
     // interpreted as comfort noise output, but such a payload is probably
     // faulty anyway.
+
+    // TODO(webrtc:10218): This is wrong for multistream opus. Then are several
+    // single-stream packets glued together with some packet size bytes in
+    // between. See https://tools.ietf.org/html/rfc6716#appendix-B
     inst->in_dtx_mode = 1;
     return 2;  // Comfort noise.
   } else {
@@ -338,8 +489,15 @@
 static int DecodeNative(OpusDecInst* inst, const uint8_t* encoded,
                         size_t encoded_bytes, int frame_size,
                         int16_t* decoded, int16_t* audio_type, int decode_fec) {
-  int res = opus_decode(inst->decoder, encoded, (opus_int32)encoded_bytes,
-                        (opus_int16*)decoded, frame_size, decode_fec);
+  int res = -1;
+  if (inst->channels <= 2) {
+    res = opus_decode(inst->decoder.decoder, encoded, (opus_int32)encoded_bytes,
+                      (opus_int16*)decoded, frame_size, decode_fec);
+  } else {
+    res = opus_multistream_decode(
+        inst->decoder.multistream_decoder, encoded, (opus_int32)encoded_bytes,
+        (opus_int16*)decoded, frame_size, decode_fec);
+  }
 
   if (res <= 0)
     return -1;