Formalized Real 16-bit FFT for APM.
It also prepares for introducing Real 16-bit FFT Neon code from Openmax to SPL. CL https://webrtc-codereview.appspot.com/1819004/ takes care of that, but this CL is a prerequisite of that one.
Tested audioproc with an offline file. Bit exact.

R=andrew@webrtc.org, rtoy@google.com

Review URL: https://webrtc-codereview.appspot.com/1830004

git-svn-id: http://webrtc.googlecode.com/svn/trunk@4390 4adac7df-926f-26a2-2b94-8c16560cd09d
diff --git a/webrtc/common_audio/signal_processing/include/real_fft.h b/webrtc/common_audio/signal_processing/include/real_fft.h
index 8d6280c..579a305 100644
--- a/webrtc/common_audio/signal_processing/include/real_fft.h
+++ b/webrtc/common_audio/signal_processing/include/real_fft.h
@@ -13,70 +13,112 @@
 
 #include "webrtc/typedefs.h"
 
+// For ComplexFFT(), the maximum fft order is 10;
+// for OpenMax FFT in ARM, it is 12;
+// WebRTC APM uses orders of only 7 and 8.
+enum {kMaxFFTOrder = 10};
+
 struct RealFFT;
 
 #ifdef __cplusplus
 extern "C" {
 #endif
 
+typedef struct RealFFT* (*CreateRealFFT)(int order);
+typedef void (*FreeRealFFT)(struct RealFFT* self);
 typedef int (*RealForwardFFT)(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* real_data_in,
+                              int16_t* complex_data_out);
 typedef int (*RealInverseFFT)(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* complex_data_in,
+                              int16_t* real_data_out);
 
+extern CreateRealFFT WebRtcSpl_CreateRealFFT;
+extern FreeRealFFT WebRtcSpl_FreeRealFFT;
 extern RealForwardFFT WebRtcSpl_RealForwardFFT;
 extern RealInverseFFT WebRtcSpl_RealInverseFFT;
 
-struct RealFFT* WebRtcSpl_CreateRealFFT(int order);
-void WebRtcSpl_FreeRealFFT(struct RealFFT* self);
+struct RealFFT* WebRtcSpl_CreateRealFFTC(int order);
+void WebRtcSpl_FreeRealFFTC(struct RealFFT* self);
 
-// TODO(kma): Implement FFT functions for real signals.
+#if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
+struct RealFFT* WebRtcSpl_CreateRealFFTNeon(int order);
+void WebRtcSpl_FreeRealFFTNeon(struct RealFFT* self);
+#endif
 
-// Compute the forward FFT for a complex signal of length 2^order.
+// Compute an FFT for a real-valued signal of length of 2^order,
+// where 1 < order <= MAX_FFT_ORDER. Transform length is determined by the
+// specification structure, which must be initialized prior to calling the FFT
+// function with WebRtcSpl_CreateRealFFT().
+// The relationship between the input and output sequences can
+// be expressed in terms of the DFT, i.e.:
+//     x[n] = (2^(-scalefactor)/N)  . SUM[k=0,...,N-1] X[k].e^(jnk.2.pi/N)
+//     n=0,1,2,...N-1
+//     N=2^order.
+// The conjugate-symmetric output sequence is represented using a CCS vector,
+// which is of length N+2, and is organized as follows:
+//     Index:      0  1  2  3  4  5   . . .   N-2       N-1       N       N+1
+//     Component:  R0 0  R1 I1 R2 I2  . . .   R[N/2-1]  I[N/2-1]  R[N/2]  0
+// where R[n] and I[n], respectively, denote the real and imaginary components
+// for FFT bin 'n'. Bins  are numbered from 0 to N/2, where N is the FFT length.
+// Bin index 0 corresponds to the DC component, and bin index N/2 corresponds to
+// the foldover frequency.
+//
 // Input Arguments:
 //   self - pointer to preallocated and initialized FFT specification structure.
-//   data_in - the input signal.
+//   real_data_in - the input signal. For an ARM Neon platform, it must be
+//                  aligned on a 32-byte boundary.
 //
 // Output Arguments:
-//   data_out - the output signal; must be different to data_in.
+//   complex_data_out - the output complex signal with (2^order + 2) 16-bit
+//                      elements. For an ARM Neon platform, it must be different
+//                      from real_data_in, and aligned on a 32-byte boundary.
 //
 // Return Value:
 //   0  - FFT calculation is successful.
-//   -1 - Error
-//
+//   -1 - Error with bad arguments (NULL pointers).
 int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* real_data_in,
+                              int16_t* complex_data_out);
 
 #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
 int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out);
+                                 const int16_t* real_data_in,
+                                 int16_t* complex_data_out);
 #endif
 
-// Compute the inverse FFT for a complex signal of length 2^order.
+// Compute the inverse FFT for a conjugate-symmetric input sequence of length of
+// 2^order, where 1 < order <= MAX_FFT_ORDER. Transform length is determined by
+// the specification structure, which must be initialized prior to calling the
+// FFT function with WebRtcSpl_CreateRealFFT().
+// For a transform of length M, the input sequence is represented using a packed
+// CCS vector of length M+2, which is explained in the comments for
+// WebRtcSpl_RealForwardFFTC above.
+//
 // Input Arguments:
 //   self - pointer to preallocated and initialized FFT specification structure.
-//   data_in - the input signal.
+//   complex_data_in - the input complex signal with (2^order + 2) 16-bit
+//                     elements. For an ARM Neon platform, it must be aligned on
+//                     a 32-byte boundary.
 //
 // Output Arguments:
-//   data_out - the output signal; must be different to data_in.
+//   real_data_out - the output real signal. For an ARM Neon platform, it must
+//                   be different to complex_data_in, and aligned on a 32-byte
+//                   boundary.
 //
 // Return Value:
-//   0 or a positive number - a value that the elements in the |data_out| should
-//                            be shifted left with in order to get correct
-//                            physical values.
-//   -1                     - Error
+//   0 or a positive number - a value that the elements in the |real_data_out|
+//                            should be shifted left with in order to get
+//                            correct physical values.
+//   -1 - Error with bad arguments (NULL pointers).
 int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out);
+                              const int16_t* complex_data_in,
+                              int16_t* real_data_out);
 
 #if (defined WEBRTC_DETECT_ARM_NEON) || (defined WEBRTC_ARCH_ARM_NEON)
 int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out);
+                                 const int16_t* complex_data_in,
+                                 int16_t* real_data_out);
 #endif
 
 #ifdef __cplusplus
diff --git a/webrtc/common_audio/signal_processing/real_fft.c b/webrtc/common_audio/signal_processing/real_fft.c
index bd54432..fc5be9a 100644
--- a/webrtc/common_audio/signal_processing/real_fft.c
+++ b/webrtc/common_audio/signal_processing/real_fft.c
@@ -18,55 +18,109 @@
   int order;
 };
 
-struct RealFFT* WebRtcSpl_CreateRealFFT(int order) {
+struct RealFFT* WebRtcSpl_CreateRealFFTC(int order) {
   struct RealFFT* self = NULL;
 
-  // This constraint comes from ComplexFFT().
-  if (order > 10 || order < 0) {
+  if (order > kMaxFFTOrder || order < 0) {
     return NULL;
   }
 
   self = malloc(sizeof(struct RealFFT));
+  if (self == NULL) {
+    return NULL;
+  }
   self->order = order;
 
   return self;
 }
 
-void WebRtcSpl_FreeRealFFT(struct RealFFT* self) {
-  free(self);
+void WebRtcSpl_FreeRealFFTC(struct RealFFT* self) {
+  if (self != NULL) {
+    free(self);
+  }
 }
 
-// WebRtcSpl_ComplexFFT and WebRtcSpl_ComplexIFFT use in-place algorithm,
-// so copy data from data_in to data_out in the next two functions.
+// The C version FFT functions (i.e. WebRtcSpl_RealForwardFFTC and
+// WebRtcSpl_RealInverseFFTC) are real-valued FFT wrappers for complex-valued
+// FFT implementation in SPL.
 
 int WebRtcSpl_RealForwardFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out) {
-  memcpy(data_out, data_in, sizeof(int16_t) * (1 << (self->order + 1)));
-  WebRtcSpl_ComplexBitReverse(data_out, self->order);
-  return WebRtcSpl_ComplexFFT(data_out, self->order, 1);
+                              const int16_t* real_data_in,
+                              int16_t* complex_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // The complex-value FFT implementation needs a buffer to hold 2^order
+  // 16-bit COMPLEX numbers, for both time and frequency data.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // Insert zeros to the imaginary parts for complex forward FFT input.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    complex_buffer[j] = real_data_in[i];
+    complex_buffer[j + 1] = 0;
+  };
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexFFT(complex_buffer, self->order, 1);
+
+  // For real FFT output, use only the first N + 2 elements from
+  // complex forward FFT.
+  memcpy(complex_data_out, complex_buffer, sizeof(int16_t) * (n + 2));
+
+  return result;
 }
 
 int WebRtcSpl_RealInverseFFTC(struct RealFFT* self,
-                              const int16_t* data_in,
-                              int16_t* data_out) {
-  memcpy(data_out, data_in, sizeof(int16_t) * (1 << (self->order + 1)));
-  WebRtcSpl_ComplexBitReverse(data_out, self->order);
-  return WebRtcSpl_ComplexIFFT(data_out, self->order, 1);
+                              const int16_t* complex_data_in,
+                              int16_t* real_data_out) {
+  int i = 0;
+  int j = 0;
+  int result = 0;
+  int n = 1 << self->order;
+  // Create the buffer specific to complex-valued FFT implementation.
+  int16_t complex_buffer[2 << kMaxFFTOrder];
+
+  // For n-point FFT, first copy the first n + 2 elements into complex
+  // FFT, then construct the remaining n - 2 elements by real FFT's
+  // conjugate-symmetric properties.
+  memcpy(complex_buffer, complex_data_in, sizeof(int16_t) * (n + 2));
+  for (i = n + 2; i < 2 * n; i += 2) {
+    complex_buffer[i] = complex_data_in[2 * n - i];
+    complex_buffer[i + 1] = -complex_data_in[2 * n - i + 1];
+  }
+
+  WebRtcSpl_ComplexBitReverse(complex_buffer, self->order);
+  result = WebRtcSpl_ComplexIFFT(complex_buffer, self->order, 1);
+
+  // Strip out the imaginary parts of the complex inverse FFT output.
+  for (i = 0, j = 0; i < n; i += 1, j += 2) {
+    real_data_out[i] = complex_buffer[j];
+  }
+
+  return result;
 }
 
 #if defined(WEBRTC_DETECT_ARM_NEON) || defined(WEBRTC_ARCH_ARM_NEON)
 // TODO(kma): Replace the following function bodies into optimized functions
 // for ARM Neon.
+struct RealFFT* WebRtcSpl_CreateRealFFTNeon(int order) {
+  return WebRtcSpl_CreateRealFFTC(order);
+}
+
+void WebRtcSpl_FreeRealFFTNeon(struct RealFFT* self) {
+  WebRtcSpl_FreeRealFFTC(self);
+}
+
 int WebRtcSpl_RealForwardFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out) {
-  return WebRtcSpl_RealForwardFFTC(self, data_in, data_out);
+                                 const int16_t* real_data_in,
+                                 int16_t* complex_data_out) {
+  return WebRtcSpl_RealForwardFFTC(self, real_data_in, complex_data_out);
 }
 
 int WebRtcSpl_RealInverseFFTNeon(struct RealFFT* self,
-                                 const int16_t* data_in,
-                                 int16_t* data_out) {
-  return WebRtcSpl_RealInverseFFTC(self, data_in, data_out);
+                                 const int16_t* complex_data_in,
+                                 int16_t* real_data_out) {
+  return WebRtcSpl_RealInverseFFTC(self, complex_data_in, real_data_out);
 }
-#endif
+#endif  // WEBRTC_DETECT_ARM_NEON || WEBRTC_ARCH_ARM_NEON
diff --git a/webrtc/common_audio/signal_processing/real_fft_unittest.cc b/webrtc/common_audio/signal_processing/real_fft_unittest.cc
index 5dc1c89..fa98836 100644
--- a/webrtc/common_audio/signal_processing/real_fft_unittest.cc
+++ b/webrtc/common_audio/signal_processing/real_fft_unittest.cc
@@ -17,9 +17,17 @@
 namespace webrtc {
 namespace {
 
-const int kOrder = 4;
-const int kLength = 1 << (kOrder + 1);  // +1 to hold complex data.
-const int16_t kRefData[kLength] = {
+// FFT order.
+const int kOrder = 5;
+// Lengths for real FFT's time and frequency bufffers.
+// For N-point FFT, the length requirements from API are N and N+2 respectively.
+const int kTimeDataLength = 1 << kOrder;
+const int kFreqDataLength = (1 << kOrder) + 2;
+// For complex FFT's time and freq buffer. The implementation requires
+// 2*N 16-bit words.
+const int kComplexFftDataLength = 2 << kOrder;
+// Reference data for time signal.
+const int16_t kRefData[kTimeDataLength] = {
   11739, 6848, -8688, 31980, -30295, 25242, 27085, 19410,
   -26299, 15607, -10791, 11778, -23819, 14498, -25772, 10076,
   1173, 6848, -8688, 31980, -30295, 2522, 27085, 19410,
@@ -40,36 +48,58 @@
   EXPECT_TRUE(fft == NULL);
 }
 
-// TODO(andrew): This won't always be the case, but verifies the current code
-// at least.
-TEST_F(RealFFTTest, RealAndComplexAreIdentical) {
-  int16_t real_data[kLength] = {0};
-  int16_t real_data_out[kLength] = {0};
-  int16_t complex_data[kLength] = {0};
-  memcpy(real_data, kRefData, sizeof(kRefData));
-  memcpy(complex_data, kRefData, sizeof(kRefData));
+TEST_F(RealFFTTest, RealAndComplexMatch) {
+  int i = 0;
+  int j = 0;
+  int16_t real_fft_time[kTimeDataLength] = {0};
+  int16_t real_fft_freq[kFreqDataLength] = {0};
+  // One common buffer for complex FFT's time and frequency data.
+  int16_t complex_fft_buff[kComplexFftDataLength] = {0};
 
+  // Prepare the inputs to forward FFT's.
+  memcpy(real_fft_time, kRefData, sizeof(kRefData));
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    complex_fft_buff[j] = kRefData[i];
+    complex_fft_buff[j + 1] = 0;  // Insert zero's to imaginary parts.
+  };
+
+  // Create and run real forward FFT.
   RealFFT* fft = WebRtcSpl_CreateRealFFT(kOrder);
   EXPECT_TRUE(fft != NULL);
+  EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_fft_time, real_fft_freq));
 
-  EXPECT_EQ(0, WebRtcSpl_RealForwardFFT(fft, real_data, real_data_out));
-  WebRtcSpl_ComplexBitReverse(complex_data, kOrder);
-  EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_data, kOrder, 1));
+  // Run complex forward FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  EXPECT_EQ(0, WebRtcSpl_ComplexFFT(complex_fft_buff, kOrder, 1));
 
-  for (int i = 0; i < kLength; i++) {
-    EXPECT_EQ(real_data_out[i], complex_data[i]);
+  // Verify the results between complex and real forward FFT.
+  for (i = 0; i < kFreqDataLength; i++) {
+    EXPECT_EQ(real_fft_freq[i], complex_fft_buff[i]);
   }
 
-  memcpy(complex_data, kRefData, sizeof(kRefData));
+  // Prepare the inputs to inverse real FFT.
+  // We use whatever data in complex_fft_buff[] since we don't care
+  // about data contents. Only kFreqDataLength 16-bit words are copied
+  // from complex_fft_buff to real_fft_freq since remaining words (2nd half)
+  // are conjugate-symmetric to the first half in theory.
+  memcpy(real_fft_freq, complex_fft_buff, sizeof(real_fft_freq));
 
-  int real_scale = WebRtcSpl_RealInverseFFT(fft, real_data, real_data_out);
+  // Run real inverse FFT.
+  int real_scale = WebRtcSpl_RealInverseFFT(fft, real_fft_freq, real_fft_time);
   EXPECT_GE(real_scale, 0);
-  WebRtcSpl_ComplexBitReverse(complex_data, kOrder);
-  int complex_scale = WebRtcSpl_ComplexIFFT(complex_data, kOrder, 1);
+
+  // Run complex inverse FFT.
+  WebRtcSpl_ComplexBitReverse(complex_fft_buff, kOrder);
+  int complex_scale = WebRtcSpl_ComplexIFFT(complex_fft_buff, kOrder, 1);
+
+  // Verify the results between complex and real inverse FFT.
+  // They are not bit-exact, since complex IFFT doesn't produce
+  // exactly conjugate-symmetric data (between first and second half).
   EXPECT_EQ(real_scale, complex_scale);
-  for (int i = 0; i < kLength; i++) {
-    EXPECT_EQ(real_data_out[i], complex_data[i]);
+  for (i = 0, j = 0; i < kTimeDataLength; i += 1, j += 2) {
+    EXPECT_LE(abs(real_fft_time[i] - complex_fft_buff[j]), 1);
   }
+
   WebRtcSpl_FreeRealFFT(fft);
 }
 
diff --git a/webrtc/common_audio/signal_processing/spl_init.c b/webrtc/common_audio/signal_processing/spl_init.c
index 1645f63..4387cc8 100644
--- a/webrtc/common_audio/signal_processing/spl_init.c
+++ b/webrtc/common_audio/signal_processing/spl_init.c
@@ -28,6 +28,8 @@
 CrossCorrelation WebRtcSpl_CrossCorrelation;
 DownsampleFast WebRtcSpl_DownsampleFast;
 ScaleAndAddVectorsWithRound WebRtcSpl_ScaleAndAddVectorsWithRound;
+CreateRealFFT WebRtcSpl_CreateRealFFT;
+FreeRealFFT WebRtcSpl_FreeRealFFT;
 RealForwardFFT WebRtcSpl_RealForwardFFT;
 RealInverseFFT WebRtcSpl_RealInverseFFT;
 
@@ -45,6 +47,8 @@
   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastC;
   WebRtcSpl_ScaleAndAddVectorsWithRound =
       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+  WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC;
+  WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC;
   WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
   WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
 }
@@ -63,6 +67,8 @@
   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFastNeon;
   WebRtcSpl_ScaleAndAddVectorsWithRound =
       WebRtcSpl_ScaleAndAddVectorsWithRoundNeon;
+  WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTNeon;
+  WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTNeon;
   WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTNeon;
   WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTNeon;
 }
@@ -80,6 +86,8 @@
   WebRtcSpl_DownsampleFast = WebRtcSpl_DownsampleFast_mips;
   WebRtcSpl_ScaleAndAddVectorsWithRound =
       WebRtcSpl_ScaleAndAddVectorsWithRoundC;
+  WebRtcSpl_CreateRealFFT = WebRtcSpl_CreateRealFFTC;
+  WebRtcSpl_FreeRealFFT = WebRtcSpl_FreeRealFFTC;
   WebRtcSpl_RealForwardFFT = WebRtcSpl_RealForwardFFTC;
   WebRtcSpl_RealInverseFFT = WebRtcSpl_RealInverseFFTC;
 #if defined(MIPS_DSP_R1_LE)