blob: c3333cea5beb000a9f52263c4f1f78c992b407d8 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
2 * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
3 * Copyright Takuya OOURA, 1996-2001
4 *
5 * You may use, copy, modify and distribute this code for any purpose (include
6 * commercial use) and without fee. Please refer to this package when you modify
7 * this code.
8 *
9 * Changes by the WebRTC authors:
10 * - Trivial type modifications.
11 * - Minimal code subset to do rdft of length 128.
12 * - Optimizations because of known length.
peah81b92912016-10-06 06:46:20 -070013 * - Removed the global variables by moving the code in to a class in order
14 * to make it thread safe.
niklase@google.com470e71d2011-07-07 08:21:25 +000015 *
16 * All changes are covered by the WebRTC license and IP grant:
17 * Use of this source code is governed by a BSD-style license
18 * that can be found in the LICENSE file in the root of the source
19 * tree. An additional intellectual property rights grant can be found
20 * in the file PATENTS. All contributing project authors may
21 * be found in the AUTHORS file in the root of the source tree.
22 */
23
Jiawei Oud3c642b2018-01-09 09:15:37 -080024#include "modules/audio_processing/utility/ooura_fft.h"
ajm@google.comce7c2a22011-08-04 01:50:00 +000025
niklase@google.com470e71d2011-07-07 08:21:25 +000026#include <math.h>
27
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020028#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
Niels Möllera12c42a2018-07-25 16:05:48 +020029#include "rtc_base/system/arch.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020030#include "system_wrappers/include/cpu_features_wrapper.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000031
peah81b92912016-10-06 06:46:20 -070032namespace webrtc {
niklase@google.com470e71d2011-07-07 08:21:25 +000033
peah81b92912016-10-06 06:46:20 -070034namespace {
cd@webrtc.org85b4a1b2012-04-10 21:25:17 +000035
peah81b92912016-10-06 06:46:20 -070036#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
andrew@webrtc.org13b2d462013-10-08 23:41:42 +000037static void cft1st_128_C(float* a) {
niklase@google.com470e71d2011-07-07 08:21:25 +000038 const int n = 128;
39 int j, k1, k2;
40 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
41 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
42
bjornv@webrtc.orgcd9b90a2014-06-30 12:05:18 +000043 // The processing of the first set of elements was simplified in C to avoid
44 // some operations (multiplication by zero or one, addition of two elements
45 // multiplied by the same weight, ...).
niklase@google.com470e71d2011-07-07 08:21:25 +000046 x0r = a[0] + a[2];
47 x0i = a[1] + a[3];
48 x1r = a[0] - a[2];
49 x1i = a[1] - a[3];
50 x2r = a[4] + a[6];
51 x2i = a[5] + a[7];
52 x3r = a[4] - a[6];
53 x3i = a[5] - a[7];
54 a[0] = x0r + x2r;
55 a[1] = x0i + x2i;
56 a[4] = x0r - x2r;
57 a[5] = x0i - x2i;
58 a[2] = x1r - x3i;
59 a[3] = x1i + x3r;
60 a[6] = x1r + x3i;
61 a[7] = x1i - x3r;
62 wk1r = rdft_w[2];
63 x0r = a[8] + a[10];
64 x0i = a[9] + a[11];
65 x1r = a[8] - a[10];
66 x1i = a[9] - a[11];
67 x2r = a[12] + a[14];
68 x2i = a[13] + a[15];
69 x3r = a[12] - a[14];
70 x3i = a[13] - a[15];
71 a[8] = x0r + x2r;
72 a[9] = x0i + x2i;
73 a[12] = x2i - x0i;
74 a[13] = x0r - x2r;
75 x0r = x1r - x3i;
76 x0i = x1i + x3r;
77 a[10] = wk1r * (x0r - x0i);
78 a[11] = wk1r * (x0r + x0i);
79 x0r = x3i + x1r;
80 x0i = x3r - x1i;
81 a[14] = wk1r * (x0i - x0r);
82 a[15] = wk1r * (x0i + x0r);
83 k1 = 0;
84 for (j = 16; j < n; j += 16) {
85 k1 += 2;
86 k2 = 2 * k1;
cduvivier@google.com0e07d822011-07-25 23:54:20 +000087 wk2r = rdft_w[k1 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +000088 wk2i = rdft_w[k1 + 1];
cduvivier@google.com0e07d822011-07-25 23:54:20 +000089 wk1r = rdft_w[k2 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +000090 wk1i = rdft_w[k2 + 1];
cduvivier@google.com0e07d822011-07-25 23:54:20 +000091 wk3r = rdft_wk3ri_first[k1 + 0];
92 wk3i = rdft_wk3ri_first[k1 + 1];
93 x0r = a[j + 0] + a[j + 2];
niklase@google.com470e71d2011-07-07 08:21:25 +000094 x0i = a[j + 1] + a[j + 3];
cduvivier@google.com0e07d822011-07-25 23:54:20 +000095 x1r = a[j + 0] - a[j + 2];
niklase@google.com470e71d2011-07-07 08:21:25 +000096 x1i = a[j + 1] - a[j + 3];
97 x2r = a[j + 4] + a[j + 6];
98 x2i = a[j + 5] + a[j + 7];
99 x3r = a[j + 4] - a[j + 6];
100 x3i = a[j + 5] - a[j + 7];
cduvivier@google.com0e07d822011-07-25 23:54:20 +0000101 a[j + 0] = x0r + x2r;
niklase@google.com470e71d2011-07-07 08:21:25 +0000102 a[j + 1] = x0i + x2i;
103 x0r -= x2r;
104 x0i -= x2i;
105 a[j + 4] = wk2r * x0r - wk2i * x0i;
106 a[j + 5] = wk2r * x0i + wk2i * x0r;
107 x0r = x1r - x3i;
108 x0i = x1i + x3r;
109 a[j + 2] = wk1r * x0r - wk1i * x0i;
110 a[j + 3] = wk1r * x0i + wk1i * x0r;
111 x0r = x1r + x3i;
112 x0i = x1i - x3r;
113 a[j + 6] = wk3r * x0r - wk3i * x0i;
114 a[j + 7] = wk3r * x0i + wk3i * x0r;
115 wk1r = rdft_w[k2 + 2];
116 wk1i = rdft_w[k2 + 3];
cduvivier@google.com0e07d822011-07-25 23:54:20 +0000117 wk3r = rdft_wk3ri_second[k1 + 0];
118 wk3i = rdft_wk3ri_second[k1 + 1];
niklase@google.com470e71d2011-07-07 08:21:25 +0000119 x0r = a[j + 8] + a[j + 10];
120 x0i = a[j + 9] + a[j + 11];
121 x1r = a[j + 8] - a[j + 10];
122 x1i = a[j + 9] - a[j + 11];
123 x2r = a[j + 12] + a[j + 14];
124 x2i = a[j + 13] + a[j + 15];
125 x3r = a[j + 12] - a[j + 14];
126 x3i = a[j + 13] - a[j + 15];
127 a[j + 8] = x0r + x2r;
128 a[j + 9] = x0i + x2i;
129 x0r -= x2r;
130 x0i -= x2i;
131 a[j + 12] = -wk2i * x0r - wk2r * x0i;
132 a[j + 13] = -wk2i * x0i + wk2r * x0r;
133 x0r = x1r - x3i;
134 x0i = x1i + x3r;
135 a[j + 10] = wk1r * x0r - wk1i * x0i;
136 a[j + 11] = wk1r * x0i + wk1i * x0r;
137 x0r = x1r + x3i;
138 x0i = x1i - x3r;
139 a[j + 14] = wk3r * x0r - wk3i * x0i;
140 a[j + 15] = wk3r * x0i + wk3i * x0r;
141 }
142}
143
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000144static void cftmdl_128_C(float* a) {
cduvivier@google.com288c8692011-08-22 21:55:33 +0000145 const int l = 8;
niklase@google.com470e71d2011-07-07 08:21:25 +0000146 const int n = 128;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000147 const int m = 32;
148 int j0, j1, j2, j3, k, k1, k2, m2;
niklase@google.com470e71d2011-07-07 08:21:25 +0000149 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
150 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
151
cduvivier@google.com288c8692011-08-22 21:55:33 +0000152 for (j0 = 0; j0 < l; j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000153 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000154 j2 = j0 + 16;
155 j3 = j0 + 24;
156 x0r = a[j0 + 0] + a[j1 + 0];
157 x0i = a[j0 + 1] + a[j1 + 1];
158 x1r = a[j0 + 0] - a[j1 + 0];
159 x1i = a[j0 + 1] - a[j1 + 1];
160 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000161 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000162 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000163 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000164 a[j0 + 0] = x0r + x2r;
165 a[j0 + 1] = x0i + x2i;
166 a[j2 + 0] = x0r - x2r;
niklase@google.com470e71d2011-07-07 08:21:25 +0000167 a[j2 + 1] = x0i - x2i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000168 a[j1 + 0] = x1r - x3i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000169 a[j1 + 1] = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000170 a[j3 + 0] = x1r + x3i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000171 a[j3 + 1] = x1i - x3r;
172 }
173 wk1r = rdft_w[2];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000174 for (j0 = m; j0 < l + m; j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000175 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000176 j2 = j0 + 16;
177 j3 = j0 + 24;
178 x0r = a[j0 + 0] + a[j1 + 0];
179 x0i = a[j0 + 1] + a[j1 + 1];
180 x1r = a[j0 + 0] - a[j1 + 0];
181 x1i = a[j0 + 1] - a[j1 + 1];
182 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000183 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000184 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000185 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000186 a[j0 + 0] = x0r + x2r;
187 a[j0 + 1] = x0i + x2i;
188 a[j2 + 0] = x2i - x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000189 a[j2 + 1] = x0r - x2r;
190 x0r = x1r - x3i;
191 x0i = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000192 a[j1 + 0] = wk1r * (x0r - x0i);
niklase@google.com470e71d2011-07-07 08:21:25 +0000193 a[j1 + 1] = wk1r * (x0r + x0i);
194 x0r = x3i + x1r;
195 x0i = x3r - x1i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000196 a[j3 + 0] = wk1r * (x0i - x0r);
niklase@google.com470e71d2011-07-07 08:21:25 +0000197 a[j3 + 1] = wk1r * (x0i + x0r);
198 }
199 k1 = 0;
200 m2 = 2 * m;
201 for (k = m2; k < n; k += m2) {
202 k1 += 2;
203 k2 = 2 * k1;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000204 wk2r = rdft_w[k1 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000205 wk2i = rdft_w[k1 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000206 wk1r = rdft_w[k2 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000207 wk1i = rdft_w[k2 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000208 wk3r = rdft_wk3ri_first[k1 + 0];
209 wk3i = rdft_wk3ri_first[k1 + 1];
210 for (j0 = k; j0 < l + k; j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000211 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000212 j2 = j0 + 16;
213 j3 = j0 + 24;
214 x0r = a[j0 + 0] + a[j1 + 0];
215 x0i = a[j0 + 1] + a[j1 + 1];
216 x1r = a[j0 + 0] - a[j1 + 0];
217 x1i = a[j0 + 1] - a[j1 + 1];
218 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000219 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000220 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000221 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000222 a[j0 + 0] = x0r + x2r;
223 a[j0 + 1] = x0i + x2i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000224 x0r -= x2r;
225 x0i -= x2i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000226 a[j2 + 0] = wk2r * x0r - wk2i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000227 a[j2 + 1] = wk2r * x0i + wk2i * x0r;
228 x0r = x1r - x3i;
229 x0i = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000230 a[j1 + 0] = wk1r * x0r - wk1i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000231 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
232 x0r = x1r + x3i;
233 x0i = x1i - x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000234 a[j3 + 0] = wk3r * x0r - wk3i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000235 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
236 }
237 wk1r = rdft_w[k2 + 2];
238 wk1i = rdft_w[k2 + 3];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000239 wk3r = rdft_wk3ri_second[k1 + 0];
240 wk3i = rdft_wk3ri_second[k1 + 1];
241 for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000242 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000243 j2 = j0 + 16;
244 j3 = j0 + 24;
245 x0r = a[j0 + 0] + a[j1 + 0];
246 x0i = a[j0 + 1] + a[j1 + 1];
247 x1r = a[j0 + 0] - a[j1 + 0];
248 x1i = a[j0 + 1] - a[j1 + 1];
249 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000250 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000251 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000252 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000253 a[j0 + 0] = x0r + x2r;
254 a[j0 + 1] = x0i + x2i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000255 x0r -= x2r;
256 x0i -= x2i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000257 a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000258 a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
259 x0r = x1r - x3i;
260 x0i = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000261 a[j1 + 0] = wk1r * x0r - wk1i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000262 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
263 x0r = x1r + x3i;
264 x0i = x1i - x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000265 a[j3 + 0] = wk3r * x0r - wk3i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000266 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
267 }
268 }
269}
270
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000271static void rftfsub_128_C(float* a) {
272 const float* c = rdft_w + 32;
niklase@google.com470e71d2011-07-07 08:21:25 +0000273 int j1, j2, k1, k2;
274 float wkr, wki, xr, xi, yr, yi;
275
276 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
277 k2 = 128 - j2;
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000278 k1 = 32 - j1;
niklase@google.com470e71d2011-07-07 08:21:25 +0000279 wkr = 0.5f - c[k1];
280 wki = c[j1];
281 xr = a[j2 + 0] - a[k2 + 0];
282 xi = a[j2 + 1] + a[k2 + 1];
283 yr = wkr * xr - wki * xi;
284 yi = wkr * xi + wki * xr;
285 a[j2 + 0] -= yr;
286 a[j2 + 1] -= yi;
287 a[k2 + 0] += yr;
288 a[k2 + 1] -= yi;
289 }
290}
291
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000292static void rftbsub_128_C(float* a) {
293 const float* c = rdft_w + 32;
niklase@google.com470e71d2011-07-07 08:21:25 +0000294 int j1, j2, k1, k2;
295 float wkr, wki, xr, xi, yr, yi;
296
297 a[1] = -a[1];
298 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
299 k2 = 128 - j2;
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000300 k1 = 32 - j1;
niklase@google.com470e71d2011-07-07 08:21:25 +0000301 wkr = 0.5f - c[k1];
302 wki = c[j1];
303 xr = a[j2 + 0] - a[k2 + 0];
304 xi = a[j2 + 1] + a[k2 + 1];
305 yr = wkr * xr + wki * xi;
306 yi = wkr * xi - wki * xr;
307 a[j2 + 0] = a[j2 + 0] - yr;
308 a[j2 + 1] = yi - a[j2 + 1];
309 a[k2 + 0] = yr + a[k2 + 0];
310 a[k2 + 1] = yi - a[k2 + 1];
311 }
312 a[65] = -a[65];
313}
peah81b92912016-10-06 06:46:20 -0700314#endif
niklase@google.com470e71d2011-07-07 08:21:25 +0000315
peah81b92912016-10-06 06:46:20 -0700316} // namespace
317
318OouraFft::OouraFft() {
319#if defined(WEBRTC_ARCH_X86_FAMILY)
320 use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0);
321#else
322 use_sse2_ = false;
323#endif
324}
325
326OouraFft::~OouraFft() = default;
327
328void OouraFft::Fft(float* a) const {
niklase@google.com470e71d2011-07-07 08:21:25 +0000329 float xi;
cd@webrtc.org85b4a1b2012-04-10 21:25:17 +0000330 bitrv2_128(a);
niklase@google.com470e71d2011-07-07 08:21:25 +0000331 cftfsub_128(a);
332 rftfsub_128(a);
333 xi = a[0] - a[1];
334 a[0] += a[1];
335 a[1] = xi;
336}
peah81b92912016-10-06 06:46:20 -0700337void OouraFft::InverseFft(float* a) const {
niklase@google.com470e71d2011-07-07 08:21:25 +0000338 a[1] = 0.5f * (a[0] - a[1]);
339 a[0] -= a[1];
340 rftbsub_128(a);
cd@webrtc.org85b4a1b2012-04-10 21:25:17 +0000341 bitrv2_128(a);
niklase@google.com470e71d2011-07-07 08:21:25 +0000342 cftbsub_128(a);
343}
344
peah81b92912016-10-06 06:46:20 -0700345void OouraFft::cft1st_128(float* a) const {
346#if defined(MIPS_FPU_LE)
347 cft1st_128_mips(a);
348#elif defined(WEBRTC_HAS_NEON)
349 cft1st_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700350#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700351 if (use_sse2_) {
352 cft1st_128_SSE2(a);
353 } else {
354 cft1st_128_C(a);
niklase@google.com470e71d2011-07-07 08:21:25 +0000355 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700356#else
357 cft1st_128_C(a);
andrew@webrtc.orgc8d012f2012-01-13 19:43:09 +0000358#endif
peah81b92912016-10-06 06:46:20 -0700359}
360void OouraFft::cftmdl_128(float* a) const {
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +0000361#if defined(MIPS_FPU_LE)
peah81b92912016-10-06 06:46:20 -0700362 cftmdl_128_mips(a);
363#elif defined(WEBRTC_HAS_NEON)
364 cftmdl_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700365#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700366 if (use_sse2_) {
367 cftmdl_128_SSE2(a);
368 } else {
369 cftmdl_128_C(a);
370 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700371#else
372 cftmdl_128_C(a);
bjornv@webrtc.orgcd9b90a2014-06-30 12:05:18 +0000373#endif
niklase@google.com470e71d2011-07-07 08:21:25 +0000374}
peah81b92912016-10-06 06:46:20 -0700375void OouraFft::rftfsub_128(float* a) const {
376#if defined(MIPS_FPU_LE)
377 rftfsub_128_mips(a);
378#elif defined(WEBRTC_HAS_NEON)
379 rftfsub_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700380#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700381 if (use_sse2_) {
382 rftfsub_128_SSE2(a);
383 } else {
384 rftfsub_128_C(a);
385 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700386#else
387 rftfsub_128_C(a);
peah81b92912016-10-06 06:46:20 -0700388#endif
389}
390
391void OouraFft::rftbsub_128(float* a) const {
392#if defined(MIPS_FPU_LE)
393 rftbsub_128_mips(a);
394#elif defined(WEBRTC_HAS_NEON)
395 rftbsub_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700396#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700397 if (use_sse2_) {
398 rftbsub_128_SSE2(a);
399 } else {
400 rftbsub_128_C(a);
401 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700402#else
403 rftbsub_128_C(a);
peah81b92912016-10-06 06:46:20 -0700404#endif
405}
406
407void OouraFft::cftbsub_128(float* a) const {
408 int j, j1, j2, j3, l;
409 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
410
411 cft1st_128(a);
412 cftmdl_128(a);
413 l = 32;
414
415 for (j = 0; j < l; j += 2) {
416 j1 = j + l;
417 j2 = j1 + l;
418 j3 = j2 + l;
419 x0r = a[j] + a[j1];
420 x0i = -a[j + 1] - a[j1 + 1];
421 x1r = a[j] - a[j1];
422 x1i = -a[j + 1] + a[j1 + 1];
423 x2r = a[j2] + a[j3];
424 x2i = a[j2 + 1] + a[j3 + 1];
425 x3r = a[j2] - a[j3];
426 x3i = a[j2 + 1] - a[j3 + 1];
427 a[j] = x0r + x2r;
428 a[j + 1] = x0i - x2i;
429 a[j2] = x0r - x2r;
430 a[j2 + 1] = x0i + x2i;
431 a[j1] = x1r - x3i;
432 a[j1 + 1] = x1i - x3r;
433 a[j3] = x1r + x3i;
434 a[j3 + 1] = x1i + x3r;
435 }
436}
437
438void OouraFft::cftfsub_128(float* a) const {
439 int j, j1, j2, j3, l;
440 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
441
442 cft1st_128(a);
443 cftmdl_128(a);
444 l = 32;
445 for (j = 0; j < l; j += 2) {
446 j1 = j + l;
447 j2 = j1 + l;
448 j3 = j2 + l;
449 x0r = a[j] + a[j1];
450 x0i = a[j + 1] + a[j1 + 1];
451 x1r = a[j] - a[j1];
452 x1i = a[j + 1] - a[j1 + 1];
453 x2r = a[j2] + a[j3];
454 x2i = a[j2 + 1] + a[j3 + 1];
455 x3r = a[j2] - a[j3];
456 x3i = a[j2 + 1] - a[j3 + 1];
457 a[j] = x0r + x2r;
458 a[j + 1] = x0i + x2i;
459 a[j2] = x0r - x2r;
460 a[j2 + 1] = x0i - x2i;
461 a[j1] = x1r - x3i;
462 a[j1 + 1] = x1i + x3r;
463 a[j3] = x1r + x3i;
464 a[j3 + 1] = x1i - x3r;
465 }
466}
467
468void OouraFft::bitrv2_128(float* a) const {
469 /*
470 Following things have been attempted but are no faster:
471 (a) Storing the swap indexes in a LUT (index calculations are done
472 for 'free' while waiting on memory/L1).
473 (b) Consolidate the load/store of two consecutive floats by a 64 bit
474 integer (execution is memory/L1 bound).
475 (c) Do a mix of floats and 64 bit integer to maximize register
476 utilization (execution is memory/L1 bound).
477 (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
478 (e) Hard-coding of the offsets to completely eliminates index
479 calculations.
480 */
481
482 unsigned int j, j1, k, k1;
483 float xr, xi, yr, yi;
484
485 const int ip[4] = {0, 64, 32, 96};
486 for (k = 0; k < 4; k++) {
487 for (j = 0; j < k; j++) {
488 j1 = 2 * j + ip[k];
489 k1 = 2 * k + ip[j];
490 xr = a[j1 + 0];
491 xi = a[j1 + 1];
492 yr = a[k1 + 0];
493 yi = a[k1 + 1];
494 a[j1 + 0] = yr;
495 a[j1 + 1] = yi;
496 a[k1 + 0] = xr;
497 a[k1 + 1] = xi;
498 j1 += 8;
499 k1 += 16;
500 xr = a[j1 + 0];
501 xi = a[j1 + 1];
502 yr = a[k1 + 0];
503 yi = a[k1 + 1];
504 a[j1 + 0] = yr;
505 a[j1 + 1] = yi;
506 a[k1 + 0] = xr;
507 a[k1 + 1] = xi;
508 j1 += 8;
509 k1 -= 8;
510 xr = a[j1 + 0];
511 xi = a[j1 + 1];
512 yr = a[k1 + 0];
513 yi = a[k1 + 1];
514 a[j1 + 0] = yr;
515 a[j1 + 1] = yi;
516 a[k1 + 0] = xr;
517 a[k1 + 1] = xi;
518 j1 += 8;
519 k1 += 16;
520 xr = a[j1 + 0];
521 xi = a[j1 + 1];
522 yr = a[k1 + 0];
523 yi = a[k1 + 1];
524 a[j1 + 0] = yr;
525 a[j1 + 1] = yi;
526 a[k1 + 0] = xr;
527 a[k1 + 1] = xi;
528 }
529 j1 = 2 * k + 8 + ip[k];
530 k1 = j1 + 8;
531 xr = a[j1 + 0];
532 xi = a[j1 + 1];
533 yr = a[k1 + 0];
534 yi = a[k1 + 1];
535 a[j1 + 0] = yr;
536 a[j1 + 1] = yi;
537 a[k1 + 0] = xr;
538 a[k1 + 1] = xi;
539 }
540}
541
542} // namespace webrtc