blob: 8628bd39f0aecc2191c3d4a252112d4d3422f4a4 [file] [log] [blame]
niklase@google.com470e71d2011-07-07 08:21:25 +00001/*
2 * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
3 * Copyright Takuya OOURA, 1996-2001
4 *
5 * You may use, copy, modify and distribute this code for any purpose (include
6 * commercial use) and without fee. Please refer to this package when you modify
7 * this code.
8 *
9 * Changes by the WebRTC authors:
10 * - Trivial type modifications.
11 * - Minimal code subset to do rdft of length 128.
12 * - Optimizations because of known length.
peah81b92912016-10-06 06:46:20 -070013 * - Removed the global variables by moving the code in to a class in order
14 * to make it thread safe.
niklase@google.com470e71d2011-07-07 08:21:25 +000015 *
16 * All changes are covered by the WebRTC license and IP grant:
17 * Use of this source code is governed by a BSD-style license
18 * that can be found in the LICENSE file in the root of the source
19 * tree. An additional intellectual property rights grant can be found
20 * in the file PATENTS. All contributing project authors may
21 * be found in the AUTHORS file in the root of the source tree.
22 */
23
Jiawei Oud3c642b2018-01-09 09:15:37 -080024#include "modules/audio_processing/utility/ooura_fft.h"
ajm@google.comce7c2a22011-08-04 01:50:00 +000025
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020026#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
Niels Möllera12c42a2018-07-25 16:05:48 +020027#include "rtc_base/system/arch.h"
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020028#include "system_wrappers/include/cpu_features_wrapper.h"
niklase@google.com470e71d2011-07-07 08:21:25 +000029
peah81b92912016-10-06 06:46:20 -070030namespace webrtc {
niklase@google.com470e71d2011-07-07 08:21:25 +000031
peah81b92912016-10-06 06:46:20 -070032namespace {
cd@webrtc.org85b4a1b2012-04-10 21:25:17 +000033
peah81b92912016-10-06 06:46:20 -070034#if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON))
andrew@webrtc.org13b2d462013-10-08 23:41:42 +000035static void cft1st_128_C(float* a) {
niklase@google.com470e71d2011-07-07 08:21:25 +000036 const int n = 128;
37 int j, k1, k2;
38 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
39 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
40
bjornv@webrtc.orgcd9b90a2014-06-30 12:05:18 +000041 // The processing of the first set of elements was simplified in C to avoid
42 // some operations (multiplication by zero or one, addition of two elements
43 // multiplied by the same weight, ...).
niklase@google.com470e71d2011-07-07 08:21:25 +000044 x0r = a[0] + a[2];
45 x0i = a[1] + a[3];
46 x1r = a[0] - a[2];
47 x1i = a[1] - a[3];
48 x2r = a[4] + a[6];
49 x2i = a[5] + a[7];
50 x3r = a[4] - a[6];
51 x3i = a[5] - a[7];
52 a[0] = x0r + x2r;
53 a[1] = x0i + x2i;
54 a[4] = x0r - x2r;
55 a[5] = x0i - x2i;
56 a[2] = x1r - x3i;
57 a[3] = x1i + x3r;
58 a[6] = x1r + x3i;
59 a[7] = x1i - x3r;
60 wk1r = rdft_w[2];
61 x0r = a[8] + a[10];
62 x0i = a[9] + a[11];
63 x1r = a[8] - a[10];
64 x1i = a[9] - a[11];
65 x2r = a[12] + a[14];
66 x2i = a[13] + a[15];
67 x3r = a[12] - a[14];
68 x3i = a[13] - a[15];
69 a[8] = x0r + x2r;
70 a[9] = x0i + x2i;
71 a[12] = x2i - x0i;
72 a[13] = x0r - x2r;
73 x0r = x1r - x3i;
74 x0i = x1i + x3r;
75 a[10] = wk1r * (x0r - x0i);
76 a[11] = wk1r * (x0r + x0i);
77 x0r = x3i + x1r;
78 x0i = x3r - x1i;
79 a[14] = wk1r * (x0i - x0r);
80 a[15] = wk1r * (x0i + x0r);
81 k1 = 0;
82 for (j = 16; j < n; j += 16) {
83 k1 += 2;
84 k2 = 2 * k1;
cduvivier@google.com0e07d822011-07-25 23:54:20 +000085 wk2r = rdft_w[k1 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +000086 wk2i = rdft_w[k1 + 1];
cduvivier@google.com0e07d822011-07-25 23:54:20 +000087 wk1r = rdft_w[k2 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +000088 wk1i = rdft_w[k2 + 1];
cduvivier@google.com0e07d822011-07-25 23:54:20 +000089 wk3r = rdft_wk3ri_first[k1 + 0];
90 wk3i = rdft_wk3ri_first[k1 + 1];
91 x0r = a[j + 0] + a[j + 2];
niklase@google.com470e71d2011-07-07 08:21:25 +000092 x0i = a[j + 1] + a[j + 3];
cduvivier@google.com0e07d822011-07-25 23:54:20 +000093 x1r = a[j + 0] - a[j + 2];
niklase@google.com470e71d2011-07-07 08:21:25 +000094 x1i = a[j + 1] - a[j + 3];
95 x2r = a[j + 4] + a[j + 6];
96 x2i = a[j + 5] + a[j + 7];
97 x3r = a[j + 4] - a[j + 6];
98 x3i = a[j + 5] - a[j + 7];
cduvivier@google.com0e07d822011-07-25 23:54:20 +000099 a[j + 0] = x0r + x2r;
niklase@google.com470e71d2011-07-07 08:21:25 +0000100 a[j + 1] = x0i + x2i;
101 x0r -= x2r;
102 x0i -= x2i;
103 a[j + 4] = wk2r * x0r - wk2i * x0i;
104 a[j + 5] = wk2r * x0i + wk2i * x0r;
105 x0r = x1r - x3i;
106 x0i = x1i + x3r;
107 a[j + 2] = wk1r * x0r - wk1i * x0i;
108 a[j + 3] = wk1r * x0i + wk1i * x0r;
109 x0r = x1r + x3i;
110 x0i = x1i - x3r;
111 a[j + 6] = wk3r * x0r - wk3i * x0i;
112 a[j + 7] = wk3r * x0i + wk3i * x0r;
113 wk1r = rdft_w[k2 + 2];
114 wk1i = rdft_w[k2 + 3];
cduvivier@google.com0e07d822011-07-25 23:54:20 +0000115 wk3r = rdft_wk3ri_second[k1 + 0];
116 wk3i = rdft_wk3ri_second[k1 + 1];
niklase@google.com470e71d2011-07-07 08:21:25 +0000117 x0r = a[j + 8] + a[j + 10];
118 x0i = a[j + 9] + a[j + 11];
119 x1r = a[j + 8] - a[j + 10];
120 x1i = a[j + 9] - a[j + 11];
121 x2r = a[j + 12] + a[j + 14];
122 x2i = a[j + 13] + a[j + 15];
123 x3r = a[j + 12] - a[j + 14];
124 x3i = a[j + 13] - a[j + 15];
125 a[j + 8] = x0r + x2r;
126 a[j + 9] = x0i + x2i;
127 x0r -= x2r;
128 x0i -= x2i;
129 a[j + 12] = -wk2i * x0r - wk2r * x0i;
130 a[j + 13] = -wk2i * x0i + wk2r * x0r;
131 x0r = x1r - x3i;
132 x0i = x1i + x3r;
133 a[j + 10] = wk1r * x0r - wk1i * x0i;
134 a[j + 11] = wk1r * x0i + wk1i * x0r;
135 x0r = x1r + x3i;
136 x0i = x1i - x3r;
137 a[j + 14] = wk3r * x0r - wk3i * x0i;
138 a[j + 15] = wk3r * x0i + wk3i * x0r;
139 }
140}
141
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000142static void cftmdl_128_C(float* a) {
cduvivier@google.com288c8692011-08-22 21:55:33 +0000143 const int l = 8;
niklase@google.com470e71d2011-07-07 08:21:25 +0000144 const int n = 128;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000145 const int m = 32;
146 int j0, j1, j2, j3, k, k1, k2, m2;
niklase@google.com470e71d2011-07-07 08:21:25 +0000147 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
148 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
149
cduvivier@google.com288c8692011-08-22 21:55:33 +0000150 for (j0 = 0; j0 < l; j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000151 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000152 j2 = j0 + 16;
153 j3 = j0 + 24;
154 x0r = a[j0 + 0] + a[j1 + 0];
155 x0i = a[j0 + 1] + a[j1 + 1];
156 x1r = a[j0 + 0] - a[j1 + 0];
157 x1i = a[j0 + 1] - a[j1 + 1];
158 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000159 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000160 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000161 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000162 a[j0 + 0] = x0r + x2r;
163 a[j0 + 1] = x0i + x2i;
164 a[j2 + 0] = x0r - x2r;
niklase@google.com470e71d2011-07-07 08:21:25 +0000165 a[j2 + 1] = x0i - x2i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000166 a[j1 + 0] = x1r - x3i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000167 a[j1 + 1] = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000168 a[j3 + 0] = x1r + x3i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000169 a[j3 + 1] = x1i - x3r;
170 }
171 wk1r = rdft_w[2];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000172 for (j0 = m; j0 < l + m; j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000173 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000174 j2 = j0 + 16;
175 j3 = j0 + 24;
176 x0r = a[j0 + 0] + a[j1 + 0];
177 x0i = a[j0 + 1] + a[j1 + 1];
178 x1r = a[j0 + 0] - a[j1 + 0];
179 x1i = a[j0 + 1] - a[j1 + 1];
180 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000181 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000182 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000183 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000184 a[j0 + 0] = x0r + x2r;
185 a[j0 + 1] = x0i + x2i;
186 a[j2 + 0] = x2i - x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000187 a[j2 + 1] = x0r - x2r;
188 x0r = x1r - x3i;
189 x0i = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000190 a[j1 + 0] = wk1r * (x0r - x0i);
niklase@google.com470e71d2011-07-07 08:21:25 +0000191 a[j1 + 1] = wk1r * (x0r + x0i);
192 x0r = x3i + x1r;
193 x0i = x3r - x1i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000194 a[j3 + 0] = wk1r * (x0i - x0r);
niklase@google.com470e71d2011-07-07 08:21:25 +0000195 a[j3 + 1] = wk1r * (x0i + x0r);
196 }
197 k1 = 0;
198 m2 = 2 * m;
199 for (k = m2; k < n; k += m2) {
200 k1 += 2;
201 k2 = 2 * k1;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000202 wk2r = rdft_w[k1 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000203 wk2i = rdft_w[k1 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000204 wk1r = rdft_w[k2 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000205 wk1i = rdft_w[k2 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000206 wk3r = rdft_wk3ri_first[k1 + 0];
207 wk3i = rdft_wk3ri_first[k1 + 1];
208 for (j0 = k; j0 < l + k; j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000209 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000210 j2 = j0 + 16;
211 j3 = j0 + 24;
212 x0r = a[j0 + 0] + a[j1 + 0];
213 x0i = a[j0 + 1] + a[j1 + 1];
214 x1r = a[j0 + 0] - a[j1 + 0];
215 x1i = a[j0 + 1] - a[j1 + 1];
216 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000217 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000218 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000219 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000220 a[j0 + 0] = x0r + x2r;
221 a[j0 + 1] = x0i + x2i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000222 x0r -= x2r;
223 x0i -= x2i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000224 a[j2 + 0] = wk2r * x0r - wk2i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000225 a[j2 + 1] = wk2r * x0i + wk2i * x0r;
226 x0r = x1r - x3i;
227 x0i = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000228 a[j1 + 0] = wk1r * x0r - wk1i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000229 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
230 x0r = x1r + x3i;
231 x0i = x1i - x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000232 a[j3 + 0] = wk3r * x0r - wk3i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000233 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
234 }
235 wk1r = rdft_w[k2 + 2];
236 wk1i = rdft_w[k2 + 3];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000237 wk3r = rdft_wk3ri_second[k1 + 0];
238 wk3i = rdft_wk3ri_second[k1 + 1];
239 for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000240 j1 = j0 + 8;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000241 j2 = j0 + 16;
242 j3 = j0 + 24;
243 x0r = a[j0 + 0] + a[j1 + 0];
244 x0i = a[j0 + 1] + a[j1 + 1];
245 x1r = a[j0 + 0] - a[j1 + 0];
246 x1i = a[j0 + 1] - a[j1 + 1];
247 x2r = a[j2 + 0] + a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000248 x2i = a[j2 + 1] + a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000249 x3r = a[j2 + 0] - a[j3 + 0];
niklase@google.com470e71d2011-07-07 08:21:25 +0000250 x3i = a[j2 + 1] - a[j3 + 1];
cduvivier@google.com288c8692011-08-22 21:55:33 +0000251 a[j0 + 0] = x0r + x2r;
252 a[j0 + 1] = x0i + x2i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000253 x0r -= x2r;
254 x0i -= x2i;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000255 a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000256 a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
257 x0r = x1r - x3i;
258 x0i = x1i + x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000259 a[j1 + 0] = wk1r * x0r - wk1i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000260 a[j1 + 1] = wk1r * x0i + wk1i * x0r;
261 x0r = x1r + x3i;
262 x0i = x1i - x3r;
cduvivier@google.com288c8692011-08-22 21:55:33 +0000263 a[j3 + 0] = wk3r * x0r - wk3i * x0i;
niklase@google.com470e71d2011-07-07 08:21:25 +0000264 a[j3 + 1] = wk3r * x0i + wk3i * x0r;
265 }
266 }
267}
268
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000269static void rftfsub_128_C(float* a) {
270 const float* c = rdft_w + 32;
niklase@google.com470e71d2011-07-07 08:21:25 +0000271 int j1, j2, k1, k2;
272 float wkr, wki, xr, xi, yr, yi;
273
274 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
275 k2 = 128 - j2;
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000276 k1 = 32 - j1;
niklase@google.com470e71d2011-07-07 08:21:25 +0000277 wkr = 0.5f - c[k1];
278 wki = c[j1];
279 xr = a[j2 + 0] - a[k2 + 0];
280 xi = a[j2 + 1] + a[k2 + 1];
281 yr = wkr * xr - wki * xi;
282 yi = wkr * xi + wki * xr;
283 a[j2 + 0] -= yr;
284 a[j2 + 1] -= yi;
285 a[k2 + 0] += yr;
286 a[k2 + 1] -= yi;
287 }
288}
289
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000290static void rftbsub_128_C(float* a) {
291 const float* c = rdft_w + 32;
niklase@google.com470e71d2011-07-07 08:21:25 +0000292 int j1, j2, k1, k2;
293 float wkr, wki, xr, xi, yr, yi;
294
295 a[1] = -a[1];
296 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
297 k2 = 128 - j2;
andrew@webrtc.org13b2d462013-10-08 23:41:42 +0000298 k1 = 32 - j1;
niklase@google.com470e71d2011-07-07 08:21:25 +0000299 wkr = 0.5f - c[k1];
300 wki = c[j1];
301 xr = a[j2 + 0] - a[k2 + 0];
302 xi = a[j2 + 1] + a[k2 + 1];
303 yr = wkr * xr + wki * xi;
304 yi = wkr * xi - wki * xr;
305 a[j2 + 0] = a[j2 + 0] - yr;
306 a[j2 + 1] = yi - a[j2 + 1];
307 a[k2 + 0] = yr + a[k2 + 0];
308 a[k2 + 1] = yi - a[k2 + 1];
309 }
310 a[65] = -a[65];
311}
peah81b92912016-10-06 06:46:20 -0700312#endif
niklase@google.com470e71d2011-07-07 08:21:25 +0000313
peah81b92912016-10-06 06:46:20 -0700314} // namespace
315
316OouraFft::OouraFft() {
317#if defined(WEBRTC_ARCH_X86_FAMILY)
318 use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0);
319#else
320 use_sse2_ = false;
321#endif
322}
323
324OouraFft::~OouraFft() = default;
325
326void OouraFft::Fft(float* a) const {
niklase@google.com470e71d2011-07-07 08:21:25 +0000327 float xi;
cd@webrtc.org85b4a1b2012-04-10 21:25:17 +0000328 bitrv2_128(a);
niklase@google.com470e71d2011-07-07 08:21:25 +0000329 cftfsub_128(a);
330 rftfsub_128(a);
331 xi = a[0] - a[1];
332 a[0] += a[1];
333 a[1] = xi;
334}
peah81b92912016-10-06 06:46:20 -0700335void OouraFft::InverseFft(float* a) const {
niklase@google.com470e71d2011-07-07 08:21:25 +0000336 a[1] = 0.5f * (a[0] - a[1]);
337 a[0] -= a[1];
338 rftbsub_128(a);
cd@webrtc.org85b4a1b2012-04-10 21:25:17 +0000339 bitrv2_128(a);
niklase@google.com470e71d2011-07-07 08:21:25 +0000340 cftbsub_128(a);
341}
342
peah81b92912016-10-06 06:46:20 -0700343void OouraFft::cft1st_128(float* a) const {
344#if defined(MIPS_FPU_LE)
345 cft1st_128_mips(a);
346#elif defined(WEBRTC_HAS_NEON)
347 cft1st_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700348#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700349 if (use_sse2_) {
350 cft1st_128_SSE2(a);
351 } else {
352 cft1st_128_C(a);
niklase@google.com470e71d2011-07-07 08:21:25 +0000353 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700354#else
355 cft1st_128_C(a);
andrew@webrtc.orgc8d012f2012-01-13 19:43:09 +0000356#endif
peah81b92912016-10-06 06:46:20 -0700357}
358void OouraFft::cftmdl_128(float* a) const {
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +0000359#if defined(MIPS_FPU_LE)
peah81b92912016-10-06 06:46:20 -0700360 cftmdl_128_mips(a);
361#elif defined(WEBRTC_HAS_NEON)
362 cftmdl_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700363#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700364 if (use_sse2_) {
365 cftmdl_128_SSE2(a);
366 } else {
367 cftmdl_128_C(a);
368 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700369#else
370 cftmdl_128_C(a);
bjornv@webrtc.orgcd9b90a2014-06-30 12:05:18 +0000371#endif
niklase@google.com470e71d2011-07-07 08:21:25 +0000372}
peah81b92912016-10-06 06:46:20 -0700373void OouraFft::rftfsub_128(float* a) const {
374#if defined(MIPS_FPU_LE)
375 rftfsub_128_mips(a);
376#elif defined(WEBRTC_HAS_NEON)
377 rftfsub_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700378#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700379 if (use_sse2_) {
380 rftfsub_128_SSE2(a);
381 } else {
382 rftfsub_128_C(a);
383 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700384#else
385 rftfsub_128_C(a);
peah81b92912016-10-06 06:46:20 -0700386#endif
387}
388
389void OouraFft::rftbsub_128(float* a) const {
390#if defined(MIPS_FPU_LE)
391 rftbsub_128_mips(a);
392#elif defined(WEBRTC_HAS_NEON)
393 rftbsub_128_neon(a);
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700394#elif defined(WEBRTC_ARCH_X86_FAMILY)
peah81b92912016-10-06 06:46:20 -0700395 if (use_sse2_) {
396 rftbsub_128_SSE2(a);
397 } else {
398 rftbsub_128_C(a);
399 }
Gordana.Cmiljanovic11f72b12016-10-27 23:44:09 -0700400#else
401 rftbsub_128_C(a);
peah81b92912016-10-06 06:46:20 -0700402#endif
403}
404
405void OouraFft::cftbsub_128(float* a) const {
406 int j, j1, j2, j3, l;
407 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
408
409 cft1st_128(a);
410 cftmdl_128(a);
411 l = 32;
412
413 for (j = 0; j < l; j += 2) {
414 j1 = j + l;
415 j2 = j1 + l;
416 j3 = j2 + l;
417 x0r = a[j] + a[j1];
418 x0i = -a[j + 1] - a[j1 + 1];
419 x1r = a[j] - a[j1];
420 x1i = -a[j + 1] + a[j1 + 1];
421 x2r = a[j2] + a[j3];
422 x2i = a[j2 + 1] + a[j3 + 1];
423 x3r = a[j2] - a[j3];
424 x3i = a[j2 + 1] - a[j3 + 1];
425 a[j] = x0r + x2r;
426 a[j + 1] = x0i - x2i;
427 a[j2] = x0r - x2r;
428 a[j2 + 1] = x0i + x2i;
429 a[j1] = x1r - x3i;
430 a[j1 + 1] = x1i - x3r;
431 a[j3] = x1r + x3i;
432 a[j3 + 1] = x1i + x3r;
433 }
434}
435
436void OouraFft::cftfsub_128(float* a) const {
437 int j, j1, j2, j3, l;
438 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
439
440 cft1st_128(a);
441 cftmdl_128(a);
442 l = 32;
443 for (j = 0; j < l; j += 2) {
444 j1 = j + l;
445 j2 = j1 + l;
446 j3 = j2 + l;
447 x0r = a[j] + a[j1];
448 x0i = a[j + 1] + a[j1 + 1];
449 x1r = a[j] - a[j1];
450 x1i = a[j + 1] - a[j1 + 1];
451 x2r = a[j2] + a[j3];
452 x2i = a[j2 + 1] + a[j3 + 1];
453 x3r = a[j2] - a[j3];
454 x3i = a[j2 + 1] - a[j3 + 1];
455 a[j] = x0r + x2r;
456 a[j + 1] = x0i + x2i;
457 a[j2] = x0r - x2r;
458 a[j2 + 1] = x0i - x2i;
459 a[j1] = x1r - x3i;
460 a[j1 + 1] = x1i + x3r;
461 a[j3] = x1r + x3i;
462 a[j3 + 1] = x1i - x3r;
463 }
464}
465
466void OouraFft::bitrv2_128(float* a) const {
467 /*
468 Following things have been attempted but are no faster:
469 (a) Storing the swap indexes in a LUT (index calculations are done
470 for 'free' while waiting on memory/L1).
471 (b) Consolidate the load/store of two consecutive floats by a 64 bit
472 integer (execution is memory/L1 bound).
473 (c) Do a mix of floats and 64 bit integer to maximize register
474 utilization (execution is memory/L1 bound).
475 (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5).
476 (e) Hard-coding of the offsets to completely eliminates index
477 calculations.
478 */
479
480 unsigned int j, j1, k, k1;
481 float xr, xi, yr, yi;
482
483 const int ip[4] = {0, 64, 32, 96};
484 for (k = 0; k < 4; k++) {
485 for (j = 0; j < k; j++) {
486 j1 = 2 * j + ip[k];
487 k1 = 2 * k + ip[j];
488 xr = a[j1 + 0];
489 xi = a[j1 + 1];
490 yr = a[k1 + 0];
491 yi = a[k1 + 1];
492 a[j1 + 0] = yr;
493 a[j1 + 1] = yi;
494 a[k1 + 0] = xr;
495 a[k1 + 1] = xi;
496 j1 += 8;
497 k1 += 16;
498 xr = a[j1 + 0];
499 xi = a[j1 + 1];
500 yr = a[k1 + 0];
501 yi = a[k1 + 1];
502 a[j1 + 0] = yr;
503 a[j1 + 1] = yi;
504 a[k1 + 0] = xr;
505 a[k1 + 1] = xi;
506 j1 += 8;
507 k1 -= 8;
508 xr = a[j1 + 0];
509 xi = a[j1 + 1];
510 yr = a[k1 + 0];
511 yi = a[k1 + 1];
512 a[j1 + 0] = yr;
513 a[j1 + 1] = yi;
514 a[k1 + 0] = xr;
515 a[k1 + 1] = xi;
516 j1 += 8;
517 k1 += 16;
518 xr = a[j1 + 0];
519 xi = a[j1 + 1];
520 yr = a[k1 + 0];
521 yi = a[k1 + 1];
522 a[j1 + 0] = yr;
523 a[j1 + 1] = yi;
524 a[k1 + 0] = xr;
525 a[k1 + 1] = xi;
526 }
527 j1 = 2 * k + 8 + ip[k];
528 k1 = j1 + 8;
529 xr = a[j1 + 0];
530 xi = a[j1 + 1];
531 yr = a[k1 + 0];
532 yi = a[k1 + 1];
533 a[j1 + 0] = yr;
534 a[j1 + 1] = yi;
535 a[k1 + 0] = xr;
536 a[k1 + 1] = xi;
537 }
538}
539
540} // namespace webrtc