niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
| 2 | * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html |
| 3 | * Copyright Takuya OOURA, 1996-2001 |
| 4 | * |
| 5 | * You may use, copy, modify and distribute this code for any purpose (include |
| 6 | * commercial use) and without fee. Please refer to this package when you modify |
| 7 | * this code. |
| 8 | * |
| 9 | * Changes by the WebRTC authors: |
| 10 | * - Trivial type modifications. |
| 11 | * - Minimal code subset to do rdft of length 128. |
| 12 | * - Optimizations because of known length. |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 13 | * - Removed the global variables by moving the code in to a class in order |
| 14 | * to make it thread safe. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 15 | * |
| 16 | * All changes are covered by the WebRTC license and IP grant: |
| 17 | * Use of this source code is governed by a BSD-style license |
| 18 | * that can be found in the LICENSE file in the root of the source |
| 19 | * tree. An additional intellectual property rights grant can be found |
| 20 | * in the file PATENTS. All contributing project authors may |
| 21 | * be found in the AUTHORS file in the root of the source tree. |
| 22 | */ |
| 23 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 24 | #include "webrtc/modules/audio_processing//utility/ooura_fft.h" |
ajm@google.com | ce7c2a2 | 2011-08-04 01:50:00 +0000 | [diff] [blame] | 25 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 26 | #include <math.h> |
| 27 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 28 | #include "webrtc/modules/audio_processing/utility/ooura_fft_tables_common.h" |
Henrik Kjellander | 98f5351 | 2015-10-28 18:17:40 +0100 | [diff] [blame] | 29 | #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" |
pbos@webrtc.org | 7fad4b8 | 2013-05-28 08:11:59 +0000 | [diff] [blame] | 30 | #include "webrtc/typedefs.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 31 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 32 | namespace webrtc { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 33 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 34 | namespace { |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 35 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 36 | #if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON)) |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 37 | static void cft1st_128_C(float* a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 38 | const int n = 128; |
| 39 | int j, k1, k2; |
| 40 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 41 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 42 | |
bjornv@webrtc.org | cd9b90a | 2014-06-30 12:05:18 +0000 | [diff] [blame] | 43 | // The processing of the first set of elements was simplified in C to avoid |
| 44 | // some operations (multiplication by zero or one, addition of two elements |
| 45 | // multiplied by the same weight, ...). |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 46 | x0r = a[0] + a[2]; |
| 47 | x0i = a[1] + a[3]; |
| 48 | x1r = a[0] - a[2]; |
| 49 | x1i = a[1] - a[3]; |
| 50 | x2r = a[4] + a[6]; |
| 51 | x2i = a[5] + a[7]; |
| 52 | x3r = a[4] - a[6]; |
| 53 | x3i = a[5] - a[7]; |
| 54 | a[0] = x0r + x2r; |
| 55 | a[1] = x0i + x2i; |
| 56 | a[4] = x0r - x2r; |
| 57 | a[5] = x0i - x2i; |
| 58 | a[2] = x1r - x3i; |
| 59 | a[3] = x1i + x3r; |
| 60 | a[6] = x1r + x3i; |
| 61 | a[7] = x1i - x3r; |
| 62 | wk1r = rdft_w[2]; |
| 63 | x0r = a[8] + a[10]; |
| 64 | x0i = a[9] + a[11]; |
| 65 | x1r = a[8] - a[10]; |
| 66 | x1i = a[9] - a[11]; |
| 67 | x2r = a[12] + a[14]; |
| 68 | x2i = a[13] + a[15]; |
| 69 | x3r = a[12] - a[14]; |
| 70 | x3i = a[13] - a[15]; |
| 71 | a[8] = x0r + x2r; |
| 72 | a[9] = x0i + x2i; |
| 73 | a[12] = x2i - x0i; |
| 74 | a[13] = x0r - x2r; |
| 75 | x0r = x1r - x3i; |
| 76 | x0i = x1i + x3r; |
| 77 | a[10] = wk1r * (x0r - x0i); |
| 78 | a[11] = wk1r * (x0r + x0i); |
| 79 | x0r = x3i + x1r; |
| 80 | x0i = x3r - x1i; |
| 81 | a[14] = wk1r * (x0i - x0r); |
| 82 | a[15] = wk1r * (x0i + x0r); |
| 83 | k1 = 0; |
| 84 | for (j = 16; j < n; j += 16) { |
| 85 | k1 += 2; |
| 86 | k2 = 2 * k1; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 87 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 88 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 89 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 90 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 91 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 92 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 93 | x0r = a[j + 0] + a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 94 | x0i = a[j + 1] + a[j + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 95 | x1r = a[j + 0] - a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 96 | x1i = a[j + 1] - a[j + 3]; |
| 97 | x2r = a[j + 4] + a[j + 6]; |
| 98 | x2i = a[j + 5] + a[j + 7]; |
| 99 | x3r = a[j + 4] - a[j + 6]; |
| 100 | x3i = a[j + 5] - a[j + 7]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 101 | a[j + 0] = x0r + x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 102 | a[j + 1] = x0i + x2i; |
| 103 | x0r -= x2r; |
| 104 | x0i -= x2i; |
| 105 | a[j + 4] = wk2r * x0r - wk2i * x0i; |
| 106 | a[j + 5] = wk2r * x0i + wk2i * x0r; |
| 107 | x0r = x1r - x3i; |
| 108 | x0i = x1i + x3r; |
| 109 | a[j + 2] = wk1r * x0r - wk1i * x0i; |
| 110 | a[j + 3] = wk1r * x0i + wk1i * x0r; |
| 111 | x0r = x1r + x3i; |
| 112 | x0i = x1i - x3r; |
| 113 | a[j + 6] = wk3r * x0r - wk3i * x0i; |
| 114 | a[j + 7] = wk3r * x0i + wk3i * x0r; |
| 115 | wk1r = rdft_w[k2 + 2]; |
| 116 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 117 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 118 | wk3i = rdft_wk3ri_second[k1 + 1]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 119 | x0r = a[j + 8] + a[j + 10]; |
| 120 | x0i = a[j + 9] + a[j + 11]; |
| 121 | x1r = a[j + 8] - a[j + 10]; |
| 122 | x1i = a[j + 9] - a[j + 11]; |
| 123 | x2r = a[j + 12] + a[j + 14]; |
| 124 | x2i = a[j + 13] + a[j + 15]; |
| 125 | x3r = a[j + 12] - a[j + 14]; |
| 126 | x3i = a[j + 13] - a[j + 15]; |
| 127 | a[j + 8] = x0r + x2r; |
| 128 | a[j + 9] = x0i + x2i; |
| 129 | x0r -= x2r; |
| 130 | x0i -= x2i; |
| 131 | a[j + 12] = -wk2i * x0r - wk2r * x0i; |
| 132 | a[j + 13] = -wk2i * x0i + wk2r * x0r; |
| 133 | x0r = x1r - x3i; |
| 134 | x0i = x1i + x3r; |
| 135 | a[j + 10] = wk1r * x0r - wk1i * x0i; |
| 136 | a[j + 11] = wk1r * x0i + wk1i * x0r; |
| 137 | x0r = x1r + x3i; |
| 138 | x0i = x1i - x3r; |
| 139 | a[j + 14] = wk3r * x0r - wk3i * x0i; |
| 140 | a[j + 15] = wk3r * x0i + wk3i * x0r; |
| 141 | } |
| 142 | } |
| 143 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 144 | static void cftmdl_128_C(float* a) { |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 145 | const int l = 8; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 146 | const int n = 128; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 147 | const int m = 32; |
| 148 | int j0, j1, j2, j3, k, k1, k2, m2; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 149 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 150 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 151 | |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 152 | for (j0 = 0; j0 < l; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 153 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 154 | j2 = j0 + 16; |
| 155 | j3 = j0 + 24; |
| 156 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 157 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 158 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 159 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 160 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 161 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 162 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 163 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 164 | a[j0 + 0] = x0r + x2r; |
| 165 | a[j0 + 1] = x0i + x2i; |
| 166 | a[j2 + 0] = x0r - x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 167 | a[j2 + 1] = x0i - x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 168 | a[j1 + 0] = x1r - x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 169 | a[j1 + 1] = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 170 | a[j3 + 0] = x1r + x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 171 | a[j3 + 1] = x1i - x3r; |
| 172 | } |
| 173 | wk1r = rdft_w[2]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 174 | for (j0 = m; j0 < l + m; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 175 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 176 | j2 = j0 + 16; |
| 177 | j3 = j0 + 24; |
| 178 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 179 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 180 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 181 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 182 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 183 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 184 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 185 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 186 | a[j0 + 0] = x0r + x2r; |
| 187 | a[j0 + 1] = x0i + x2i; |
| 188 | a[j2 + 0] = x2i - x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 189 | a[j2 + 1] = x0r - x2r; |
| 190 | x0r = x1r - x3i; |
| 191 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 192 | a[j1 + 0] = wk1r * (x0r - x0i); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 193 | a[j1 + 1] = wk1r * (x0r + x0i); |
| 194 | x0r = x3i + x1r; |
| 195 | x0i = x3r - x1i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 196 | a[j3 + 0] = wk1r * (x0i - x0r); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 197 | a[j3 + 1] = wk1r * (x0i + x0r); |
| 198 | } |
| 199 | k1 = 0; |
| 200 | m2 = 2 * m; |
| 201 | for (k = m2; k < n; k += m2) { |
| 202 | k1 += 2; |
| 203 | k2 = 2 * k1; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 204 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 205 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 206 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 207 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 208 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 209 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 210 | for (j0 = k; j0 < l + k; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 211 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 212 | j2 = j0 + 16; |
| 213 | j3 = j0 + 24; |
| 214 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 215 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 216 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 217 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 218 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 219 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 220 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 221 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 222 | a[j0 + 0] = x0r + x2r; |
| 223 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 224 | x0r -= x2r; |
| 225 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 226 | a[j2 + 0] = wk2r * x0r - wk2i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 227 | a[j2 + 1] = wk2r * x0i + wk2i * x0r; |
| 228 | x0r = x1r - x3i; |
| 229 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 230 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 231 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 232 | x0r = x1r + x3i; |
| 233 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 234 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 235 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 236 | } |
| 237 | wk1r = rdft_w[k2 + 2]; |
| 238 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 239 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 240 | wk3i = rdft_wk3ri_second[k1 + 1]; |
| 241 | for (j0 = k + m; j0 < l + (k + m); j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 242 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 243 | j2 = j0 + 16; |
| 244 | j3 = j0 + 24; |
| 245 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 246 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 247 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 248 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 249 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 250 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 251 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 252 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 253 | a[j0 + 0] = x0r + x2r; |
| 254 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 255 | x0r -= x2r; |
| 256 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 257 | a[j2 + 0] = -wk2i * x0r - wk2r * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 258 | a[j2 + 1] = -wk2i * x0i + wk2r * x0r; |
| 259 | x0r = x1r - x3i; |
| 260 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 261 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 262 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 263 | x0r = x1r + x3i; |
| 264 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 265 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 266 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 267 | } |
| 268 | } |
| 269 | } |
| 270 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 271 | static void rftfsub_128_C(float* a) { |
| 272 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 273 | int j1, j2, k1, k2; |
| 274 | float wkr, wki, xr, xi, yr, yi; |
| 275 | |
| 276 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 277 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 278 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 279 | wkr = 0.5f - c[k1]; |
| 280 | wki = c[j1]; |
| 281 | xr = a[j2 + 0] - a[k2 + 0]; |
| 282 | xi = a[j2 + 1] + a[k2 + 1]; |
| 283 | yr = wkr * xr - wki * xi; |
| 284 | yi = wkr * xi + wki * xr; |
| 285 | a[j2 + 0] -= yr; |
| 286 | a[j2 + 1] -= yi; |
| 287 | a[k2 + 0] += yr; |
| 288 | a[k2 + 1] -= yi; |
| 289 | } |
| 290 | } |
| 291 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 292 | static void rftbsub_128_C(float* a) { |
| 293 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 294 | int j1, j2, k1, k2; |
| 295 | float wkr, wki, xr, xi, yr, yi; |
| 296 | |
| 297 | a[1] = -a[1]; |
| 298 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 299 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 300 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 301 | wkr = 0.5f - c[k1]; |
| 302 | wki = c[j1]; |
| 303 | xr = a[j2 + 0] - a[k2 + 0]; |
| 304 | xi = a[j2 + 1] + a[k2 + 1]; |
| 305 | yr = wkr * xr + wki * xi; |
| 306 | yi = wkr * xi - wki * xr; |
| 307 | a[j2 + 0] = a[j2 + 0] - yr; |
| 308 | a[j2 + 1] = yi - a[j2 + 1]; |
| 309 | a[k2 + 0] = yr + a[k2 + 0]; |
| 310 | a[k2 + 1] = yi - a[k2 + 1]; |
| 311 | } |
| 312 | a[65] = -a[65]; |
| 313 | } |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 314 | #endif |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 315 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 316 | |
| 317 | } // namespace |
| 318 | |
| 319 | OouraFft::OouraFft() { |
| 320 | #if defined(WEBRTC_ARCH_X86_FAMILY) |
| 321 | use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0); |
| 322 | #else |
| 323 | use_sse2_ = false; |
| 324 | #endif |
| 325 | } |
| 326 | |
| 327 | OouraFft::~OouraFft() = default; |
| 328 | |
| 329 | void OouraFft::Fft(float* a) const { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 330 | float xi; |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 331 | bitrv2_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 332 | cftfsub_128(a); |
| 333 | rftfsub_128(a); |
| 334 | xi = a[0] - a[1]; |
| 335 | a[0] += a[1]; |
| 336 | a[1] = xi; |
| 337 | } |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 338 | void OouraFft::InverseFft(float* a) const { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 339 | a[1] = 0.5f * (a[0] - a[1]); |
| 340 | a[0] -= a[1]; |
| 341 | rftbsub_128(a); |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 342 | bitrv2_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 343 | cftbsub_128(a); |
| 344 | } |
| 345 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 346 | void OouraFft::cft1st_128(float* a) const { |
| 347 | #if defined(MIPS_FPU_LE) |
| 348 | cft1st_128_mips(a); |
| 349 | #elif defined(WEBRTC_HAS_NEON) |
| 350 | cft1st_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 351 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 352 | if (use_sse2_) { |
| 353 | cft1st_128_SSE2(a); |
| 354 | } else { |
| 355 | cft1st_128_C(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 356 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 357 | #else |
| 358 | cft1st_128_C(a); |
andrew@webrtc.org | c8d012f | 2012-01-13 19:43:09 +0000 | [diff] [blame] | 359 | #endif |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 360 | } |
| 361 | void OouraFft::cftmdl_128(float* a) const { |
andrew@webrtc.org | c0907ef | 2014-02-21 00:13:31 +0000 | [diff] [blame] | 362 | #if defined(MIPS_FPU_LE) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 363 | cftmdl_128_mips(a); |
| 364 | #elif defined(WEBRTC_HAS_NEON) |
| 365 | cftmdl_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 366 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 367 | if (use_sse2_) { |
| 368 | cftmdl_128_SSE2(a); |
| 369 | } else { |
| 370 | cftmdl_128_C(a); |
| 371 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 372 | #else |
| 373 | cftmdl_128_C(a); |
bjornv@webrtc.org | cd9b90a | 2014-06-30 12:05:18 +0000 | [diff] [blame] | 374 | #endif |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 375 | } |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 376 | void OouraFft::rftfsub_128(float* a) const { |
| 377 | #if defined(MIPS_FPU_LE) |
| 378 | rftfsub_128_mips(a); |
| 379 | #elif defined(WEBRTC_HAS_NEON) |
| 380 | rftfsub_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 381 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 382 | if (use_sse2_) { |
| 383 | rftfsub_128_SSE2(a); |
| 384 | } else { |
| 385 | rftfsub_128_C(a); |
| 386 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 387 | #else |
| 388 | rftfsub_128_C(a); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 389 | #endif |
| 390 | } |
| 391 | |
| 392 | void OouraFft::rftbsub_128(float* a) const { |
| 393 | #if defined(MIPS_FPU_LE) |
| 394 | rftbsub_128_mips(a); |
| 395 | #elif defined(WEBRTC_HAS_NEON) |
| 396 | rftbsub_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 397 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 398 | if (use_sse2_) { |
| 399 | rftbsub_128_SSE2(a); |
| 400 | } else { |
| 401 | rftbsub_128_C(a); |
| 402 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 403 | #else |
| 404 | rftbsub_128_C(a); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 405 | #endif |
| 406 | } |
| 407 | |
| 408 | void OouraFft::cftbsub_128(float* a) const { |
| 409 | int j, j1, j2, j3, l; |
| 410 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 411 | |
| 412 | cft1st_128(a); |
| 413 | cftmdl_128(a); |
| 414 | l = 32; |
| 415 | |
| 416 | for (j = 0; j < l; j += 2) { |
| 417 | j1 = j + l; |
| 418 | j2 = j1 + l; |
| 419 | j3 = j2 + l; |
| 420 | x0r = a[j] + a[j1]; |
| 421 | x0i = -a[j + 1] - a[j1 + 1]; |
| 422 | x1r = a[j] - a[j1]; |
| 423 | x1i = -a[j + 1] + a[j1 + 1]; |
| 424 | x2r = a[j2] + a[j3]; |
| 425 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 426 | x3r = a[j2] - a[j3]; |
| 427 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 428 | a[j] = x0r + x2r; |
| 429 | a[j + 1] = x0i - x2i; |
| 430 | a[j2] = x0r - x2r; |
| 431 | a[j2 + 1] = x0i + x2i; |
| 432 | a[j1] = x1r - x3i; |
| 433 | a[j1 + 1] = x1i - x3r; |
| 434 | a[j3] = x1r + x3i; |
| 435 | a[j3 + 1] = x1i + x3r; |
| 436 | } |
| 437 | } |
| 438 | |
| 439 | void OouraFft::cftfsub_128(float* a) const { |
| 440 | int j, j1, j2, j3, l; |
| 441 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 442 | |
| 443 | cft1st_128(a); |
| 444 | cftmdl_128(a); |
| 445 | l = 32; |
| 446 | for (j = 0; j < l; j += 2) { |
| 447 | j1 = j + l; |
| 448 | j2 = j1 + l; |
| 449 | j3 = j2 + l; |
| 450 | x0r = a[j] + a[j1]; |
| 451 | x0i = a[j + 1] + a[j1 + 1]; |
| 452 | x1r = a[j] - a[j1]; |
| 453 | x1i = a[j + 1] - a[j1 + 1]; |
| 454 | x2r = a[j2] + a[j3]; |
| 455 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 456 | x3r = a[j2] - a[j3]; |
| 457 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 458 | a[j] = x0r + x2r; |
| 459 | a[j + 1] = x0i + x2i; |
| 460 | a[j2] = x0r - x2r; |
| 461 | a[j2 + 1] = x0i - x2i; |
| 462 | a[j1] = x1r - x3i; |
| 463 | a[j1 + 1] = x1i + x3r; |
| 464 | a[j3] = x1r + x3i; |
| 465 | a[j3 + 1] = x1i - x3r; |
| 466 | } |
| 467 | } |
| 468 | |
| 469 | void OouraFft::bitrv2_128(float* a) const { |
| 470 | /* |
| 471 | Following things have been attempted but are no faster: |
| 472 | (a) Storing the swap indexes in a LUT (index calculations are done |
| 473 | for 'free' while waiting on memory/L1). |
| 474 | (b) Consolidate the load/store of two consecutive floats by a 64 bit |
| 475 | integer (execution is memory/L1 bound). |
| 476 | (c) Do a mix of floats and 64 bit integer to maximize register |
| 477 | utilization (execution is memory/L1 bound). |
| 478 | (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). |
| 479 | (e) Hard-coding of the offsets to completely eliminates index |
| 480 | calculations. |
| 481 | */ |
| 482 | |
| 483 | unsigned int j, j1, k, k1; |
| 484 | float xr, xi, yr, yi; |
| 485 | |
| 486 | const int ip[4] = {0, 64, 32, 96}; |
| 487 | for (k = 0; k < 4; k++) { |
| 488 | for (j = 0; j < k; j++) { |
| 489 | j1 = 2 * j + ip[k]; |
| 490 | k1 = 2 * k + ip[j]; |
| 491 | xr = a[j1 + 0]; |
| 492 | xi = a[j1 + 1]; |
| 493 | yr = a[k1 + 0]; |
| 494 | yi = a[k1 + 1]; |
| 495 | a[j1 + 0] = yr; |
| 496 | a[j1 + 1] = yi; |
| 497 | a[k1 + 0] = xr; |
| 498 | a[k1 + 1] = xi; |
| 499 | j1 += 8; |
| 500 | k1 += 16; |
| 501 | xr = a[j1 + 0]; |
| 502 | xi = a[j1 + 1]; |
| 503 | yr = a[k1 + 0]; |
| 504 | yi = a[k1 + 1]; |
| 505 | a[j1 + 0] = yr; |
| 506 | a[j1 + 1] = yi; |
| 507 | a[k1 + 0] = xr; |
| 508 | a[k1 + 1] = xi; |
| 509 | j1 += 8; |
| 510 | k1 -= 8; |
| 511 | xr = a[j1 + 0]; |
| 512 | xi = a[j1 + 1]; |
| 513 | yr = a[k1 + 0]; |
| 514 | yi = a[k1 + 1]; |
| 515 | a[j1 + 0] = yr; |
| 516 | a[j1 + 1] = yi; |
| 517 | a[k1 + 0] = xr; |
| 518 | a[k1 + 1] = xi; |
| 519 | j1 += 8; |
| 520 | k1 += 16; |
| 521 | xr = a[j1 + 0]; |
| 522 | xi = a[j1 + 1]; |
| 523 | yr = a[k1 + 0]; |
| 524 | yi = a[k1 + 1]; |
| 525 | a[j1 + 0] = yr; |
| 526 | a[j1 + 1] = yi; |
| 527 | a[k1 + 0] = xr; |
| 528 | a[k1 + 1] = xi; |
| 529 | } |
| 530 | j1 = 2 * k + 8 + ip[k]; |
| 531 | k1 = j1 + 8; |
| 532 | xr = a[j1 + 0]; |
| 533 | xi = a[j1 + 1]; |
| 534 | yr = a[k1 + 0]; |
| 535 | yi = a[k1 + 1]; |
| 536 | a[j1 + 0] = yr; |
| 537 | a[j1 + 1] = yi; |
| 538 | a[k1 + 0] = xr; |
| 539 | a[k1 + 1] = xi; |
| 540 | } |
| 541 | } |
| 542 | |
| 543 | } // namespace webrtc |