niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
| 2 | * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html |
| 3 | * Copyright Takuya OOURA, 1996-2001 |
| 4 | * |
| 5 | * You may use, copy, modify and distribute this code for any purpose (include |
| 6 | * commercial use) and without fee. Please refer to this package when you modify |
| 7 | * this code. |
| 8 | * |
| 9 | * Changes by the WebRTC authors: |
| 10 | * - Trivial type modifications. |
| 11 | * - Minimal code subset to do rdft of length 128. |
| 12 | * - Optimizations because of known length. |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 13 | * - Removed the global variables by moving the code in to a class in order |
| 14 | * to make it thread safe. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 15 | * |
| 16 | * All changes are covered by the WebRTC license and IP grant: |
| 17 | * Use of this source code is governed by a BSD-style license |
| 18 | * that can be found in the LICENSE file in the root of the source |
| 19 | * tree. An additional intellectual property rights grant can be found |
| 20 | * in the file PATENTS. All contributing project authors may |
| 21 | * be found in the AUTHORS file in the root of the source tree. |
| 22 | */ |
| 23 | |
Jiawei Ou | d3c642b | 2018-01-09 09:15:37 -0800 | [diff] [blame] | 24 | #include "modules/audio_processing/utility/ooura_fft.h" |
ajm@google.com | ce7c2a2 | 2011-08-04 01:50:00 +0000 | [diff] [blame] | 25 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 26 | #include "modules/audio_processing/utility/ooura_fft_tables_common.h" |
Niels Möller | a12c42a | 2018-07-25 16:05:48 +0200 | [diff] [blame] | 27 | #include "rtc_base/system/arch.h" |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 28 | #include "system_wrappers/include/cpu_features_wrapper.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 29 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 30 | namespace webrtc { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 31 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 32 | namespace { |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 33 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 34 | #if !(defined(MIPS_FPU_LE) || defined(WEBRTC_HAS_NEON)) |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 35 | static void cft1st_128_C(float* a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 36 | const int n = 128; |
| 37 | int j, k1, k2; |
| 38 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 39 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 40 | |
bjornv@webrtc.org | cd9b90a | 2014-06-30 12:05:18 +0000 | [diff] [blame] | 41 | // The processing of the first set of elements was simplified in C to avoid |
| 42 | // some operations (multiplication by zero or one, addition of two elements |
| 43 | // multiplied by the same weight, ...). |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 44 | x0r = a[0] + a[2]; |
| 45 | x0i = a[1] + a[3]; |
| 46 | x1r = a[0] - a[2]; |
| 47 | x1i = a[1] - a[3]; |
| 48 | x2r = a[4] + a[6]; |
| 49 | x2i = a[5] + a[7]; |
| 50 | x3r = a[4] - a[6]; |
| 51 | x3i = a[5] - a[7]; |
| 52 | a[0] = x0r + x2r; |
| 53 | a[1] = x0i + x2i; |
| 54 | a[4] = x0r - x2r; |
| 55 | a[5] = x0i - x2i; |
| 56 | a[2] = x1r - x3i; |
| 57 | a[3] = x1i + x3r; |
| 58 | a[6] = x1r + x3i; |
| 59 | a[7] = x1i - x3r; |
| 60 | wk1r = rdft_w[2]; |
| 61 | x0r = a[8] + a[10]; |
| 62 | x0i = a[9] + a[11]; |
| 63 | x1r = a[8] - a[10]; |
| 64 | x1i = a[9] - a[11]; |
| 65 | x2r = a[12] + a[14]; |
| 66 | x2i = a[13] + a[15]; |
| 67 | x3r = a[12] - a[14]; |
| 68 | x3i = a[13] - a[15]; |
| 69 | a[8] = x0r + x2r; |
| 70 | a[9] = x0i + x2i; |
| 71 | a[12] = x2i - x0i; |
| 72 | a[13] = x0r - x2r; |
| 73 | x0r = x1r - x3i; |
| 74 | x0i = x1i + x3r; |
| 75 | a[10] = wk1r * (x0r - x0i); |
| 76 | a[11] = wk1r * (x0r + x0i); |
| 77 | x0r = x3i + x1r; |
| 78 | x0i = x3r - x1i; |
| 79 | a[14] = wk1r * (x0i - x0r); |
| 80 | a[15] = wk1r * (x0i + x0r); |
| 81 | k1 = 0; |
| 82 | for (j = 16; j < n; j += 16) { |
| 83 | k1 += 2; |
| 84 | k2 = 2 * k1; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 85 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 86 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 87 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 88 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 89 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 90 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 91 | x0r = a[j + 0] + a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 92 | x0i = a[j + 1] + a[j + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 93 | x1r = a[j + 0] - a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 94 | x1i = a[j + 1] - a[j + 3]; |
| 95 | x2r = a[j + 4] + a[j + 6]; |
| 96 | x2i = a[j + 5] + a[j + 7]; |
| 97 | x3r = a[j + 4] - a[j + 6]; |
| 98 | x3i = a[j + 5] - a[j + 7]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 99 | a[j + 0] = x0r + x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 100 | a[j + 1] = x0i + x2i; |
| 101 | x0r -= x2r; |
| 102 | x0i -= x2i; |
| 103 | a[j + 4] = wk2r * x0r - wk2i * x0i; |
| 104 | a[j + 5] = wk2r * x0i + wk2i * x0r; |
| 105 | x0r = x1r - x3i; |
| 106 | x0i = x1i + x3r; |
| 107 | a[j + 2] = wk1r * x0r - wk1i * x0i; |
| 108 | a[j + 3] = wk1r * x0i + wk1i * x0r; |
| 109 | x0r = x1r + x3i; |
| 110 | x0i = x1i - x3r; |
| 111 | a[j + 6] = wk3r * x0r - wk3i * x0i; |
| 112 | a[j + 7] = wk3r * x0i + wk3i * x0r; |
| 113 | wk1r = rdft_w[k2 + 2]; |
| 114 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 115 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 116 | wk3i = rdft_wk3ri_second[k1 + 1]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 117 | x0r = a[j + 8] + a[j + 10]; |
| 118 | x0i = a[j + 9] + a[j + 11]; |
| 119 | x1r = a[j + 8] - a[j + 10]; |
| 120 | x1i = a[j + 9] - a[j + 11]; |
| 121 | x2r = a[j + 12] + a[j + 14]; |
| 122 | x2i = a[j + 13] + a[j + 15]; |
| 123 | x3r = a[j + 12] - a[j + 14]; |
| 124 | x3i = a[j + 13] - a[j + 15]; |
| 125 | a[j + 8] = x0r + x2r; |
| 126 | a[j + 9] = x0i + x2i; |
| 127 | x0r -= x2r; |
| 128 | x0i -= x2i; |
| 129 | a[j + 12] = -wk2i * x0r - wk2r * x0i; |
| 130 | a[j + 13] = -wk2i * x0i + wk2r * x0r; |
| 131 | x0r = x1r - x3i; |
| 132 | x0i = x1i + x3r; |
| 133 | a[j + 10] = wk1r * x0r - wk1i * x0i; |
| 134 | a[j + 11] = wk1r * x0i + wk1i * x0r; |
| 135 | x0r = x1r + x3i; |
| 136 | x0i = x1i - x3r; |
| 137 | a[j + 14] = wk3r * x0r - wk3i * x0i; |
| 138 | a[j + 15] = wk3r * x0i + wk3i * x0r; |
| 139 | } |
| 140 | } |
| 141 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 142 | static void cftmdl_128_C(float* a) { |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 143 | const int l = 8; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 144 | const int n = 128; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 145 | const int m = 32; |
| 146 | int j0, j1, j2, j3, k, k1, k2, m2; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 147 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 148 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 149 | |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 150 | for (j0 = 0; j0 < l; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 151 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 152 | j2 = j0 + 16; |
| 153 | j3 = j0 + 24; |
| 154 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 155 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 156 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 157 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 158 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 159 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 160 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 161 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 162 | a[j0 + 0] = x0r + x2r; |
| 163 | a[j0 + 1] = x0i + x2i; |
| 164 | a[j2 + 0] = x0r - x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 165 | a[j2 + 1] = x0i - x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 166 | a[j1 + 0] = x1r - x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 167 | a[j1 + 1] = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 168 | a[j3 + 0] = x1r + x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 169 | a[j3 + 1] = x1i - x3r; |
| 170 | } |
| 171 | wk1r = rdft_w[2]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 172 | for (j0 = m; j0 < l + m; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 173 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 174 | j2 = j0 + 16; |
| 175 | j3 = j0 + 24; |
| 176 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 177 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 178 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 179 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 180 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 181 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 182 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 183 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 184 | a[j0 + 0] = x0r + x2r; |
| 185 | a[j0 + 1] = x0i + x2i; |
| 186 | a[j2 + 0] = x2i - x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 187 | a[j2 + 1] = x0r - x2r; |
| 188 | x0r = x1r - x3i; |
| 189 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 190 | a[j1 + 0] = wk1r * (x0r - x0i); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 191 | a[j1 + 1] = wk1r * (x0r + x0i); |
| 192 | x0r = x3i + x1r; |
| 193 | x0i = x3r - x1i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 194 | a[j3 + 0] = wk1r * (x0i - x0r); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 195 | a[j3 + 1] = wk1r * (x0i + x0r); |
| 196 | } |
| 197 | k1 = 0; |
| 198 | m2 = 2 * m; |
| 199 | for (k = m2; k < n; k += m2) { |
| 200 | k1 += 2; |
| 201 | k2 = 2 * k1; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 202 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 203 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 204 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 205 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 206 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 207 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 208 | for (j0 = k; j0 < l + k; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 209 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 210 | j2 = j0 + 16; |
| 211 | j3 = j0 + 24; |
| 212 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 213 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 214 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 215 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 216 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 217 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 218 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 219 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 220 | a[j0 + 0] = x0r + x2r; |
| 221 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 222 | x0r -= x2r; |
| 223 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 224 | a[j2 + 0] = wk2r * x0r - wk2i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 225 | a[j2 + 1] = wk2r * x0i + wk2i * x0r; |
| 226 | x0r = x1r - x3i; |
| 227 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 228 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 229 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 230 | x0r = x1r + x3i; |
| 231 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 232 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 233 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 234 | } |
| 235 | wk1r = rdft_w[k2 + 2]; |
| 236 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 237 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 238 | wk3i = rdft_wk3ri_second[k1 + 1]; |
| 239 | for (j0 = k + m; j0 < l + (k + m); j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 240 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 241 | j2 = j0 + 16; |
| 242 | j3 = j0 + 24; |
| 243 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 244 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 245 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 246 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 247 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 248 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 249 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 250 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 251 | a[j0 + 0] = x0r + x2r; |
| 252 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 253 | x0r -= x2r; |
| 254 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 255 | a[j2 + 0] = -wk2i * x0r - wk2r * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 256 | a[j2 + 1] = -wk2i * x0i + wk2r * x0r; |
| 257 | x0r = x1r - x3i; |
| 258 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 259 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 260 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 261 | x0r = x1r + x3i; |
| 262 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 263 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 264 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 265 | } |
| 266 | } |
| 267 | } |
| 268 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 269 | static void rftfsub_128_C(float* a) { |
| 270 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 271 | int j1, j2, k1, k2; |
| 272 | float wkr, wki, xr, xi, yr, yi; |
| 273 | |
| 274 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 275 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 276 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 277 | wkr = 0.5f - c[k1]; |
| 278 | wki = c[j1]; |
| 279 | xr = a[j2 + 0] - a[k2 + 0]; |
| 280 | xi = a[j2 + 1] + a[k2 + 1]; |
| 281 | yr = wkr * xr - wki * xi; |
| 282 | yi = wkr * xi + wki * xr; |
| 283 | a[j2 + 0] -= yr; |
| 284 | a[j2 + 1] -= yi; |
| 285 | a[k2 + 0] += yr; |
| 286 | a[k2 + 1] -= yi; |
| 287 | } |
| 288 | } |
| 289 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 290 | static void rftbsub_128_C(float* a) { |
| 291 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 292 | int j1, j2, k1, k2; |
| 293 | float wkr, wki, xr, xi, yr, yi; |
| 294 | |
| 295 | a[1] = -a[1]; |
| 296 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 297 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 298 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 299 | wkr = 0.5f - c[k1]; |
| 300 | wki = c[j1]; |
| 301 | xr = a[j2 + 0] - a[k2 + 0]; |
| 302 | xi = a[j2 + 1] + a[k2 + 1]; |
| 303 | yr = wkr * xr + wki * xi; |
| 304 | yi = wkr * xi - wki * xr; |
| 305 | a[j2 + 0] = a[j2 + 0] - yr; |
| 306 | a[j2 + 1] = yi - a[j2 + 1]; |
| 307 | a[k2 + 0] = yr + a[k2 + 0]; |
| 308 | a[k2 + 1] = yi - a[k2 + 1]; |
| 309 | } |
| 310 | a[65] = -a[65]; |
| 311 | } |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 312 | #endif |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 313 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 314 | } // namespace |
| 315 | |
| 316 | OouraFft::OouraFft() { |
| 317 | #if defined(WEBRTC_ARCH_X86_FAMILY) |
| 318 | use_sse2_ = (WebRtc_GetCPUInfo(kSSE2) != 0); |
| 319 | #else |
| 320 | use_sse2_ = false; |
| 321 | #endif |
| 322 | } |
| 323 | |
| 324 | OouraFft::~OouraFft() = default; |
| 325 | |
| 326 | void OouraFft::Fft(float* a) const { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 327 | float xi; |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 328 | bitrv2_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 329 | cftfsub_128(a); |
| 330 | rftfsub_128(a); |
| 331 | xi = a[0] - a[1]; |
| 332 | a[0] += a[1]; |
| 333 | a[1] = xi; |
| 334 | } |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 335 | void OouraFft::InverseFft(float* a) const { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 336 | a[1] = 0.5f * (a[0] - a[1]); |
| 337 | a[0] -= a[1]; |
| 338 | rftbsub_128(a); |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 339 | bitrv2_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 340 | cftbsub_128(a); |
| 341 | } |
| 342 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 343 | void OouraFft::cft1st_128(float* a) const { |
| 344 | #if defined(MIPS_FPU_LE) |
| 345 | cft1st_128_mips(a); |
| 346 | #elif defined(WEBRTC_HAS_NEON) |
| 347 | cft1st_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 348 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 349 | if (use_sse2_) { |
| 350 | cft1st_128_SSE2(a); |
| 351 | } else { |
| 352 | cft1st_128_C(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 353 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 354 | #else |
| 355 | cft1st_128_C(a); |
andrew@webrtc.org | c8d012f | 2012-01-13 19:43:09 +0000 | [diff] [blame] | 356 | #endif |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 357 | } |
| 358 | void OouraFft::cftmdl_128(float* a) const { |
andrew@webrtc.org | c0907ef | 2014-02-21 00:13:31 +0000 | [diff] [blame] | 359 | #if defined(MIPS_FPU_LE) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 360 | cftmdl_128_mips(a); |
| 361 | #elif defined(WEBRTC_HAS_NEON) |
| 362 | cftmdl_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 363 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 364 | if (use_sse2_) { |
| 365 | cftmdl_128_SSE2(a); |
| 366 | } else { |
| 367 | cftmdl_128_C(a); |
| 368 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 369 | #else |
| 370 | cftmdl_128_C(a); |
bjornv@webrtc.org | cd9b90a | 2014-06-30 12:05:18 +0000 | [diff] [blame] | 371 | #endif |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 372 | } |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 373 | void OouraFft::rftfsub_128(float* a) const { |
| 374 | #if defined(MIPS_FPU_LE) |
| 375 | rftfsub_128_mips(a); |
| 376 | #elif defined(WEBRTC_HAS_NEON) |
| 377 | rftfsub_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 378 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 379 | if (use_sse2_) { |
| 380 | rftfsub_128_SSE2(a); |
| 381 | } else { |
| 382 | rftfsub_128_C(a); |
| 383 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 384 | #else |
| 385 | rftfsub_128_C(a); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 386 | #endif |
| 387 | } |
| 388 | |
| 389 | void OouraFft::rftbsub_128(float* a) const { |
| 390 | #if defined(MIPS_FPU_LE) |
| 391 | rftbsub_128_mips(a); |
| 392 | #elif defined(WEBRTC_HAS_NEON) |
| 393 | rftbsub_128_neon(a); |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 394 | #elif defined(WEBRTC_ARCH_X86_FAMILY) |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 395 | if (use_sse2_) { |
| 396 | rftbsub_128_SSE2(a); |
| 397 | } else { |
| 398 | rftbsub_128_C(a); |
| 399 | } |
Gordana.Cmiljanovic | 11f72b1 | 2016-10-27 23:44:09 -0700 | [diff] [blame] | 400 | #else |
| 401 | rftbsub_128_C(a); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 402 | #endif |
| 403 | } |
| 404 | |
| 405 | void OouraFft::cftbsub_128(float* a) const { |
| 406 | int j, j1, j2, j3, l; |
| 407 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 408 | |
| 409 | cft1st_128(a); |
| 410 | cftmdl_128(a); |
| 411 | l = 32; |
| 412 | |
| 413 | for (j = 0; j < l; j += 2) { |
| 414 | j1 = j + l; |
| 415 | j2 = j1 + l; |
| 416 | j3 = j2 + l; |
| 417 | x0r = a[j] + a[j1]; |
| 418 | x0i = -a[j + 1] - a[j1 + 1]; |
| 419 | x1r = a[j] - a[j1]; |
| 420 | x1i = -a[j + 1] + a[j1 + 1]; |
| 421 | x2r = a[j2] + a[j3]; |
| 422 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 423 | x3r = a[j2] - a[j3]; |
| 424 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 425 | a[j] = x0r + x2r; |
| 426 | a[j + 1] = x0i - x2i; |
| 427 | a[j2] = x0r - x2r; |
| 428 | a[j2 + 1] = x0i + x2i; |
| 429 | a[j1] = x1r - x3i; |
| 430 | a[j1 + 1] = x1i - x3r; |
| 431 | a[j3] = x1r + x3i; |
| 432 | a[j3 + 1] = x1i + x3r; |
| 433 | } |
| 434 | } |
| 435 | |
| 436 | void OouraFft::cftfsub_128(float* a) const { |
| 437 | int j, j1, j2, j3, l; |
| 438 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 439 | |
| 440 | cft1st_128(a); |
| 441 | cftmdl_128(a); |
| 442 | l = 32; |
| 443 | for (j = 0; j < l; j += 2) { |
| 444 | j1 = j + l; |
| 445 | j2 = j1 + l; |
| 446 | j3 = j2 + l; |
| 447 | x0r = a[j] + a[j1]; |
| 448 | x0i = a[j + 1] + a[j1 + 1]; |
| 449 | x1r = a[j] - a[j1]; |
| 450 | x1i = a[j + 1] - a[j1 + 1]; |
| 451 | x2r = a[j2] + a[j3]; |
| 452 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 453 | x3r = a[j2] - a[j3]; |
| 454 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 455 | a[j] = x0r + x2r; |
| 456 | a[j + 1] = x0i + x2i; |
| 457 | a[j2] = x0r - x2r; |
| 458 | a[j2 + 1] = x0i - x2i; |
| 459 | a[j1] = x1r - x3i; |
| 460 | a[j1 + 1] = x1i + x3r; |
| 461 | a[j3] = x1r + x3i; |
| 462 | a[j3 + 1] = x1i - x3r; |
| 463 | } |
| 464 | } |
| 465 | |
| 466 | void OouraFft::bitrv2_128(float* a) const { |
| 467 | /* |
| 468 | Following things have been attempted but are no faster: |
| 469 | (a) Storing the swap indexes in a LUT (index calculations are done |
| 470 | for 'free' while waiting on memory/L1). |
| 471 | (b) Consolidate the load/store of two consecutive floats by a 64 bit |
| 472 | integer (execution is memory/L1 bound). |
| 473 | (c) Do a mix of floats and 64 bit integer to maximize register |
| 474 | utilization (execution is memory/L1 bound). |
| 475 | (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). |
| 476 | (e) Hard-coding of the offsets to completely eliminates index |
| 477 | calculations. |
| 478 | */ |
| 479 | |
| 480 | unsigned int j, j1, k, k1; |
| 481 | float xr, xi, yr, yi; |
| 482 | |
| 483 | const int ip[4] = {0, 64, 32, 96}; |
| 484 | for (k = 0; k < 4; k++) { |
| 485 | for (j = 0; j < k; j++) { |
| 486 | j1 = 2 * j + ip[k]; |
| 487 | k1 = 2 * k + ip[j]; |
| 488 | xr = a[j1 + 0]; |
| 489 | xi = a[j1 + 1]; |
| 490 | yr = a[k1 + 0]; |
| 491 | yi = a[k1 + 1]; |
| 492 | a[j1 + 0] = yr; |
| 493 | a[j1 + 1] = yi; |
| 494 | a[k1 + 0] = xr; |
| 495 | a[k1 + 1] = xi; |
| 496 | j1 += 8; |
| 497 | k1 += 16; |
| 498 | xr = a[j1 + 0]; |
| 499 | xi = a[j1 + 1]; |
| 500 | yr = a[k1 + 0]; |
| 501 | yi = a[k1 + 1]; |
| 502 | a[j1 + 0] = yr; |
| 503 | a[j1 + 1] = yi; |
| 504 | a[k1 + 0] = xr; |
| 505 | a[k1 + 1] = xi; |
| 506 | j1 += 8; |
| 507 | k1 -= 8; |
| 508 | xr = a[j1 + 0]; |
| 509 | xi = a[j1 + 1]; |
| 510 | yr = a[k1 + 0]; |
| 511 | yi = a[k1 + 1]; |
| 512 | a[j1 + 0] = yr; |
| 513 | a[j1 + 1] = yi; |
| 514 | a[k1 + 0] = xr; |
| 515 | a[k1 + 1] = xi; |
| 516 | j1 += 8; |
| 517 | k1 += 16; |
| 518 | xr = a[j1 + 0]; |
| 519 | xi = a[j1 + 1]; |
| 520 | yr = a[k1 + 0]; |
| 521 | yi = a[k1 + 1]; |
| 522 | a[j1 + 0] = yr; |
| 523 | a[j1 + 1] = yi; |
| 524 | a[k1 + 0] = xr; |
| 525 | a[k1 + 1] = xi; |
| 526 | } |
| 527 | j1 = 2 * k + 8 + ip[k]; |
| 528 | k1 = j1 + 8; |
| 529 | xr = a[j1 + 0]; |
| 530 | xi = a[j1 + 1]; |
| 531 | yr = a[k1 + 0]; |
| 532 | yi = a[k1 + 1]; |
| 533 | a[j1 + 0] = yr; |
| 534 | a[j1 + 1] = yi; |
| 535 | a[k1 + 0] = xr; |
| 536 | a[k1 + 1] = xi; |
| 537 | } |
| 538 | } |
| 539 | |
| 540 | } // namespace webrtc |