niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
| 2 | * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html |
| 3 | * Copyright Takuya OOURA, 1996-2001 |
| 4 | * |
| 5 | * You may use, copy, modify and distribute this code for any purpose (include |
| 6 | * commercial use) and without fee. Please refer to this package when you modify |
| 7 | * this code. |
| 8 | * |
| 9 | * Changes by the WebRTC authors: |
| 10 | * - Trivial type modifications. |
| 11 | * - Minimal code subset to do rdft of length 128. |
| 12 | * - Optimizations because of known length. |
| 13 | * |
| 14 | * All changes are covered by the WebRTC license and IP grant: |
| 15 | * Use of this source code is governed by a BSD-style license |
| 16 | * that can be found in the LICENSE file in the root of the source |
| 17 | * tree. An additional intellectual property rights grant can be found |
| 18 | * in the file PATENTS. All contributing project authors may |
| 19 | * be found in the AUTHORS file in the root of the source tree. |
| 20 | */ |
| 21 | |
ajm@google.com | ce7c2a2 | 2011-08-04 01:50:00 +0000 | [diff] [blame] | 22 | #include "aec_rdft.h" |
| 23 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 24 | #include <math.h> |
| 25 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 26 | #include "system_wrappers/interface/cpu_features_wrapper.h" |
ajm@google.com | ce7c2a2 | 2011-08-04 01:50:00 +0000 | [diff] [blame] | 27 | #include "typedefs.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 28 | |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 29 | // constants shared by all paths (C, SSE2). |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 30 | float rdft_w[64]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 31 | // constants used by the C path. |
| 32 | float rdft_wk3ri_first[32]; |
| 33 | float rdft_wk3ri_second[32]; |
| 34 | // constants used by SSE2 but initialized in C path. |
| 35 | ALIGN16_BEG float ALIGN16_END rdft_wk1r[32]; |
| 36 | ALIGN16_BEG float ALIGN16_END rdft_wk2r[32]; |
| 37 | ALIGN16_BEG float ALIGN16_END rdft_wk3r[32]; |
| 38 | ALIGN16_BEG float ALIGN16_END rdft_wk1i[32]; |
| 39 | ALIGN16_BEG float ALIGN16_END rdft_wk2i[32]; |
| 40 | ALIGN16_BEG float ALIGN16_END rdft_wk3i[32]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 41 | ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 42 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 43 | static int ip[16]; |
| 44 | |
| 45 | static void bitrv2_32or128(int n, int *ip, float *a) { |
| 46 | // n is 32 or 128 |
| 47 | int j, j1, k, k1, m, m2; |
| 48 | float xr, xi, yr, yi; |
| 49 | |
| 50 | ip[0] = 0; |
| 51 | { |
| 52 | int l = n; |
| 53 | m = 1; |
| 54 | while ((m << 3) < l) { |
| 55 | l >>= 1; |
| 56 | for (j = 0; j < m; j++) { |
| 57 | ip[m + j] = ip[j] + l; |
| 58 | } |
| 59 | m <<= 1; |
| 60 | } |
| 61 | } |
| 62 | m2 = 2 * m; |
| 63 | for (k = 0; k < m; k++) { |
| 64 | for (j = 0; j < k; j++) { |
| 65 | j1 = 2 * j + ip[k]; |
| 66 | k1 = 2 * k + ip[j]; |
| 67 | xr = a[j1]; |
| 68 | xi = a[j1 + 1]; |
| 69 | yr = a[k1]; |
| 70 | yi = a[k1 + 1]; |
| 71 | a[j1] = yr; |
| 72 | a[j1 + 1] = yi; |
| 73 | a[k1] = xr; |
| 74 | a[k1 + 1] = xi; |
| 75 | j1 += m2; |
| 76 | k1 += 2 * m2; |
| 77 | xr = a[j1]; |
| 78 | xi = a[j1 + 1]; |
| 79 | yr = a[k1]; |
| 80 | yi = a[k1 + 1]; |
| 81 | a[j1] = yr; |
| 82 | a[j1 + 1] = yi; |
| 83 | a[k1] = xr; |
| 84 | a[k1 + 1] = xi; |
| 85 | j1 += m2; |
| 86 | k1 -= m2; |
| 87 | xr = a[j1]; |
| 88 | xi = a[j1 + 1]; |
| 89 | yr = a[k1]; |
| 90 | yi = a[k1 + 1]; |
| 91 | a[j1] = yr; |
| 92 | a[j1 + 1] = yi; |
| 93 | a[k1] = xr; |
| 94 | a[k1 + 1] = xi; |
| 95 | j1 += m2; |
| 96 | k1 += 2 * m2; |
| 97 | xr = a[j1]; |
| 98 | xi = a[j1 + 1]; |
| 99 | yr = a[k1]; |
| 100 | yi = a[k1 + 1]; |
| 101 | a[j1] = yr; |
| 102 | a[j1 + 1] = yi; |
| 103 | a[k1] = xr; |
| 104 | a[k1 + 1] = xi; |
| 105 | } |
| 106 | j1 = 2 * k + m2 + ip[k]; |
| 107 | k1 = j1 + m2; |
| 108 | xr = a[j1]; |
| 109 | xi = a[j1 + 1]; |
| 110 | yr = a[k1]; |
| 111 | yi = a[k1 + 1]; |
| 112 | a[j1] = yr; |
| 113 | a[j1 + 1] = yi; |
| 114 | a[k1] = xr; |
| 115 | a[k1 + 1] = xi; |
| 116 | } |
| 117 | } |
| 118 | |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 119 | static void makewt_32(void) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 120 | const int nw = 32; |
| 121 | int j, nwh; |
| 122 | float delta, x, y; |
| 123 | |
| 124 | ip[0] = nw; |
| 125 | ip[1] = 1; |
| 126 | nwh = nw >> 1; |
| 127 | delta = atanf(1.0f) / nwh; |
| 128 | rdft_w[0] = 1; |
| 129 | rdft_w[1] = 0; |
| 130 | rdft_w[nwh] = cosf(delta * nwh); |
| 131 | rdft_w[nwh + 1] = rdft_w[nwh]; |
| 132 | for (j = 2; j < nwh; j += 2) { |
| 133 | x = cosf(delta * j); |
| 134 | y = sinf(delta * j); |
| 135 | rdft_w[j] = x; |
| 136 | rdft_w[j + 1] = y; |
| 137 | rdft_w[nw - j] = y; |
| 138 | rdft_w[nw - j + 1] = x; |
| 139 | } |
| 140 | bitrv2_32or128(nw, ip + 2, rdft_w); |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 141 | |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 142 | // pre-calculate constants used by cft1st_128 and cftmdl_128... |
| 143 | cftmdl_wk1r[0] = rdft_w[2]; |
| 144 | cftmdl_wk1r[1] = rdft_w[2]; |
| 145 | cftmdl_wk1r[2] = rdft_w[2]; |
| 146 | cftmdl_wk1r[3] = -rdft_w[2]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 147 | { |
| 148 | int k1; |
| 149 | |
| 150 | for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) { |
| 151 | const int k2 = 2 * k1; |
| 152 | const float wk2r = rdft_w[k1 + 0]; |
| 153 | const float wk2i = rdft_w[k1 + 1]; |
| 154 | float wk1r, wk1i; |
| 155 | // ... scalar version. |
| 156 | wk1r = rdft_w[k2 + 0]; |
| 157 | wk1i = rdft_w[k2 + 1]; |
| 158 | rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i; |
| 159 | rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i; |
| 160 | wk1r = rdft_w[k2 + 2]; |
| 161 | wk1i = rdft_w[k2 + 3]; |
| 162 | rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i; |
| 163 | rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i; |
| 164 | // ... vector version. |
| 165 | rdft_wk1r[k2 + 0] = rdft_w[k2 + 0]; |
| 166 | rdft_wk1r[k2 + 1] = rdft_w[k2 + 0]; |
| 167 | rdft_wk1r[k2 + 2] = rdft_w[k2 + 2]; |
| 168 | rdft_wk1r[k2 + 3] = rdft_w[k2 + 2]; |
| 169 | rdft_wk2r[k2 + 0] = rdft_w[k1 + 0]; |
| 170 | rdft_wk2r[k2 + 1] = rdft_w[k1 + 0]; |
| 171 | rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1]; |
| 172 | rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1]; |
| 173 | rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0]; |
| 174 | rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0]; |
| 175 | rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0]; |
| 176 | rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0]; |
| 177 | rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1]; |
| 178 | rdft_wk1i[k2 + 1] = rdft_w[k2 + 1]; |
| 179 | rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3]; |
| 180 | rdft_wk1i[k2 + 3] = rdft_w[k2 + 3]; |
| 181 | rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1]; |
| 182 | rdft_wk2i[k2 + 1] = rdft_w[k1 + 1]; |
| 183 | rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0]; |
| 184 | rdft_wk2i[k2 + 3] = rdft_w[k1 + 0]; |
| 185 | rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1]; |
| 186 | rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1]; |
| 187 | rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1]; |
| 188 | rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1]; |
| 189 | } |
| 190 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 191 | } |
| 192 | |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 193 | static void makect_32(void) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 194 | float *c = rdft_w + 32; |
| 195 | const int nc = 32; |
| 196 | int j, nch; |
| 197 | float delta; |
| 198 | |
| 199 | ip[1] = nc; |
| 200 | nch = nc >> 1; |
| 201 | delta = atanf(1.0f) / nch; |
| 202 | c[0] = cosf(delta * nch); |
| 203 | c[nch] = 0.5f * c[0]; |
| 204 | for (j = 1; j < nch; j++) { |
| 205 | c[j] = 0.5f * cosf(delta * j); |
| 206 | c[nc - j] = 0.5f * sinf(delta * j); |
| 207 | } |
| 208 | } |
| 209 | |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 210 | static void cft1st_128_C(float *a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 211 | const int n = 128; |
| 212 | int j, k1, k2; |
| 213 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 214 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 215 | |
| 216 | x0r = a[0] + a[2]; |
| 217 | x0i = a[1] + a[3]; |
| 218 | x1r = a[0] - a[2]; |
| 219 | x1i = a[1] - a[3]; |
| 220 | x2r = a[4] + a[6]; |
| 221 | x2i = a[5] + a[7]; |
| 222 | x3r = a[4] - a[6]; |
| 223 | x3i = a[5] - a[7]; |
| 224 | a[0] = x0r + x2r; |
| 225 | a[1] = x0i + x2i; |
| 226 | a[4] = x0r - x2r; |
| 227 | a[5] = x0i - x2i; |
| 228 | a[2] = x1r - x3i; |
| 229 | a[3] = x1i + x3r; |
| 230 | a[6] = x1r + x3i; |
| 231 | a[7] = x1i - x3r; |
| 232 | wk1r = rdft_w[2]; |
| 233 | x0r = a[8] + a[10]; |
| 234 | x0i = a[9] + a[11]; |
| 235 | x1r = a[8] - a[10]; |
| 236 | x1i = a[9] - a[11]; |
| 237 | x2r = a[12] + a[14]; |
| 238 | x2i = a[13] + a[15]; |
| 239 | x3r = a[12] - a[14]; |
| 240 | x3i = a[13] - a[15]; |
| 241 | a[8] = x0r + x2r; |
| 242 | a[9] = x0i + x2i; |
| 243 | a[12] = x2i - x0i; |
| 244 | a[13] = x0r - x2r; |
| 245 | x0r = x1r - x3i; |
| 246 | x0i = x1i + x3r; |
| 247 | a[10] = wk1r * (x0r - x0i); |
| 248 | a[11] = wk1r * (x0r + x0i); |
| 249 | x0r = x3i + x1r; |
| 250 | x0i = x3r - x1i; |
| 251 | a[14] = wk1r * (x0i - x0r); |
| 252 | a[15] = wk1r * (x0i + x0r); |
| 253 | k1 = 0; |
| 254 | for (j = 16; j < n; j += 16) { |
| 255 | k1 += 2; |
| 256 | k2 = 2 * k1; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 257 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 258 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 259 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 260 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 261 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 262 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 263 | x0r = a[j + 0] + a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 264 | x0i = a[j + 1] + a[j + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 265 | x1r = a[j + 0] - a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 266 | x1i = a[j + 1] - a[j + 3]; |
| 267 | x2r = a[j + 4] + a[j + 6]; |
| 268 | x2i = a[j + 5] + a[j + 7]; |
| 269 | x3r = a[j + 4] - a[j + 6]; |
| 270 | x3i = a[j + 5] - a[j + 7]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 271 | a[j + 0] = x0r + x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 272 | a[j + 1] = x0i + x2i; |
| 273 | x0r -= x2r; |
| 274 | x0i -= x2i; |
| 275 | a[j + 4] = wk2r * x0r - wk2i * x0i; |
| 276 | a[j + 5] = wk2r * x0i + wk2i * x0r; |
| 277 | x0r = x1r - x3i; |
| 278 | x0i = x1i + x3r; |
| 279 | a[j + 2] = wk1r * x0r - wk1i * x0i; |
| 280 | a[j + 3] = wk1r * x0i + wk1i * x0r; |
| 281 | x0r = x1r + x3i; |
| 282 | x0i = x1i - x3r; |
| 283 | a[j + 6] = wk3r * x0r - wk3i * x0i; |
| 284 | a[j + 7] = wk3r * x0i + wk3i * x0r; |
| 285 | wk1r = rdft_w[k2 + 2]; |
| 286 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 287 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 288 | wk3i = rdft_wk3ri_second[k1 + 1]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 289 | x0r = a[j + 8] + a[j + 10]; |
| 290 | x0i = a[j + 9] + a[j + 11]; |
| 291 | x1r = a[j + 8] - a[j + 10]; |
| 292 | x1i = a[j + 9] - a[j + 11]; |
| 293 | x2r = a[j + 12] + a[j + 14]; |
| 294 | x2i = a[j + 13] + a[j + 15]; |
| 295 | x3r = a[j + 12] - a[j + 14]; |
| 296 | x3i = a[j + 13] - a[j + 15]; |
| 297 | a[j + 8] = x0r + x2r; |
| 298 | a[j + 9] = x0i + x2i; |
| 299 | x0r -= x2r; |
| 300 | x0i -= x2i; |
| 301 | a[j + 12] = -wk2i * x0r - wk2r * x0i; |
| 302 | a[j + 13] = -wk2i * x0i + wk2r * x0r; |
| 303 | x0r = x1r - x3i; |
| 304 | x0i = x1i + x3r; |
| 305 | a[j + 10] = wk1r * x0r - wk1i * x0i; |
| 306 | a[j + 11] = wk1r * x0i + wk1i * x0r; |
| 307 | x0r = x1r + x3i; |
| 308 | x0i = x1i - x3r; |
| 309 | a[j + 14] = wk3r * x0r - wk3i * x0i; |
| 310 | a[j + 15] = wk3r * x0i + wk3i * x0r; |
| 311 | } |
| 312 | } |
| 313 | |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 314 | static void cftmdl_128_C(float *a) { |
| 315 | const int l = 8; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 316 | const int n = 128; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 317 | const int m = 32; |
| 318 | int j0, j1, j2, j3, k, k1, k2, m2; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 319 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 320 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 321 | |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 322 | for (j0 = 0; j0 < l; j0 += 2) { |
| 323 | j1 = j0 + 8; |
| 324 | j2 = j0 + 16; |
| 325 | j3 = j0 + 24; |
| 326 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 327 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 328 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 329 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 330 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 331 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 332 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 333 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 334 | a[j0 + 0] = x0r + x2r; |
| 335 | a[j0 + 1] = x0i + x2i; |
| 336 | a[j2 + 0] = x0r - x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 337 | a[j2 + 1] = x0i - x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 338 | a[j1 + 0] = x1r - x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 339 | a[j1 + 1] = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 340 | a[j3 + 0] = x1r + x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 341 | a[j3 + 1] = x1i - x3r; |
| 342 | } |
| 343 | wk1r = rdft_w[2]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 344 | for (j0 = m; j0 < l + m; j0 += 2) { |
| 345 | j1 = j0 + 8; |
| 346 | j2 = j0 + 16; |
| 347 | j3 = j0 + 24; |
| 348 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 349 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 350 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 351 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 352 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 353 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 354 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 355 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 356 | a[j0 + 0] = x0r + x2r; |
| 357 | a[j0 + 1] = x0i + x2i; |
| 358 | a[j2 + 0] = x2i - x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 359 | a[j2 + 1] = x0r - x2r; |
| 360 | x0r = x1r - x3i; |
| 361 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 362 | a[j1 + 0] = wk1r * (x0r - x0i); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 363 | a[j1 + 1] = wk1r * (x0r + x0i); |
| 364 | x0r = x3i + x1r; |
| 365 | x0i = x3r - x1i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 366 | a[j3 + 0] = wk1r * (x0i - x0r); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 367 | a[j3 + 1] = wk1r * (x0i + x0r); |
| 368 | } |
| 369 | k1 = 0; |
| 370 | m2 = 2 * m; |
| 371 | for (k = m2; k < n; k += m2) { |
| 372 | k1 += 2; |
| 373 | k2 = 2 * k1; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 374 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 375 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 376 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 377 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 378 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 379 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 380 | for (j0 = k; j0 < l + k; j0 += 2) { |
| 381 | j1 = j0 + 8; |
| 382 | j2 = j0 + 16; |
| 383 | j3 = j0 + 24; |
| 384 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 385 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 386 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 387 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 388 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 389 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 390 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 391 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 392 | a[j0 + 0] = x0r + x2r; |
| 393 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 394 | x0r -= x2r; |
| 395 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 396 | a[j2 + 0] = wk2r * x0r - wk2i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 397 | a[j2 + 1] = wk2r * x0i + wk2i * x0r; |
| 398 | x0r = x1r - x3i; |
| 399 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 400 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 401 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 402 | x0r = x1r + x3i; |
| 403 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 404 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 405 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 406 | } |
| 407 | wk1r = rdft_w[k2 + 2]; |
| 408 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 409 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 410 | wk3i = rdft_wk3ri_second[k1 + 1]; |
| 411 | for (j0 = k + m; j0 < l + (k + m); j0 += 2) { |
| 412 | j1 = j0 + 8; |
| 413 | j2 = j0 + 16; |
| 414 | j3 = j0 + 24; |
| 415 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 416 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 417 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 418 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 419 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 420 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 421 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 422 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 423 | a[j0 + 0] = x0r + x2r; |
| 424 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 425 | x0r -= x2r; |
| 426 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 427 | a[j2 + 0] = -wk2i * x0r - wk2r * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 428 | a[j2 + 1] = -wk2i * x0i + wk2r * x0r; |
| 429 | x0r = x1r - x3i; |
| 430 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 431 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 432 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 433 | x0r = x1r + x3i; |
| 434 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 435 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 436 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 437 | } |
| 438 | } |
| 439 | } |
| 440 | |
| 441 | static void cftfsub_128(float *a) { |
| 442 | int j, j1, j2, j3, l; |
| 443 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 444 | |
| 445 | cft1st_128(a); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 446 | cftmdl_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 447 | l = 32; |
| 448 | for (j = 0; j < l; j += 2) { |
| 449 | j1 = j + l; |
| 450 | j2 = j1 + l; |
| 451 | j3 = j2 + l; |
| 452 | x0r = a[j] + a[j1]; |
| 453 | x0i = a[j + 1] + a[j1 + 1]; |
| 454 | x1r = a[j] - a[j1]; |
| 455 | x1i = a[j + 1] - a[j1 + 1]; |
| 456 | x2r = a[j2] + a[j3]; |
| 457 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 458 | x3r = a[j2] - a[j3]; |
| 459 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 460 | a[j] = x0r + x2r; |
| 461 | a[j + 1] = x0i + x2i; |
| 462 | a[j2] = x0r - x2r; |
| 463 | a[j2 + 1] = x0i - x2i; |
| 464 | a[j1] = x1r - x3i; |
| 465 | a[j1 + 1] = x1i + x3r; |
| 466 | a[j3] = x1r + x3i; |
| 467 | a[j3 + 1] = x1i - x3r; |
| 468 | } |
| 469 | } |
| 470 | |
| 471 | static void cftbsub_128(float *a) { |
| 472 | int j, j1, j2, j3, l; |
| 473 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 474 | |
| 475 | cft1st_128(a); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 476 | cftmdl_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 477 | l = 32; |
| 478 | |
| 479 | for (j = 0; j < l; j += 2) { |
| 480 | j1 = j + l; |
| 481 | j2 = j1 + l; |
| 482 | j3 = j2 + l; |
| 483 | x0r = a[j] + a[j1]; |
| 484 | x0i = -a[j + 1] - a[j1 + 1]; |
| 485 | x1r = a[j] - a[j1]; |
| 486 | x1i = -a[j + 1] + a[j1 + 1]; |
| 487 | x2r = a[j2] + a[j3]; |
| 488 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 489 | x3r = a[j2] - a[j3]; |
| 490 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 491 | a[j] = x0r + x2r; |
| 492 | a[j + 1] = x0i - x2i; |
| 493 | a[j2] = x0r - x2r; |
| 494 | a[j2 + 1] = x0i + x2i; |
| 495 | a[j1] = x1r - x3i; |
| 496 | a[j1 + 1] = x1i - x3r; |
| 497 | a[j3] = x1r + x3i; |
| 498 | a[j3 + 1] = x1i + x3r; |
| 499 | } |
| 500 | } |
| 501 | |
| 502 | static void rftfsub_128_C(float *a) { |
| 503 | const float *c = rdft_w + 32; |
| 504 | int j1, j2, k1, k2; |
| 505 | float wkr, wki, xr, xi, yr, yi; |
| 506 | |
| 507 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 508 | k2 = 128 - j2; |
| 509 | k1 = 32 - j1; |
| 510 | wkr = 0.5f - c[k1]; |
| 511 | wki = c[j1]; |
| 512 | xr = a[j2 + 0] - a[k2 + 0]; |
| 513 | xi = a[j2 + 1] + a[k2 + 1]; |
| 514 | yr = wkr * xr - wki * xi; |
| 515 | yi = wkr * xi + wki * xr; |
| 516 | a[j2 + 0] -= yr; |
| 517 | a[j2 + 1] -= yi; |
| 518 | a[k2 + 0] += yr; |
| 519 | a[k2 + 1] -= yi; |
| 520 | } |
| 521 | } |
| 522 | |
| 523 | static void rftbsub_128_C(float *a) { |
| 524 | const float *c = rdft_w + 32; |
| 525 | int j1, j2, k1, k2; |
| 526 | float wkr, wki, xr, xi, yr, yi; |
| 527 | |
| 528 | a[1] = -a[1]; |
| 529 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 530 | k2 = 128 - j2; |
| 531 | k1 = 32 - j1; |
| 532 | wkr = 0.5f - c[k1]; |
| 533 | wki = c[j1]; |
| 534 | xr = a[j2 + 0] - a[k2 + 0]; |
| 535 | xi = a[j2 + 1] + a[k2 + 1]; |
| 536 | yr = wkr * xr + wki * xi; |
| 537 | yi = wkr * xi - wki * xr; |
| 538 | a[j2 + 0] = a[j2 + 0] - yr; |
| 539 | a[j2 + 1] = yi - a[j2 + 1]; |
| 540 | a[k2 + 0] = yr + a[k2 + 0]; |
| 541 | a[k2 + 1] = yi - a[k2 + 1]; |
| 542 | } |
| 543 | a[65] = -a[65]; |
| 544 | } |
| 545 | |
| 546 | void aec_rdft_forward_128(float *a) { |
| 547 | const int n = 128; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 548 | float xi; |
| 549 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 550 | bitrv2_32or128(n, ip + 2, a); |
| 551 | cftfsub_128(a); |
| 552 | rftfsub_128(a); |
| 553 | xi = a[0] - a[1]; |
| 554 | a[0] += a[1]; |
| 555 | a[1] = xi; |
| 556 | } |
| 557 | |
| 558 | void aec_rdft_inverse_128(float *a) { |
| 559 | const int n = 128; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 560 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 561 | a[1] = 0.5f * (a[0] - a[1]); |
| 562 | a[0] -= a[1]; |
| 563 | rftbsub_128(a); |
| 564 | bitrv2_32or128(n, ip + 2, a); |
| 565 | cftbsub_128(a); |
| 566 | } |
| 567 | |
| 568 | // code path selection |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 569 | rft_sub_128_t cft1st_128; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 570 | rft_sub_128_t cftmdl_128; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 571 | rft_sub_128_t rftfsub_128; |
| 572 | rft_sub_128_t rftbsub_128; |
| 573 | |
| 574 | void aec_rdft_init(void) { |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 575 | cft1st_128 = cft1st_128_C; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 576 | cftmdl_128 = cftmdl_128_C; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 577 | rftfsub_128 = rftfsub_128_C; |
| 578 | rftbsub_128 = rftbsub_128_C; |
andrew@webrtc.org | c8d012f | 2012-01-13 19:43:09 +0000 | [diff] [blame^] | 579 | #if defined(WEBRTC_ARCH_X86_FAMILY) |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 580 | if (WebRtc_GetCPUInfo(kSSE2)) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 581 | aec_rdft_init_sse2(); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 582 | } |
andrew@webrtc.org | c8d012f | 2012-01-13 19:43:09 +0000 | [diff] [blame^] | 583 | #endif |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 584 | // init library constants. |
| 585 | makewt_32(); |
| 586 | makect_32(); |
| 587 | } |