niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
| 2 | * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html |
| 3 | * Copyright Takuya OOURA, 1996-2001 |
| 4 | * |
| 5 | * You may use, copy, modify and distribute this code for any purpose (include |
| 6 | * commercial use) and without fee. Please refer to this package when you modify |
| 7 | * this code. |
| 8 | * |
| 9 | * Changes by the WebRTC authors: |
| 10 | * - Trivial type modifications. |
| 11 | * - Minimal code subset to do rdft of length 128. |
| 12 | * - Optimizations because of known length. |
| 13 | * |
| 14 | * All changes are covered by the WebRTC license and IP grant: |
| 15 | * Use of this source code is governed by a BSD-style license |
| 16 | * that can be found in the LICENSE file in the root of the source |
| 17 | * tree. An additional intellectual property rights grant can be found |
| 18 | * in the file PATENTS. All contributing project authors may |
| 19 | * be found in the AUTHORS file in the root of the source tree. |
| 20 | */ |
| 21 | |
pbos@webrtc.org | 7fad4b8 | 2013-05-28 08:11:59 +0000 | [diff] [blame] | 22 | #include "webrtc/modules/audio_processing/aec/aec_rdft.h" |
ajm@google.com | ce7c2a2 | 2011-08-04 01:50:00 +0000 | [diff] [blame] | 23 | |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 24 | #include <math.h> |
| 25 | |
Henrik Kjellander | 98f5351 | 2015-10-28 18:17:40 +0100 | [diff] [blame^] | 26 | #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" |
pbos@webrtc.org | 7fad4b8 | 2013-05-28 08:11:59 +0000 | [diff] [blame] | 27 | #include "webrtc/typedefs.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 28 | |
andrew@webrtc.org | a1ad844 | 2014-08-18 19:02:51 +0000 | [diff] [blame] | 29 | // These tables used to be computed at run-time. For example, refer to: |
| 30 | // https://code.google.com/p/webrtc/source/browse/trunk/webrtc/modules/audio_processing/aec/aec_rdft.c?r=6564 |
| 31 | // to see the initialization code. |
| 32 | const float rdft_w[64] = { |
| 33 | 1.0000000000f, 0.0000000000f, 0.7071067691f, 0.7071067691f, |
| 34 | 0.9238795638f, 0.3826834559f, 0.3826834559f, 0.9238795638f, |
| 35 | 0.9807852507f, 0.1950903237f, 0.5555702448f, 0.8314695954f, |
| 36 | 0.8314695954f, 0.5555702448f, 0.1950903237f, 0.9807852507f, |
| 37 | 0.9951847196f, 0.0980171412f, 0.6343933344f, 0.7730104327f, |
| 38 | 0.8819212914f, 0.4713967443f, 0.2902846634f, 0.9569403529f, |
| 39 | 0.9569403529f, 0.2902846634f, 0.4713967443f, 0.8819212914f, |
| 40 | 0.7730104327f, 0.6343933344f, 0.0980171412f, 0.9951847196f, |
| 41 | 0.7071067691f, 0.4993977249f, 0.4975923598f, 0.4945882559f, |
| 42 | 0.4903926253f, 0.4850156307f, 0.4784701765f, 0.4707720280f, |
| 43 | 0.4619397819f, 0.4519946277f, 0.4409606457f, 0.4288643003f, |
| 44 | 0.4157347977f, 0.4016037583f, 0.3865052164f, 0.3704755902f, |
| 45 | 0.3535533845f, 0.3357794881f, 0.3171966672f, 0.2978496552f, |
| 46 | 0.2777851224f, 0.2570513785f, 0.2356983721f, 0.2137775421f, |
| 47 | 0.1913417280f, 0.1684449315f, 0.1451423317f, 0.1214900985f, |
| 48 | 0.0975451618f, 0.0733652338f, 0.0490085706f, 0.0245338380f, |
| 49 | }; |
| 50 | const float rdft_wk3ri_first[16] = { |
| 51 | 1.000000000f, 0.000000000f, 0.382683456f, 0.923879564f, |
| 52 | 0.831469536f, 0.555570245f, -0.195090353f, 0.980785251f, |
| 53 | 0.956940353f, 0.290284693f, 0.098017156f, 0.995184720f, |
| 54 | 0.634393334f, 0.773010492f, -0.471396863f, 0.881921172f, |
| 55 | }; |
| 56 | const float rdft_wk3ri_second[16] = { |
| 57 | -0.707106769f, 0.707106769f, -0.923879564f, -0.382683456f, |
| 58 | -0.980785251f, 0.195090353f, -0.555570245f, -0.831469536f, |
| 59 | -0.881921172f, 0.471396863f, -0.773010492f, -0.634393334f, |
| 60 | -0.995184720f, -0.098017156f, -0.290284693f, -0.956940353f, |
| 61 | }; |
| 62 | ALIGN16_BEG const float ALIGN16_END rdft_wk1r[32] = { |
| 63 | 1.000000000f, 1.000000000f, 0.707106769f, 0.707106769f, |
| 64 | 0.923879564f, 0.923879564f, 0.382683456f, 0.382683456f, |
| 65 | 0.980785251f, 0.980785251f, 0.555570245f, 0.555570245f, |
| 66 | 0.831469595f, 0.831469595f, 0.195090324f, 0.195090324f, |
| 67 | 0.995184720f, 0.995184720f, 0.634393334f, 0.634393334f, |
| 68 | 0.881921291f, 0.881921291f, 0.290284663f, 0.290284663f, |
| 69 | 0.956940353f, 0.956940353f, 0.471396744f, 0.471396744f, |
| 70 | 0.773010433f, 0.773010433f, 0.098017141f, 0.098017141f, |
| 71 | }; |
| 72 | ALIGN16_BEG const float ALIGN16_END rdft_wk2r[32] = { |
| 73 | 1.000000000f, 1.000000000f, -0.000000000f, -0.000000000f, |
| 74 | 0.707106769f, 0.707106769f, -0.707106769f, -0.707106769f, |
| 75 | 0.923879564f, 0.923879564f, -0.382683456f, -0.382683456f, |
| 76 | 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, |
| 77 | 0.980785251f, 0.980785251f, -0.195090324f, -0.195090324f, |
| 78 | 0.555570245f, 0.555570245f, -0.831469595f, -0.831469595f, |
| 79 | 0.831469595f, 0.831469595f, -0.555570245f, -0.555570245f, |
| 80 | 0.195090324f, 0.195090324f, -0.980785251f, -0.980785251f, |
| 81 | }; |
| 82 | ALIGN16_BEG const float ALIGN16_END rdft_wk3r[32] = { |
| 83 | 1.000000000f, 1.000000000f, -0.707106769f, -0.707106769f, |
| 84 | 0.382683456f, 0.382683456f, -0.923879564f, -0.923879564f, |
| 85 | 0.831469536f, 0.831469536f, -0.980785251f, -0.980785251f, |
| 86 | -0.195090353f, -0.195090353f, -0.555570245f, -0.555570245f, |
| 87 | 0.956940353f, 0.956940353f, -0.881921172f, -0.881921172f, |
| 88 | 0.098017156f, 0.098017156f, -0.773010492f, -0.773010492f, |
| 89 | 0.634393334f, 0.634393334f, -0.995184720f, -0.995184720f, |
| 90 | -0.471396863f, -0.471396863f, -0.290284693f, -0.290284693f, |
| 91 | }; |
| 92 | ALIGN16_BEG const float ALIGN16_END rdft_wk1i[32] = { |
| 93 | -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, |
| 94 | -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, |
| 95 | -0.195090324f, 0.195090324f, -0.831469595f, 0.831469595f, |
| 96 | -0.555570245f, 0.555570245f, -0.980785251f, 0.980785251f, |
| 97 | -0.098017141f, 0.098017141f, -0.773010433f, 0.773010433f, |
| 98 | -0.471396744f, 0.471396744f, -0.956940353f, 0.956940353f, |
| 99 | -0.290284663f, 0.290284663f, -0.881921291f, 0.881921291f, |
| 100 | -0.634393334f, 0.634393334f, -0.995184720f, 0.995184720f, |
| 101 | }; |
| 102 | ALIGN16_BEG const float ALIGN16_END rdft_wk2i[32] = { |
| 103 | -0.000000000f, 0.000000000f, -1.000000000f, 1.000000000f, |
| 104 | -0.707106769f, 0.707106769f, -0.707106769f, 0.707106769f, |
| 105 | -0.382683456f, 0.382683456f, -0.923879564f, 0.923879564f, |
| 106 | -0.923879564f, 0.923879564f, -0.382683456f, 0.382683456f, |
| 107 | -0.195090324f, 0.195090324f, -0.980785251f, 0.980785251f, |
| 108 | -0.831469595f, 0.831469595f, -0.555570245f, 0.555570245f, |
| 109 | -0.555570245f, 0.555570245f, -0.831469595f, 0.831469595f, |
| 110 | -0.980785251f, 0.980785251f, -0.195090324f, 0.195090324f, |
| 111 | }; |
| 112 | ALIGN16_BEG const float ALIGN16_END rdft_wk3i[32] = { |
| 113 | -0.000000000f, 0.000000000f, -0.707106769f, 0.707106769f, |
| 114 | -0.923879564f, 0.923879564f, 0.382683456f, -0.382683456f, |
| 115 | -0.555570245f, 0.555570245f, -0.195090353f, 0.195090353f, |
| 116 | -0.980785251f, 0.980785251f, 0.831469536f, -0.831469536f, |
| 117 | -0.290284693f, 0.290284693f, -0.471396863f, 0.471396863f, |
| 118 | -0.995184720f, 0.995184720f, 0.634393334f, -0.634393334f, |
| 119 | -0.773010492f, 0.773010492f, 0.098017156f, -0.098017156f, |
| 120 | -0.881921172f, 0.881921172f, 0.956940353f, -0.956940353f, |
| 121 | }; |
| 122 | ALIGN16_BEG const float ALIGN16_END cftmdl_wk1r[4] = { |
| 123 | 0.707106769f, 0.707106769f, 0.707106769f, -0.707106769f, |
| 124 | }; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 125 | |
andrew@webrtc.org | c0907ef | 2014-02-21 00:13:31 +0000 | [diff] [blame] | 126 | static void bitrv2_128_C(float* a) { |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 127 | /* |
| 128 | Following things have been attempted but are no faster: |
| 129 | (a) Storing the swap indexes in a LUT (index calculations are done |
| 130 | for 'free' while waiting on memory/L1). |
| 131 | (b) Consolidate the load/store of two consecutive floats by a 64 bit |
| 132 | integer (execution is memory/L1 bound). |
| 133 | (c) Do a mix of floats and 64 bit integer to maximize register |
| 134 | utilization (execution is memory/L1 bound). |
| 135 | (d) Replacing ip[i] by ((k<<31)>>25) + ((k >> 1)<<5). |
| 136 | (e) Hard-coding of the offsets to completely eliminates index |
| 137 | calculations. |
| 138 | */ |
| 139 | |
| 140 | unsigned int j, j1, k, k1; |
| 141 | float xr, xi, yr, yi; |
| 142 | |
| 143 | static const int ip[4] = {0, 64, 32, 96}; |
| 144 | for (k = 0; k < 4; k++) { |
| 145 | for (j = 0; j < k; j++) { |
| 146 | j1 = 2 * j + ip[k]; |
| 147 | k1 = 2 * k + ip[j]; |
| 148 | xr = a[j1 + 0]; |
| 149 | xi = a[j1 + 1]; |
| 150 | yr = a[k1 + 0]; |
| 151 | yi = a[k1 + 1]; |
| 152 | a[j1 + 0] = yr; |
| 153 | a[j1 + 1] = yi; |
| 154 | a[k1 + 0] = xr; |
| 155 | a[k1 + 1] = xi; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 156 | j1 += 8; |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 157 | k1 += 16; |
| 158 | xr = a[j1 + 0]; |
| 159 | xi = a[j1 + 1]; |
| 160 | yr = a[k1 + 0]; |
| 161 | yi = a[k1 + 1]; |
| 162 | a[j1 + 0] = yr; |
| 163 | a[j1 + 1] = yi; |
| 164 | a[k1 + 0] = xr; |
| 165 | a[k1 + 1] = xi; |
| 166 | j1 += 8; |
| 167 | k1 -= 8; |
| 168 | xr = a[j1 + 0]; |
| 169 | xi = a[j1 + 1]; |
| 170 | yr = a[k1 + 0]; |
| 171 | yi = a[k1 + 1]; |
| 172 | a[j1 + 0] = yr; |
| 173 | a[j1 + 1] = yi; |
| 174 | a[k1 + 0] = xr; |
| 175 | a[k1 + 1] = xi; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 176 | j1 += 8; |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 177 | k1 += 16; |
| 178 | xr = a[j1 + 0]; |
| 179 | xi = a[j1 + 1]; |
| 180 | yr = a[k1 + 0]; |
| 181 | yi = a[k1 + 1]; |
| 182 | a[j1 + 0] = yr; |
| 183 | a[j1 + 1] = yi; |
| 184 | a[k1 + 0] = xr; |
| 185 | a[k1 + 1] = xi; |
| 186 | } |
| 187 | j1 = 2 * k + 8 + ip[k]; |
| 188 | k1 = j1 + 8; |
| 189 | xr = a[j1 + 0]; |
| 190 | xi = a[j1 + 1]; |
| 191 | yr = a[k1 + 0]; |
| 192 | yi = a[k1 + 1]; |
| 193 | a[j1 + 0] = yr; |
| 194 | a[j1 + 1] = yi; |
| 195 | a[k1 + 0] = xr; |
| 196 | a[k1 + 1] = xi; |
| 197 | } |
| 198 | } |
| 199 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 200 | static void cft1st_128_C(float* a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 201 | const int n = 128; |
| 202 | int j, k1, k2; |
| 203 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 204 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 205 | |
bjornv@webrtc.org | cd9b90a | 2014-06-30 12:05:18 +0000 | [diff] [blame] | 206 | // The processing of the first set of elements was simplified in C to avoid |
| 207 | // some operations (multiplication by zero or one, addition of two elements |
| 208 | // multiplied by the same weight, ...). |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 209 | x0r = a[0] + a[2]; |
| 210 | x0i = a[1] + a[3]; |
| 211 | x1r = a[0] - a[2]; |
| 212 | x1i = a[1] - a[3]; |
| 213 | x2r = a[4] + a[6]; |
| 214 | x2i = a[5] + a[7]; |
| 215 | x3r = a[4] - a[6]; |
| 216 | x3i = a[5] - a[7]; |
| 217 | a[0] = x0r + x2r; |
| 218 | a[1] = x0i + x2i; |
| 219 | a[4] = x0r - x2r; |
| 220 | a[5] = x0i - x2i; |
| 221 | a[2] = x1r - x3i; |
| 222 | a[3] = x1i + x3r; |
| 223 | a[6] = x1r + x3i; |
| 224 | a[7] = x1i - x3r; |
| 225 | wk1r = rdft_w[2]; |
| 226 | x0r = a[8] + a[10]; |
| 227 | x0i = a[9] + a[11]; |
| 228 | x1r = a[8] - a[10]; |
| 229 | x1i = a[9] - a[11]; |
| 230 | x2r = a[12] + a[14]; |
| 231 | x2i = a[13] + a[15]; |
| 232 | x3r = a[12] - a[14]; |
| 233 | x3i = a[13] - a[15]; |
| 234 | a[8] = x0r + x2r; |
| 235 | a[9] = x0i + x2i; |
| 236 | a[12] = x2i - x0i; |
| 237 | a[13] = x0r - x2r; |
| 238 | x0r = x1r - x3i; |
| 239 | x0i = x1i + x3r; |
| 240 | a[10] = wk1r * (x0r - x0i); |
| 241 | a[11] = wk1r * (x0r + x0i); |
| 242 | x0r = x3i + x1r; |
| 243 | x0i = x3r - x1i; |
| 244 | a[14] = wk1r * (x0i - x0r); |
| 245 | a[15] = wk1r * (x0i + x0r); |
| 246 | k1 = 0; |
| 247 | for (j = 16; j < n; j += 16) { |
| 248 | k1 += 2; |
| 249 | k2 = 2 * k1; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 250 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 251 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 252 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 253 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 254 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 255 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 256 | x0r = a[j + 0] + a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 257 | x0i = a[j + 1] + a[j + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 258 | x1r = a[j + 0] - a[j + 2]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 259 | x1i = a[j + 1] - a[j + 3]; |
| 260 | x2r = a[j + 4] + a[j + 6]; |
| 261 | x2i = a[j + 5] + a[j + 7]; |
| 262 | x3r = a[j + 4] - a[j + 6]; |
| 263 | x3i = a[j + 5] - a[j + 7]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 264 | a[j + 0] = x0r + x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 265 | a[j + 1] = x0i + x2i; |
| 266 | x0r -= x2r; |
| 267 | x0i -= x2i; |
| 268 | a[j + 4] = wk2r * x0r - wk2i * x0i; |
| 269 | a[j + 5] = wk2r * x0i + wk2i * x0r; |
| 270 | x0r = x1r - x3i; |
| 271 | x0i = x1i + x3r; |
| 272 | a[j + 2] = wk1r * x0r - wk1i * x0i; |
| 273 | a[j + 3] = wk1r * x0i + wk1i * x0r; |
| 274 | x0r = x1r + x3i; |
| 275 | x0i = x1i - x3r; |
| 276 | a[j + 6] = wk3r * x0r - wk3i * x0i; |
| 277 | a[j + 7] = wk3r * x0i + wk3i * x0r; |
| 278 | wk1r = rdft_w[k2 + 2]; |
| 279 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 280 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 281 | wk3i = rdft_wk3ri_second[k1 + 1]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 282 | x0r = a[j + 8] + a[j + 10]; |
| 283 | x0i = a[j + 9] + a[j + 11]; |
| 284 | x1r = a[j + 8] - a[j + 10]; |
| 285 | x1i = a[j + 9] - a[j + 11]; |
| 286 | x2r = a[j + 12] + a[j + 14]; |
| 287 | x2i = a[j + 13] + a[j + 15]; |
| 288 | x3r = a[j + 12] - a[j + 14]; |
| 289 | x3i = a[j + 13] - a[j + 15]; |
| 290 | a[j + 8] = x0r + x2r; |
| 291 | a[j + 9] = x0i + x2i; |
| 292 | x0r -= x2r; |
| 293 | x0i -= x2i; |
| 294 | a[j + 12] = -wk2i * x0r - wk2r * x0i; |
| 295 | a[j + 13] = -wk2i * x0i + wk2r * x0r; |
| 296 | x0r = x1r - x3i; |
| 297 | x0i = x1i + x3r; |
| 298 | a[j + 10] = wk1r * x0r - wk1i * x0i; |
| 299 | a[j + 11] = wk1r * x0i + wk1i * x0r; |
| 300 | x0r = x1r + x3i; |
| 301 | x0i = x1i - x3r; |
| 302 | a[j + 14] = wk3r * x0r - wk3i * x0i; |
| 303 | a[j + 15] = wk3r * x0i + wk3i * x0r; |
| 304 | } |
| 305 | } |
| 306 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 307 | static void cftmdl_128_C(float* a) { |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 308 | const int l = 8; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 309 | const int n = 128; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 310 | const int m = 32; |
| 311 | int j0, j1, j2, j3, k, k1, k2, m2; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 312 | float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; |
| 313 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 314 | |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 315 | for (j0 = 0; j0 < l; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 316 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 317 | j2 = j0 + 16; |
| 318 | j3 = j0 + 24; |
| 319 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 320 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 321 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 322 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 323 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 324 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 325 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 326 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 327 | a[j0 + 0] = x0r + x2r; |
| 328 | a[j0 + 1] = x0i + x2i; |
| 329 | a[j2 + 0] = x0r - x2r; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 330 | a[j2 + 1] = x0i - x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 331 | a[j1 + 0] = x1r - x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 332 | a[j1 + 1] = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 333 | a[j3 + 0] = x1r + x3i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 334 | a[j3 + 1] = x1i - x3r; |
| 335 | } |
| 336 | wk1r = rdft_w[2]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 337 | for (j0 = m; j0 < l + m; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 338 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 339 | j2 = j0 + 16; |
| 340 | j3 = j0 + 24; |
| 341 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 342 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 343 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 344 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 345 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 346 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 347 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 348 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 349 | a[j0 + 0] = x0r + x2r; |
| 350 | a[j0 + 1] = x0i + x2i; |
| 351 | a[j2 + 0] = x2i - x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 352 | a[j2 + 1] = x0r - x2r; |
| 353 | x0r = x1r - x3i; |
| 354 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 355 | a[j1 + 0] = wk1r * (x0r - x0i); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 356 | a[j1 + 1] = wk1r * (x0r + x0i); |
| 357 | x0r = x3i + x1r; |
| 358 | x0i = x3r - x1i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 359 | a[j3 + 0] = wk1r * (x0i - x0r); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 360 | a[j3 + 1] = wk1r * (x0i + x0r); |
| 361 | } |
| 362 | k1 = 0; |
| 363 | m2 = 2 * m; |
| 364 | for (k = m2; k < n; k += m2) { |
| 365 | k1 += 2; |
| 366 | k2 = 2 * k1; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 367 | wk2r = rdft_w[k1 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 368 | wk2i = rdft_w[k1 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 369 | wk1r = rdft_w[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 370 | wk1i = rdft_w[k2 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 371 | wk3r = rdft_wk3ri_first[k1 + 0]; |
| 372 | wk3i = rdft_wk3ri_first[k1 + 1]; |
| 373 | for (j0 = k; j0 < l + k; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 374 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 375 | j2 = j0 + 16; |
| 376 | j3 = j0 + 24; |
| 377 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 378 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 379 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 380 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 381 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 382 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 383 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 384 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 385 | a[j0 + 0] = x0r + x2r; |
| 386 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 387 | x0r -= x2r; |
| 388 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 389 | a[j2 + 0] = wk2r * x0r - wk2i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 390 | a[j2 + 1] = wk2r * x0i + wk2i * x0r; |
| 391 | x0r = x1r - x3i; |
| 392 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 393 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 394 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 395 | x0r = x1r + x3i; |
| 396 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 397 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 398 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 399 | } |
| 400 | wk1r = rdft_w[k2 + 2]; |
| 401 | wk1i = rdft_w[k2 + 3]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 402 | wk3r = rdft_wk3ri_second[k1 + 0]; |
| 403 | wk3i = rdft_wk3ri_second[k1 + 1]; |
| 404 | for (j0 = k + m; j0 < l + (k + m); j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 405 | j1 = j0 + 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 406 | j2 = j0 + 16; |
| 407 | j3 = j0 + 24; |
| 408 | x0r = a[j0 + 0] + a[j1 + 0]; |
| 409 | x0i = a[j0 + 1] + a[j1 + 1]; |
| 410 | x1r = a[j0 + 0] - a[j1 + 0]; |
| 411 | x1i = a[j0 + 1] - a[j1 + 1]; |
| 412 | x2r = a[j2 + 0] + a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 413 | x2i = a[j2 + 1] + a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 414 | x3r = a[j2 + 0] - a[j3 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 415 | x3i = a[j2 + 1] - a[j3 + 1]; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 416 | a[j0 + 0] = x0r + x2r; |
| 417 | a[j0 + 1] = x0i + x2i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 418 | x0r -= x2r; |
| 419 | x0i -= x2i; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 420 | a[j2 + 0] = -wk2i * x0r - wk2r * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 421 | a[j2 + 1] = -wk2i * x0i + wk2r * x0r; |
| 422 | x0r = x1r - x3i; |
| 423 | x0i = x1i + x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 424 | a[j1 + 0] = wk1r * x0r - wk1i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 425 | a[j1 + 1] = wk1r * x0i + wk1i * x0r; |
| 426 | x0r = x1r + x3i; |
| 427 | x0i = x1i - x3r; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 428 | a[j3 + 0] = wk3r * x0r - wk3i * x0i; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 429 | a[j3 + 1] = wk3r * x0i + wk3i * x0r; |
| 430 | } |
| 431 | } |
| 432 | } |
| 433 | |
andrew@webrtc.org | c0907ef | 2014-02-21 00:13:31 +0000 | [diff] [blame] | 434 | static void cftfsub_128_C(float* a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 435 | int j, j1, j2, j3, l; |
| 436 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 437 | |
| 438 | cft1st_128(a); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 439 | cftmdl_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 440 | l = 32; |
| 441 | for (j = 0; j < l; j += 2) { |
| 442 | j1 = j + l; |
| 443 | j2 = j1 + l; |
| 444 | j3 = j2 + l; |
| 445 | x0r = a[j] + a[j1]; |
| 446 | x0i = a[j + 1] + a[j1 + 1]; |
| 447 | x1r = a[j] - a[j1]; |
| 448 | x1i = a[j + 1] - a[j1 + 1]; |
| 449 | x2r = a[j2] + a[j3]; |
| 450 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 451 | x3r = a[j2] - a[j3]; |
| 452 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 453 | a[j] = x0r + x2r; |
| 454 | a[j + 1] = x0i + x2i; |
| 455 | a[j2] = x0r - x2r; |
| 456 | a[j2 + 1] = x0i - x2i; |
| 457 | a[j1] = x1r - x3i; |
| 458 | a[j1 + 1] = x1i + x3r; |
| 459 | a[j3] = x1r + x3i; |
| 460 | a[j3 + 1] = x1i - x3r; |
| 461 | } |
| 462 | } |
| 463 | |
andrew@webrtc.org | c0907ef | 2014-02-21 00:13:31 +0000 | [diff] [blame] | 464 | static void cftbsub_128_C(float* a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 465 | int j, j1, j2, j3, l; |
| 466 | float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; |
| 467 | |
| 468 | cft1st_128(a); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 469 | cftmdl_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 470 | l = 32; |
| 471 | |
| 472 | for (j = 0; j < l; j += 2) { |
| 473 | j1 = j + l; |
| 474 | j2 = j1 + l; |
| 475 | j3 = j2 + l; |
| 476 | x0r = a[j] + a[j1]; |
| 477 | x0i = -a[j + 1] - a[j1 + 1]; |
| 478 | x1r = a[j] - a[j1]; |
| 479 | x1i = -a[j + 1] + a[j1 + 1]; |
| 480 | x2r = a[j2] + a[j3]; |
| 481 | x2i = a[j2 + 1] + a[j3 + 1]; |
| 482 | x3r = a[j2] - a[j3]; |
| 483 | x3i = a[j2 + 1] - a[j3 + 1]; |
| 484 | a[j] = x0r + x2r; |
| 485 | a[j + 1] = x0i - x2i; |
| 486 | a[j2] = x0r - x2r; |
| 487 | a[j2 + 1] = x0i + x2i; |
| 488 | a[j1] = x1r - x3i; |
| 489 | a[j1 + 1] = x1i - x3r; |
| 490 | a[j3] = x1r + x3i; |
| 491 | a[j3 + 1] = x1i + x3r; |
| 492 | } |
| 493 | } |
| 494 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 495 | static void rftfsub_128_C(float* a) { |
| 496 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 497 | int j1, j2, k1, k2; |
| 498 | float wkr, wki, xr, xi, yr, yi; |
| 499 | |
| 500 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 501 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 502 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 503 | wkr = 0.5f - c[k1]; |
| 504 | wki = c[j1]; |
| 505 | xr = a[j2 + 0] - a[k2 + 0]; |
| 506 | xi = a[j2 + 1] + a[k2 + 1]; |
| 507 | yr = wkr * xr - wki * xi; |
| 508 | yi = wkr * xi + wki * xr; |
| 509 | a[j2 + 0] -= yr; |
| 510 | a[j2 + 1] -= yi; |
| 511 | a[k2 + 0] += yr; |
| 512 | a[k2 + 1] -= yi; |
| 513 | } |
| 514 | } |
| 515 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 516 | static void rftbsub_128_C(float* a) { |
| 517 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 518 | int j1, j2, k1, k2; |
| 519 | float wkr, wki, xr, xi, yr, yi; |
| 520 | |
| 521 | a[1] = -a[1]; |
| 522 | for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { |
| 523 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 524 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 525 | wkr = 0.5f - c[k1]; |
| 526 | wki = c[j1]; |
| 527 | xr = a[j2 + 0] - a[k2 + 0]; |
| 528 | xi = a[j2 + 1] + a[k2 + 1]; |
| 529 | yr = wkr * xr + wki * xi; |
| 530 | yi = wkr * xi - wki * xr; |
| 531 | a[j2 + 0] = a[j2 + 0] - yr; |
| 532 | a[j2 + 1] = yi - a[j2 + 1]; |
| 533 | a[k2 + 0] = yr + a[k2 + 0]; |
| 534 | a[k2 + 1] = yi - a[k2 + 1]; |
| 535 | } |
| 536 | a[65] = -a[65]; |
| 537 | } |
| 538 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 539 | void aec_rdft_forward_128(float* a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 540 | float xi; |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 541 | bitrv2_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 542 | cftfsub_128(a); |
| 543 | rftfsub_128(a); |
| 544 | xi = a[0] - a[1]; |
| 545 | a[0] += a[1]; |
| 546 | a[1] = xi; |
| 547 | } |
| 548 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 549 | void aec_rdft_inverse_128(float* a) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 550 | a[1] = 0.5f * (a[0] - a[1]); |
| 551 | a[0] -= a[1]; |
| 552 | rftbsub_128(a); |
cd@webrtc.org | 85b4a1b | 2012-04-10 21:25:17 +0000 | [diff] [blame] | 553 | bitrv2_128(a); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 554 | cftbsub_128(a); |
| 555 | } |
| 556 | |
| 557 | // code path selection |
pbos@webrtc.org | f832a6d | 2014-12-18 09:56:09 +0000 | [diff] [blame] | 558 | RftSub128 cft1st_128; |
| 559 | RftSub128 cftmdl_128; |
| 560 | RftSub128 rftfsub_128; |
| 561 | RftSub128 rftbsub_128; |
| 562 | RftSub128 cftfsub_128; |
| 563 | RftSub128 cftbsub_128; |
| 564 | RftSub128 bitrv2_128; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 565 | |
| 566 | void aec_rdft_init(void) { |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 567 | cft1st_128 = cft1st_128_C; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 568 | cftmdl_128 = cftmdl_128_C; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 569 | rftfsub_128 = rftfsub_128_C; |
| 570 | rftbsub_128 = rftbsub_128_C; |
andrew@webrtc.org | c0907ef | 2014-02-21 00:13:31 +0000 | [diff] [blame] | 571 | cftfsub_128 = cftfsub_128_C; |
| 572 | cftbsub_128 = cftbsub_128_C; |
| 573 | bitrv2_128 = bitrv2_128_C; |
andrew@webrtc.org | c8d012f | 2012-01-13 19:43:09 +0000 | [diff] [blame] | 574 | #if defined(WEBRTC_ARCH_X86_FAMILY) |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 575 | if (WebRtc_GetCPUInfo(kSSE2)) { |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 576 | aec_rdft_init_sse2(); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 577 | } |
andrew@webrtc.org | c8d012f | 2012-01-13 19:43:09 +0000 | [diff] [blame] | 578 | #endif |
andrew@webrtc.org | c0907ef | 2014-02-21 00:13:31 +0000 | [diff] [blame] | 579 | #if defined(MIPS_FPU_LE) |
| 580 | aec_rdft_init_mips(); |
| 581 | #endif |
Andrew MacDonald | cb7f8ce | 2015-05-19 22:20:17 -0700 | [diff] [blame] | 582 | #if defined(WEBRTC_HAS_NEON) |
bjornv@webrtc.org | cd9b90a | 2014-06-30 12:05:18 +0000 | [diff] [blame] | 583 | aec_rdft_init_neon(); |
Andrew MacDonald | cb7f8ce | 2015-05-19 22:20:17 -0700 | [diff] [blame] | 584 | #elif defined(WEBRTC_DETECT_NEON) |
bjornv@webrtc.org | 976c0f3 | 2015-03-02 16:25:08 +0000 | [diff] [blame] | 585 | if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { |
| 586 | aec_rdft_init_neon(); |
| 587 | } |
bjornv@webrtc.org | cd9b90a | 2014-06-30 12:05:18 +0000 | [diff] [blame] | 588 | #endif |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 589 | } |