niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 1 | /* |
| 2 | * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. |
| 3 | * |
| 4 | * Use of this source code is governed by a BSD-style license |
| 5 | * that can be found in the LICENSE file in the root of the source |
| 6 | * tree. An additional intellectual property rights grant can be found |
| 7 | * in the file PATENTS. All contributing project authors may |
| 8 | * be found in the AUTHORS file in the root of the source tree. |
| 9 | */ |
| 10 | |
Jiawei Ou | d3c642b | 2018-01-09 09:15:37 -0800 | [diff] [blame] | 11 | #include "modules/audio_processing/utility/ooura_fft.h" |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 12 | |
andrew@webrtc.org | c8d012f | 2012-01-13 19:43:09 +0000 | [diff] [blame] | 13 | #include <emmintrin.h> |
| 14 | |
Mirko Bonadei | 92ea95e | 2017-09-15 06:47:31 +0200 | [diff] [blame] | 15 | #include "modules/audio_processing/utility/ooura_fft_tables_common.h" |
| 16 | #include "modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h" |
Niels Möller | a12c42a | 2018-07-25 16:05:48 +0200 | [diff] [blame^] | 17 | #include "rtc_base/system/arch.h" |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 18 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 19 | namespace webrtc { |
| 20 | |
| 21 | #if defined(WEBRTC_ARCH_X86_FAMILY) |
| 22 | |
| 23 | namespace { |
| 24 | // These intrinsics were unavailable before VS 2008. |
| 25 | // TODO(andrew): move to a common file. |
| 26 | #if defined(_MSC_VER) && _MSC_VER < 1500 |
| 27 | static __inline __m128 _mm_castsi128_ps(__m128i a) { |
| 28 | return *(__m128*)&a; |
| 29 | } |
| 30 | static __inline __m128i _mm_castps_si128(__m128 a) { |
| 31 | return *(__m128i*)&a; |
| 32 | } |
| 33 | #endif |
| 34 | |
| 35 | } // namespace |
| 36 | |
| 37 | void cft1st_128_SSE2(float* a) { |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 38 | const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); |
| 39 | int j, k2; |
| 40 | |
| 41 | for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 42 | __m128 a00v = _mm_loadu_ps(&a[j + 0]); |
| 43 | __m128 a04v = _mm_loadu_ps(&a[j + 4]); |
| 44 | __m128 a08v = _mm_loadu_ps(&a[j + 8]); |
| 45 | __m128 a12v = _mm_loadu_ps(&a[j + 12]); |
| 46 | __m128 a01v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(1, 0, 1, 0)); |
| 47 | __m128 a23v = _mm_shuffle_ps(a00v, a08v, _MM_SHUFFLE(3, 2, 3, 2)); |
| 48 | __m128 a45v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(1, 0, 1, 0)); |
| 49 | __m128 a67v = _mm_shuffle_ps(a04v, a12v, _MM_SHUFFLE(3, 2, 3, 2)); |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 50 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 51 | const __m128 wk1rv = _mm_load_ps(&rdft_wk1r[k2]); |
| 52 | const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2]); |
| 53 | const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2]); |
| 54 | const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2]); |
| 55 | const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2]); |
| 56 | const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2]); |
| 57 | __m128 x0v = _mm_add_ps(a01v, a23v); |
| 58 | const __m128 x1v = _mm_sub_ps(a01v, a23v); |
| 59 | const __m128 x2v = _mm_add_ps(a45v, a67v); |
| 60 | const __m128 x3v = _mm_sub_ps(a45v, a67v); |
| 61 | __m128 x0w; |
| 62 | a01v = _mm_add_ps(x0v, x2v); |
| 63 | x0v = _mm_sub_ps(x0v, x2v); |
| 64 | x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 65 | { |
| 66 | const __m128 a45_0v = _mm_mul_ps(wk2rv, x0v); |
| 67 | const __m128 a45_1v = _mm_mul_ps(wk2iv, x0w); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 68 | a45v = _mm_add_ps(a45_0v, a45_1v); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 69 | } |
| 70 | { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 71 | __m128 a23_0v, a23_1v; |
| 72 | const __m128 x3w = _mm_shuffle_ps(x3v, x3v, _MM_SHUFFLE(2, 3, 0, 1)); |
| 73 | const __m128 x3s = _mm_mul_ps(mm_swap_sign, x3w); |
| 74 | x0v = _mm_add_ps(x1v, x3s); |
| 75 | x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); |
| 76 | a23_0v = _mm_mul_ps(wk1rv, x0v); |
| 77 | a23_1v = _mm_mul_ps(wk1iv, x0w); |
| 78 | a23v = _mm_add_ps(a23_0v, a23_1v); |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 79 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 80 | x0v = _mm_sub_ps(x1v, x3s); |
| 81 | x0w = _mm_shuffle_ps(x0v, x0v, _MM_SHUFFLE(2, 3, 0, 1)); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 82 | } |
| 83 | { |
| 84 | const __m128 a67_0v = _mm_mul_ps(wk3rv, x0v); |
| 85 | const __m128 a67_1v = _mm_mul_ps(wk3iv, x0w); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 86 | a67v = _mm_add_ps(a67_0v, a67_1v); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 87 | } |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 88 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 89 | a00v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(1, 0, 1, 0)); |
| 90 | a04v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(1, 0, 1, 0)); |
| 91 | a08v = _mm_shuffle_ps(a01v, a23v, _MM_SHUFFLE(3, 2, 3, 2)); |
| 92 | a12v = _mm_shuffle_ps(a45v, a67v, _MM_SHUFFLE(3, 2, 3, 2)); |
| 93 | _mm_storeu_ps(&a[j + 0], a00v); |
| 94 | _mm_storeu_ps(&a[j + 4], a04v); |
| 95 | _mm_storeu_ps(&a[j + 8], a08v); |
cduvivier@google.com | 0e07d82 | 2011-07-25 23:54:20 +0000 | [diff] [blame] | 96 | _mm_storeu_ps(&a[j + 12], a12v); |
| 97 | } |
| 98 | } |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 99 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 100 | void cftmdl_128_SSE2(float* a) { |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 101 | const int l = 8; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 102 | const __m128 mm_swap_sign = _mm_load_ps(k_swap_sign); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 103 | int j0; |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 104 | |
| 105 | __m128 wk1rv = _mm_load_ps(cftmdl_wk1r); |
| 106 | for (j0 = 0; j0 < l; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 107 | const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); |
| 108 | const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 109 | const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); |
| 110 | const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 111 | const __m128 a_00_32 = |
| 112 | _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), |
| 113 | _MM_SHUFFLE(1, 0, 1, 0)); |
| 114 | const __m128 a_08_40 = |
| 115 | _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), |
| 116 | _MM_SHUFFLE(1, 0, 1, 0)); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 117 | __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); |
| 118 | const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 119 | |
| 120 | const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); |
| 121 | const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); |
| 122 | const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); |
| 123 | const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 124 | const __m128 a_16_48 = |
| 125 | _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), |
| 126 | _MM_SHUFFLE(1, 0, 1, 0)); |
| 127 | const __m128 a_24_56 = |
| 128 | _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), |
| 129 | _MM_SHUFFLE(1, 0, 1, 0)); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 130 | const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); |
| 131 | const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 132 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 133 | const __m128 xx0 = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); |
| 134 | const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 135 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 136 | const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( |
| 137 | _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); |
| 138 | const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); |
| 139 | const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); |
| 140 | const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 141 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 142 | const __m128 yy0 = |
| 143 | _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(2, 2, 2, 2)); |
| 144 | const __m128 yy1 = |
| 145 | _mm_shuffle_ps(x1_x3_add, x1_x3_sub, _MM_SHUFFLE(3, 3, 3, 3)); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 146 | const __m128 yy2 = _mm_mul_ps(mm_swap_sign, yy1); |
| 147 | const __m128 yy3 = _mm_add_ps(yy0, yy2); |
| 148 | const __m128 yy4 = _mm_mul_ps(wk1rv, yy3); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 149 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 150 | _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx0)); |
| 151 | _mm_storel_epi64( |
| 152 | (__m128i*)&a[j0 + 32], |
| 153 | _mm_shuffle_epi32(_mm_castps_si128(xx0), _MM_SHUFFLE(3, 2, 3, 2))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 154 | |
| 155 | _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx1)); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 156 | _mm_storel_epi64( |
| 157 | (__m128i*)&a[j0 + 48], |
| 158 | _mm_shuffle_epi32(_mm_castps_si128(xx1), _MM_SHUFFLE(2, 3, 2, 3))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 159 | a[j0 + 48] = -a[j0 + 48]; |
| 160 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 161 | _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(x1_x3_add)); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 162 | _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(x1_x3_sub)); |
| 163 | |
| 164 | _mm_storel_epi64((__m128i*)&a[j0 + 40], _mm_castps_si128(yy4)); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 165 | _mm_storel_epi64( |
| 166 | (__m128i*)&a[j0 + 56], |
| 167 | _mm_shuffle_epi32(_mm_castps_si128(yy4), _MM_SHUFFLE(2, 3, 2, 3))); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 168 | } |
| 169 | |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 170 | { |
| 171 | int k = 64; |
| 172 | int k1 = 2; |
| 173 | int k2 = 2 * k1; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 174 | const __m128 wk2rv = _mm_load_ps(&rdft_wk2r[k2 + 0]); |
| 175 | const __m128 wk2iv = _mm_load_ps(&rdft_wk2i[k2 + 0]); |
| 176 | const __m128 wk1iv = _mm_load_ps(&rdft_wk1i[k2 + 0]); |
| 177 | const __m128 wk3rv = _mm_load_ps(&rdft_wk3r[k2 + 0]); |
| 178 | const __m128 wk3iv = _mm_load_ps(&rdft_wk3i[k2 + 0]); |
| 179 | wk1rv = _mm_load_ps(&rdft_wk1r[k2 + 0]); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 180 | for (j0 = k; j0 < l + k; j0 += 2) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 181 | const __m128i a_00 = _mm_loadl_epi64((__m128i*)&a[j0 + 0]); |
| 182 | const __m128i a_08 = _mm_loadl_epi64((__m128i*)&a[j0 + 8]); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 183 | const __m128i a_32 = _mm_loadl_epi64((__m128i*)&a[j0 + 32]); |
| 184 | const __m128i a_40 = _mm_loadl_epi64((__m128i*)&a[j0 + 40]); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 185 | const __m128 a_00_32 = |
| 186 | _mm_shuffle_ps(_mm_castsi128_ps(a_00), _mm_castsi128_ps(a_32), |
| 187 | _MM_SHUFFLE(1, 0, 1, 0)); |
| 188 | const __m128 a_08_40 = |
| 189 | _mm_shuffle_ps(_mm_castsi128_ps(a_08), _mm_castsi128_ps(a_40), |
| 190 | _MM_SHUFFLE(1, 0, 1, 0)); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 191 | __m128 x0r0_0i0_0r1_x0i1 = _mm_add_ps(a_00_32, a_08_40); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 192 | const __m128 x1r0_1i0_1r1_x1i1 = _mm_sub_ps(a_00_32, a_08_40); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 193 | |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 194 | const __m128i a_16 = _mm_loadl_epi64((__m128i*)&a[j0 + 16]); |
| 195 | const __m128i a_24 = _mm_loadl_epi64((__m128i*)&a[j0 + 24]); |
| 196 | const __m128i a_48 = _mm_loadl_epi64((__m128i*)&a[j0 + 48]); |
| 197 | const __m128i a_56 = _mm_loadl_epi64((__m128i*)&a[j0 + 56]); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 198 | const __m128 a_16_48 = |
| 199 | _mm_shuffle_ps(_mm_castsi128_ps(a_16), _mm_castsi128_ps(a_48), |
| 200 | _MM_SHUFFLE(1, 0, 1, 0)); |
| 201 | const __m128 a_24_56 = |
| 202 | _mm_shuffle_ps(_mm_castsi128_ps(a_24), _mm_castsi128_ps(a_56), |
| 203 | _MM_SHUFFLE(1, 0, 1, 0)); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 204 | const __m128 x2r0_2i0_2r1_x2i1 = _mm_add_ps(a_16_48, a_24_56); |
| 205 | const __m128 x3r0_3i0_3r1_x3i1 = _mm_sub_ps(a_16_48, a_24_56); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 206 | |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 207 | const __m128 xx = _mm_add_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); |
| 208 | const __m128 xx1 = _mm_sub_ps(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 209 | const __m128 xx2 = _mm_mul_ps(xx1, wk2rv); |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 210 | const __m128 xx3 = _mm_mul_ps( |
| 211 | wk2iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(xx1), |
| 212 | _MM_SHUFFLE(2, 3, 0, 1)))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 213 | const __m128 xx4 = _mm_add_ps(xx2, xx3); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 214 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 215 | const __m128 x3i0_3r0_3i1_x3r1 = _mm_castsi128_ps(_mm_shuffle_epi32( |
| 216 | _mm_castps_si128(x3r0_3i0_3r1_x3i1), _MM_SHUFFLE(2, 3, 0, 1))); |
| 217 | const __m128 x3_swapped = _mm_mul_ps(mm_swap_sign, x3i0_3r0_3i1_x3r1); |
| 218 | const __m128 x1_x3_add = _mm_add_ps(x1r0_1i0_1r1_x1i1, x3_swapped); |
| 219 | const __m128 x1_x3_sub = _mm_sub_ps(x1r0_1i0_1r1_x1i1, x3_swapped); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 220 | |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 221 | const __m128 xx10 = _mm_mul_ps(x1_x3_add, wk1rv); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 222 | const __m128 xx11 = _mm_mul_ps( |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 223 | wk1iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_add), |
| 224 | _MM_SHUFFLE(2, 3, 0, 1)))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 225 | const __m128 xx12 = _mm_add_ps(xx10, xx11); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 226 | |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 227 | const __m128 xx20 = _mm_mul_ps(x1_x3_sub, wk3rv); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 228 | const __m128 xx21 = _mm_mul_ps( |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 229 | wk3iv, _mm_castsi128_ps(_mm_shuffle_epi32(_mm_castps_si128(x1_x3_sub), |
| 230 | _MM_SHUFFLE(2, 3, 0, 1)))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 231 | const __m128 xx22 = _mm_add_ps(xx20, xx21); |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 232 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 233 | _mm_storel_epi64((__m128i*)&a[j0 + 0], _mm_castps_si128(xx)); |
| 234 | _mm_storel_epi64( |
| 235 | (__m128i*)&a[j0 + 32], |
| 236 | _mm_shuffle_epi32(_mm_castps_si128(xx), _MM_SHUFFLE(3, 2, 3, 2))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 237 | |
| 238 | _mm_storel_epi64((__m128i*)&a[j0 + 16], _mm_castps_si128(xx4)); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 239 | _mm_storel_epi64( |
| 240 | (__m128i*)&a[j0 + 48], |
| 241 | _mm_shuffle_epi32(_mm_castps_si128(xx4), _MM_SHUFFLE(3, 2, 3, 2))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 242 | |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 243 | _mm_storel_epi64((__m128i*)&a[j0 + 8], _mm_castps_si128(xx12)); |
| 244 | _mm_storel_epi64( |
| 245 | (__m128i*)&a[j0 + 40], |
| 246 | _mm_shuffle_epi32(_mm_castps_si128(xx12), _MM_SHUFFLE(3, 2, 3, 2))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 247 | |
| 248 | _mm_storel_epi64((__m128i*)&a[j0 + 24], _mm_castps_si128(xx22)); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 249 | _mm_storel_epi64( |
| 250 | (__m128i*)&a[j0 + 56], |
| 251 | _mm_shuffle_epi32(_mm_castps_si128(xx22), _MM_SHUFFLE(3, 2, 3, 2))); |
andrew@webrtc.org | 7b7c045 | 2011-09-16 22:51:57 +0000 | [diff] [blame] | 252 | } |
cduvivier@google.com | 288c869 | 2011-08-22 21:55:33 +0000 | [diff] [blame] | 253 | } |
| 254 | } |
| 255 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 256 | void rftfsub_128_SSE2(float* a) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 257 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 258 | int j1, j2, k1, k2; |
| 259 | float wkr, wki, xr, xi, yr, yi; |
| 260 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 261 | static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f, |
| 262 | 0.5f}; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 263 | const __m128 mm_half = _mm_load_ps(k_half); |
| 264 | |
| 265 | // Vectorized code (four at once). |
| 266 | // Note: commented number are indexes for the first iteration of the loop. |
| 267 | for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { |
| 268 | // Load 'wk'. |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 269 | const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, |
| 270 | const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, |
| 271 | const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 272 | const __m128 wkr_ = |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 273 | _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, |
| 274 | const __m128 wki_ = c_j1; // 1, 2, 3, 4, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 275 | // Load and shuffle 'a'. |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 276 | const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, |
| 277 | const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 278 | const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, |
| 279 | const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 280 | const __m128 a_j2_p0 = _mm_shuffle_ps( |
| 281 | a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, |
| 282 | const __m128 a_j2_p1 = _mm_shuffle_ps( |
| 283 | a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, |
| 284 | const __m128 a_k2_p0 = _mm_shuffle_ps( |
| 285 | a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, |
| 286 | const __m128 a_k2_p1 = _mm_shuffle_ps( |
| 287 | a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 288 | // Calculate 'x'. |
| 289 | const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 290 | // 2-126, 4-124, 6-122, 8-120, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 291 | const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 292 | // 3-127, 5-125, 7-123, 9-121, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 293 | // Calculate product into 'y'. |
| 294 | // yr = wkr * xr - wki * xi; |
| 295 | // yi = wkr * xi + wki * xr; |
| 296 | const __m128 a_ = _mm_mul_ps(wkr_, xr_); |
| 297 | const __m128 b_ = _mm_mul_ps(wki_, xi_); |
| 298 | const __m128 c_ = _mm_mul_ps(wkr_, xi_); |
| 299 | const __m128 d_ = _mm_mul_ps(wki_, xr_); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 300 | const __m128 yr_ = _mm_sub_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, |
| 301 | const __m128 yi_ = _mm_add_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, |
| 302 | // Update 'a'. |
| 303 | // a[j2 + 0] -= yr; |
| 304 | // a[j2 + 1] -= yi; |
| 305 | // a[k2 + 0] += yr; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 306 | // a[k2 + 1] -= yi; |
| 307 | const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, |
| 308 | const __m128 a_j2_p1n = _mm_sub_ps(a_j2_p1, yi_); // 3, 5, 7, 9, |
| 309 | const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, |
| 310 | const __m128 a_k2_p1n = _mm_sub_ps(a_k2_p1, yi_); // 127, 125, 123, 121, |
| 311 | // Shuffle in right order and store. |
| 312 | const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 313 | // 2, 3, 4, 5, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 314 | const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 315 | // 6, 7, 8, 9, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 316 | const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 317 | // 122, 123, 120, 121, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 318 | const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 319 | // 126, 127, 124, 125, |
| 320 | const __m128 a_k2_0n = _mm_shuffle_ps( |
| 321 | a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, |
| 322 | const __m128 a_k2_4n = _mm_shuffle_ps( |
| 323 | a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, |
| 324 | _mm_storeu_ps(&a[0 + j2], a_j2_0n); |
| 325 | _mm_storeu_ps(&a[4 + j2], a_j2_4n); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 326 | _mm_storeu_ps(&a[122 - j2], a_k2_0n); |
| 327 | _mm_storeu_ps(&a[126 - j2], a_k2_4n); |
| 328 | } |
| 329 | // Scalar code for the remaining items. |
| 330 | for (; j2 < 64; j1 += 1, j2 += 2) { |
| 331 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 332 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 333 | wkr = 0.5f - c[k1]; |
| 334 | wki = c[j1]; |
| 335 | xr = a[j2 + 0] - a[k2 + 0]; |
| 336 | xi = a[j2 + 1] + a[k2 + 1]; |
| 337 | yr = wkr * xr - wki * xi; |
| 338 | yi = wkr * xi + wki * xr; |
| 339 | a[j2 + 0] -= yr; |
| 340 | a[j2 + 1] -= yi; |
| 341 | a[k2 + 0] += yr; |
| 342 | a[k2 + 1] -= yi; |
| 343 | } |
| 344 | } |
| 345 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 346 | void rftbsub_128_SSE2(float* a) { |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 347 | const float* c = rdft_w + 32; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 348 | int j1, j2, k1, k2; |
| 349 | float wkr, wki, xr, xi, yr, yi; |
| 350 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 351 | static const ALIGN16_BEG float ALIGN16_END k_half[4] = {0.5f, 0.5f, 0.5f, |
| 352 | 0.5f}; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 353 | const __m128 mm_half = _mm_load_ps(k_half); |
| 354 | |
| 355 | a[1] = -a[1]; |
| 356 | // Vectorized code (four at once). |
| 357 | // Note: commented number are indexes for the first iteration of the loop. |
| 358 | for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) { |
| 359 | // Load 'wk'. |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 360 | const __m128 c_j1 = _mm_loadu_ps(&c[j1]); // 1, 2, 3, 4, |
| 361 | const __m128 c_k1 = _mm_loadu_ps(&c[29 - j1]); // 28, 29, 30, 31, |
| 362 | const __m128 wkrt = _mm_sub_ps(mm_half, c_k1); // 28, 29, 30, 31, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 363 | const __m128 wkr_ = |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 364 | _mm_shuffle_ps(wkrt, wkrt, _MM_SHUFFLE(0, 1, 2, 3)); // 31, 30, 29, 28, |
| 365 | const __m128 wki_ = c_j1; // 1, 2, 3, 4, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 366 | // Load and shuffle 'a'. |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 367 | const __m128 a_j2_0 = _mm_loadu_ps(&a[0 + j2]); // 2, 3, 4, 5, |
| 368 | const __m128 a_j2_4 = _mm_loadu_ps(&a[4 + j2]); // 6, 7, 8, 9, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 369 | const __m128 a_k2_0 = _mm_loadu_ps(&a[122 - j2]); // 120, 121, 122, 123, |
| 370 | const __m128 a_k2_4 = _mm_loadu_ps(&a[126 - j2]); // 124, 125, 126, 127, |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 371 | const __m128 a_j2_p0 = _mm_shuffle_ps( |
| 372 | a_j2_0, a_j2_4, _MM_SHUFFLE(2, 0, 2, 0)); // 2, 4, 6, 8, |
| 373 | const __m128 a_j2_p1 = _mm_shuffle_ps( |
| 374 | a_j2_0, a_j2_4, _MM_SHUFFLE(3, 1, 3, 1)); // 3, 5, 7, 9, |
| 375 | const __m128 a_k2_p0 = _mm_shuffle_ps( |
| 376 | a_k2_4, a_k2_0, _MM_SHUFFLE(0, 2, 0, 2)); // 126, 124, 122, 120, |
| 377 | const __m128 a_k2_p1 = _mm_shuffle_ps( |
| 378 | a_k2_4, a_k2_0, _MM_SHUFFLE(1, 3, 1, 3)); // 127, 125, 123, 121, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 379 | // Calculate 'x'. |
| 380 | const __m128 xr_ = _mm_sub_ps(a_j2_p0, a_k2_p0); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 381 | // 2-126, 4-124, 6-122, 8-120, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 382 | const __m128 xi_ = _mm_add_ps(a_j2_p1, a_k2_p1); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 383 | // 3-127, 5-125, 7-123, 9-121, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 384 | // Calculate product into 'y'. |
| 385 | // yr = wkr * xr + wki * xi; |
| 386 | // yi = wkr * xi - wki * xr; |
| 387 | const __m128 a_ = _mm_mul_ps(wkr_, xr_); |
| 388 | const __m128 b_ = _mm_mul_ps(wki_, xi_); |
| 389 | const __m128 c_ = _mm_mul_ps(wkr_, xi_); |
| 390 | const __m128 d_ = _mm_mul_ps(wki_, xr_); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 391 | const __m128 yr_ = _mm_add_ps(a_, b_); // 2-126, 4-124, 6-122, 8-120, |
| 392 | const __m128 yi_ = _mm_sub_ps(c_, d_); // 3-127, 5-125, 7-123, 9-121, |
| 393 | // Update 'a'. |
| 394 | // a[j2 + 0] = a[j2 + 0] - yr; |
| 395 | // a[j2 + 1] = yi - a[j2 + 1]; |
| 396 | // a[k2 + 0] = yr + a[k2 + 0]; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 397 | // a[k2 + 1] = yi - a[k2 + 1]; |
| 398 | const __m128 a_j2_p0n = _mm_sub_ps(a_j2_p0, yr_); // 2, 4, 6, 8, |
| 399 | const __m128 a_j2_p1n = _mm_sub_ps(yi_, a_j2_p1); // 3, 5, 7, 9, |
| 400 | const __m128 a_k2_p0n = _mm_add_ps(a_k2_p0, yr_); // 126, 124, 122, 120, |
| 401 | const __m128 a_k2_p1n = _mm_sub_ps(yi_, a_k2_p1); // 127, 125, 123, 121, |
| 402 | // Shuffle in right order and store. |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 403 | const __m128 a_j2_0n = _mm_unpacklo_ps(a_j2_p0n, a_j2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 404 | // 2, 3, 4, 5, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 405 | const __m128 a_j2_4n = _mm_unpackhi_ps(a_j2_p0n, a_j2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 406 | // 6, 7, 8, 9, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 407 | const __m128 a_k2_0nt = _mm_unpackhi_ps(a_k2_p0n, a_k2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 408 | // 122, 123, 120, 121, |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 409 | const __m128 a_k2_4nt = _mm_unpacklo_ps(a_k2_p0n, a_k2_p1n); |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 410 | // 126, 127, 124, 125, |
| 411 | const __m128 a_k2_0n = _mm_shuffle_ps( |
| 412 | a_k2_0nt, a_k2_0nt, _MM_SHUFFLE(1, 0, 3, 2)); // 120, 121, 122, 123, |
| 413 | const __m128 a_k2_4n = _mm_shuffle_ps( |
| 414 | a_k2_4nt, a_k2_4nt, _MM_SHUFFLE(1, 0, 3, 2)); // 124, 125, 126, 127, |
| 415 | _mm_storeu_ps(&a[0 + j2], a_j2_0n); |
| 416 | _mm_storeu_ps(&a[4 + j2], a_j2_4n); |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 417 | _mm_storeu_ps(&a[122 - j2], a_k2_0n); |
| 418 | _mm_storeu_ps(&a[126 - j2], a_k2_4n); |
| 419 | } |
| 420 | // Scalar code for the remaining items. |
| 421 | for (; j2 < 64; j1 += 1, j2 += 2) { |
| 422 | k2 = 128 - j2; |
andrew@webrtc.org | 13b2d46 | 2013-10-08 23:41:42 +0000 | [diff] [blame] | 423 | k1 = 32 - j1; |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 424 | wkr = 0.5f - c[k1]; |
| 425 | wki = c[j1]; |
| 426 | xr = a[j2 + 0] - a[k2 + 0]; |
| 427 | xi = a[j2 + 1] + a[k2 + 1]; |
| 428 | yr = wkr * xr + wki * xi; |
| 429 | yi = wkr * xi - wki * xr; |
| 430 | a[j2 + 0] = a[j2 + 0] - yr; |
| 431 | a[j2 + 1] = yi - a[j2 + 1]; |
| 432 | a[k2 + 0] = yr + a[k2 + 0]; |
| 433 | a[k2 + 1] = yi - a[k2 + 1]; |
| 434 | } |
| 435 | a[65] = -a[65]; |
| 436 | } |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 437 | #endif |
niklase@google.com | 470e71d | 2011-07-07 08:21:25 +0000 | [diff] [blame] | 438 | |
peah | 81b9291 | 2016-10-06 06:46:20 -0700 | [diff] [blame] | 439 | } // namespace webrtc |