Blame - modules/audio_processing/utility/ooura_fft_neon.cc - webrtc.googlesource.com/src

blob: 401387a643bcce2cbde8bc9968dce6a8d757fa7b [file] [log] [blame]

bjornv@webrtc.org	cd9b90a	2014-06-30 12:05:18 +0000	[diff] [blame]	1	/*
				2	* Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
				3	*
				4	* Use of this source code is governed by a BSD-style license
				5	* that can be found in the LICENSE file in the root of the source
				6	* tree. An additional intellectual property rights grant can be found
				7	* in the file PATENTS. All contributing project authors may
				8	* be found in the AUTHORS file in the root of the source tree.
				9	*/
				10
				11	/*
				12	* The rdft AEC algorithm, neon version of speed-critical functions.
				13	*
				14	* Based on the sse2 version.
				15	*/
				16
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	17	#include "modules/audio_processing/utility/ooura_fft.h"
bjornv@webrtc.org	cd9b90a	2014-06-30 12:05:18 +0000	[diff] [blame]	18
				19	#include <arm_neon.h>
				20
Mirko Bonadei	92ea95e	2017-09-15 06:47:31 +0200	[diff] [blame]	21	#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
				22	#include "modules/audio_processing/utility/ooura_fft_tables_neon_sse2.h"
bjornv@webrtc.org	cd9b90a	2014-06-30 12:05:18 +0000	[diff] [blame]	23
peah	81b9291	2016-10-06 06:46:20 -0700	[diff] [blame]	24	namespace webrtc {
				25
				26	#if defined(WEBRTC_HAS_NEON)
				27	void cft1st_128_neon(float* a) {
bjornv@webrtc.org	cd9b90a	2014-06-30 12:05:18 +0000	[diff] [blame]	28	const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
				29	int j, k2;
				30
				31	for (k2 = 0, j = 0; j < 128; j += 16, k2 += 4) {
				32	float32x4_t a00v = vld1q_f32(&a[j + 0]);
				33	float32x4_t a04v = vld1q_f32(&a[j + 4]);
				34	float32x4_t a08v = vld1q_f32(&a[j + 8]);
				35	float32x4_t a12v = vld1q_f32(&a[j + 12]);
				36	float32x4_t a01v = vcombine_f32(vget_low_f32(a00v), vget_low_f32(a08v));
				37	float32x4_t a23v = vcombine_f32(vget_high_f32(a00v), vget_high_f32(a08v));
				38	float32x4_t a45v = vcombine_f32(vget_low_f32(a04v), vget_low_f32(a12v));
				39	float32x4_t a67v = vcombine_f32(vget_high_f32(a04v), vget_high_f32(a12v));
				40	const float32x4_t wk1rv = vld1q_f32(&rdft_wk1r[k2]);
				41	const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2]);
				42	const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2]);
				43	const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2]);
				44	const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2]);
				45	const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2]);
				46	float32x4_t x0v = vaddq_f32(a01v, a23v);
				47	const float32x4_t x1v = vsubq_f32(a01v, a23v);
				48	const float32x4_t x2v = vaddq_f32(a45v, a67v);
				49	const float32x4_t x3v = vsubq_f32(a45v, a67v);
				50	const float32x4_t x3w = vrev64q_f32(x3v);
				51	float32x4_t x0w;
				52	a01v = vaddq_f32(x0v, x2v);
				53	x0v = vsubq_f32(x0v, x2v);
				54	x0w = vrev64q_f32(x0v);
				55	a45v = vmulq_f32(wk2rv, x0v);
				56	a45v = vmlaq_f32(a45v, wk2iv, x0w);
				57	x0v = vmlaq_f32(x1v, x3w, vec_swap_sign);
				58	x0w = vrev64q_f32(x0v);
				59	a23v = vmulq_f32(wk1rv, x0v);
				60	a23v = vmlaq_f32(a23v, wk1iv, x0w);
				61	x0v = vmlsq_f32(x1v, x3w, vec_swap_sign);
				62	x0w = vrev64q_f32(x0v);
				63	a67v = vmulq_f32(wk3rv, x0v);
				64	a67v = vmlaq_f32(a67v, wk3iv, x0w);
				65	a00v = vcombine_f32(vget_low_f32(a01v), vget_low_f32(a23v));
				66	a04v = vcombine_f32(vget_low_f32(a45v), vget_low_f32(a67v));
				67	a08v = vcombine_f32(vget_high_f32(a01v), vget_high_f32(a23v));
				68	a12v = vcombine_f32(vget_high_f32(a45v), vget_high_f32(a67v));
				69	vst1q_f32(&a[j + 0], a00v);
				70	vst1q_f32(&a[j + 4], a04v);
				71	vst1q_f32(&a[j + 8], a08v);
				72	vst1q_f32(&a[j + 12], a12v);
				73	}
				74	}
				75
peah	81b9291	2016-10-06 06:46:20 -0700	[diff] [blame]	76	void cftmdl_128_neon(float* a) {
bjornv@webrtc.org	59adb1d	2014-06-30 19:34:33 +0000	[diff] [blame]	77	int j;
				78	const int l = 8;
				79	const float32x4_t vec_swap_sign = vld1q_f32((float32_t*)k_swap_sign);
				80	float32x4_t wk1rv = vld1q_f32(cftmdl_wk1r);
				81
				82	for (j = 0; j < l; j += 2) {
				83	const float32x2_t a_00 = vld1_f32(&a[j + 0]);
				84	const float32x2_t a_08 = vld1_f32(&a[j + 8]);
				85	const float32x2_t a_32 = vld1_f32(&a[j + 32]);
				86	const float32x2_t a_40 = vld1_f32(&a[j + 40]);
				87	const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
				88	const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
				89	const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
				90	const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
				91	const float32x2_t a_16 = vld1_f32(&a[j + 16]);
				92	const float32x2_t a_24 = vld1_f32(&a[j + 24]);
				93	const float32x2_t a_48 = vld1_f32(&a[j + 48]);
				94	const float32x2_t a_56 = vld1_f32(&a[j + 56]);
				95	const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
				96	const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
				97	const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
				98	const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
				99	const float32x4_t xx0 = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
				100	const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
				101	const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
				102	const float32x4_t x1_x3_add =
				103	vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
				104	const float32x4_t x1_x3_sub =
				105	vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
				106	const float32x2_t yy0_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 0);
				107	const float32x2_t yy0_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 0);
				108	const float32x4_t yy0_as = vcombine_f32(yy0_a, yy0_s);
				109	const float32x2_t yy1_a = vdup_lane_f32(vget_high_f32(x1_x3_add), 1);
				110	const float32x2_t yy1_s = vdup_lane_f32(vget_high_f32(x1_x3_sub), 1);
				111	const float32x4_t yy1_as = vcombine_f32(yy1_a, yy1_s);
				112	const float32x4_t yy0 = vmlaq_f32(yy0_as, vec_swap_sign, yy1_as);
				113	const float32x4_t yy4 = vmulq_f32(wk1rv, yy0);
				114	const float32x4_t xx1_rev = vrev64q_f32(xx1);
				115	const float32x4_t yy4_rev = vrev64q_f32(yy4);
				116
				117	vst1_f32(&a[j + 0], vget_low_f32(xx0));
				118	vst1_f32(&a[j + 32], vget_high_f32(xx0));
				119	vst1_f32(&a[j + 16], vget_low_f32(xx1));
				120	vst1_f32(&a[j + 48], vget_high_f32(xx1_rev));
				121
				122	a[j + 48] = -a[j + 48];
				123
				124	vst1_f32(&a[j + 8], vget_low_f32(x1_x3_add));
				125	vst1_f32(&a[j + 24], vget_low_f32(x1_x3_sub));
				126	vst1_f32(&a[j + 40], vget_low_f32(yy4));
				127	vst1_f32(&a[j + 56], vget_high_f32(yy4_rev));
				128	}
				129
				130	{
				131	const int k = 64;
				132	const int k1 = 2;
				133	const int k2 = 2 * k1;
				134	const float32x4_t wk2rv = vld1q_f32(&rdft_wk2r[k2 + 0]);
				135	const float32x4_t wk2iv = vld1q_f32(&rdft_wk2i[k2 + 0]);
				136	const float32x4_t wk1iv = vld1q_f32(&rdft_wk1i[k2 + 0]);
				137	const float32x4_t wk3rv = vld1q_f32(&rdft_wk3r[k2 + 0]);
				138	const float32x4_t wk3iv = vld1q_f32(&rdft_wk3i[k2 + 0]);
				139	wk1rv = vld1q_f32(&rdft_wk1r[k2 + 0]);
				140	for (j = k; j < l + k; j += 2) {
				141	const float32x2_t a_00 = vld1_f32(&a[j + 0]);
				142	const float32x2_t a_08 = vld1_f32(&a[j + 8]);
				143	const float32x2_t a_32 = vld1_f32(&a[j + 32]);
				144	const float32x2_t a_40 = vld1_f32(&a[j + 40]);
				145	const float32x4_t a_00_32 = vcombine_f32(a_00, a_32);
				146	const float32x4_t a_08_40 = vcombine_f32(a_08, a_40);
				147	const float32x4_t x0r0_0i0_0r1_x0i1 = vaddq_f32(a_00_32, a_08_40);
				148	const float32x4_t x1r0_1i0_1r1_x1i1 = vsubq_f32(a_00_32, a_08_40);
				149	const float32x2_t a_16 = vld1_f32(&a[j + 16]);
				150	const float32x2_t a_24 = vld1_f32(&a[j + 24]);
				151	const float32x2_t a_48 = vld1_f32(&a[j + 48]);
				152	const float32x2_t a_56 = vld1_f32(&a[j + 56]);
				153	const float32x4_t a_16_48 = vcombine_f32(a_16, a_48);
				154	const float32x4_t a_24_56 = vcombine_f32(a_24, a_56);
				155	const float32x4_t x2r0_2i0_2r1_x2i1 = vaddq_f32(a_16_48, a_24_56);
				156	const float32x4_t x3r0_3i0_3r1_x3i1 = vsubq_f32(a_16_48, a_24_56);
				157	const float32x4_t xx = vaddq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
				158	const float32x4_t xx1 = vsubq_f32(x0r0_0i0_0r1_x0i1, x2r0_2i0_2r1_x2i1);
				159	const float32x4_t x3i0_3r0_3i1_x3r1 = vrev64q_f32(x3r0_3i0_3r1_x3i1);
				160	const float32x4_t x1_x3_add =
				161	vmlaq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
				162	const float32x4_t x1_x3_sub =
				163	vmlsq_f32(x1r0_1i0_1r1_x1i1, vec_swap_sign, x3i0_3r0_3i1_x3r1);
				164	float32x4_t xx4 = vmulq_f32(wk2rv, xx1);
				165	float32x4_t xx12 = vmulq_f32(wk1rv, x1_x3_add);
				166	float32x4_t xx22 = vmulq_f32(wk3rv, x1_x3_sub);
				167	xx4 = vmlaq_f32(xx4, wk2iv, vrev64q_f32(xx1));
				168	xx12 = vmlaq_f32(xx12, wk1iv, vrev64q_f32(x1_x3_add));
				169	xx22 = vmlaq_f32(xx22, wk3iv, vrev64q_f32(x1_x3_sub));
				170
				171	vst1_f32(&a[j + 0], vget_low_f32(xx));
				172	vst1_f32(&a[j + 32], vget_high_f32(xx));
				173	vst1_f32(&a[j + 16], vget_low_f32(xx4));
				174	vst1_f32(&a[j + 48], vget_high_f32(xx4));
				175	vst1_f32(&a[j + 8], vget_low_f32(xx12));
				176	vst1_f32(&a[j + 40], vget_high_f32(xx12));
				177	vst1_f32(&a[j + 24], vget_low_f32(xx22));
				178	vst1_f32(&a[j + 56], vget_high_f32(xx22));
				179	}
				180	}
				181	}
				182
bjornv@webrtc.org	e55641d	2014-07-08 21:12:23 +0000	[diff] [blame]	183	__inline static float32x4_t reverse_order_f32x4(float32x4_t in) {
				184	// A B C D -> C D A B
				185	const float32x4_t rev = vcombine_f32(vget_high_f32(in), vget_low_f32(in));
				186	// C D A B -> D C B A
				187	return vrev64q_f32(rev);
				188	}
				189
peah	81b9291	2016-10-06 06:46:20 -0700	[diff] [blame]	190	void rftfsub_128_neon(float* a) {
bjornv@webrtc.org	e55641d	2014-07-08 21:12:23 +0000	[diff] [blame]	191	const float* c = rdft_w + 32;
bjornv@webrtc.org	ac800c8	2014-07-10 07:53:13 +0000	[diff] [blame]	192	int j1, j2;
bjornv@webrtc.org	e55641d	2014-07-08 21:12:23 +0000	[diff] [blame]	193	const float32x4_t mm_half = vdupq_n_f32(0.5f);
				194
				195	// Vectorized code (four at once).
				196	// Note: commented number are indexes for the first iteration of the loop.
				197	for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
				198	// Load 'wk'.
bjornv@webrtc.org	ac800c8	2014-07-10 07:53:13 +0000	[diff] [blame]	199	const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
				200	const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
				201	const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
				202	const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
				203	const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
bjornv@webrtc.org	e55641d	2014-07-08 21:12:23 +0000	[diff] [blame]	204	// Load and shuffle 'a'.
				205	// 2, 4, 6, 8, 3, 5, 7, 9
				206	float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
				207	// 120, 122, 124, 126, 121, 123, 125, 127,
				208	const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
				209	// 126, 124, 122, 120
				210	const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
				211	// 127, 125, 123, 121
				212	const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
				213	// Calculate 'x'.
				214	const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
				215	// 2-126, 4-124, 6-122, 8-120,
				216	const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
				217	// 3-127, 5-125, 7-123, 9-121,
				218	// Calculate product into 'y'.
				219	// yr = wkr * xr - wki * xi;
				220	// yi = wkr * xi + wki * xr;
				221	const float32x4_t a_ = vmulq_f32(wkr_, xr_);
				222	const float32x4_t b_ = vmulq_f32(wki_, xi_);
				223	const float32x4_t c_ = vmulq_f32(wkr_, xi_);
				224	const float32x4_t d_ = vmulq_f32(wki_, xr_);
				225	const float32x4_t yr_ = vsubq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
				226	const float32x4_t yi_ = vaddq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
				227	// Update 'a'.
				228	// a[j2 + 0] -= yr;
				229	// a[j2 + 1] -= yi;
				230	// a[k2 + 0] += yr;
				231	// a[k2 + 1] -= yi;
				232	// 126, 124, 122, 120,
				233	const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
				234	// 127, 125, 123, 121,
				235	const float32x4_t a_k2_p1n = vsubq_f32(a_k2_p1, yi_);
				236	// Shuffle in right order and store.
				237	const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
				238	const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
				239	// 124, 125, 126, 127, 120, 121, 122, 123
				240	const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
				241	// 2, 4, 6, 8,
				242	a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
				243	// 3, 5, 7, 9,
				244	a_j2_p.val[1] = vsubq_f32(a_j2_p.val[1], yi_);
				245	// 2, 3, 4, 5, 6, 7, 8, 9,
				246	vst2q_f32(&a[0 + j2], a_j2_p);
				247
				248	vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
				249	vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
				250	}
				251
				252	// Scalar code for the remaining items.
				253	for (; j2 < 64; j1 += 1, j2 += 2) {
bjornv@webrtc.org	ac800c8	2014-07-10 07:53:13 +0000	[diff] [blame]	254	const int k2 = 128 - j2;
				255	const int k1 = 32 - j1;
				256	const float wkr = 0.5f - c[k1];
				257	const float wki = c[j1];
				258	const float xr = a[j2 + 0] - a[k2 + 0];
				259	const float xi = a[j2 + 1] + a[k2 + 1];
				260	const float yr = wkr * xr - wki * xi;
				261	const float yi = wkr * xi + wki * xr;
bjornv@webrtc.org	e55641d	2014-07-08 21:12:23 +0000	[diff] [blame]	262	a[j2 + 0] -= yr;
				263	a[j2 + 1] -= yi;
				264	a[k2 + 0] += yr;
				265	a[k2 + 1] -= yi;
				266	}
				267	}
				268
peah	81b9291	2016-10-06 06:46:20 -0700	[diff] [blame]	269	void rftbsub_128_neon(float* a) {
bjornv@webrtc.org	ac800c8	2014-07-10 07:53:13 +0000	[diff] [blame]	270	const float* c = rdft_w + 32;
				271	int j1, j2;
				272	const float32x4_t mm_half = vdupq_n_f32(0.5f);
				273
				274	a[1] = -a[1];
				275	// Vectorized code (four at once).
				276	// Note: commented number are indexes for the first iteration of the loop.
				277	for (j1 = 1, j2 = 2; j2 + 7 < 64; j1 += 4, j2 += 8) {
				278	// Load 'wk'.
peah	81b9291	2016-10-06 06:46:20 -0700	[diff] [blame]	279	const float32x4_t c_j1 = vld1q_f32(&c[j1]); // 1, 2, 3, 4,
				280	const float32x4_t c_k1 = vld1q_f32(&c[29 - j1]); // 28, 29, 30, 31,
				281	const float32x4_t wkrt = vsubq_f32(mm_half, c_k1); // 28, 29, 30, 31,
				282	const float32x4_t wkr_ = reverse_order_f32x4(wkrt); // 31, 30, 29, 28,
				283	const float32x4_t wki_ = c_j1; // 1, 2, 3, 4,
bjornv@webrtc.org	ac800c8	2014-07-10 07:53:13 +0000	[diff] [blame]	284	// Load and shuffle 'a'.
				285	// 2, 4, 6, 8, 3, 5, 7, 9
				286	float32x4x2_t a_j2_p = vld2q_f32(&a[0 + j2]);
				287	// 120, 122, 124, 126, 121, 123, 125, 127,
				288	const float32x4x2_t k2_0_4 = vld2q_f32(&a[122 - j2]);
				289	// 126, 124, 122, 120
				290	const float32x4_t a_k2_p0 = reverse_order_f32x4(k2_0_4.val[0]);
				291	// 127, 125, 123, 121
				292	const float32x4_t a_k2_p1 = reverse_order_f32x4(k2_0_4.val[1]);
				293	// Calculate 'x'.
				294	const float32x4_t xr_ = vsubq_f32(a_j2_p.val[0], a_k2_p0);
				295	// 2-126, 4-124, 6-122, 8-120,
				296	const float32x4_t xi_ = vaddq_f32(a_j2_p.val[1], a_k2_p1);
				297	// 3-127, 5-125, 7-123, 9-121,
				298	// Calculate product into 'y'.
				299	// yr = wkr * xr - wki * xi;
				300	// yi = wkr * xi + wki * xr;
				301	const float32x4_t a_ = vmulq_f32(wkr_, xr_);
				302	const float32x4_t b_ = vmulq_f32(wki_, xi_);
				303	const float32x4_t c_ = vmulq_f32(wkr_, xi_);
				304	const float32x4_t d_ = vmulq_f32(wki_, xr_);
				305	const float32x4_t yr_ = vaddq_f32(a_, b_); // 2-126, 4-124, 6-122, 8-120,
				306	const float32x4_t yi_ = vsubq_f32(c_, d_); // 3-127, 5-125, 7-123, 9-121,
				307	// Update 'a'.
				308	// a[j2 + 0] -= yr;
				309	// a[j2 + 1] -= yi;
				310	// a[k2 + 0] += yr;
				311	// a[k2 + 1] -= yi;
				312	// 126, 124, 122, 120,
				313	const float32x4_t a_k2_p0n = vaddq_f32(a_k2_p0, yr_);
				314	// 127, 125, 123, 121,
				315	const float32x4_t a_k2_p1n = vsubq_f32(yi_, a_k2_p1);
				316	// Shuffle in right order and store.
				317	// 2, 3, 4, 5, 6, 7, 8, 9,
				318	const float32x4_t a_k2_p0nr = vrev64q_f32(a_k2_p0n);
				319	const float32x4_t a_k2_p1nr = vrev64q_f32(a_k2_p1n);
				320	// 124, 125, 126, 127, 120, 121, 122, 123
				321	const float32x4x2_t a_k2_n = vzipq_f32(a_k2_p0nr, a_k2_p1nr);
				322	// 2, 4, 6, 8,
				323	a_j2_p.val[0] = vsubq_f32(a_j2_p.val[0], yr_);
				324	// 3, 5, 7, 9,
				325	a_j2_p.val[1] = vsubq_f32(yi_, a_j2_p.val[1]);
				326	// 2, 3, 4, 5, 6, 7, 8, 9,
				327	vst2q_f32(&a[0 + j2], a_j2_p);
				328
				329	vst1q_f32(&a[122 - j2], a_k2_n.val[1]);
				330	vst1q_f32(&a[126 - j2], a_k2_n.val[0]);
				331	}
				332
				333	// Scalar code for the remaining items.
				334	for (; j2 < 64; j1 += 1, j2 += 2) {
				335	const int k2 = 128 - j2;
				336	const int k1 = 32 - j1;
				337	const float wkr = 0.5f - c[k1];
				338	const float wki = c[j1];
				339	const float xr = a[j2 + 0] - a[k2 + 0];
				340	const float xi = a[j2 + 1] + a[k2 + 1];
				341	const float yr = wkr * xr + wki * xi;
				342	const float yi = wkr * xi - wki * xr;
				343	a[j2 + 0] = a[j2 + 0] - yr;
				344	a[j2 + 1] = yi - a[j2 + 1];
				345	a[k2 + 0] = yr + a[k2 + 0];
				346	a[k2 + 1] = yi - a[k2 + 1];
				347	}
				348	a[65] = -a[65];
				349	}
peah	81b9291	2016-10-06 06:46:20 -0700	[diff] [blame]	350	#endif
bjornv@webrtc.org	ac800c8	2014-07-10 07:53:13 +0000	[diff] [blame]	351
peah	81b9291	2016-10-06 06:46:20 -0700	[diff] [blame]	352	} // namespace webrtc