blob: 9fe577db10a288a8e95f52f58d944285c9e3c0ae [file] [log] [blame]
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +00001/*
2 * Copyright (c) 2014 The WebRTC project authors. All Rights Reserved.
3 *
4 * Use of this source code is governed by a BSD-style license
5 * that can be found in the LICENSE file in the root of the source
6 * tree. An additional intellectual property rights grant can be found
7 * in the file PATENTS. All contributing project authors may
8 * be found in the AUTHORS file in the root of the source tree.
9 */
10
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020011#include "modules/audio_processing/utility/ooura_fft.h"
peah81b92912016-10-06 06:46:20 -070012
Mirko Bonadei92ea95e2017-09-15 06:47:31 +020013#include "modules/audio_processing/utility/ooura_fft_tables_common.h"
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +000014
peah81b92912016-10-06 06:46:20 -070015namespace webrtc {
16
17#if defined(MIPS_FPU_LE)
18void bitrv2_128_mips(float* a) {
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +000019 // n is 128
20 float xr, xi, yr, yi;
21
22 xr = a[8];
23 xi = a[9];
24 yr = a[16];
25 yi = a[17];
26 a[8] = yr;
27 a[9] = yi;
28 a[16] = xr;
29 a[17] = xi;
30
31 xr = a[64];
32 xi = a[65];
33 yr = a[2];
34 yi = a[3];
35 a[64] = yr;
36 a[65] = yi;
37 a[2] = xr;
38 a[3] = xi;
39
40 xr = a[72];
41 xi = a[73];
42 yr = a[18];
43 yi = a[19];
44 a[72] = yr;
45 a[73] = yi;
46 a[18] = xr;
47 a[19] = xi;
48
49 xr = a[80];
50 xi = a[81];
51 yr = a[10];
52 yi = a[11];
53 a[80] = yr;
54 a[81] = yi;
55 a[10] = xr;
56 a[11] = xi;
57
58 xr = a[88];
59 xi = a[89];
60 yr = a[26];
61 yi = a[27];
62 a[88] = yr;
63 a[89] = yi;
64 a[26] = xr;
65 a[27] = xi;
66
67 xr = a[74];
68 xi = a[75];
69 yr = a[82];
70 yi = a[83];
71 a[74] = yr;
72 a[75] = yi;
73 a[82] = xr;
74 a[83] = xi;
75
76 xr = a[32];
77 xi = a[33];
78 yr = a[4];
79 yi = a[5];
80 a[32] = yr;
81 a[33] = yi;
82 a[4] = xr;
83 a[5] = xi;
84
85 xr = a[40];
86 xi = a[41];
87 yr = a[20];
88 yi = a[21];
89 a[40] = yr;
90 a[41] = yi;
91 a[20] = xr;
92 a[21] = xi;
93
94 xr = a[48];
95 xi = a[49];
96 yr = a[12];
97 yi = a[13];
98 a[48] = yr;
99 a[49] = yi;
100 a[12] = xr;
101 a[13] = xi;
102
103 xr = a[56];
104 xi = a[57];
105 yr = a[28];
106 yi = a[29];
107 a[56] = yr;
108 a[57] = yi;
109 a[28] = xr;
110 a[29] = xi;
111
112 xr = a[34];
113 xi = a[35];
114 yr = a[68];
115 yi = a[69];
116 a[34] = yr;
117 a[35] = yi;
118 a[68] = xr;
119 a[69] = xi;
120
121 xr = a[42];
122 xi = a[43];
123 yr = a[84];
124 yi = a[85];
125 a[42] = yr;
126 a[43] = yi;
127 a[84] = xr;
128 a[85] = xi;
129
130 xr = a[50];
131 xi = a[51];
132 yr = a[76];
133 yi = a[77];
134 a[50] = yr;
135 a[51] = yi;
136 a[76] = xr;
137 a[77] = xi;
138
139 xr = a[58];
140 xi = a[59];
141 yr = a[92];
142 yi = a[93];
143 a[58] = yr;
144 a[59] = yi;
145 a[92] = xr;
146 a[93] = xi;
147
148 xr = a[44];
149 xi = a[45];
150 yr = a[52];
151 yi = a[53];
152 a[44] = yr;
153 a[45] = yi;
154 a[52] = xr;
155 a[53] = xi;
156
157 xr = a[96];
158 xi = a[97];
159 yr = a[6];
160 yi = a[7];
161 a[96] = yr;
162 a[97] = yi;
163 a[6] = xr;
164 a[7] = xi;
165
166 xr = a[104];
167 xi = a[105];
168 yr = a[22];
169 yi = a[23];
170 a[104] = yr;
171 a[105] = yi;
172 a[22] = xr;
173 a[23] = xi;
174
175 xr = a[112];
176 xi = a[113];
177 yr = a[14];
178 yi = a[15];
179 a[112] = yr;
180 a[113] = yi;
181 a[14] = xr;
182 a[15] = xi;
183
184 xr = a[120];
185 xi = a[121];
186 yr = a[30];
187 yi = a[31];
188 a[120] = yr;
189 a[121] = yi;
190 a[30] = xr;
191 a[31] = xi;
192
193 xr = a[98];
194 xi = a[99];
195 yr = a[70];
196 yi = a[71];
197 a[98] = yr;
198 a[99] = yi;
199 a[70] = xr;
200 a[71] = xi;
201
202 xr = a[106];
203 xi = a[107];
204 yr = a[86];
205 yi = a[87];
206 a[106] = yr;
207 a[107] = yi;
208 a[86] = xr;
209 a[87] = xi;
210
211 xr = a[114];
212 xi = a[115];
213 yr = a[78];
214 yi = a[79];
215 a[114] = yr;
216 a[115] = yi;
217 a[78] = xr;
218 a[79] = xi;
219
220 xr = a[122];
221 xi = a[123];
222 yr = a[94];
223 yi = a[95];
224 a[122] = yr;
225 a[123] = yi;
226 a[94] = xr;
227 a[95] = xi;
228
229 xr = a[100];
230 xi = a[101];
231 yr = a[38];
232 yi = a[39];
233 a[100] = yr;
234 a[101] = yi;
235 a[38] = xr;
236 a[39] = xi;
237
238 xr = a[108];
239 xi = a[109];
240 yr = a[54];
241 yi = a[55];
242 a[108] = yr;
243 a[109] = yi;
244 a[54] = xr;
245 a[55] = xi;
246
247 xr = a[116];
248 xi = a[117];
249 yr = a[46];
250 yi = a[47];
251 a[116] = yr;
252 a[117] = yi;
253 a[46] = xr;
254 a[47] = xi;
255
256 xr = a[124];
257 xi = a[125];
258 yr = a[62];
259 yi = a[63];
260 a[124] = yr;
261 a[125] = yi;
262 a[62] = xr;
263 a[63] = xi;
264
265 xr = a[110];
266 xi = a[111];
267 yr = a[118];
268 yi = a[119];
269 a[110] = yr;
270 a[111] = yi;
271 a[118] = xr;
272 a[119] = xi;
273}
274
peah81b92912016-10-06 06:46:20 -0700275void cft1st_128_mips(float* a) {
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000276 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
277 int a_ptr, p1_rdft, p2_rdft, count;
andrew@webrtc.orgdb75a662014-09-30 15:17:50 +0000278 const float* first = rdft_wk3ri_first;
279 const float* second = rdft_wk3ri_second;
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000280
Yves Gerey665174f2018-06-19 15:03:05 +0200281 __asm __volatile(
282 ".set push \n\t"
283 ".set noreorder \n\t"
284 // first 8
285 "lwc1 %[f0], 0(%[a]) \n\t"
286 "lwc1 %[f1], 4(%[a]) \n\t"
287 "lwc1 %[f2], 8(%[a]) \n\t"
288 "lwc1 %[f3], 12(%[a]) \n\t"
289 "lwc1 %[f4], 16(%[a]) \n\t"
290 "lwc1 %[f5], 20(%[a]) \n\t"
291 "lwc1 %[f6], 24(%[a]) \n\t"
292 "lwc1 %[f7], 28(%[a]) \n\t"
293 "add.s %[f8], %[f0], %[f2] \n\t"
294 "sub.s %[f0], %[f0], %[f2] \n\t"
295 "add.s %[f2], %[f4], %[f6] \n\t"
296 "sub.s %[f4], %[f4], %[f6] \n\t"
297 "add.s %[f6], %[f1], %[f3] \n\t"
298 "sub.s %[f1], %[f1], %[f3] \n\t"
299 "add.s %[f3], %[f5], %[f7] \n\t"
300 "sub.s %[f5], %[f5], %[f7] \n\t"
301 "add.s %[f7], %[f8], %[f2] \n\t"
302 "sub.s %[f8], %[f8], %[f2] \n\t"
303 "sub.s %[f2], %[f1], %[f4] \n\t"
304 "add.s %[f1], %[f1], %[f4] \n\t"
305 "add.s %[f4], %[f6], %[f3] \n\t"
306 "sub.s %[f6], %[f6], %[f3] \n\t"
307 "sub.s %[f3], %[f0], %[f5] \n\t"
308 "add.s %[f0], %[f0], %[f5] \n\t"
309 "swc1 %[f7], 0(%[a]) \n\t"
310 "swc1 %[f8], 16(%[a]) \n\t"
311 "swc1 %[f2], 28(%[a]) \n\t"
312 "swc1 %[f1], 12(%[a]) \n\t"
313 "swc1 %[f4], 4(%[a]) \n\t"
314 "swc1 %[f6], 20(%[a]) \n\t"
315 "swc1 %[f3], 8(%[a]) \n\t"
316 "swc1 %[f0], 24(%[a]) \n\t"
317 // second 8
318 "lwc1 %[f0], 32(%[a]) \n\t"
319 "lwc1 %[f1], 36(%[a]) \n\t"
320 "lwc1 %[f2], 40(%[a]) \n\t"
321 "lwc1 %[f3], 44(%[a]) \n\t"
322 "lwc1 %[f4], 48(%[a]) \n\t"
323 "lwc1 %[f5], 52(%[a]) \n\t"
324 "lwc1 %[f6], 56(%[a]) \n\t"
325 "lwc1 %[f7], 60(%[a]) \n\t"
326 "add.s %[f8], %[f4], %[f6] \n\t"
327 "sub.s %[f4], %[f4], %[f6] \n\t"
328 "add.s %[f6], %[f1], %[f3] \n\t"
329 "sub.s %[f1], %[f1], %[f3] \n\t"
330 "add.s %[f3], %[f0], %[f2] \n\t"
331 "sub.s %[f0], %[f0], %[f2] \n\t"
332 "add.s %[f2], %[f5], %[f7] \n\t"
333 "sub.s %[f5], %[f5], %[f7] \n\t"
334 "add.s %[f7], %[f4], %[f1] \n\t"
335 "sub.s %[f4], %[f4], %[f1] \n\t"
336 "add.s %[f1], %[f3], %[f8] \n\t"
337 "sub.s %[f3], %[f3], %[f8] \n\t"
338 "sub.s %[f8], %[f0], %[f5] \n\t"
339 "add.s %[f0], %[f0], %[f5] \n\t"
340 "add.s %[f5], %[f6], %[f2] \n\t"
341 "sub.s %[f6], %[f2], %[f6] \n\t"
342 "lwc1 %[f9], 8(%[rdft_w]) \n\t"
343 "sub.s %[f2], %[f8], %[f7] \n\t"
344 "add.s %[f8], %[f8], %[f7] \n\t"
345 "sub.s %[f7], %[f4], %[f0] \n\t"
346 "add.s %[f4], %[f4], %[f0] \n\t"
347 // prepare for loop
348 "addiu %[a_ptr], %[a], 64 \n\t"
349 "addiu %[p1_rdft], %[rdft_w], 8 \n\t"
350 "addiu %[p2_rdft], %[rdft_w], 16 \n\t"
351 "addiu %[count], $zero, 7 \n\t"
352 // finish second 8
353 "mul.s %[f2], %[f9], %[f2] \n\t"
354 "mul.s %[f8], %[f9], %[f8] \n\t"
355 "mul.s %[f7], %[f9], %[f7] \n\t"
356 "mul.s %[f4], %[f9], %[f4] \n\t"
357 "swc1 %[f1], 32(%[a]) \n\t"
358 "swc1 %[f3], 52(%[a]) \n\t"
359 "swc1 %[f5], 36(%[a]) \n\t"
360 "swc1 %[f6], 48(%[a]) \n\t"
361 "swc1 %[f2], 40(%[a]) \n\t"
362 "swc1 %[f8], 44(%[a]) \n\t"
363 "swc1 %[f7], 56(%[a]) \n\t"
364 "swc1 %[f4], 60(%[a]) \n\t"
365 // loop
366 "1: \n\t"
367 "lwc1 %[f0], 0(%[a_ptr]) \n\t"
368 "lwc1 %[f1], 4(%[a_ptr]) \n\t"
369 "lwc1 %[f2], 8(%[a_ptr]) \n\t"
370 "lwc1 %[f3], 12(%[a_ptr]) \n\t"
371 "lwc1 %[f4], 16(%[a_ptr]) \n\t"
372 "lwc1 %[f5], 20(%[a_ptr]) \n\t"
373 "lwc1 %[f6], 24(%[a_ptr]) \n\t"
374 "lwc1 %[f7], 28(%[a_ptr]) \n\t"
375 "add.s %[f8], %[f0], %[f2] \n\t"
376 "sub.s %[f0], %[f0], %[f2] \n\t"
377 "add.s %[f2], %[f4], %[f6] \n\t"
378 "sub.s %[f4], %[f4], %[f6] \n\t"
379 "add.s %[f6], %[f1], %[f3] \n\t"
380 "sub.s %[f1], %[f1], %[f3] \n\t"
381 "add.s %[f3], %[f5], %[f7] \n\t"
382 "sub.s %[f5], %[f5], %[f7] \n\t"
383 "lwc1 %[f10], 4(%[p1_rdft]) \n\t"
384 "lwc1 %[f11], 0(%[p2_rdft]) \n\t"
385 "lwc1 %[f12], 4(%[p2_rdft]) \n\t"
386 "lwc1 %[f13], 8(%[first]) \n\t"
387 "lwc1 %[f14], 12(%[first]) \n\t"
388 "add.s %[f7], %[f8], %[f2] \n\t"
389 "sub.s %[f8], %[f8], %[f2] \n\t"
390 "add.s %[f2], %[f6], %[f3] \n\t"
391 "sub.s %[f6], %[f6], %[f3] \n\t"
392 "add.s %[f3], %[f0], %[f5] \n\t"
393 "sub.s %[f0], %[f0], %[f5] \n\t"
394 "add.s %[f5], %[f1], %[f4] \n\t"
395 "sub.s %[f1], %[f1], %[f4] \n\t"
396 "swc1 %[f7], 0(%[a_ptr]) \n\t"
397 "swc1 %[f2], 4(%[a_ptr]) \n\t"
398 "mul.s %[f4], %[f9], %[f8] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000399#if defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +0200400 "mul.s %[f8], %[f10], %[f8] \n\t"
401 "mul.s %[f7], %[f11], %[f0] \n\t"
402 "mul.s %[f0], %[f12], %[f0] \n\t"
403 "mul.s %[f2], %[f13], %[f3] \n\t"
404 "mul.s %[f3], %[f14], %[f3] \n\t"
405 "nmsub.s %[f4], %[f4], %[f10], %[f6] \n\t"
406 "madd.s %[f8], %[f8], %[f9], %[f6] \n\t"
407 "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t"
408 "madd.s %[f0], %[f0], %[f11], %[f5] \n\t"
409 "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t"
410 "madd.s %[f3], %[f3], %[f13], %[f1] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000411#else
Yves Gerey665174f2018-06-19 15:03:05 +0200412 "mul.s %[f7], %[f10], %[f6] \n\t"
413 "mul.s %[f6], %[f9], %[f6] \n\t"
414 "mul.s %[f8], %[f10], %[f8] \n\t"
415 "mul.s %[f2], %[f11], %[f0] \n\t"
416 "mul.s %[f11], %[f11], %[f5] \n\t"
417 "mul.s %[f5], %[f12], %[f5] \n\t"
418 "mul.s %[f0], %[f12], %[f0] \n\t"
419 "mul.s %[f12], %[f13], %[f3] \n\t"
420 "mul.s %[f13], %[f13], %[f1] \n\t"
421 "mul.s %[f1], %[f14], %[f1] \n\t"
422 "mul.s %[f3], %[f14], %[f3] \n\t"
423 "sub.s %[f4], %[f4], %[f7] \n\t"
424 "add.s %[f8], %[f6], %[f8] \n\t"
425 "sub.s %[f7], %[f2], %[f5] \n\t"
426 "add.s %[f0], %[f11], %[f0] \n\t"
427 "sub.s %[f2], %[f12], %[f1] \n\t"
428 "add.s %[f3], %[f13], %[f3] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000429#endif
Yves Gerey665174f2018-06-19 15:03:05 +0200430 "swc1 %[f4], 16(%[a_ptr]) \n\t"
431 "swc1 %[f8], 20(%[a_ptr]) \n\t"
432 "swc1 %[f7], 8(%[a_ptr]) \n\t"
433 "swc1 %[f0], 12(%[a_ptr]) \n\t"
434 "swc1 %[f2], 24(%[a_ptr]) \n\t"
435 "swc1 %[f3], 28(%[a_ptr]) \n\t"
436 "lwc1 %[f0], 32(%[a_ptr]) \n\t"
437 "lwc1 %[f1], 36(%[a_ptr]) \n\t"
438 "lwc1 %[f2], 40(%[a_ptr]) \n\t"
439 "lwc1 %[f3], 44(%[a_ptr]) \n\t"
440 "lwc1 %[f4], 48(%[a_ptr]) \n\t"
441 "lwc1 %[f5], 52(%[a_ptr]) \n\t"
442 "lwc1 %[f6], 56(%[a_ptr]) \n\t"
443 "lwc1 %[f7], 60(%[a_ptr]) \n\t"
444 "add.s %[f8], %[f0], %[f2] \n\t"
445 "sub.s %[f0], %[f0], %[f2] \n\t"
446 "add.s %[f2], %[f4], %[f6] \n\t"
447 "sub.s %[f4], %[f4], %[f6] \n\t"
448 "add.s %[f6], %[f1], %[f3] \n\t"
449 "sub.s %[f1], %[f1], %[f3] \n\t"
450 "add.s %[f3], %[f5], %[f7] \n\t"
451 "sub.s %[f5], %[f5], %[f7] \n\t"
452 "lwc1 %[f11], 8(%[p2_rdft]) \n\t"
453 "lwc1 %[f12], 12(%[p2_rdft]) \n\t"
454 "lwc1 %[f13], 8(%[second]) \n\t"
455 "lwc1 %[f14], 12(%[second]) \n\t"
456 "add.s %[f7], %[f8], %[f2] \n\t"
457 "sub.s %[f8], %[f2], %[f8] \n\t"
458 "add.s %[f2], %[f6], %[f3] \n\t"
459 "sub.s %[f6], %[f3], %[f6] \n\t"
460 "add.s %[f3], %[f0], %[f5] \n\t"
461 "sub.s %[f0], %[f0], %[f5] \n\t"
462 "add.s %[f5], %[f1], %[f4] \n\t"
463 "sub.s %[f1], %[f1], %[f4] \n\t"
464 "swc1 %[f7], 32(%[a_ptr]) \n\t"
465 "swc1 %[f2], 36(%[a_ptr]) \n\t"
466 "mul.s %[f4], %[f10], %[f8] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000467#if defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +0200468 "mul.s %[f10], %[f10], %[f6] \n\t"
469 "mul.s %[f7], %[f11], %[f0] \n\t"
470 "mul.s %[f11], %[f11], %[f5] \n\t"
471 "mul.s %[f2], %[f13], %[f3] \n\t"
472 "mul.s %[f13], %[f13], %[f1] \n\t"
473 "madd.s %[f4], %[f4], %[f9], %[f6] \n\t"
474 "nmsub.s %[f10], %[f10], %[f9], %[f8] \n\t"
475 "nmsub.s %[f7], %[f7], %[f12], %[f5] \n\t"
476 "madd.s %[f11], %[f11], %[f12], %[f0] \n\t"
477 "nmsub.s %[f2], %[f2], %[f14], %[f1] \n\t"
478 "madd.s %[f13], %[f13], %[f14], %[f3] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000479#else
Yves Gerey665174f2018-06-19 15:03:05 +0200480 "mul.s %[f2], %[f9], %[f6] \n\t"
481 "mul.s %[f10], %[f10], %[f6] \n\t"
482 "mul.s %[f9], %[f9], %[f8] \n\t"
483 "mul.s %[f7], %[f11], %[f0] \n\t"
484 "mul.s %[f8], %[f12], %[f5] \n\t"
485 "mul.s %[f11], %[f11], %[f5] \n\t"
486 "mul.s %[f12], %[f12], %[f0] \n\t"
487 "mul.s %[f5], %[f13], %[f3] \n\t"
488 "mul.s %[f0], %[f14], %[f1] \n\t"
489 "mul.s %[f13], %[f13], %[f1] \n\t"
490 "mul.s %[f14], %[f14], %[f3] \n\t"
491 "add.s %[f4], %[f4], %[f2] \n\t"
492 "sub.s %[f10], %[f10], %[f9] \n\t"
493 "sub.s %[f7], %[f7], %[f8] \n\t"
494 "add.s %[f11], %[f11], %[f12] \n\t"
495 "sub.s %[f2], %[f5], %[f0] \n\t"
496 "add.s %[f13], %[f13], %[f14] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000497#endif
Yves Gerey665174f2018-06-19 15:03:05 +0200498 "swc1 %[f4], 48(%[a_ptr]) \n\t"
499 "swc1 %[f10], 52(%[a_ptr]) \n\t"
500 "swc1 %[f7], 40(%[a_ptr]) \n\t"
501 "swc1 %[f11], 44(%[a_ptr]) \n\t"
502 "swc1 %[f2], 56(%[a_ptr]) \n\t"
503 "swc1 %[f13], 60(%[a_ptr]) \n\t"
504 "addiu %[count], %[count], -1 \n\t"
505 "lwc1 %[f9], 8(%[p1_rdft]) \n\t"
506 "addiu %[a_ptr], %[a_ptr], 64 \n\t"
507 "addiu %[p1_rdft], %[p1_rdft], 8 \n\t"
508 "addiu %[p2_rdft], %[p2_rdft], 16 \n\t"
509 "addiu %[first], %[first], 8 \n\t"
510 "bgtz %[count], 1b \n\t"
511 " addiu %[second], %[second], 8 \n\t"
512 ".set pop \n\t"
513 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
514 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
515 [f8] "=&f"(f8), [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11),
516 [f12] "=&f"(f12), [f13] "=&f"(f13), [f14] "=&f"(f14),
517 [a_ptr] "=&r"(a_ptr), [p1_rdft] "=&r"(p1_rdft), [first] "+r"(first),
518 [p2_rdft] "=&r"(p2_rdft), [count] "=&r"(count), [second] "+r"(second)
519 : [a] "r"(a), [rdft_w] "r"(rdft_w)
520 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000521}
522
peah81b92912016-10-06 06:46:20 -0700523void cftmdl_128_mips(float* a) {
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000524 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14;
525 int tmp_a, count;
Yves Gerey665174f2018-06-19 15:03:05 +0200526 __asm __volatile(
527 ".set push \n\t"
528 ".set noreorder \n\t"
529 "addiu %[tmp_a], %[a], 0 \n\t"
530 "addiu %[count], $zero, 4 \n\t"
531 "1: \n\t"
532 "addiu %[count], %[count], -1 \n\t"
533 "lwc1 %[f0], 0(%[tmp_a]) \n\t"
534 "lwc1 %[f2], 32(%[tmp_a]) \n\t"
535 "lwc1 %[f4], 64(%[tmp_a]) \n\t"
536 "lwc1 %[f6], 96(%[tmp_a]) \n\t"
537 "lwc1 %[f1], 4(%[tmp_a]) \n\t"
538 "lwc1 %[f3], 36(%[tmp_a]) \n\t"
539 "lwc1 %[f5], 68(%[tmp_a]) \n\t"
540 "lwc1 %[f7], 100(%[tmp_a]) \n\t"
541 "add.s %[f8], %[f0], %[f2] \n\t"
542 "sub.s %[f0], %[f0], %[f2] \n\t"
543 "add.s %[f2], %[f4], %[f6] \n\t"
544 "sub.s %[f4], %[f4], %[f6] \n\t"
545 "add.s %[f6], %[f1], %[f3] \n\t"
546 "sub.s %[f1], %[f1], %[f3] \n\t"
547 "add.s %[f3], %[f5], %[f7] \n\t"
548 "sub.s %[f5], %[f5], %[f7] \n\t"
549 "add.s %[f7], %[f8], %[f2] \n\t"
550 "sub.s %[f8], %[f8], %[f2] \n\t"
551 "add.s %[f2], %[f1], %[f4] \n\t"
552 "sub.s %[f1], %[f1], %[f4] \n\t"
553 "add.s %[f4], %[f6], %[f3] \n\t"
554 "sub.s %[f6], %[f6], %[f3] \n\t"
555 "sub.s %[f3], %[f0], %[f5] \n\t"
556 "add.s %[f0], %[f0], %[f5] \n\t"
557 "swc1 %[f7], 0(%[tmp_a]) \n\t"
558 "swc1 %[f8], 64(%[tmp_a]) \n\t"
559 "swc1 %[f2], 36(%[tmp_a]) \n\t"
560 "swc1 %[f1], 100(%[tmp_a]) \n\t"
561 "swc1 %[f4], 4(%[tmp_a]) \n\t"
562 "swc1 %[f6], 68(%[tmp_a]) \n\t"
563 "swc1 %[f3], 32(%[tmp_a]) \n\t"
564 "swc1 %[f0], 96(%[tmp_a]) \n\t"
565 "bgtz %[count], 1b \n\t"
566 " addiu %[tmp_a], %[tmp_a], 8 \n\t"
567 ".set pop \n\t"
568 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
569 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
570 [f8] "=&f"(f8), [tmp_a] "=&r"(tmp_a), [count] "=&r"(count)
571 : [a] "r"(a)
572 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000573 f9 = rdft_w[2];
Yves Gerey665174f2018-06-19 15:03:05 +0200574 __asm __volatile(
575 ".set push \n\t"
576 ".set noreorder \n\t"
577 "addiu %[tmp_a], %[a], 128 \n\t"
578 "addiu %[count], $zero, 4 \n\t"
579 "1: \n\t"
580 "addiu %[count], %[count], -1 \n\t"
581 "lwc1 %[f0], 0(%[tmp_a]) \n\t"
582 "lwc1 %[f2], 32(%[tmp_a]) \n\t"
583 "lwc1 %[f5], 68(%[tmp_a]) \n\t"
584 "lwc1 %[f7], 100(%[tmp_a]) \n\t"
585 "lwc1 %[f1], 4(%[tmp_a]) \n\t"
586 "lwc1 %[f3], 36(%[tmp_a]) \n\t"
587 "lwc1 %[f4], 64(%[tmp_a]) \n\t"
588 "lwc1 %[f6], 96(%[tmp_a]) \n\t"
589 "sub.s %[f8], %[f0], %[f2] \n\t"
590 "add.s %[f0], %[f0], %[f2] \n\t"
591 "sub.s %[f2], %[f5], %[f7] \n\t"
592 "add.s %[f5], %[f5], %[f7] \n\t"
593 "sub.s %[f7], %[f1], %[f3] \n\t"
594 "add.s %[f1], %[f1], %[f3] \n\t"
595 "sub.s %[f3], %[f4], %[f6] \n\t"
596 "add.s %[f4], %[f4], %[f6] \n\t"
597 "sub.s %[f6], %[f8], %[f2] \n\t"
598 "add.s %[f8], %[f8], %[f2] \n\t"
599 "add.s %[f2], %[f5], %[f1] \n\t"
600 "sub.s %[f5], %[f5], %[f1] \n\t"
601 "add.s %[f1], %[f3], %[f7] \n\t"
602 "sub.s %[f3], %[f3], %[f7] \n\t"
603 "add.s %[f7], %[f0], %[f4] \n\t"
604 "sub.s %[f0], %[f0], %[f4] \n\t"
605 "sub.s %[f4], %[f6], %[f1] \n\t"
606 "add.s %[f6], %[f6], %[f1] \n\t"
607 "sub.s %[f1], %[f3], %[f8] \n\t"
608 "add.s %[f3], %[f3], %[f8] \n\t"
609 "mul.s %[f4], %[f4], %[f9] \n\t"
610 "mul.s %[f6], %[f6], %[f9] \n\t"
611 "mul.s %[f1], %[f1], %[f9] \n\t"
612 "mul.s %[f3], %[f3], %[f9] \n\t"
613 "swc1 %[f7], 0(%[tmp_a]) \n\t"
614 "swc1 %[f2], 4(%[tmp_a]) \n\t"
615 "swc1 %[f5], 64(%[tmp_a]) \n\t"
616 "swc1 %[f0], 68(%[tmp_a]) \n\t"
617 "swc1 %[f4], 32(%[tmp_a]) \n\t"
618 "swc1 %[f6], 36(%[tmp_a]) \n\t"
619 "swc1 %[f1], 96(%[tmp_a]) \n\t"
620 "swc1 %[f3], 100(%[tmp_a]) \n\t"
621 "bgtz %[count], 1b \n\t"
622 " addiu %[tmp_a], %[tmp_a], 8 \n\t"
623 ".set pop \n\t"
624 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
625 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
626 [f8] "=&f"(f8), [tmp_a] "=&r"(tmp_a), [count] "=&r"(count)
627 : [a] "r"(a), [f9] "f"(f9)
628 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000629 f10 = rdft_w[3];
630 f11 = rdft_w[4];
631 f12 = rdft_w[5];
632 f13 = rdft_wk3ri_first[2];
633 f14 = rdft_wk3ri_first[3];
634
Yves Gerey665174f2018-06-19 15:03:05 +0200635 __asm __volatile(
636 ".set push \n\t"
637 ".set noreorder \n\t"
638 "addiu %[tmp_a], %[a], 256 \n\t"
639 "addiu %[count], $zero, 4 \n\t"
640 "1: \n\t"
641 "addiu %[count], %[count], -1 \n\t"
642 "lwc1 %[f0], 0(%[tmp_a]) \n\t"
643 "lwc1 %[f2], 32(%[tmp_a]) \n\t"
644 "lwc1 %[f4], 64(%[tmp_a]) \n\t"
645 "lwc1 %[f6], 96(%[tmp_a]) \n\t"
646 "lwc1 %[f1], 4(%[tmp_a]) \n\t"
647 "lwc1 %[f3], 36(%[tmp_a]) \n\t"
648 "lwc1 %[f5], 68(%[tmp_a]) \n\t"
649 "lwc1 %[f7], 100(%[tmp_a]) \n\t"
650 "add.s %[f8], %[f0], %[f2] \n\t"
651 "sub.s %[f0], %[f0], %[f2] \n\t"
652 "add.s %[f2], %[f4], %[f6] \n\t"
653 "sub.s %[f4], %[f4], %[f6] \n\t"
654 "add.s %[f6], %[f1], %[f3] \n\t"
655 "sub.s %[f1], %[f1], %[f3] \n\t"
656 "add.s %[f3], %[f5], %[f7] \n\t"
657 "sub.s %[f5], %[f5], %[f7] \n\t"
658 "sub.s %[f7], %[f8], %[f2] \n\t"
659 "add.s %[f8], %[f8], %[f2] \n\t"
660 "add.s %[f2], %[f1], %[f4] \n\t"
661 "sub.s %[f1], %[f1], %[f4] \n\t"
662 "sub.s %[f4], %[f6], %[f3] \n\t"
663 "add.s %[f6], %[f6], %[f3] \n\t"
664 "sub.s %[f3], %[f0], %[f5] \n\t"
665 "add.s %[f0], %[f0], %[f5] \n\t"
666 "swc1 %[f8], 0(%[tmp_a]) \n\t"
667 "swc1 %[f6], 4(%[tmp_a]) \n\t"
668 "mul.s %[f5], %[f9], %[f7] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000669#if defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +0200670 "mul.s %[f7], %[f10], %[f7] \n\t"
671 "mul.s %[f8], %[f11], %[f3] \n\t"
672 "mul.s %[f3], %[f12], %[f3] \n\t"
673 "mul.s %[f6], %[f13], %[f0] \n\t"
674 "mul.s %[f0], %[f14], %[f0] \n\t"
675 "nmsub.s %[f5], %[f5], %[f10], %[f4] \n\t"
676 "madd.s %[f7], %[f7], %[f9], %[f4] \n\t"
677 "nmsub.s %[f8], %[f8], %[f12], %[f2] \n\t"
678 "madd.s %[f3], %[f3], %[f11], %[f2] \n\t"
679 "nmsub.s %[f6], %[f6], %[f14], %[f1] \n\t"
680 "madd.s %[f0], %[f0], %[f13], %[f1] \n\t"
681 "swc1 %[f5], 64(%[tmp_a]) \n\t"
682 "swc1 %[f7], 68(%[tmp_a]) \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000683#else
Yves Gerey665174f2018-06-19 15:03:05 +0200684 "mul.s %[f8], %[f10], %[f4] \n\t"
685 "mul.s %[f4], %[f9], %[f4] \n\t"
686 "mul.s %[f7], %[f10], %[f7] \n\t"
687 "mul.s %[f6], %[f11], %[f3] \n\t"
688 "mul.s %[f3], %[f12], %[f3] \n\t"
689 "sub.s %[f5], %[f5], %[f8] \n\t"
690 "mul.s %[f8], %[f12], %[f2] \n\t"
691 "mul.s %[f2], %[f11], %[f2] \n\t"
692 "add.s %[f7], %[f4], %[f7] \n\t"
693 "mul.s %[f4], %[f13], %[f0] \n\t"
694 "mul.s %[f0], %[f14], %[f0] \n\t"
695 "sub.s %[f8], %[f6], %[f8] \n\t"
696 "mul.s %[f6], %[f14], %[f1] \n\t"
697 "mul.s %[f1], %[f13], %[f1] \n\t"
698 "add.s %[f3], %[f2], %[f3] \n\t"
699 "swc1 %[f5], 64(%[tmp_a]) \n\t"
700 "swc1 %[f7], 68(%[tmp_a]) \n\t"
701 "sub.s %[f6], %[f4], %[f6] \n\t"
702 "add.s %[f0], %[f1], %[f0] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000703#endif
Yves Gerey665174f2018-06-19 15:03:05 +0200704 "swc1 %[f8], 32(%[tmp_a]) \n\t"
705 "swc1 %[f3], 36(%[tmp_a]) \n\t"
706 "swc1 %[f6], 96(%[tmp_a]) \n\t"
707 "swc1 %[f0], 100(%[tmp_a]) \n\t"
708 "bgtz %[count], 1b \n\t"
709 " addiu %[tmp_a], %[tmp_a], 8 \n\t"
710 ".set pop \n\t"
711 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
712 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
713 [f8] "=&f"(f8), [tmp_a] "=&r"(tmp_a), [count] "=&r"(count)
714 : [a] "r"(a), [f9] "f"(f9), [f10] "f"(f10), [f11] "f"(f11),
715 [f12] "f"(f12), [f13] "f"(f13), [f14] "f"(f14)
716 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000717 f11 = rdft_w[6];
718 f12 = rdft_w[7];
719 f13 = rdft_wk3ri_second[2];
720 f14 = rdft_wk3ri_second[3];
Yves Gerey665174f2018-06-19 15:03:05 +0200721 __asm __volatile(
722 ".set push "
723 "\n\t"
724 ".set noreorder "
725 "\n\t"
726 "addiu %[tmp_a], %[a], 384 "
727 "\n\t"
728 "addiu %[count], $zero, 4 "
729 "\n\t"
730 "1: "
731 "\n\t"
732 "addiu %[count], %[count], -1 "
733 "\n\t"
734 "lwc1 %[f0], 0(%[tmp_a]) "
735 "\n\t"
736 "lwc1 %[f1], 4(%[tmp_a]) "
737 "\n\t"
738 "lwc1 %[f2], 32(%[tmp_a]) "
739 "\n\t"
740 "lwc1 %[f3], 36(%[tmp_a]) "
741 "\n\t"
742 "lwc1 %[f4], 64(%[tmp_a]) "
743 "\n\t"
744 "lwc1 %[f5], 68(%[tmp_a]) "
745 "\n\t"
746 "lwc1 %[f6], 96(%[tmp_a]) "
747 "\n\t"
748 "lwc1 %[f7], 100(%[tmp_a]) "
749 "\n\t"
750 "add.s %[f8], %[f0], %[f2] "
751 "\n\t"
752 "sub.s %[f0], %[f0], %[f2] "
753 "\n\t"
754 "add.s %[f2], %[f4], %[f6] "
755 "\n\t"
756 "sub.s %[f4], %[f4], %[f6] "
757 "\n\t"
758 "add.s %[f6], %[f1], %[f3] "
759 "\n\t"
760 "sub.s %[f1], %[f1], %[f3] "
761 "\n\t"
762 "add.s %[f3], %[f5], %[f7] "
763 "\n\t"
764 "sub.s %[f5], %[f5], %[f7] "
765 "\n\t"
766 "sub.s %[f7], %[f2], %[f8] "
767 "\n\t"
768 "add.s %[f2], %[f2], %[f8] "
769 "\n\t"
770 "add.s %[f8], %[f1], %[f4] "
771 "\n\t"
772 "sub.s %[f1], %[f1], %[f4] "
773 "\n\t"
774 "sub.s %[f4], %[f3], %[f6] "
775 "\n\t"
776 "add.s %[f3], %[f3], %[f6] "
777 "\n\t"
778 "sub.s %[f6], %[f0], %[f5] "
779 "\n\t"
780 "add.s %[f0], %[f0], %[f5] "
781 "\n\t"
782 "swc1 %[f2], 0(%[tmp_a]) "
783 "\n\t"
784 "swc1 %[f3], 4(%[tmp_a]) "
785 "\n\t"
786 "mul.s %[f5], %[f10], %[f7] "
787 "\n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000788#if defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +0200789 "mul.s %[f7], %[f9], %[f7] "
790 "\n\t"
791 "mul.s %[f2], %[f12], %[f8] "
792 "\n\t"
793 "mul.s %[f8], %[f11], %[f8] "
794 "\n\t"
795 "mul.s %[f3], %[f14], %[f1] "
796 "\n\t"
797 "mul.s %[f1], %[f13], %[f1] "
798 "\n\t"
799 "madd.s %[f5], %[f5], %[f9], %[f4] "
800 "\n\t"
801 "msub.s %[f7], %[f7], %[f10], %[f4] "
802 "\n\t"
803 "msub.s %[f2], %[f2], %[f11], %[f6] "
804 "\n\t"
805 "madd.s %[f8], %[f8], %[f12], %[f6] "
806 "\n\t"
807 "msub.s %[f3], %[f3], %[f13], %[f0] "
808 "\n\t"
809 "madd.s %[f1], %[f1], %[f14], %[f0] "
810 "\n\t"
811 "swc1 %[f5], 64(%[tmp_a]) "
812 "\n\t"
813 "swc1 %[f7], 68(%[tmp_a]) "
814 "\n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000815#else
Yves Gerey665174f2018-06-19 15:03:05 +0200816 "mul.s %[f2], %[f9], %[f4] "
817 "\n\t"
818 "mul.s %[f4], %[f10], %[f4] "
819 "\n\t"
820 "mul.s %[f7], %[f9], %[f7] "
821 "\n\t"
822 "mul.s %[f3], %[f11], %[f6] "
823 "\n\t"
824 "mul.s %[f6], %[f12], %[f6] "
825 "\n\t"
826 "add.s %[f5], %[f5], %[f2] "
827 "\n\t"
828 "sub.s %[f7], %[f4], %[f7] "
829 "\n\t"
830 "mul.s %[f2], %[f12], %[f8] "
831 "\n\t"
832 "mul.s %[f8], %[f11], %[f8] "
833 "\n\t"
834 "mul.s %[f4], %[f14], %[f1] "
835 "\n\t"
836 "mul.s %[f1], %[f13], %[f1] "
837 "\n\t"
838 "sub.s %[f2], %[f3], %[f2] "
839 "\n\t"
840 "mul.s %[f3], %[f13], %[f0] "
841 "\n\t"
842 "mul.s %[f0], %[f14], %[f0] "
843 "\n\t"
844 "add.s %[f8], %[f8], %[f6] "
845 "\n\t"
846 "swc1 %[f5], 64(%[tmp_a]) "
847 "\n\t"
848 "swc1 %[f7], 68(%[tmp_a]) "
849 "\n\t"
850 "sub.s %[f3], %[f3], %[f4] "
851 "\n\t"
852 "add.s %[f1], %[f1], %[f0] "
853 "\n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000854#endif
Yves Gerey665174f2018-06-19 15:03:05 +0200855 "swc1 %[f2], 32(%[tmp_a]) "
856 "\n\t"
857 "swc1 %[f8], 36(%[tmp_a]) "
858 "\n\t"
859 "swc1 %[f3], 96(%[tmp_a]) "
860 "\n\t"
861 "swc1 %[f1], 100(%[tmp_a]) "
862 "\n\t"
863 "bgtz %[count], 1b "
864 "\n\t"
865 " addiu %[tmp_a], %[tmp_a], 8 "
866 "\n\t"
867 ".set pop "
868 "\n\t"
869 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
870 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
871 [f8] "=&f"(f8), [tmp_a] "=&r"(tmp_a), [count] "=&r"(count)
872 : [a] "r"(a), [f9] "f"(f9), [f10] "f"(f10), [f11] "f"(f11),
873 [f12] "f"(f12), [f13] "f"(f13), [f14] "f"(f14)
874 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000875}
876
peah81b92912016-10-06 06:46:20 -0700877void cftfsub_128_mips(float* a) {
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000878 float f0, f1, f2, f3, f4, f5, f6, f7, f8;
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +0000879 int tmp_a, count;
880
peah81b92912016-10-06 06:46:20 -0700881 cft1st_128_mips(a);
882 cftmdl_128_mips(a);
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +0000883
Yves Gerey665174f2018-06-19 15:03:05 +0200884 __asm __volatile(
885 ".set push \n\t"
886 ".set noreorder \n\t"
887 "addiu %[tmp_a], %[a], 0 \n\t"
888 "addiu %[count], $zero, 16 \n\t"
889 "1: \n\t"
890 "addiu %[count], %[count], -1 \n\t"
891 "lwc1 %[f0], 0(%[tmp_a]) \n\t"
892 "lwc1 %[f2], 128(%[tmp_a]) \n\t"
893 "lwc1 %[f4], 256(%[tmp_a]) \n\t"
894 "lwc1 %[f6], 384(%[tmp_a]) \n\t"
895 "lwc1 %[f1], 4(%[tmp_a]) \n\t"
896 "lwc1 %[f3], 132(%[tmp_a]) \n\t"
897 "lwc1 %[f5], 260(%[tmp_a]) \n\t"
898 "lwc1 %[f7], 388(%[tmp_a]) \n\t"
899 "add.s %[f8], %[f0], %[f2] \n\t"
900 "sub.s %[f0], %[f0], %[f2] \n\t"
901 "add.s %[f2], %[f4], %[f6] \n\t"
902 "sub.s %[f4], %[f4], %[f6] \n\t"
903 "add.s %[f6], %[f1], %[f3] \n\t"
904 "sub.s %[f1], %[f1], %[f3] \n\t"
905 "add.s %[f3], %[f5], %[f7] \n\t"
906 "sub.s %[f5], %[f5], %[f7] \n\t"
907 "add.s %[f7], %[f8], %[f2] \n\t"
908 "sub.s %[f8], %[f8], %[f2] \n\t"
909 "add.s %[f2], %[f1], %[f4] \n\t"
910 "sub.s %[f1], %[f1], %[f4] \n\t"
911 "add.s %[f4], %[f6], %[f3] \n\t"
912 "sub.s %[f6], %[f6], %[f3] \n\t"
913 "sub.s %[f3], %[f0], %[f5] \n\t"
914 "add.s %[f0], %[f0], %[f5] \n\t"
915 "swc1 %[f7], 0(%[tmp_a]) \n\t"
916 "swc1 %[f8], 256(%[tmp_a]) \n\t"
917 "swc1 %[f2], 132(%[tmp_a]) \n\t"
918 "swc1 %[f1], 388(%[tmp_a]) \n\t"
919 "swc1 %[f4], 4(%[tmp_a]) \n\t"
920 "swc1 %[f6], 260(%[tmp_a]) \n\t"
921 "swc1 %[f3], 128(%[tmp_a]) \n\t"
922 "swc1 %[f0], 384(%[tmp_a]) \n\t"
923 "bgtz %[count], 1b \n\t"
924 " addiu %[tmp_a], %[tmp_a], 8 \n\t"
925 ".set pop \n\t"
926 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
927 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
928 [f8] "=&f"(f8), [tmp_a] "=&r"(tmp_a), [count] "=&r"(count)
929 : [a] "r"(a)
930 : "memory");
andrew@webrtc.orgc0907ef2014-02-21 00:13:31 +0000931}
932
peah81b92912016-10-06 06:46:20 -0700933void cftbsub_128_mips(float* a) {
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000934 float f0, f1, f2, f3, f4, f5, f6, f7, f8;
935 int tmp_a, count;
936
peah81b92912016-10-06 06:46:20 -0700937 cft1st_128_mips(a);
938 cftmdl_128_mips(a);
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000939
Yves Gerey665174f2018-06-19 15:03:05 +0200940 __asm __volatile(
941 ".set push \n\t"
942 ".set noreorder \n\t"
943 "addiu %[tmp_a], %[a], 0 \n\t"
944 "addiu %[count], $zero, 16 \n\t"
945 "1: \n\t"
946 "addiu %[count], %[count], -1 \n\t"
947 "lwc1 %[f0], 0(%[tmp_a]) \n\t"
948 "lwc1 %[f2], 128(%[tmp_a]) \n\t"
949 "lwc1 %[f4], 256(%[tmp_a]) \n\t"
950 "lwc1 %[f6], 384(%[tmp_a]) \n\t"
951 "lwc1 %[f1], 4(%[tmp_a]) \n\t"
952 "lwc1 %[f3], 132(%[tmp_a]) \n\t"
953 "lwc1 %[f5], 260(%[tmp_a]) \n\t"
954 "lwc1 %[f7], 388(%[tmp_a]) \n\t"
955 "add.s %[f8], %[f0], %[f2] \n\t"
956 "sub.s %[f0], %[f0], %[f2] \n\t"
957 "add.s %[f2], %[f4], %[f6] \n\t"
958 "sub.s %[f4], %[f4], %[f6] \n\t"
959 "add.s %[f6], %[f1], %[f3] \n\t"
960 "sub.s %[f1], %[f3], %[f1] \n\t"
961 "add.s %[f3], %[f5], %[f7] \n\t"
962 "sub.s %[f5], %[f5], %[f7] \n\t"
963 "add.s %[f7], %[f8], %[f2] \n\t"
964 "sub.s %[f8], %[f8], %[f2] \n\t"
965 "sub.s %[f2], %[f1], %[f4] \n\t"
966 "add.s %[f1], %[f1], %[f4] \n\t"
967 "add.s %[f4], %[f3], %[f6] \n\t"
968 "sub.s %[f6], %[f3], %[f6] \n\t"
969 "sub.s %[f3], %[f0], %[f5] \n\t"
970 "add.s %[f0], %[f0], %[f5] \n\t"
971 "neg.s %[f4], %[f4] \n\t"
972 "swc1 %[f7], 0(%[tmp_a]) \n\t"
973 "swc1 %[f8], 256(%[tmp_a]) \n\t"
974 "swc1 %[f2], 132(%[tmp_a]) \n\t"
975 "swc1 %[f1], 388(%[tmp_a]) \n\t"
976 "swc1 %[f6], 260(%[tmp_a]) \n\t"
977 "swc1 %[f3], 128(%[tmp_a]) \n\t"
978 "swc1 %[f0], 384(%[tmp_a]) \n\t"
979 "swc1 %[f4], 4(%[tmp_a]) \n\t"
980 "bgtz %[count], 1b \n\t"
981 " addiu %[tmp_a], %[tmp_a], 8 \n\t"
982 ".set pop \n\t"
983 : [f0] "=&f"(f0), [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3),
984 [f4] "=&f"(f4), [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7),
985 [f8] "=&f"(f8), [tmp_a] "=&r"(tmp_a), [count] "=&r"(count)
986 : [a] "r"(a)
987 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000988}
989
peah81b92912016-10-06 06:46:20 -0700990void rftfsub_128_mips(float* a) {
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000991 const float* c = rdft_w + 32;
992 const float f0 = 0.5f;
993 float* a1 = &a[2];
994 float* a2 = &a[126];
995 const float* c1 = &c[1];
996 const float* c2 = &c[31];
Yves Gerey665174f2018-06-19 15:03:05 +0200997 float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
andrew@webrtc.orga22485e2014-08-29 17:51:28 +0000998 int count;
999
Yves Gerey665174f2018-06-19 15:03:05 +02001000 __asm __volatile(
1001 ".set push \n\t"
1002 ".set noreorder \n\t"
1003 "lwc1 %[f6], 0(%[c2]) \n\t"
1004 "lwc1 %[f1], 0(%[a1]) \n\t"
1005 "lwc1 %[f2], 0(%[a2]) \n\t"
1006 "lwc1 %[f3], 4(%[a1]) \n\t"
1007 "lwc1 %[f4], 4(%[a2]) \n\t"
1008 "lwc1 %[f5], 0(%[c1]) \n\t"
1009 "sub.s %[f6], %[f0], %[f6] \n\t"
1010 "sub.s %[f7], %[f1], %[f2] \n\t"
1011 "add.s %[f8], %[f3], %[f4] \n\t"
1012 "addiu %[count], $zero, 15 \n\t"
1013 "mul.s %[f9], %[f6], %[f7] \n\t"
1014 "mul.s %[f6], %[f6], %[f8] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001015#if !defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +02001016 "mul.s %[f8], %[f5], %[f8] \n\t"
1017 "mul.s %[f5], %[f5], %[f7] \n\t"
1018 "sub.s %[f9], %[f9], %[f8] \n\t"
1019 "add.s %[f6], %[f6], %[f5] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001020#else
Yves Gerey665174f2018-06-19 15:03:05 +02001021 "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t"
1022 "madd.s %[f6], %[f6], %[f5], %[f7] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001023#endif
Yves Gerey665174f2018-06-19 15:03:05 +02001024 "sub.s %[f1], %[f1], %[f9] \n\t"
1025 "add.s %[f2], %[f2], %[f9] \n\t"
1026 "sub.s %[f3], %[f3], %[f6] \n\t"
1027 "sub.s %[f4], %[f4], %[f6] \n\t"
1028 "swc1 %[f1], 0(%[a1]) \n\t"
1029 "swc1 %[f2], 0(%[a2]) \n\t"
1030 "swc1 %[f3], 4(%[a1]) \n\t"
1031 "swc1 %[f4], 4(%[a2]) \n\t"
1032 "addiu %[a1], %[a1], 8 \n\t"
1033 "addiu %[a2], %[a2], -8 \n\t"
1034 "addiu %[c1], %[c1], 4 \n\t"
1035 "addiu %[c2], %[c2], -4 \n\t"
1036 "1: \n\t"
1037 "lwc1 %[f6], 0(%[c2]) \n\t"
1038 "lwc1 %[f1], 0(%[a1]) \n\t"
1039 "lwc1 %[f2], 0(%[a2]) \n\t"
1040 "lwc1 %[f3], 4(%[a1]) \n\t"
1041 "lwc1 %[f4], 4(%[a2]) \n\t"
1042 "lwc1 %[f5], 0(%[c1]) \n\t"
1043 "sub.s %[f6], %[f0], %[f6] \n\t"
1044 "sub.s %[f7], %[f1], %[f2] \n\t"
1045 "add.s %[f8], %[f3], %[f4] \n\t"
1046 "lwc1 %[f10], -4(%[c2]) \n\t"
1047 "lwc1 %[f11], 8(%[a1]) \n\t"
1048 "lwc1 %[f12], -8(%[a2]) \n\t"
1049 "mul.s %[f9], %[f6], %[f7] \n\t"
1050 "mul.s %[f6], %[f6], %[f8] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001051#if !defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +02001052 "mul.s %[f8], %[f5], %[f8] \n\t"
1053 "mul.s %[f5], %[f5], %[f7] \n\t"
1054 "lwc1 %[f13], 12(%[a1]) \n\t"
1055 "lwc1 %[f14], -4(%[a2]) \n\t"
1056 "lwc1 %[f15], 4(%[c1]) \n\t"
1057 "sub.s %[f9], %[f9], %[f8] \n\t"
1058 "add.s %[f6], %[f6], %[f5] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001059#else
Yves Gerey665174f2018-06-19 15:03:05 +02001060 "lwc1 %[f13], 12(%[a1]) \n\t"
1061 "lwc1 %[f14], -4(%[a2]) \n\t"
1062 "lwc1 %[f15], 4(%[c1]) \n\t"
1063 "nmsub.s %[f9], %[f9], %[f5], %[f8] \n\t"
1064 "madd.s %[f6], %[f6], %[f5], %[f7] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001065#endif
Yves Gerey665174f2018-06-19 15:03:05 +02001066 "sub.s %[f10], %[f0], %[f10] \n\t"
1067 "sub.s %[f5], %[f11], %[f12] \n\t"
1068 "add.s %[f7], %[f13], %[f14] \n\t"
1069 "sub.s %[f1], %[f1], %[f9] \n\t"
1070 "add.s %[f2], %[f2], %[f9] \n\t"
1071 "sub.s %[f3], %[f3], %[f6] \n\t"
1072 "mul.s %[f8], %[f10], %[f5] \n\t"
1073 "mul.s %[f10], %[f10], %[f7] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001074#if !defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +02001075 "mul.s %[f9], %[f15], %[f7] \n\t"
1076 "mul.s %[f15], %[f15], %[f5] \n\t"
1077 "sub.s %[f4], %[f4], %[f6] \n\t"
1078 "swc1 %[f1], 0(%[a1]) \n\t"
1079 "swc1 %[f2], 0(%[a2]) \n\t"
1080 "sub.s %[f8], %[f8], %[f9] \n\t"
1081 "add.s %[f10], %[f10], %[f15] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001082#else
Yves Gerey665174f2018-06-19 15:03:05 +02001083 "swc1 %[f1], 0(%[a1]) \n\t"
1084 "swc1 %[f2], 0(%[a2]) \n\t"
1085 "sub.s %[f4], %[f4], %[f6] \n\t"
1086 "nmsub.s %[f8], %[f8], %[f15], %[f7] \n\t"
1087 "madd.s %[f10], %[f10], %[f15], %[f5] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001088#endif
Yves Gerey665174f2018-06-19 15:03:05 +02001089 "swc1 %[f3], 4(%[a1]) \n\t"
1090 "swc1 %[f4], 4(%[a2]) \n\t"
1091 "sub.s %[f11], %[f11], %[f8] \n\t"
1092 "add.s %[f12], %[f12], %[f8] \n\t"
1093 "sub.s %[f13], %[f13], %[f10] \n\t"
1094 "sub.s %[f14], %[f14], %[f10] \n\t"
1095 "addiu %[c2], %[c2], -8 \n\t"
1096 "addiu %[c1], %[c1], 8 \n\t"
1097 "swc1 %[f11], 8(%[a1]) \n\t"
1098 "swc1 %[f12], -8(%[a2]) \n\t"
1099 "swc1 %[f13], 12(%[a1]) \n\t"
1100 "swc1 %[f14], -4(%[a2]) \n\t"
1101 "addiu %[a1], %[a1], 16 \n\t"
1102 "addiu %[count], %[count], -1 \n\t"
1103 "bgtz %[count], 1b \n\t"
1104 " addiu %[a2], %[a2], -16 \n\t"
1105 ".set pop \n\t"
1106 : [a1] "+r"(a1), [a2] "+r"(a2), [c1] "+r"(c1), [c2] "+r"(c2),
1107 [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3), [f4] "=&f"(f4),
1108 [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7), [f8] "=&f"(f8),
1109 [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11), [f12] "=&f"(f12),
1110 [f13] "=&f"(f13), [f14] "=&f"(f14), [f15] "=&f"(f15),
1111 [count] "=&r"(count)
1112 : [f0] "f"(f0)
1113 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001114}
1115
peah81b92912016-10-06 06:46:20 -07001116void rftbsub_128_mips(float* a) {
Yves Gerey665174f2018-06-19 15:03:05 +02001117 const float* c = rdft_w + 32;
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001118 const float f0 = 0.5f;
1119 float* a1 = &a[2];
1120 float* a2 = &a[126];
1121 const float* c1 = &c[1];
1122 const float* c2 = &c[31];
Yves Gerey665174f2018-06-19 15:03:05 +02001123 float f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13, f14, f15;
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001124 int count;
1125
1126 a[1] = -a[1];
1127 a[65] = -a[65];
1128
Yves Gerey665174f2018-06-19 15:03:05 +02001129 __asm __volatile(
1130 ".set push \n\t"
1131 ".set noreorder \n\t"
1132 "lwc1 %[f6], 0(%[c2]) \n\t"
1133 "lwc1 %[f1], 0(%[a1]) \n\t"
1134 "lwc1 %[f2], 0(%[a2]) \n\t"
1135 "lwc1 %[f3], 4(%[a1]) \n\t"
1136 "lwc1 %[f4], 4(%[a2]) \n\t"
1137 "lwc1 %[f5], 0(%[c1]) \n\t"
1138 "sub.s %[f6], %[f0], %[f6] \n\t"
1139 "sub.s %[f7], %[f1], %[f2] \n\t"
1140 "add.s %[f8], %[f3], %[f4] \n\t"
1141 "addiu %[count], $zero, 15 \n\t"
1142 "mul.s %[f9], %[f6], %[f7] \n\t"
1143 "mul.s %[f6], %[f6], %[f8] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001144#if !defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +02001145 "mul.s %[f8], %[f5], %[f8] \n\t"
1146 "mul.s %[f5], %[f5], %[f7] \n\t"
1147 "add.s %[f9], %[f9], %[f8] \n\t"
1148 "sub.s %[f6], %[f6], %[f5] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001149#else
Yves Gerey665174f2018-06-19 15:03:05 +02001150 "madd.s %[f9], %[f9], %[f5], %[f8] \n\t"
1151 "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001152#endif
Yves Gerey665174f2018-06-19 15:03:05 +02001153 "sub.s %[f1], %[f1], %[f9] \n\t"
1154 "add.s %[f2], %[f2], %[f9] \n\t"
1155 "sub.s %[f3], %[f6], %[f3] \n\t"
1156 "sub.s %[f4], %[f6], %[f4] \n\t"
1157 "swc1 %[f1], 0(%[a1]) \n\t"
1158 "swc1 %[f2], 0(%[a2]) \n\t"
1159 "swc1 %[f3], 4(%[a1]) \n\t"
1160 "swc1 %[f4], 4(%[a2]) \n\t"
1161 "addiu %[a1], %[a1], 8 \n\t"
1162 "addiu %[a2], %[a2], -8 \n\t"
1163 "addiu %[c1], %[c1], 4 \n\t"
1164 "addiu %[c2], %[c2], -4 \n\t"
1165 "1: \n\t"
1166 "lwc1 %[f6], 0(%[c2]) \n\t"
1167 "lwc1 %[f1], 0(%[a1]) \n\t"
1168 "lwc1 %[f2], 0(%[a2]) \n\t"
1169 "lwc1 %[f3], 4(%[a1]) \n\t"
1170 "lwc1 %[f4], 4(%[a2]) \n\t"
1171 "lwc1 %[f5], 0(%[c1]) \n\t"
1172 "sub.s %[f6], %[f0], %[f6] \n\t"
1173 "sub.s %[f7], %[f1], %[f2] \n\t"
1174 "add.s %[f8], %[f3], %[f4] \n\t"
1175 "lwc1 %[f10], -4(%[c2]) \n\t"
1176 "lwc1 %[f11], 8(%[a1]) \n\t"
1177 "lwc1 %[f12], -8(%[a2]) \n\t"
1178 "mul.s %[f9], %[f6], %[f7] \n\t"
1179 "mul.s %[f6], %[f6], %[f8] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001180#if !defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +02001181 "mul.s %[f8], %[f5], %[f8] \n\t"
1182 "mul.s %[f5], %[f5], %[f7] \n\t"
1183 "lwc1 %[f13], 12(%[a1]) \n\t"
1184 "lwc1 %[f14], -4(%[a2]) \n\t"
1185 "lwc1 %[f15], 4(%[c1]) \n\t"
1186 "add.s %[f9], %[f9], %[f8] \n\t"
1187 "sub.s %[f6], %[f6], %[f5] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001188#else
Yves Gerey665174f2018-06-19 15:03:05 +02001189 "lwc1 %[f13], 12(%[a1]) \n\t"
1190 "lwc1 %[f14], -4(%[a2]) \n\t"
1191 "lwc1 %[f15], 4(%[c1]) \n\t"
1192 "madd.s %[f9], %[f9], %[f5], %[f8] \n\t"
1193 "nmsub.s %[f6], %[f6], %[f5], %[f7] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001194#endif
Yves Gerey665174f2018-06-19 15:03:05 +02001195 "sub.s %[f10], %[f0], %[f10] \n\t"
1196 "sub.s %[f5], %[f11], %[f12] \n\t"
1197 "add.s %[f7], %[f13], %[f14] \n\t"
1198 "sub.s %[f1], %[f1], %[f9] \n\t"
1199 "add.s %[f2], %[f2], %[f9] \n\t"
1200 "sub.s %[f3], %[f6], %[f3] \n\t"
1201 "mul.s %[f8], %[f10], %[f5] \n\t"
1202 "mul.s %[f10], %[f10], %[f7] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001203#if !defined(MIPS32_R2_LE)
Yves Gerey665174f2018-06-19 15:03:05 +02001204 "mul.s %[f9], %[f15], %[f7] \n\t"
1205 "mul.s %[f15], %[f15], %[f5] \n\t"
1206 "sub.s %[f4], %[f6], %[f4] \n\t"
1207 "swc1 %[f1], 0(%[a1]) \n\t"
1208 "swc1 %[f2], 0(%[a2]) \n\t"
1209 "add.s %[f8], %[f8], %[f9] \n\t"
1210 "sub.s %[f10], %[f10], %[f15] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001211#else
Yves Gerey665174f2018-06-19 15:03:05 +02001212 "swc1 %[f1], 0(%[a1]) \n\t"
1213 "swc1 %[f2], 0(%[a2]) \n\t"
1214 "sub.s %[f4], %[f6], %[f4] \n\t"
1215 "madd.s %[f8], %[f8], %[f15], %[f7] \n\t"
1216 "nmsub.s %[f10], %[f10], %[f15], %[f5] \n\t"
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001217#endif
Yves Gerey665174f2018-06-19 15:03:05 +02001218 "swc1 %[f3], 4(%[a1]) \n\t"
1219 "swc1 %[f4], 4(%[a2]) \n\t"
1220 "sub.s %[f11], %[f11], %[f8] \n\t"
1221 "add.s %[f12], %[f12], %[f8] \n\t"
1222 "sub.s %[f13], %[f10], %[f13] \n\t"
1223 "sub.s %[f14], %[f10], %[f14] \n\t"
1224 "addiu %[c2], %[c2], -8 \n\t"
1225 "addiu %[c1], %[c1], 8 \n\t"
1226 "swc1 %[f11], 8(%[a1]) \n\t"
1227 "swc1 %[f12], -8(%[a2]) \n\t"
1228 "swc1 %[f13], 12(%[a1]) \n\t"
1229 "swc1 %[f14], -4(%[a2]) \n\t"
1230 "addiu %[a1], %[a1], 16 \n\t"
1231 "addiu %[count], %[count], -1 \n\t"
1232 "bgtz %[count], 1b \n\t"
1233 " addiu %[a2], %[a2], -16 \n\t"
1234 ".set pop \n\t"
1235 : [a1] "+r"(a1), [a2] "+r"(a2), [c1] "+r"(c1), [c2] "+r"(c2),
1236 [f1] "=&f"(f1), [f2] "=&f"(f2), [f3] "=&f"(f3), [f4] "=&f"(f4),
1237 [f5] "=&f"(f5), [f6] "=&f"(f6), [f7] "=&f"(f7), [f8] "=&f"(f8),
1238 [f9] "=&f"(f9), [f10] "=&f"(f10), [f11] "=&f"(f11), [f12] "=&f"(f12),
1239 [f13] "=&f"(f13), [f14] "=&f"(f14), [f15] "=&f"(f15),
1240 [count] "=&r"(count)
1241 : [f0] "f"(f0)
1242 : "memory");
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001243}
peah81b92912016-10-06 06:46:20 -07001244#endif
andrew@webrtc.orga22485e2014-08-29 17:51:28 +00001245
peah81b92912016-10-06 06:46:20 -07001246} // namespace webrtc