blob: 41153bf94df2ec481fbb54077761f8fe2e4b6cd3 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
3// Copyright(c) 2005-2011 TransGaming Inc.
4//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "VertexProgram.hpp"
13
14#include "Renderer.hpp"
15#include "VertexShader.hpp"
16#include "Vertex.hpp"
17#include "Half.hpp"
18#include "SamplerCore.hpp"
19#include "Debug.hpp"
20
21extern bool localShaderConstants;
22
23namespace sw
24{
25 VertexProgram::VertexProgram(const VertexProcessor::State &state, const VertexShader *vertexShader) : VertexRoutine(state), vertexShader(vertexShader)
26 {
27 returns = false;
28 ifDepth = 0;
29 loopRepDepth = 0;
30 breakDepth = 0;
31
32 for(int i = 0; i < 2048; i++)
33 {
34 labelBlock[i] = 0;
35 }
36 }
37
38 VertexProgram::~VertexProgram()
39 {
40 for(int i = 0; i < 4; i++)
41 {
42 delete sampler[i];
43 }
44 }
45
46 void VertexProgram::pipeline(Registers &r)
47 {
48 for(int i = 0; i < 4; i++)
49 {
50 sampler[i] = new SamplerCore(r.constants, state.samplerState[i]);
51 }
52
53 if(!state.preTransformed)
54 {
55 shader(r);
56 }
57 else
58 {
59 passThrough(r);
60 }
61 }
62
63 Color4f VertexProgram::readConstant(Registers &r, const Src &src, int offset)
64 {
65 Color4f c;
66
67 int i = src.index + offset;
68 bool relative = src.relative;
69
70 if(!relative)
71 {
72 c.r = c.g = c.b = c.a = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]));
73
74 c.r = c.r.xxxx;
75 c.g = c.g.yyyy;
76 c.b = c.b.zzzz;
77 c.a = c.a.wwww;
78
79 if(localShaderConstants) // Constant may be known at compile time
80 {
81 for(int j = 0; j < vertexShader->getLength(); j++)
82 {
83 const ShaderInstruction &instruction = *vertexShader->getInstruction(j);
84
85 if(instruction.getOpcode() == ShaderOperation::OPCODE_DEF)
86 {
87 if(instruction.getDestinationParameter().index == i)
88 {
89 c.r = Float4(instruction.getSourceParameter(0).value);
90 c.g = Float4(instruction.getSourceParameter(1).value);
91 c.b = Float4(instruction.getSourceParameter(2).value);
92 c.a = Float4(instruction.getSourceParameter(3).value);
93
94 break;
95 }
96 }
97 }
98 }
99 }
100 else if(src.relativeType == Src::PARAMETER_LOOP)
101 {
102 Int loopCounter = r.aL[r.loopDepth];
103
104 c.r = c.g = c.b = c.a = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c[i]) + loopCounter * 16);
105
106 c.r = c.r.xxxx;
107 c.g = c.g.yyyy;
108 c.b = c.b.zzzz;
109 c.a = c.a.wwww;
110 }
111 else
112 {
113 Int index0;
114 Int index1;
115 Int index2;
116 Int index3;
117
118 Float4 a0_;
119
120 switch(src.relativeSwizzle & 0x03)
121 {
122 case 0: a0_ = r.a0.x; break;
123 case 1: a0_ = r.a0.y; break;
124 case 2: a0_ = r.a0.z; break;
125 case 3: a0_ = r.a0.w; break;
126 }
127
128 index0 = i + RoundInt(Float(a0_.x));
129 index1 = i + RoundInt(Float(a0_.y));
130 index2 = i + RoundInt(Float(a0_.z));
131 index3 = i + RoundInt(Float(a0_.w));
132
133 // Clamp to constant register range, c[256] = {0, 0, 0, 0}
134 index0 = IfThenElse(UInt(index0) > UInt(256), Int(256), index0);
135 index1 = IfThenElse(UInt(index1) > UInt(256), Int(256), index1);
136 index2 = IfThenElse(UInt(index2) > UInt(256), Int(256), index2);
137 index3 = IfThenElse(UInt(index3) > UInt(256), Int(256), index3);
138
139 c.x = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index0 * 16, 16);
140 c.y = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index1 * 16, 16);
141 c.z = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index2 * 16, 16);
142 c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,vs.c) + index3 * 16, 16);
143
144 transpose4x4(c.x, c.y, c.z, c.w);
145 }
146
147 return c;
148 }
149
150 void VertexProgram::shader(Registers &r)
151 {
152 // vertexShader->print("VertexShader-%0.16llX.txt", state.shaderHash);
153
154 unsigned short version = vertexShader->getVersion();
155
156 r.enableIndex = 0;
157 r.stackIndex = 0;
158
159 for(int i = 0; i < vertexShader->getLength(); i++)
160 {
161 const ShaderInstruction *instruction = vertexShader->getInstruction(i);
162 Op::Opcode opcode = instruction->getOpcode();
163
164 // #ifndef NDEBUG // FIXME: Centralize debug output control
165 // vertexShader->printInstruction(i, "debug.txt");
166 // #endif
167
168 if(opcode == Op::OPCODE_DCL || opcode == Op::OPCODE_DEF || opcode == Op::OPCODE_DEFI || opcode == Op::OPCODE_DEFB)
169 {
170 continue;
171 }
172
173 Dst dest = instruction->getDestinationParameter();
174 Src src0 = instruction->getSourceParameter(0);
175 Src src1 = instruction->getSourceParameter(1);
176 Src src2 = instruction->getSourceParameter(2);
177 Src src3 = instruction->getSourceParameter(3);
178
179 bool predicate = instruction->isPredicate();
180 int size = vertexShader->size(opcode);
181 Usage usage = instruction->getUsage();
182 unsigned char usageIndex = instruction->getUsageIndex();
183 Control control = instruction->getControl();
184 bool integer = dest.type == Dst::PARAMETER_ADDR;
185 bool pp = dest.partialPrecision;
186
187 Color4f d;
188 Color4f s0;
189 Color4f s1;
190 Color4f s2;
191 Color4f s3;
192
193 if(src0.type != Src::PARAMETER_VOID) s0 = reg(r, src0);
194 if(src1.type != Src::PARAMETER_VOID) s1 = reg(r, src1);
195 if(src2.type != Src::PARAMETER_VOID) s2 = reg(r, src2);
196 if(src3.type != Src::PARAMETER_VOID) s3 = reg(r, src3);
197
198 switch(opcode)
199 {
200 case Op::OPCODE_VS_1_0: break;
201 case Op::OPCODE_VS_1_1: break;
202 case Op::OPCODE_VS_2_0: break;
203 case Op::OPCODE_VS_2_x: break;
204 case Op::OPCODE_VS_2_sw: break;
205 case Op::OPCODE_VS_3_0: break;
206 case Op::OPCODE_VS_3_sw: break;
207 case Op::OPCODE_DCL: break;
208 case Op::OPCODE_DEF: break;
209 case Op::OPCODE_DEFI: break;
210 case Op::OPCODE_DEFB: break;
211 case Op::OPCODE_NOP: break;
212 case Op::OPCODE_ABS: abs(d, s0); break;
213 case Op::OPCODE_ADD: add(d, s0, s1); break;
214 case Op::OPCODE_CRS: crs(d, s0, s1); break;
215 case Op::OPCODE_DP3: dp3(d, s0, s1); break;
216 case Op::OPCODE_DP4: dp4(d, s0, s1); break;
217 case Op::OPCODE_DST: dst(d, s0, s1); break;
218 case Op::OPCODE_EXP: exp(d, s0, pp); break;
219 case Op::OPCODE_EXPP: expp(d, s0, version); break;
220 case Op::OPCODE_FRC: frc(d, s0); break;
221 case Op::OPCODE_LIT: lit(d, s0); break;
222 case Op::OPCODE_LOG: log(d, s0, pp); break;
223 case Op::OPCODE_LOGP: logp(d, s0, version); break;
224 case Op::OPCODE_LRP: lrp(d, s0, s1, s2); break;
225 case Op::OPCODE_M3X2: M3X2(r, d, s0, src1); break;
226 case Op::OPCODE_M3X3: M3X3(r, d, s0, src1); break;
227 case Op::OPCODE_M3X4: M3X4(r, d, s0, src1); break;
228 case Op::OPCODE_M4X3: M4X3(r, d, s0, src1); break;
229 case Op::OPCODE_M4X4: M4X4(r, d, s0, src1); break;
230 case Op::OPCODE_MAD: mad(d, s0, s1, s2); break;
231 case Op::OPCODE_MAX: max(d, s0, s1); break;
232 case Op::OPCODE_MIN: min(d, s0, s1); break;
233 case Op::OPCODE_MOV: mov(d, s0, integer); break;
234 case Op::OPCODE_MOVA: mov(d, s0); break;
235 case Op::OPCODE_MUL: mul(d, s0, s1); break;
236 case Op::OPCODE_NRM: nrm(d, s0, pp); break;
237 case Op::OPCODE_POW: pow(d, s0, s1, pp); break;
238 case Op::OPCODE_RCP: rcp(d, s0, pp); break;
239 case Op::OPCODE_RSQ: rsq(d, s0, pp); break;
240 case Op::OPCODE_SGE: sge(d, s0, s1); break;
241 case Op::OPCODE_SGN: sgn(d, s0); break;
242 case Op::OPCODE_SINCOS: sincos(d, s0, pp); break;
243 case Op::OPCODE_SLT: slt(d, s0, s1); break;
244 case Op::OPCODE_SUB: sub(d, s0, s1); break;
245 case Op::OPCODE_BREAK: BREAK(r); break;
246 case Op::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break;
247 case Op::OPCODE_BREAKP: BREAKP(r, src0); break;
248 case Op::OPCODE_CALL: CALL(r, dest.index); break;
249 case Op::OPCODE_CALLNZ: CALLNZ(r, dest.index, src0); break;
250 case Op::OPCODE_ELSE: ELSE(r); break;
251 case Op::OPCODE_ENDIF: ENDIF(r); break;
252 case Op::OPCODE_ENDLOOP: ENDLOOP(r); break;
253 case Op::OPCODE_ENDREP: ENDREP(r); break;
254 case Op::OPCODE_IF: IF(r, src0); break;
255 case Op::OPCODE_IFC: IFC(r, s0, s1, control); break;
256 case Op::OPCODE_LABEL: LABEL(dest.index); break;
257 case Op::OPCODE_LOOP: LOOP(r, src1); break;
258 case Op::OPCODE_REP: REP(r, src0); break;
259 case Op::OPCODE_RET: RET(r); break;
260 case Op::OPCODE_SETP: setp(d, s0, s1, control); break;
261 case Op::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1); break;
262 case Op::OPCODE_END: break;
263 default:
264 ASSERT(false);
265 }
266
267 if(dest.type != Dst::PARAMETER_VOID && dest.type != Dst::PARAMETER_LABEL)
268 {
269 if(dest.saturate)
270 {
271 if(dest.x) d.r = Max(d.r, Float4(0.0f, 0.0f, 0.0f, 0.0f));
272 if(dest.y) d.g = Max(d.g, Float4(0.0f, 0.0f, 0.0f, 0.0f));
273 if(dest.z) d.b = Max(d.b, Float4(0.0f, 0.0f, 0.0f, 0.0f));
274 if(dest.w) d.a = Max(d.a, Float4(0.0f, 0.0f, 0.0f, 0.0f));
275
276 if(dest.x) d.r = Min(d.r, Float4(1.0f, 1.0f, 1.0f, 1.0f));
277 if(dest.y) d.g = Min(d.g, Float4(1.0f, 1.0f, 1.0f, 1.0f));
278 if(dest.z) d.b = Min(d.b, Float4(1.0f, 1.0f, 1.0f, 1.0f));
279 if(dest.w) d.a = Min(d.a, Float4(1.0f, 1.0f, 1.0f, 1.0f));
280 }
281
282 if(vertexShader->containsDynamicBranching())
283 {
284 Color4f pDst; // FIXME: Rename
285
286 switch(dest.type)
287 {
288 case Dst::PARAMETER_VOID: break;
289 case Dst::PARAMETER_TEMP: pDst = r.r[dest.index]; break;
290 case Dst::PARAMETER_ADDR: pDst = r.a0; break;
291 case Dst::PARAMETER_RASTOUT:
292 switch(dest.index)
293 {
294 case 0:
295 if(dest.x) pDst.x = r.ox[Pos];
296 if(dest.y) pDst.y = r.oy[Pos];
297 if(dest.z) pDst.z = r.oz[Pos];
298 if(dest.w) pDst.w = r.ow[Pos];
299 break;
300 case 1:
301 pDst.x = r.ox[Fog];
302 break;
303 case 2:
304 pDst.x = r.oy[Pts];
305 break;
306 default:
307 ASSERT(false);
308 }
309 break;
310 case Dst::PARAMETER_ATTROUT:
311 if(dest.x) pDst.x = r.ox[D0 + dest.index];
312 if(dest.y) pDst.y = r.oy[D0 + dest.index];
313 if(dest.z) pDst.z = r.oz[D0 + dest.index];
314 if(dest.w) pDst.w = r.ow[D0 + dest.index];
315 break;
316 case Dst::PARAMETER_TEXCRDOUT:
317 // case Dst::PARAMETER_OUTPUT:
318 if(version < 0x0300)
319 {
320 if(dest.x) pDst.x = r.ox[T0 + dest.index];
321 if(dest.y) pDst.y = r.oy[T0 + dest.index];
322 if(dest.z) pDst.z = r.oz[T0 + dest.index];
323 if(dest.w) pDst.w = r.ow[T0 + dest.index];
324 }
325 else
326 {
327 if(!dest.relative)
328 {
329 if(dest.x) pDst.x = r.ox[dest.index];
330 if(dest.y) pDst.y = r.oy[dest.index];
331 if(dest.z) pDst.z = r.oz[dest.index];
332 if(dest.w) pDst.w = r.ow[dest.index];
333 }
334 else
335 {
336 Int aL = r.aL[r.loopDepth];
337
338 if(dest.x) pDst.x = r.ox[dest.index + aL];
339 if(dest.y) pDst.y = r.oy[dest.index + aL];
340 if(dest.z) pDst.z = r.oz[dest.index + aL];
341 if(dest.w) pDst.w = r.ow[dest.index + aL];
342 }
343 }
344 break;
345 case Dst::PARAMETER_LABEL: break;
346 case Dst::PARAMETER_PREDICATE: pDst = r.p0; break;
347 case Dst::PARAMETER_INPUT: break;
348 default:
349 ASSERT(false);
350 }
351
352 Int4 enable = r.enableStack[r.enableIndex] & r.enableBreak;
353
354 Int4 xEnable = enable;
355 Int4 yEnable = enable;
356 Int4 zEnable = enable;
357 Int4 wEnable = enable;
358
359 if(predicate)
360 {
361 unsigned char pSwizzle = instruction->getPredicateSwizzle();
362
363 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
364 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
365 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
366 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
367
368 if(!instruction->isPredicateNot())
369 {
370 if(dest.x) xEnable = xEnable & As<Int4>(xPredicate);
371 if(dest.y) yEnable = yEnable & As<Int4>(yPredicate);
372 if(dest.z) zEnable = zEnable & As<Int4>(zPredicate);
373 if(dest.w) wEnable = wEnable & As<Int4>(wPredicate);
374 }
375 else
376 {
377 if(dest.x) xEnable = xEnable & ~As<Int4>(xPredicate);
378 if(dest.y) yEnable = yEnable & ~As<Int4>(yPredicate);
379 if(dest.z) zEnable = zEnable & ~As<Int4>(zPredicate);
380 if(dest.w) wEnable = wEnable & ~As<Int4>(wPredicate);
381 }
382 }
383
384 if(dest.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
385 if(dest.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
386 if(dest.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
387 if(dest.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
388
389 if(dest.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
390 if(dest.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
391 if(dest.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
392 if(dest.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
393 }
394
395 switch(dest.type)
396 {
397 case Dst::PARAMETER_VOID:
398 break;
399 case Dst::PARAMETER_TEMP:
400 if(dest.x) r.r[dest.index].x = d.x;
401 if(dest.y) r.r[dest.index].y = d.y;
402 if(dest.z) r.r[dest.index].z = d.z;
403 if(dest.w) r.r[dest.index].w = d.w;
404 break;
405 case Dst::PARAMETER_ADDR:
406 if(dest.x) r.a0.x = d.x;
407 if(dest.y) r.a0.y = d.y;
408 if(dest.z) r.a0.z = d.z;
409 if(dest.w) r.a0.w = d.w;
410 break;
411 case Dst::PARAMETER_RASTOUT:
412 switch(dest.index)
413 {
414 case 0:
415 if(dest.x) r.ox[Pos] = d.x;
416 if(dest.y) r.oy[Pos] = d.y;
417 if(dest.z) r.oz[Pos] = d.z;
418 if(dest.w) r.ow[Pos] = d.w;
419 break;
420 case 1:
421 r.ox[Fog] = d.x;
422 break;
423 case 2:
424 r.oy[Pts] = d.x;
425 break;
426 default: ASSERT(false);
427 }
428 break;
429 case Dst::PARAMETER_ATTROUT:
430 if(dest.x) r.ox[D0 + dest.index] = d.x;
431 if(dest.y) r.oy[D0 + dest.index] = d.y;
432 if(dest.z) r.oz[D0 + dest.index] = d.z;
433 if(dest.w) r.ow[D0 + dest.index] = d.w;
434 break;
435 case Dst::PARAMETER_TEXCRDOUT:
436 // case Dst::PARAMETER_OUTPUT:
437 if(version < 0x0300)
438 {
439 if(dest.x) r.ox[T0 + dest.index] = d.x;
440 if(dest.y) r.oy[T0 + dest.index] = d.y;
441 if(dest.z) r.oz[T0 + dest.index] = d.z;
442 if(dest.w) r.ow[T0 + dest.index] = d.w;
443 }
444 else
445 {
446 if(!dest.relative)
447 {
448 if(dest.x) r.ox[dest.index] = d.x;
449 if(dest.y) r.oy[dest.index] = d.y;
450 if(dest.z) r.oz[dest.index] = d.z;
451 if(dest.w) r.ow[dest.index] = d.w;
452 }
453 else
454 {
455 Int aL = r.aL[r.loopDepth];
456
457 if(dest.x) r.ox[dest.index + aL] = d.x;
458 if(dest.y) r.oy[dest.index + aL] = d.y;
459 if(dest.z) r.oz[dest.index + aL] = d.z;
460 if(dest.w) r.ow[dest.index + aL] = d.w;
461 }
462 }
463 break;
464 case Dst::PARAMETER_LABEL: break;
465 case Dst::PARAMETER_PREDICATE: r.p0 = d; break;
466 case Dst::PARAMETER_INPUT: break;
467 default:
468 ASSERT(false);
469 }
470 }
471 }
472
473 if(returns)
474 {
475 Nucleus::setInsertBlock(returnBlock);
476 }
477 }
478
479 void VertexProgram::passThrough(Registers &r)
480 {
481 if(vertexShader)
482 {
483 for(int i = 0; i < 12; i++)
484 {
485 unsigned char usage = vertexShader->output[i][0].usage;
486 unsigned char index = vertexShader->output[i][0].index;
487
488 switch(usage)
489 {
490 case 0xFF:
491 continue;
492 case ShaderOperation::USAGE_PSIZE:
493 r.oy[i] = r.v[i].x;
494 break;
495 case ShaderOperation::USAGE_TEXCOORD:
496 r.ox[i] = r.v[i].x;
497 r.oy[i] = r.v[i].y;
498 r.oz[i] = r.v[i].z;
499 r.ow[i] = r.v[i].w;
500 break;
501 case ShaderOperation::USAGE_POSITION:
502 r.ox[i] = r.v[i].x;
503 r.oy[i] = r.v[i].y;
504 r.oz[i] = r.v[i].z;
505 r.ow[i] = r.v[i].w;
506 break;
507 case ShaderOperation::USAGE_COLOR:
508 r.ox[i] = r.v[i].x;
509 r.oy[i] = r.v[i].y;
510 r.oz[i] = r.v[i].z;
511 r.ow[i] = r.v[i].w;
512 break;
513 case ShaderOperation::USAGE_FOG:
514 r.ox[i] = r.v[i].x;
515 break;
516 default:
517 ASSERT(false);
518 }
519 }
520 }
521 else
522 {
523 r.ox[Pos] = r.v[PositionT].x;
524 r.oy[Pos] = r.v[PositionT].y;
525 r.oz[Pos] = r.v[PositionT].z;
526 r.ow[Pos] = r.v[PositionT].w;
527
528 for(int i = 0; i < 2; i++)
529 {
530 r.ox[D0 + i] = r.v[Color0 + i].x;
531 r.oy[D0 + i] = r.v[Color0 + i].y;
532 r.oz[D0 + i] = r.v[Color0 + i].z;
533 r.ow[D0 + i] = r.v[Color0 + i].w;
534 }
535
536 for(int i = 0; i < 8; i++)
537 {
538 r.ox[T0 + i] = r.v[TexCoord0 + i].x;
539 r.oy[T0 + i] = r.v[TexCoord0 + i].y;
540 r.oz[T0 + i] = r.v[TexCoord0 + i].z;
541 r.ow[T0 + i] = r.v[TexCoord0 + i].w;
542 }
543
544 r.oy[Pts] = r.v[PSize].x;
545 }
546 }
547
548 Color4f VertexProgram::reg(Registers &r, const Src &src, int offset)
549 {
550 int i = src.index + offset;
551
552 Color4f reg;
553
554 if(src.type == Src::PARAMETER_CONST)
555 {
556 reg = readConstant(r, src, offset);
557 }
558
559 switch(src.type)
560 {
561 case Src::PARAMETER_TEMP: reg = r.r[i]; break;
562 case Src::PARAMETER_CONST: break;
563 case Src::PARAMETER_INPUT: reg = r.v[i]; break;
564 case Src::PARAMETER_VOID: return r.r[0]; // Dummy
565 case Src::PARAMETER_FLOATLITERAL: return r.r[0]; // Dummy
566 case Src::PARAMETER_ADDR: reg = r.a0; break;
567 case Src::PARAMETER_CONSTBOOL: return r.r[0]; // Dummy
568 case Src::PARAMETER_CONSTINT: return r.r[0]; // Dummy
569 case Src::PARAMETER_LOOP: return r.r[0]; // Dummy
570 case Src::PARAMETER_PREDICATE: return r.r[0]; // Dummy
571 case Src::PARAMETER_SAMPLER: return r.r[0]; // Dummy
572 default:
573 ASSERT(false);
574 }
575
576 Color4f mod;
577
578 mod.x = reg[(src.swizzle >> 0) & 0x03];
579 mod.y = reg[(src.swizzle >> 2) & 0x03];
580 mod.z = reg[(src.swizzle >> 4) & 0x03];
581 mod.w = reg[(src.swizzle >> 6) & 0x03];
582
583 switch(src.modifier)
584 {
585 case Src::MODIFIER_NONE:
586 break;
587 case Src::MODIFIER_NEGATE:
588 mod.x = -mod.x;
589 mod.y = -mod.y;
590 mod.z = -mod.z;
591 mod.w = -mod.w;
592 break;
593 case Src::MODIFIER_BIAS:
594 ASSERT(false); // NOTE: Unimplemented
595 break;
596 case Src::MODIFIER_BIAS_NEGATE:
597 ASSERT(false); // NOTE: Unimplemented
598 break;
599 case Src::MODIFIER_SIGN:
600 ASSERT(false); // NOTE: Unimplemented
601 break;
602 case Src::MODIFIER_SIGN_NEGATE:
603 ASSERT(false); // NOTE: Unimplemented
604 break;
605 case Src::MODIFIER_COMPLEMENT:
606 ASSERT(false); // NOTE: Unimplemented
607 break;
608 case Src::MODIFIER_X2:
609 ASSERT(false); // NOTE: Unimplemented
610 break;
611 case Src::MODIFIER_X2_NEGATE:
612 ASSERT(false); // NOTE: Unimplemented
613 break;
614 case Src::MODIFIER_DZ:
615 ASSERT(false); // NOTE: Unimplemented
616 break;
617 case Src::MODIFIER_DW:
618 ASSERT(false); // NOTE: Unimplemented
619 break;
620 case Src::MODIFIER_ABS:
621 mod.x = Abs(mod.x);
622 mod.y = Abs(mod.y);
623 mod.z = Abs(mod.z);
624 mod.w = Abs(mod.w);
625 break;
626 case Src::MODIFIER_ABS_NEGATE:
627 mod.x = -Abs(mod.x);
628 mod.y = -Abs(mod.y);
629 mod.z = -Abs(mod.z);
630 mod.w = -Abs(mod.w);
631 break;
632 case Src::MODIFIER_NOT:
633 UNIMPLEMENTED();
634 break;
635 default:
636 ASSERT(false);
637 }
638
639 return mod;
640 }
641
642 void VertexProgram::M3X2(Registers &r, Color4f &dst, Color4f &src0, Src &src1)
643 {
644 Color4f row0 = reg(r, src1, 0);
645 Color4f row1 = reg(r, src1, 1);
646
647 dst.x = dot3(src0, row0);
648 dst.y = dot3(src0, row1);
649 }
650
651 void VertexProgram::M3X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1)
652 {
653 Color4f row0 = reg(r, src1, 0);
654 Color4f row1 = reg(r, src1, 1);
655 Color4f row2 = reg(r, src1, 2);
656
657 dst.x = dot3(src0, row0);
658 dst.y = dot3(src0, row1);
659 dst.z = dot3(src0, row2);
660 }
661
662 void VertexProgram::M3X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1)
663 {
664 Color4f row0 = reg(r, src1, 0);
665 Color4f row1 = reg(r, src1, 1);
666 Color4f row2 = reg(r, src1, 2);
667 Color4f row3 = reg(r, src1, 3);
668
669 dst.x = dot3(src0, row0);
670 dst.y = dot3(src0, row1);
671 dst.z = dot3(src0, row2);
672 dst.w = dot3(src0, row3);
673 }
674
675 void VertexProgram::M4X3(Registers &r, Color4f &dst, Color4f &src0, Src &src1)
676 {
677 Color4f row0 = reg(r, src1, 0);
678 Color4f row1 = reg(r, src1, 1);
679 Color4f row2 = reg(r, src1, 2);
680
681 dst.x = dot4(src0, row0);
682 dst.y = dot4(src0, row1);
683 dst.z = dot4(src0, row2);
684 }
685
686 void VertexProgram::M4X4(Registers &r, Color4f &dst, Color4f &src0, Src &src1)
687 {
688 Color4f row0 = reg(r, src1, 0);
689 Color4f row1 = reg(r, src1, 1);
690 Color4f row2 = reg(r, src1, 2);
691 Color4f row3 = reg(r, src1, 3);
692
693 dst.x = dot4(src0, row0);
694 dst.y = dot4(src0, row1);
695 dst.z = dot4(src0, row2);
696 dst.w = dot4(src0, row3);
697 }
698
699 void VertexProgram::BREAK(Registers &r)
700 {
701 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
702 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
703
704 if(breakDepth == 0)
705 {
706 Nucleus::createBr(endBlock);
707 }
708 else
709 {
710 r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
711 Bool allBreak = SignMask(r.enableBreak) == 0x0;
712
713 branch(allBreak, endBlock, deadBlock);
714 }
715
716 Nucleus::setInsertBlock(deadBlock);
717 }
718
719 void VertexProgram::BREAKC(Registers &r, Color4f &src0, Color4f &src1, Control control)
720 {
721 Int4 condition;
722
723 switch(control)
724 {
725 case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
726 case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break;
727 case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
728 case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break;
729 case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
730 case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break;
731 default:
732 ASSERT(false);
733 }
734
735 condition &= r.enableStack[r.enableIndex];
736
737 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
738 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
739
740 r.enableBreak = r.enableBreak & ~condition;
741 Bool allBreak = SignMask(r.enableBreak) == 0x0;
742
743 branch(allBreak, endBlock, continueBlock);
744 Nucleus::setInsertBlock(continueBlock);
745 }
746
747 void VertexProgram::BREAKP(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC
748 {
749 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
750
751 if(predicateRegister.modifier == Src::MODIFIER_NOT)
752 {
753 condition = ~condition;
754 }
755
756 condition &= r.enableStack[r.enableIndex];
757
758 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
759 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
760
761 r.enableBreak = r.enableBreak & ~condition;
762 Bool allBreak = SignMask(r.enableBreak) == 0x0;
763
764 branch(allBreak, endBlock, continueBlock);
765 Nucleus::setInsertBlock(continueBlock);
766 }
767
768 void VertexProgram::CALL(Registers &r, int labelIndex)
769 {
770 if(!labelBlock[labelIndex])
771 {
772 labelBlock[labelIndex] = Nucleus::createBasicBlock();
773 }
774
775 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock();
776 callRetBlock.push_back(retBlock);
777
778 r.callStack[r.stackIndex++] = UInt((unsigned int)callRetBlock.size() - 1); // FIXME
779
780 Nucleus::createBr(labelBlock[labelIndex]);
781 Nucleus::setInsertBlock(retBlock);
782 }
783
784 void VertexProgram::CALLNZ(Registers &r, int labelIndex, const Src &src)
785 {
786 if(src.type == Src::PARAMETER_CONSTBOOL)
787 {
788 CALLNZb(r, labelIndex, src);
789 }
790 else if(src.type == Src::PARAMETER_PREDICATE)
791 {
792 CALLNZp(r, labelIndex, src);
793 }
794 else ASSERT(false);
795 }
796
797 void VertexProgram::CALLNZb(Registers &r, int labelIndex, const Src &boolRegister)
798 {
799 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME
800
801 if(boolRegister.modifier == Src::MODIFIER_NOT)
802 {
803 condition = !condition;
804 }
805
806 if(!labelBlock[labelIndex])
807 {
808 labelBlock[labelIndex] = Nucleus::createBasicBlock();
809 }
810
811 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock();
812 callRetBlock.push_back(retBlock);
813
814 r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME
815
816 branch(condition, labelBlock[labelIndex], retBlock);
817 Nucleus::setInsertBlock(retBlock);
818 }
819
820 void VertexProgram::CALLNZp(Registers &r, int labelIndex, const Src &predicateRegister)
821 {
822 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
823
824 if(predicateRegister.modifier == Src::MODIFIER_NOT)
825 {
826 condition = ~condition;
827 }
828
829 condition &= r.enableStack[r.enableIndex];
830
831 if(!labelBlock[labelIndex])
832 {
833 labelBlock[labelIndex] = Nucleus::createBasicBlock();
834 }
835
836 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock();
837 callRetBlock.push_back(retBlock);
838
839 r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME
840
841 r.enableIndex++;
842 r.enableStack[r.enableIndex] = condition;
843
844 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
845
846 branch(notAllFalse, labelBlock[labelIndex], retBlock);
847 Nucleus::setInsertBlock(retBlock);
848
849 r.enableIndex--;
850 }
851
852 void VertexProgram::ELSE(Registers &r)
853 {
854 ifDepth--;
855
856 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
857 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
858
859 if(isConditionalIf[ifDepth])
860 {
861 Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
862 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
863
864 branch(notAllFalse, falseBlock, endBlock);
865
866 r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
867 }
868 else
869 {
870 Nucleus::createBr(endBlock);
871 Nucleus::setInsertBlock(falseBlock);
872 }
873
874 ifFalseBlock[ifDepth] = endBlock;
875
876 ifDepth++;
877 }
878
879 void VertexProgram::ENDIF(Registers &r)
880 {
881 ifDepth--;
882
883 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
884
885 Nucleus::createBr(endBlock);
886 Nucleus::setInsertBlock(endBlock);
887
888 if(isConditionalIf[ifDepth])
889 {
890 breakDepth--;
891 r.enableIndex--;
892 }
893 }
894
895 void VertexProgram::ENDREP(Registers &r)
896 {
897 loopRepDepth--;
898
899 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
900 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
901
902 Nucleus::createBr(testBlock);
903 Nucleus::setInsertBlock(endBlock);
904
905 r.loopDepth--;
906 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
907 }
908
909 void VertexProgram::ENDLOOP(Registers &r)
910 {
911 loopRepDepth--;
912
913 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth]; // FIXME: +=
914
915 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
916 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
917
918 Nucleus::createBr(testBlock);
919 Nucleus::setInsertBlock(endBlock);
920
921 r.loopDepth--;
922 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
923 }
924
925 void VertexProgram::IF(Registers &r, const Src &src)
926 {
927 if(src.type == Src::PARAMETER_CONSTBOOL)
928 {
929 IFb(r, src);
930 }
931 else if(src.type == Src::PARAMETER_PREDICATE)
932 {
933 IFp(r, src);
934 }
935 else ASSERT(false);
936 }
937
938 void VertexProgram::IFb(Registers &r, const Src &boolRegister)
939 {
940 ASSERT(ifDepth < 24 + 4);
941
942 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,vs.b[boolRegister.index])) != Byte(0)); // FIXME
943
944 if(boolRegister.modifier == Src::MODIFIER_NOT)
945 {
946 condition = !condition;
947 }
948
949 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
950 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
951
952 branch(condition, trueBlock, falseBlock);
953
954 isConditionalIf[ifDepth] = false;
955 ifFalseBlock[ifDepth] = falseBlock;
956
957 ifDepth++;
958 }
959
960 void VertexProgram::IFp(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with IFC
961 {
962 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
963
964 if(predicateRegister.modifier == Src::MODIFIER_NOT)
965 {
966 condition = ~condition;
967 }
968
969 condition &= r.enableStack[r.enableIndex];
970
971 r.enableIndex++;
972 r.enableStack[r.enableIndex] = condition;
973
974 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
975 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
976
977 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
978
979 branch(notAllFalse, trueBlock, falseBlock);
980
981 isConditionalIf[ifDepth] = true;
982 ifFalseBlock[ifDepth] = falseBlock;
983
984 ifDepth++;
985 breakDepth++;
986 }
987
988 void VertexProgram::IFC(Registers &r, Color4f &src0, Color4f &src1, Control control)
989 {
990 Int4 condition;
991
992 switch(control)
993 {
994 case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
995 case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break;
996 case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
997 case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break;
998 case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
999 case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break;
1000 default:
1001 ASSERT(false);
1002 }
1003
1004 condition &= r.enableStack[r.enableIndex];
1005
1006 r.enableIndex++;
1007 r.enableStack[r.enableIndex] = condition;
1008
1009 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
1010 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
1011
1012 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
1013
1014 branch(notAllFalse, trueBlock, falseBlock);
1015
1016 isConditionalIf[ifDepth] = true;
1017 ifFalseBlock[ifDepth] = falseBlock;
1018
1019 ifDepth++;
1020 breakDepth++;
1021 }
1022
1023 void VertexProgram::LABEL(int labelIndex)
1024 {
1025 Nucleus::setInsertBlock(labelBlock[labelIndex]);
1026 }
1027
1028 void VertexProgram::LOOP(Registers &r, const Src &integerRegister)
1029 {
1030 r.loopDepth++;
1031
1032 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1033 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][1]));
1034 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][2]));
1035
1036 // FIXME: Compiles to two instructions?
1037 If(r.increment[r.loopDepth] == 0)
1038 {
1039 r.increment[r.loopDepth] = 1;
1040 }
1041
1042 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1043 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1044 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1045
1046 loopRepTestBlock[loopRepDepth] = testBlock;
1047 loopRepEndBlock[loopRepDepth] = endBlock;
1048
1049 // FIXME: jump(testBlock)
1050 Nucleus::createBr(testBlock);
1051 Nucleus::setInsertBlock(testBlock);
1052
1053 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1054 Nucleus::setInsertBlock(loopBlock);
1055
1056 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
1057
1058 loopRepDepth++;
1059 breakDepth = 0;
1060 }
1061
1062 void VertexProgram::REP(Registers &r, const Src &integerRegister)
1063 {
1064 r.loopDepth++;
1065
1066 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,vs.i[integerRegister.index][0]));
1067 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
1068
1069 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
1070 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
1071 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
1072
1073 loopRepTestBlock[loopRepDepth] = testBlock;
1074 loopRepEndBlock[loopRepDepth] = endBlock;
1075
1076 // FIXME: jump(testBlock)
1077 Nucleus::createBr(testBlock);
1078 Nucleus::setInsertBlock(testBlock);
1079
1080 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
1081 Nucleus::setInsertBlock(loopBlock);
1082
1083 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
1084
1085 loopRepDepth++;
1086 breakDepth = 0;
1087 }
1088
1089 void VertexProgram::RET(Registers &r)
1090 {
1091 if(!returns)
1092 {
1093 returnBlock = Nucleus::createBasicBlock();
1094 Nucleus::createBr(returnBlock);
1095
1096 returns = true;
1097 }
1098 else
1099 {
1100 // FIXME: Encapsulate
1101 UInt index = r.callStack[--r.stackIndex];
1102
1103 llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
1104 llvm::Value *value = Nucleus::createLoad(index.address);
1105 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock.size());
1106
1107 for(unsigned int i = 0; i < callRetBlock.size(); i++)
1108 {
1109 Nucleus::addSwitchCase(switchInst, i, callRetBlock[i]);
1110 }
1111
1112 Nucleus::setInsertBlock(unreachableBlock);
1113 Nucleus::createUnreachable();
1114 }
1115 }
1116
1117 void VertexProgram::TEXLDL(Registers &r, Color4f &dst, Color4f &src0, const Src &src1)
1118 {
1119 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap[16]) + src1.index * sizeof(Texture);
1120
1121 Color4f tmp;
1122
1123 sampler[src1.index]->sampleTexture(texture, tmp, src0.x, src0.y, src0.z, src0.w, src0, src0, false, false, true);
1124
1125 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
1126 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
1127 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
1128 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
1129 }
1130}