blob: f1ac5be65d77a1bc75895bc12a0519ab90766a65 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
3// Copyright(c) 2005-2011 TransGaming Inc.
4//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "PixelRoutine.hpp"
13
14#include "Renderer.hpp"
15#include "PixelShader.hpp"
16#include "QuadRasterizer.hpp"
17#include "Surface.hpp"
18#include "Primitive.hpp"
19#include "CPUID.hpp"
20#include "SamplerCore.hpp"
21#include "Constants.hpp"
22#include "Debug.hpp"
23
24#include <assert.h>
25
26extern bool localShaderConstants;
27
28namespace sw
29{
30 extern bool complementaryDepthBuffer;
31 extern bool postBlendSRGB;
32 extern bool exactColorRounding;
33
34 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *pixelShader) : Rasterizer(state), pixelShader(pixelShader)
35 {
36 perturbate = false;
37 luminance = false;
38 previousScaling = false;
39
40 returns = false;
41 ifDepth = 0;
42 loopRepDepth = 0;
43 breakDepth = 0;
44
45 for(int i = 0; i < 2048; i++)
46 {
47 labelBlock[i] = 0;
48 }
49 }
50
51 PixelRoutine::~PixelRoutine()
52 {
53 for(int i = 0; i < 16; i++)
54 {
55 delete sampler[i];
56 }
57 }
58
59 void PixelRoutine::quad(Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
60 {
61 #if PERF_PROFILE
62 Long pipeTime = Ticks();
63 #endif
64
65 for(int i = 0; i < 16; i++)
66 {
67 sampler[i] = new SamplerCore(r.constants, state.sampler[i]);
68 }
69
70 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
71 const bool integerPipeline = pixelShaderVersion() <= 0x0104;
72
73 Int zMask[4]; // Depth mask
74 Int sMask[4]; // Stencil mask
75
76 for(unsigned int q = 0; q < state.multiSample; q++)
77 {
78 zMask[q] = cMask[q];
79 sMask[q] = cMask[q];
80 }
81
82 for(unsigned int q = 0; q < state.multiSample; q++)
83 {
84 stencilTest(r, sBuffer, q, x, sMask[q], cMask[q]);
85 }
86
87 Float4 f;
88
89 Color4i &current = r.ri[0];
90 Color4i &diffuse = r.vi[0];
91 Color4i &specular = r.vi[1];
92
93 Float4 (&z)[4] = r.z;
94 Float4 &rhw = r.rhw;
95 Float4 rhwCentroid;
96
97 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16);
98 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16);
99
100 if(state.depthTestActive || state.pixelFogActive())
101 {
102 for(unsigned int q = 0; q < state.multiSample; q++)
103 {
104 Float4 x = xxxx;
105
106 if(state.multiSample > 1)
107 {
108 x -= *Pointer<Float4>(r.constants + OFFSET(Constants,X) + q * sizeof(float4));
109 }
110
111 z[q] = interpolate(x, r.Dz[q], z[q], r.primitive + OFFSET(Primitive,z), false, false);
112 }
113 }
114
115 Bool depthPass = false;
116
117 if(earlyDepthTest)
118 {
119 for(unsigned int q = 0; q < state.multiSample; q++)
120 {
121 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
122 }
123 }
124
125 If(depthPass || Bool(!earlyDepthTest))
126 {
127 #if PERF_PROFILE
128 Long interpTime = Ticks();
129 #endif
130
131 // Centroid locations
132 Float4 XXXX = Float4(0.0f);
133 Float4 YYYY = Float4(0.0f);
134
135 if(state.centroid)
136 {
137 Float4 WWWW(1.0e-9f);
138
139 for(unsigned int q = 0; q < state.multiSample; q++)
140 {
141 XXXX += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
142 YYYY += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
143 WWWW += *Pointer<Float4>(r.constants + OFFSET(Constants,weight) + 16 * cMask[q]);
144 }
145
146 WWWW = Rcp_pp(WWWW);
147 XXXX *= WWWW;
148 YYYY *= WWWW;
149
150 XXXX += xxxx;
151 YYYY += yyyy;
152 }
153
154 if(state.perspective)
155 {
156 rhw = reciprocal(interpolate(xxxx, r.Dw, rhw, r.primitive + OFFSET(Primitive,w), false, false));
157
158 if(state.centroid)
159 {
160 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,w), false, false));
161 }
162 }
163
164 for(int interpolant = 0; interpolant < 10; interpolant++)
165 {
166 for(int component = 0; component < 4; component++)
167 {
168 Array<Float4> *pv;
169
170 switch(component)
171 {
172 case 0: pv = &r.vx; break;
173 case 1: pv = &r.vy; break;
174 case 2: pv = &r.vz; break;
175 case 3: pv = &r.vw; break;
176 }
177
178 Array<Float4> &v = *pv;
179
180 if(state.interpolant[interpolant].component & (1 << component))
181 {
182 if(!state.interpolant[interpolant].centroid)
183 {
184 v[interpolant] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive,V[interpolant][component]), state.interpolant[interpolant].flat & (1 << component), state.perspective);
185 }
186 else
187 {
188 v[interpolant] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,V[interpolant][component]), state.interpolant[interpolant].flat & (1 << component), state.perspective);
189 }
190 }
191 }
192
193 Float4 rcp;
194
195 switch(state.interpolant[interpolant].project)
196 {
197 case 0:
198 break;
199 case 1:
200 rcp = reciprocal(Float4(r.vy[interpolant]));
201 r.vx[interpolant] = r.vx[interpolant] * rcp;
202 break;
203 case 2:
204 rcp = reciprocal(Float4(r.vz[interpolant]));
205 r.vx[interpolant] = r.vx[interpolant] * rcp;
206 r.vy[interpolant] = r.vy[interpolant] * rcp;
207 break;
208 case 3:
209 rcp = reciprocal(Float4(r.vw[interpolant]));
210 r.vx[interpolant] = r.vx[interpolant] * rcp;
211 r.vy[interpolant] = r.vy[interpolant] * rcp;
212 r.vz[interpolant] = r.vz[interpolant] * rcp;
213 break;
214 }
215 }
216
217 if(state.fog.component)
218 {
219 f = interpolate(xxxx, r.Df, rhw, r.primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective);
220 }
221
222 if(integerPipeline)
223 {
224 if(state.color[0].component & 0x1) diffuse.x = convertFixed12(Float4(r.vx[0])); else diffuse.x = Short4(0x1000, 0x1000, 0x1000, 0x1000);
225 if(state.color[0].component & 0x2) diffuse.y = convertFixed12(Float4(r.vy[0])); else diffuse.y = Short4(0x1000, 0x1000, 0x1000, 0x1000);
226 if(state.color[0].component & 0x4) diffuse.z = convertFixed12(Float4(r.vz[0])); else diffuse.z = Short4(0x1000, 0x1000, 0x1000, 0x1000);
227 if(state.color[0].component & 0x8) diffuse.w = convertFixed12(Float4(r.vw[0])); else diffuse.w = Short4(0x1000, 0x1000, 0x1000, 0x1000);
228
229 if(state.color[1].component & 0x1) specular.x = convertFixed12(Float4(r.vx[1])); else specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
230 if(state.color[1].component & 0x2) specular.y = convertFixed12(Float4(r.vy[1])); else specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
231 if(state.color[1].component & 0x4) specular.z = convertFixed12(Float4(r.vz[1])); else specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
232 if(state.color[1].component & 0x8) specular.w = convertFixed12(Float4(r.vw[1])); else specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
233 }
234 else if(pixelShaderVersion() >= 0x0300)
235 {
236 if(pixelShader->vPosDeclared)
237 {
238 r.vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
239 r.vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
240 }
241
242 if(pixelShader->vFaceDeclared)
243 {
244 Float4 area = *Pointer<Float>(r.primitive + OFFSET(Primitive,area));
245
246 r.vFace.x = area;
247 r.vFace.y = area;
248 r.vFace.z = area;
249 r.vFace.w = area;
250 }
251 }
252
253 #if PERF_PROFILE
254 r.cycles[PERF_INTERP] += Ticks() - interpTime;
255 #endif
256
257 Bool alphaPass = true;
258
259 if(colorUsed())
260 {
261 #if PERF_PROFILE
262 Long shaderTime = Ticks();
263 #endif
264
265 if(pixelShader)
266 {
267 // pixelShader->print("PixelShader-%0.16llX.txt", state.shaderHash);
268
269 if(pixelShader->getVersion() <= 0x0104)
270 {
271 ps_1_x(r, cMask);
272 }
273 else
274 {
275 ps_2_x(r, cMask);
276 }
277 }
278 else
279 {
280 current = diffuse;
281 Color4i temp(0x0000, 0x0000, 0x0000, 0x0000);
282
283 for(int stage = 0; stage < 8; stage++)
284 {
285 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
286 {
287 break;
288 }
289
290 Color4i texture;
291
292 if(state.textureStage[stage].usesTexture)
293 {
294 sampleTexture(r, texture, stage, stage);
295 }
296
297 blendTexture(r, current, temp, texture, stage);
298 }
299
300 specularPixel(current, specular);
301 }
302
303 #if PERF_PROFILE
304 r.cycles[PERF_SHADER] += Ticks() - shaderTime;
305 #endif
306
307 if(integerPipeline)
308 {
309 current.r = Min(current.r, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.r = Max(current.r, Short4(0x0000, 0x0000, 0x0000, 0x0000));
310 current.g = Min(current.g, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.g = Max(current.g, Short4(0x0000, 0x0000, 0x0000, 0x0000));
311 current.b = Min(current.b, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.b = Max(current.b, Short4(0x0000, 0x0000, 0x0000, 0x0000));
312 current.a = Min(current.a, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); current.a = Max(current.a, Short4(0x0000, 0x0000, 0x0000, 0x0000));
313
314 alphaPass = alphaTest(r, cMask, current);
315 }
316 else
317 {
318 clampColor(r.oC);
319
320 alphaPass = alphaTest(r, cMask, r.oC[0]);
321 }
322
323 if((pixelShader && pixelShader->containsTexkill()) || state.alphaTestActive())
324 {
325 for(unsigned int q = 0; q < state.multiSample; q++)
326 {
327 zMask[q] &= cMask[q];
328 sMask[q] &= cMask[q];
329 }
330 }
331 }
332
333 If(alphaPass)
334 {
335 if(!earlyDepthTest)
336 {
337 for(unsigned int q = 0; q < state.multiSample; q++)
338 {
339 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
340 }
341 }
342
343 #if PERF_PROFILE
344 Long ropTime = Ticks();
345 #endif
346
347 If(depthPass || Bool(earlyDepthTest))
348 {
349 for(unsigned int q = 0; q < state.multiSample; q++)
350 {
351 if(state.multiSampleMask & (1 << q))
352 {
353 writeDepth(r, zBuffer, q, x, z[q], zMask[q]);
354
355 if(state.occlusionEnabled)
356 {
357 r.occlusion += *Pointer<UInt>(r.constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
358 }
359 }
360 }
361
362 if(colorUsed())
363 {
364 #if PERF_PROFILE
365 AddAtomic(Pointer<Long>(&profiler.ropOperations), Long(4));
366 #endif
367
368 if(integerPipeline)
369 {
370 rasterOperation(current, r, f, cBuffer[0], x, sMask, zMask, cMask);
371 }
372 else
373 {
374 rasterOperation(r.oC, r, f, cBuffer, x, sMask, zMask, cMask);
375 }
376 }
377 }
378
379 #if PERF_PROFILE
380 r.cycles[PERF_ROP] += Ticks() - ropTime;
381 #endif
382 }
383 }
384
385 for(unsigned int q = 0; q < state.multiSample; q++)
386 {
387 if(state.multiSampleMask & (1 << q))
388 {
389 writeStencil(r, sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
390 }
391 }
392
393 #if PERF_PROFILE
394 r.cycles[PERF_PIPE] += Ticks() - pipeTime;
395 #endif
396 }
397
398 Float4 PixelRoutine::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
399 {
400 Float4 interpolant = D;
401
402 if(!flat)
403 {
404 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16);
405
406 if(perspective)
407 {
408 interpolant *= rhw;
409 }
410 }
411
412 return interpolant;
413 }
414
415 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
416 {
417 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
418
419 if(!flat)
420 {
421 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
422 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
423
424 if(perspective)
425 {
426 interpolant *= rhw;
427 }
428 }
429
430 return interpolant;
431 }
432
433 void PixelRoutine::stencilTest(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
434 {
435 if(!state.stencilActive)
436 {
437 return;
438 }
439
440 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
441
442 Pointer<Byte> buffer = sBuffer + 2 * x;
443
444 if(q > 0)
445 {
446 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
447 }
448
449 Byte8 value = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
450 Byte8 valueCCW = value;
451
452 if(!state.noStencilMask)
453 {
454 value &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].testMaskQ));
455 }
456
457 stencilTest(r, value, (Context::StencilCompareMode)state.stencilCompareMode, false);
458
459 if(state.twoSidedStencil)
460 {
461 if(!state.noStencilMaskCCW)
462 {
463 valueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].testMaskQ));
464 }
465
466 stencilTest(r, valueCCW, (Context::StencilCompareMode)state.stencilCompareModeCCW, true);
467
468 value &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
469 valueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
470 value |= valueCCW;
471 }
472
473 sMask = SignMask(value) & cMask;
474 }
475
476 void PixelRoutine::stencilTest(Registers &r, Byte8 &value, Context::StencilCompareMode stencilCompareMode, bool CCW)
477 {
478 Byte8 equal;
479
480 switch(stencilCompareMode)
481 {
482 case Context::STENCIL_ALWAYS:
483 value = Byte8(0xFFFFFFFFFFFFFFFF);
484 break;
485 case Context::STENCIL_NEVER:
486 value = Byte8(0x0000000000000000);
487 break;
488 case Context::STENCIL_LESS: // a < b ~ b > a
489 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
490 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
491 break;
492 case Context::STENCIL_EQUAL:
493 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
494 break;
495 case Context::STENCIL_NOTEQUAL: // a != b ~ !(a == b)
496 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
497 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
498 break;
499 case Context::STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
500 equal = value;
501 equal = CmpEQ(equal, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
502 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
503 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
504 value |= equal;
505 break;
506 case Context::STENCIL_GREATER: // a > b
507 equal = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
508 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
509 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
510 value = equal;
511 break;
512 case Context::STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
513 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
514 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
515 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
516 break;
517 default:
518 ASSERT(false);
519 }
520 }
521
522 Bool PixelRoutine::depthTest(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
523 {
524 if(!state.depthTestActive)
525 {
526 return true;
527 }
528
529 Float4 Z = z;
530
531 if(pixelShader && pixelShader->depthOverride())
532 {
533 if(complementaryDepthBuffer)
534 {
535 Z = Float4(1, 1, 1, 1) - r.oDepth;
536 }
537 else
538 {
539 Z = r.oDepth;
540 }
541 }
542
543 Pointer<Byte> buffer;
544 Int pitch;
545
546 if(!state.quadLayoutDepthBuffer)
547 {
548 buffer = zBuffer + 4 * x;
549 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
550 }
551 else
552 {
553 buffer = zBuffer + 8 * x;
554 }
555
556 if(q > 0)
557 {
558 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
559 }
560
561 Float4 zValue;
562
563 if(state.depthCompareMode != Context::DEPTH_NEVER || (state.depthCompareMode != Context::DEPTH_ALWAYS && !state.depthWriteEnable))
564 {
565 if(!state.quadLayoutDepthBuffer)
566 {
567 // FIXME: Properly optimizes?
568 zValue.xy = *Pointer<Float4>(buffer);
569 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
570 }
571 else
572 {
573 zValue = *Pointer<Float4>(buffer, 16);
574 }
575 }
576
577 Int4 zTest;
578
579 switch(state.depthCompareMode)
580 {
581 case Context::DEPTH_ALWAYS:
582 // Optimized
583 break;
584 case Context::DEPTH_NEVER:
585 // Optimized
586 break;
587 case Context::DEPTH_EQUAL:
588 zTest = CmpEQ(zValue, Z);
589 break;
590 case Context::DEPTH_NOTEQUAL:
591 zTest = CmpNEQ(zValue, Z);
592 break;
593 case Context::DEPTH_LESS:
594 if(complementaryDepthBuffer)
595 {
596 zTest = CmpLT(zValue, Z);
597 }
598 else
599 {
600 zTest = CmpNLE(zValue, Z);
601 }
602 break;
603 case Context::DEPTH_GREATEREQUAL:
604 if(complementaryDepthBuffer)
605 {
606 zTest = CmpNLT(zValue, Z);
607 }
608 else
609 {
610 zTest = CmpLE(zValue, Z);
611 }
612 break;
613 case Context::DEPTH_LESSEQUAL:
614 if(complementaryDepthBuffer)
615 {
616 zTest = CmpLE(zValue, Z);
617 }
618 else
619 {
620 zTest = CmpNLT(zValue, Z);
621 }
622 break;
623 case Context::DEPTH_GREATER:
624 if(complementaryDepthBuffer)
625 {
626 zTest = CmpNLE(zValue, Z);
627 }
628 else
629 {
630 zTest = CmpLT(zValue, Z);
631 }
632 break;
633 default:
634 ASSERT(false);
635 }
636
637 switch(state.depthCompareMode)
638 {
639 case Context::DEPTH_ALWAYS:
640 zMask = cMask;
641 break;
642 case Context::DEPTH_NEVER:
643 zMask = 0x0;
644 break;
645 default:
646 zMask = SignMask(zTest) & cMask;
647 break;
648 }
649
650 if(state.stencilActive)
651 {
652 zMask &= sMask;
653 }
654
655 return zMask != 0;
656 }
657
658 void PixelRoutine::blendTexture(Registers &r, Color4i &current, Color4i &temp, Color4i &texture, int stage)
659 {
660 Color4i *arg1;
661 Color4i *arg2;
662 Color4i *arg3;
663 Color4i res;
664
665 Color4i constant;
666 Color4i tfactor;
667
668 const TextureStage::State &textureStage = state.textureStage[stage];
669
670 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
671 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
672 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
673 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
674 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
675 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
676 {
677 constant.r = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[0]));
678 constant.g = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[1]));
679 constant.b = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[2]));
680 constant.a = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[3]));
681 }
682
683 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
684 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
685 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
686 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
687 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
688 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
689 {
690 tfactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[0]));
691 tfactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[1]));
692 tfactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[2]));
693 tfactor.a = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]));
694 }
695
696 // Premodulate
697 if(stage > 0 && textureStage.usesTexture)
698 {
699 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
700 {
701 current.r = MulHigh(current.r, texture.r) << 4;
702 current.g = MulHigh(current.g, texture.g) << 4;
703 current.b = MulHigh(current.b, texture.b) << 4;
704 }
705
706 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
707 {
708 current.a = MulHigh(current.a, texture.a) << 4;
709 }
710 }
711
712 if(luminance)
713 {
714 texture.r = MulHigh(texture.r, r.L) << 4;
715 texture.g = MulHigh(texture.g, r.L) << 4;
716 texture.b = MulHigh(texture.b, r.L) << 4;
717
718 luminance = false;
719 }
720
721 switch(textureStage.firstArgument)
722 {
723 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
724 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
725 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
726 case TextureStage::SOURCE_DIFFUSE: arg1 = &r.diffuse; break;
727 case TextureStage::SOURCE_SPECULAR: arg1 = &r.specular; break;
728 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
729 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
730 default:
731 ASSERT(false);
732 }
733
734 switch(textureStage.secondArgument)
735 {
736 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
737 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
738 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
739 case TextureStage::SOURCE_DIFFUSE: arg2 = &r.diffuse; break;
740 case TextureStage::SOURCE_SPECULAR: arg2 = &r.specular; break;
741 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
742 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
743 default:
744 ASSERT(false);
745 }
746
747 switch(textureStage.thirdArgument)
748 {
749 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
750 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
751 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
752 case TextureStage::SOURCE_DIFFUSE: arg3 = &r.diffuse; break;
753 case TextureStage::SOURCE_SPECULAR: arg3 = &r.specular; break;
754 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
755 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
756 default:
757 ASSERT(false);
758 }
759
760 Color4i mod1;
761 Color4i mod2;
762 Color4i mod3;
763
764 switch(textureStage.firstModifier)
765 {
766 case TextureStage::MODIFIER_COLOR:
767 break;
768 case TextureStage::MODIFIER_INVCOLOR:
769 {
770 mod1.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->r);
771 mod1.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->g);
772 mod1.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->b);
773 mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a);
774
775 arg1 = &mod1;
776 }
777 break;
778 case TextureStage::MODIFIER_ALPHA:
779 {
780 mod1.r = arg1->a;
781 mod1.g = arg1->a;
782 mod1.b = arg1->a;
783 mod1.a = arg1->a;
784
785 arg1 = &mod1;
786 }
787 break;
788 case TextureStage::MODIFIER_INVALPHA:
789 {
790 mod1.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a);
791 mod1.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a);
792 mod1.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a);
793 mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a);
794
795 arg1 = &mod1;
796 }
797 break;
798 default:
799 ASSERT(false);
800 }
801
802 switch(textureStage.secondModifier)
803 {
804 case TextureStage::MODIFIER_COLOR:
805 break;
806 case TextureStage::MODIFIER_INVCOLOR:
807 {
808 mod2.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->r);
809 mod2.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->g);
810 mod2.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->b);
811 mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a);
812
813 arg2 = &mod2;
814 }
815 break;
816 case TextureStage::MODIFIER_ALPHA:
817 {
818 mod2.r = arg2->a;
819 mod2.g = arg2->a;
820 mod2.b = arg2->a;
821 mod2.a = arg2->a;
822
823 arg2 = &mod2;
824 }
825 break;
826 case TextureStage::MODIFIER_INVALPHA:
827 {
828 mod2.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a);
829 mod2.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a);
830 mod2.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a);
831 mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a);
832
833 arg2 = &mod2;
834 }
835 break;
836 default:
837 ASSERT(false);
838 }
839
840 switch(textureStage.thirdModifier)
841 {
842 case TextureStage::MODIFIER_COLOR:
843 break;
844 case TextureStage::MODIFIER_INVCOLOR:
845 {
846 mod3.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->r);
847 mod3.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->g);
848 mod3.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->b);
849 mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a);
850
851 arg3 = &mod3;
852 }
853 break;
854 case TextureStage::MODIFIER_ALPHA:
855 {
856 mod3.r = arg3->a;
857 mod3.g = arg3->a;
858 mod3.b = arg3->a;
859 mod3.a = arg3->a;
860
861 arg3 = &mod3;
862 }
863 break;
864 case TextureStage::MODIFIER_INVALPHA:
865 {
866 mod3.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a);
867 mod3.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a);
868 mod3.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a);
869 mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a);
870
871 arg3 = &mod3;
872 }
873 break;
874 default:
875 ASSERT(false);
876 }
877
878 switch(textureStage.stageOperation)
879 {
880 case TextureStage::STAGE_DISABLE:
881 break;
882 case TextureStage::STAGE_SELECTARG1: // Arg1
883 {
884 res.r = arg1->r;
885 res.g = arg1->g;
886 res.b = arg1->b;
887 }
888 break;
889 case TextureStage::STAGE_SELECTARG2: // Arg2
890 {
891 res.r = arg2->r;
892 res.g = arg2->g;
893 res.b = arg2->b;
894 }
895 break;
896 case TextureStage::STAGE_SELECTARG3: // Arg3
897 {
898 res.r = arg3->r;
899 res.g = arg3->g;
900 res.b = arg3->b;
901 }
902 break;
903 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
904 {
905 res.r = MulHigh(arg1->r, arg2->r) << 4;
906 res.g = MulHigh(arg1->g, arg2->g) << 4;
907 res.b = MulHigh(arg1->b, arg2->b) << 4;
908 }
909 break;
910 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
911 {
912 res.r = MulHigh(arg1->r, arg2->r) << 5;
913 res.g = MulHigh(arg1->g, arg2->g) << 5;
914 res.b = MulHigh(arg1->b, arg2->b) << 5;
915 }
916 break;
917 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
918 {
919 res.r = MulHigh(arg1->r, arg2->r) << 6;
920 res.g = MulHigh(arg1->g, arg2->g) << 6;
921 res.b = MulHigh(arg1->b, arg2->b) << 6;
922 }
923 break;
924 case TextureStage::STAGE_ADD: // Arg1 + Arg2
925 {
926 res.r = AddSat(arg1->r, arg2->r);
927 res.g = AddSat(arg1->g, arg2->g);
928 res.b = AddSat(arg1->b, arg2->b);
929 }
930 break;
931 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
932 {
933 res.r = AddSat(arg1->r, arg2->r);
934 res.g = AddSat(arg1->g, arg2->g);
935 res.b = AddSat(arg1->b, arg2->b);
936
937 res.r = SubSat(res.r, Short4(0x0800, 0x0800, 0x0800, 0x0800));
938 res.g = SubSat(res.g, Short4(0x0800, 0x0800, 0x0800, 0x0800));
939 res.b = SubSat(res.b, Short4(0x0800, 0x0800, 0x0800, 0x0800));
940 }
941 break;
942 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
943 {
944 res.r = AddSat(arg1->r, arg2->r);
945 res.g = AddSat(arg1->g, arg2->g);
946 res.b = AddSat(arg1->b, arg2->b);
947
948 res.r = SubSat(res.r, Short4(0x0800, 0x0800, 0x0800, 0x0800));
949 res.g = SubSat(res.g, Short4(0x0800, 0x0800, 0x0800, 0x0800));
950 res.b = SubSat(res.b, Short4(0x0800, 0x0800, 0x0800, 0x0800));
951
952 res.r = AddSat(res.r, res.r);
953 res.g = AddSat(res.g, res.g);
954 res.b = AddSat(res.b, res.b);
955 }
956 break;
957 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
958 {
959 res.r = SubSat(arg1->r, arg2->r);
960 res.g = SubSat(arg1->g, arg2->g);
961 res.b = SubSat(arg1->b, arg2->b);
962 }
963 break;
964 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
965 {
966 Short4 tmp;
967
968 tmp = MulHigh(arg1->r, arg2->r) << 4; res.r = AddSat(arg1->r, arg2->r); res.r = SubSat(res.r, tmp);
969 tmp = MulHigh(arg1->g, arg2->g) << 4; res.g = AddSat(arg1->g, arg2->g); res.g = SubSat(res.g, tmp);
970 tmp = MulHigh(arg1->b, arg2->b) << 4; res.b = AddSat(arg1->b, arg2->b); res.b = SubSat(res.b, tmp);
971 }
972 break;
973 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
974 {
975 res.r = MulHigh(arg1->r, arg2->r) << 4; res.r = AddSat(res.r, arg3->r);
976 res.g = MulHigh(arg1->g, arg2->g) << 4; res.g = AddSat(res.g, arg3->g);
977 res.b = MulHigh(arg1->b, arg2->b) << 4; res.b = AddSat(res.b, arg3->b);
978 }
979 break;
980 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
981 {
982 res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, arg3->r) << 4; res.r = AddSat(res.r, arg2->r);
983 res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, arg3->g) << 4; res.g = AddSat(res.g, arg2->g);
984 res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, arg3->b) << 4; res.b = AddSat(res.b, arg2->b);
985 }
986 break;
987 case TextureStage::STAGE_DOT3: // 2 * (Arg1.r - 0.5) * 2 * (Arg2.r - 0.5) + 2 * (Arg1.g - 0.5) * 2 * (Arg2.g - 0.5) + 2 * (Arg1.b - 0.5) * 2 * (Arg2.b - 0.5)
988 {
989 Short4 tmp;
990
991 res.r = SubSat(arg1->r, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->r, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.r = MulHigh(res.r, tmp);
992 res.g = SubSat(arg1->g, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->g, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.g = MulHigh(res.g, tmp);
993 res.b = SubSat(arg1->b, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->b, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.b = MulHigh(res.b, tmp);
994
995 res.r = res.r << 6;
996 res.g = res.g << 6;
997 res.b = res.b << 6;
998
999 res.r = AddSat(res.r, res.g);
1000 res.r = AddSat(res.r, res.b);
1001
1002 // Clamp to [0, 1]
1003 res.r = Max(res.r, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1004 res.r = Min(res.r, Short4(0x1000, 0x1000, 0x1000, 0x1000));
1005
1006 res.g = res.r;
1007 res.b = res.r;
1008 res.a = res.r;
1009 }
1010 break;
1011 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1012 {
1013 res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, current.a) << 4; res.r = AddSat(res.r, arg2->r);
1014 res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, current.a) << 4; res.g = AddSat(res.g, arg2->g);
1015 res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, current.a) << 4; res.b = AddSat(res.b, arg2->b);
1016 }
1017 break;
1018 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1019 {
1020 res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, r.diffuse.a) << 4; res.r = AddSat(res.r, arg2->r);
1021 res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, r.diffuse.a) << 4; res.g = AddSat(res.g, arg2->g);
1022 res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, r.diffuse.a) << 4; res.b = AddSat(res.b, arg2->b);
1023 }
1024 break;
1025 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1026 {
1027 res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.r = AddSat(res.r, arg2->r);
1028 res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.g = AddSat(res.g, arg2->g);
1029 res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.b = AddSat(res.b, arg2->b);
1030 }
1031 break;
1032 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1033 {
1034 res.r = SubSat(arg1->r, arg2->r); res.r = MulHigh(res.r, texture.a) << 4; res.r = AddSat(res.r, arg2->r);
1035 res.g = SubSat(arg1->g, arg2->g); res.g = MulHigh(res.g, texture.a) << 4; res.g = AddSat(res.g, arg2->g);
1036 res.b = SubSat(arg1->b, arg2->b); res.b = MulHigh(res.b, texture.a) << 4; res.b = AddSat(res.b, arg2->b);
1037 }
1038 break;
1039 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
1040 {
1041 res.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.r = MulHigh(res.r, arg2->r) << 4; res.r = AddSat(res.r, arg1->r);
1042 res.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.g = MulHigh(res.g, arg2->g) << 4; res.g = AddSat(res.g, arg1->g);
1043 res.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.b = MulHigh(res.b, arg2->b) << 4; res.b = AddSat(res.b, arg1->b);
1044 }
1045 break;
1046 case TextureStage::STAGE_PREMODULATE:
1047 {
1048 res.r = arg1->r;
1049 res.g = arg1->g;
1050 res.b = arg1->b;
1051 }
1052 break;
1053 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.a * Arg2
1054 {
1055 res.r = MulHigh(arg1->a, arg2->r) << 4; res.r = AddSat(res.r, arg1->r);
1056 res.g = MulHigh(arg1->a, arg2->g) << 4; res.g = AddSat(res.g, arg1->g);
1057 res.b = MulHigh(arg1->a, arg2->b) << 4; res.b = AddSat(res.b, arg1->b);
1058 }
1059 break;
1060 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.a
1061 {
1062 res.r = MulHigh(arg1->r, arg2->r) << 4; res.r = AddSat(res.r, arg1->a);
1063 res.g = MulHigh(arg1->g, arg2->g) << 4; res.g = AddSat(res.g, arg1->a);
1064 res.b = MulHigh(arg1->b, arg2->b) << 4; res.b = AddSat(res.b, arg1->a);
1065 }
1066 break;
1067 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.a) * Arg2 + Arg1
1068 {
1069 Short4 tmp;
1070
1071 res.r = AddSat(arg1->r, arg2->r); tmp = MulHigh(arg1->a, arg2->r) << 4; res.r = SubSat(res.r, tmp);
1072 res.g = AddSat(arg1->g, arg2->g); tmp = MulHigh(arg1->a, arg2->g) << 4; res.g = SubSat(res.g, tmp);
1073 res.b = AddSat(arg1->b, arg2->b); tmp = MulHigh(arg1->a, arg2->b) << 4; res.b = SubSat(res.b, tmp);
1074 }
1075 break;
1076 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.a
1077 {
1078 Short4 tmp;
1079
1080 res.r = AddSat(arg1->a, arg2->r); tmp = MulHigh(arg1->r, arg2->r) << 4; res.r = SubSat(res.r, tmp);
1081 res.g = AddSat(arg1->a, arg2->g); tmp = MulHigh(arg1->g, arg2->g) << 4; res.g = SubSat(res.g, tmp);
1082 res.b = AddSat(arg1->a, arg2->b); tmp = MulHigh(arg1->b, arg2->b) << 4; res.b = SubSat(res.b, tmp);
1083 }
1084 break;
1085 case TextureStage::STAGE_BUMPENVMAP:
1086 {
1087 r.du = Float4(texture.r) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0);
1088 r.dv = Float4(texture.g) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0);
1089
1090 Float4 du2;
1091 Float4 dv2;
1092
1093 du2 = r.du;
1094 dv2 = r.dv;
1095 r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
1096 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
1097 r.du += dv2;
1098 r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
1099 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
1100 r.dv += du2;
1101
1102 perturbate = true;
1103
1104 res.r = r.current.r;
1105 res.g = r.current.g;
1106 res.b = r.current.b;
1107 res.a = r.current.a;
1108 }
1109 break;
1110 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1111 {
1112 r.du = Float4(texture.r) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0);
1113 r.dv = Float4(texture.g) * Float4(1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0, 1.0f / 0x0FE0);
1114
1115 Float4 du2;
1116 Float4 dv2;
1117
1118 du2 = r.du;
1119 dv2 = r.dv;
1120
1121 r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
1122 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
1123 r.du += dv2;
1124 r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
1125 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
1126 r.dv += du2;
1127
1128 perturbate = true;
1129
1130 r.L = texture.b;
1131 r.L = MulHigh(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4)));
1132 r.L = r.L << 4;
1133 r.L = AddSat(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4)));
1134 r.L = Max(r.L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1135 r.L = Min(r.L, Short4(0x1000, 0x1000, 0x1000, 0x1000));
1136
1137 luminance = true;
1138
1139 res.r = r.current.r;
1140 res.g = r.current.g;
1141 res.b = r.current.b;
1142 res.a = r.current.a;
1143 }
1144 break;
1145 default:
1146 ASSERT(false);
1147 }
1148
1149 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
1150 {
1151 switch(textureStage.firstArgumentAlpha)
1152 {
1153 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
1154 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
1155 case TextureStage::SOURCE_CURRENT: arg1 = &current; break;
1156 case TextureStage::SOURCE_DIFFUSE: arg1 = &r.diffuse; break;
1157 case TextureStage::SOURCE_SPECULAR: arg1 = &r.specular; break;
1158 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
1159 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
1160 default:
1161 ASSERT(false);
1162 }
1163
1164 switch(textureStage.secondArgumentAlpha)
1165 {
1166 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
1167 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
1168 case TextureStage::SOURCE_CURRENT: arg2 = &current; break;
1169 case TextureStage::SOURCE_DIFFUSE: arg2 = &r.diffuse; break;
1170 case TextureStage::SOURCE_SPECULAR: arg2 = &r.specular; break;
1171 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
1172 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
1173 default:
1174 ASSERT(false);
1175 }
1176
1177 switch(textureStage.thirdArgumentAlpha)
1178 {
1179 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
1180 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
1181 case TextureStage::SOURCE_CURRENT: arg3 = &current; break;
1182 case TextureStage::SOURCE_DIFFUSE: arg3 = &r.diffuse; break;
1183 case TextureStage::SOURCE_SPECULAR: arg3 = &r.specular; break;
1184 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
1185 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
1186 default:
1187 ASSERT(false);
1188 }
1189
1190 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used
1191 {
1192 case TextureStage::MODIFIER_COLOR:
1193 break;
1194 case TextureStage::MODIFIER_INVCOLOR:
1195 {
1196 mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a);
1197
1198 arg1 = &mod1;
1199 }
1200 break;
1201 case TextureStage::MODIFIER_ALPHA:
1202 {
1203 // Redudant
1204 }
1205 break;
1206 case TextureStage::MODIFIER_INVALPHA:
1207 {
1208 mod1.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg1->a);
1209
1210 arg1 = &mod1;
1211 }
1212 break;
1213 default:
1214 ASSERT(false);
1215 }
1216
1217 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used
1218 {
1219 case TextureStage::MODIFIER_COLOR:
1220 break;
1221 case TextureStage::MODIFIER_INVCOLOR:
1222 {
1223 mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a);
1224
1225 arg2 = &mod2;
1226 }
1227 break;
1228 case TextureStage::MODIFIER_ALPHA:
1229 {
1230 // Redudant
1231 }
1232 break;
1233 case TextureStage::MODIFIER_INVALPHA:
1234 {
1235 mod2.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg2->a);
1236
1237 arg2 = &mod2;
1238 }
1239 break;
1240 default:
1241 ASSERT(false);
1242 }
1243
1244 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used
1245 {
1246 case TextureStage::MODIFIER_COLOR:
1247 break;
1248 case TextureStage::MODIFIER_INVCOLOR:
1249 {
1250 mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a);
1251
1252 arg3 = &mod3;
1253 }
1254 break;
1255 case TextureStage::MODIFIER_ALPHA:
1256 {
1257 // Redudant
1258 }
1259 break;
1260 case TextureStage::MODIFIER_INVALPHA:
1261 {
1262 mod3.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), arg3->a);
1263
1264 arg3 = &mod3;
1265 }
1266 break;
1267 default:
1268 ASSERT(false);
1269 }
1270
1271 switch(textureStage.stageOperationAlpha)
1272 {
1273 case TextureStage::STAGE_DISABLE:
1274 break;
1275 case TextureStage::STAGE_SELECTARG1: // Arg1
1276 {
1277 res.a = arg1->a;
1278 }
1279 break;
1280 case TextureStage::STAGE_SELECTARG2: // Arg2
1281 {
1282 res.a = arg2->a;
1283 }
1284 break;
1285 case TextureStage::STAGE_SELECTARG3: // Arg3
1286 {
1287 res.a = arg3->a;
1288 }
1289 break;
1290 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
1291 {
1292 res.a = MulHigh(arg1->a, arg2->a) << 4;
1293 }
1294 break;
1295 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
1296 {
1297 res.a = MulHigh(arg1->a, arg2->a) << 5;
1298 }
1299 break;
1300 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
1301 {
1302 res.a = MulHigh(arg1->a, arg2->a) << 6;
1303 }
1304 break;
1305 case TextureStage::STAGE_ADD: // Arg1 + Arg2
1306 {
1307 res.a = AddSat(arg1->a, arg2->a);
1308 }
1309 break;
1310 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
1311 {
1312 res.a = AddSat(arg1->a, arg2->a);
1313 res.a = SubSat(res.a, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1314 }
1315 break;
1316 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
1317 {
1318 res.a = AddSat(arg1->a, arg2->a);
1319 res.a = SubSat(res.a, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1320 res.a = AddSat(res.a, res.a);
1321 }
1322 break;
1323 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
1324 {
1325 res.a = SubSat(arg1->a, arg2->a);
1326 }
1327 break;
1328 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
1329 {
1330 Short4 tmp;
1331
1332 tmp = MulHigh(arg1->a, arg2->a) << 4; res.a = AddSat(arg1->a, arg2->a); res.a = SubSat(res.a, tmp);
1333 }
1334 break;
1335 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
1336 {
1337 res.a = MulHigh(arg1->a, arg2->a) << 4; res.a = AddSat(res.a, arg3->a);
1338 }
1339 break;
1340 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
1341 {
1342 res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, arg3->a) << 4; res.a = AddSat(res.a, arg2->a);
1343 }
1344 break;
1345 case TextureStage::STAGE_DOT3:
1346 break; // Already computed in color channel
1347 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1348 {
1349 res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, current.a) << 4; res.a = AddSat(res.a, arg2->a);
1350 }
1351 break;
1352 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
1353 {
1354 res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, r.diffuse.a) << 4; res.a = AddSat(res.a, arg2->a);
1355 }
1356 break;
1357 case TextureStage::STAGE_BLENDFACTORALPHA:
1358 {
1359 res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.a = AddSat(res.a, arg2->a);
1360 }
1361 break;
1362 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
1363 {
1364 res.a = SubSat(arg1->a, arg2->a); res.a = MulHigh(res.a, texture.a) << 4; res.a = AddSat(res.a, arg2->a);
1365 }
1366 break;
1367 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
1368 {
1369 res.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), texture.a); res.a = MulHigh(res.a, arg2->a) << 4; res.a = AddSat(res.a, arg1->a);
1370 }
1371 break;
1372 case TextureStage::STAGE_PREMODULATE:
1373 {
1374 res.a = arg1->a;
1375 }
1376 break;
1377 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1378 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1379 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1380 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1381 case TextureStage::STAGE_BUMPENVMAP:
1382 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1383 break; // Invalid alpha operations
1384 default:
1385 ASSERT(false);
1386 }
1387 }
1388
1389 // Clamp result to [0, 1]
1390
1391 switch(textureStage.stageOperation)
1392 {
1393 case TextureStage::STAGE_DISABLE:
1394 case TextureStage::STAGE_SELECTARG1:
1395 case TextureStage::STAGE_SELECTARG2:
1396 case TextureStage::STAGE_SELECTARG3:
1397 case TextureStage::STAGE_MODULATE:
1398 case TextureStage::STAGE_MODULATE2X:
1399 case TextureStage::STAGE_MODULATE4X:
1400 case TextureStage::STAGE_ADD:
1401 case TextureStage::STAGE_MULTIPLYADD:
1402 case TextureStage::STAGE_LERP:
1403 case TextureStage::STAGE_BLENDCURRENTALPHA:
1404 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1405 case TextureStage::STAGE_BLENDFACTORALPHA:
1406 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1407 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1408 case TextureStage::STAGE_DOT3: // Already clamped
1409 case TextureStage::STAGE_PREMODULATE:
1410 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1411 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1412 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1413 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1414 case TextureStage::STAGE_BUMPENVMAP:
1415 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1416 if(state.textureStage[stage].cantUnderflow)
1417 {
1418 break; // Can't go below zero
1419 }
1420 case TextureStage::STAGE_ADDSIGNED:
1421 case TextureStage::STAGE_ADDSIGNED2X:
1422 case TextureStage::STAGE_SUBTRACT:
1423 case TextureStage::STAGE_ADDSMOOTH:
1424 res.r = Max(res.r, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1425 res.g = Max(res.g, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1426 res.b = Max(res.b, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1427 break;
1428 default:
1429 ASSERT(false);
1430 }
1431
1432 switch(textureStage.stageOperationAlpha)
1433 {
1434 case TextureStage::STAGE_DISABLE:
1435 case TextureStage::STAGE_SELECTARG1:
1436 case TextureStage::STAGE_SELECTARG2:
1437 case TextureStage::STAGE_SELECTARG3:
1438 case TextureStage::STAGE_MODULATE:
1439 case TextureStage::STAGE_MODULATE2X:
1440 case TextureStage::STAGE_MODULATE4X:
1441 case TextureStage::STAGE_ADD:
1442 case TextureStage::STAGE_MULTIPLYADD:
1443 case TextureStage::STAGE_LERP:
1444 case TextureStage::STAGE_BLENDCURRENTALPHA:
1445 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1446 case TextureStage::STAGE_BLENDFACTORALPHA:
1447 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1448 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1449 case TextureStage::STAGE_DOT3: // Already clamped
1450 case TextureStage::STAGE_PREMODULATE:
1451 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1452 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1453 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1454 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1455 case TextureStage::STAGE_BUMPENVMAP:
1456 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1457 if(state.textureStage[stage].cantUnderflow)
1458 {
1459 break; // Can't go below zero
1460 }
1461 case TextureStage::STAGE_ADDSIGNED:
1462 case TextureStage::STAGE_ADDSIGNED2X:
1463 case TextureStage::STAGE_SUBTRACT:
1464 case TextureStage::STAGE_ADDSMOOTH:
1465 res.a = Max(res.a, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1466 break;
1467 default:
1468 ASSERT(false);
1469 }
1470
1471 switch(textureStage.stageOperation)
1472 {
1473 case TextureStage::STAGE_DISABLE:
1474 case TextureStage::STAGE_SELECTARG1:
1475 case TextureStage::STAGE_SELECTARG2:
1476 case TextureStage::STAGE_SELECTARG3:
1477 case TextureStage::STAGE_MODULATE:
1478 case TextureStage::STAGE_SUBTRACT:
1479 case TextureStage::STAGE_ADDSMOOTH:
1480 case TextureStage::STAGE_LERP:
1481 case TextureStage::STAGE_BLENDCURRENTALPHA:
1482 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1483 case TextureStage::STAGE_BLENDFACTORALPHA:
1484 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1485 case TextureStage::STAGE_DOT3: // Already clamped
1486 case TextureStage::STAGE_PREMODULATE:
1487 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1488 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1489 case TextureStage::STAGE_BUMPENVMAP:
1490 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1491 break; // Can't go above one
1492 case TextureStage::STAGE_MODULATE2X:
1493 case TextureStage::STAGE_MODULATE4X:
1494 case TextureStage::STAGE_ADD:
1495 case TextureStage::STAGE_ADDSIGNED:
1496 case TextureStage::STAGE_ADDSIGNED2X:
1497 case TextureStage::STAGE_MULTIPLYADD:
1498 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1499 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1500 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1501 res.r = Min(res.r, Short4(0x1000, 0x1000, 0x1000, 0x1000));
1502 res.g = Min(res.g, Short4(0x1000, 0x1000, 0x1000, 0x1000));
1503 res.b = Min(res.b, Short4(0x1000, 0x1000, 0x1000, 0x1000));
1504 break;
1505 default:
1506 ASSERT(false);
1507 }
1508
1509 switch(textureStage.stageOperationAlpha)
1510 {
1511 case TextureStage::STAGE_DISABLE:
1512 case TextureStage::STAGE_SELECTARG1:
1513 case TextureStage::STAGE_SELECTARG2:
1514 case TextureStage::STAGE_SELECTARG3:
1515 case TextureStage::STAGE_MODULATE:
1516 case TextureStage::STAGE_SUBTRACT:
1517 case TextureStage::STAGE_ADDSMOOTH:
1518 case TextureStage::STAGE_LERP:
1519 case TextureStage::STAGE_BLENDCURRENTALPHA:
1520 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1521 case TextureStage::STAGE_BLENDFACTORALPHA:
1522 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1523 case TextureStage::STAGE_DOT3: // Already clamped
1524 case TextureStage::STAGE_PREMODULATE:
1525 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1526 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1527 case TextureStage::STAGE_BUMPENVMAP:
1528 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1529 break; // Can't go above one
1530 case TextureStage::STAGE_MODULATE2X:
1531 case TextureStage::STAGE_MODULATE4X:
1532 case TextureStage::STAGE_ADD:
1533 case TextureStage::STAGE_ADDSIGNED:
1534 case TextureStage::STAGE_ADDSIGNED2X:
1535 case TextureStage::STAGE_MULTIPLYADD:
1536 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1537 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1538 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1539 res.a = Min(res.a, Short4(0x1000, 0x1000, 0x1000, 0x1000));
1540 break;
1541 default:
1542 ASSERT(false);
1543 }
1544
1545 switch(textureStage.destinationArgument)
1546 {
1547 case TextureStage::DESTINATION_CURRENT:
1548 current.r = res.r;
1549 current.g = res.g;
1550 current.b = res.b;
1551 current.a = res.a;
1552 break;
1553 case TextureStage::DESTINATION_TEMP:
1554 temp.r = res.r;
1555 temp.g = res.g;
1556 temp.b = res.b;
1557 temp.a = res.a;
1558 break;
1559 default:
1560 ASSERT(false);
1561 }
1562 }
1563
1564 void PixelRoutine::alphaTest(Registers &r, Int &aMask, Short4 &alpha)
1565 {
1566 Short4 cmp;
1567 Short4 equal;
1568
1569 switch(state.alphaCompareMode)
1570 {
1571 case Context::ALPHA_ALWAYS:
1572 aMask = 0xF;
1573 break;
1574 case Context::ALPHA_NEVER:
1575 aMask = 0x0;
1576 break;
1577 case Context::ALPHA_EQUAL:
1578 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1579 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1580 break;
1581 case Context::ALPHA_NOTEQUAL: // a != b ~ !(a == b)
1582 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
1583 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1584 break;
1585 case Context::ALPHA_LESS: // a < b ~ b > a
1586 cmp = CmpGT(*Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)), alpha);
1587 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1588 break;
1589 case Context::ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
1590 equal = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1591 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1592 cmp |= equal;
1593 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1594 break;
1595 case Context::ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
1596 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
1597 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1598 break;
1599 case Context::ALPHA_GREATER: // a > b
1600 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1601 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1602 break;
1603 default:
1604 ASSERT(false);
1605 }
1606 }
1607
1608 void PixelRoutine::alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha)
1609 {
1610 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c0)));
1611 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c1)));
1612 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c2)));
1613 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c3)));
1614
1615 Int aMask0 = SignMask(coverage0);
1616 Int aMask1 = SignMask(coverage1);
1617 Int aMask2 = SignMask(coverage2);
1618 Int aMask3 = SignMask(coverage3);
1619
1620 cMask[0] &= aMask0;
1621 cMask[1] &= aMask1;
1622 cMask[2] &= aMask2;
1623 cMask[3] &= aMask3;
1624 }
1625
1626 Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Color4i &current)
1627 {
1628 if(!state.alphaTestActive())
1629 {
1630 return true;
1631 }
1632
1633 Int aMask;
1634
1635 if(state.transparencyAntialiasing == Context::TRANSPARENCY_NONE)
1636 {
1637 alphaTest(r, aMask, current.a);
1638
1639 for(unsigned int q = 0; q < state.multiSample; q++)
1640 {
1641 cMask[q] &= aMask;
1642 }
1643 }
1644 else if(state.transparencyAntialiasing == Context::TRANSPARENCY_ALPHA_TO_COVERAGE)
1645 {
1646 Float4 alpha = Float4(current.a) * Float4(1.0f / 0x1000);
1647
1648 alphaToCoverage(r, cMask, alpha);
1649 }
1650 else ASSERT(false);
1651
1652 Int pass = cMask[0];
1653
1654 for(unsigned int q = 1; q < state.multiSample; q++)
1655 {
1656 pass = pass | cMask[q];
1657 }
1658
1659 return pass != 0x0;
1660 }
1661
1662 Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Color4f &c0)
1663 {
1664 if(!state.alphaTestActive())
1665 {
1666 return true;
1667 }
1668
1669 Int aMask;
1670
1671 if(state.transparencyAntialiasing == Context::TRANSPARENCY_NONE)
1672 {
1673 Short4 alpha = RoundShort4(c0.a * Float4(0x1000, 0x1000, 0x1000, 0x1000));
1674
1675 alphaTest(r, aMask, alpha);
1676
1677 for(unsigned int q = 0; q < state.multiSample; q++)
1678 {
1679 cMask[q] &= aMask;
1680 }
1681 }
1682 else if(state.transparencyAntialiasing == Context::TRANSPARENCY_ALPHA_TO_COVERAGE)
1683 {
1684 alphaToCoverage(r, cMask, c0.a);
1685 }
1686 else ASSERT(false);
1687
1688 Int pass = cMask[0];
1689
1690 for(unsigned int q = 1; q < state.multiSample; q++)
1691 {
1692 pass = pass | cMask[q];
1693 }
1694
1695 return pass != 0x0;
1696 }
1697
1698 void PixelRoutine::fogBlend(Registers &r, Color4i &current, Float4 &f, Float4 &z, Float4 &rhw)
1699 {
1700 if(!state.fogActive)
1701 {
1702 return;
1703 }
1704
1705 if(state.pixelFogMode != Context::FOG_NONE)
1706 {
1707 pixelFog(r, f, z, rhw);
1708 }
1709
1710 UShort4 fog = convertFixed16(f, true);
1711
1712 current.r = As<Short4>(MulHigh(As<UShort4>(current.r), fog));
1713 current.g = As<Short4>(MulHigh(As<UShort4>(current.g), fog));
1714 current.b = As<Short4>(MulHigh(As<UShort4>(current.b), fog));
1715
1716 UShort4 invFog = UShort4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - fog;
1717
1718 current.r += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[0]))));
1719 current.g += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[1]))));
1720 current.b += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[2]))));
1721 }
1722
1723 void PixelRoutine::fogBlend(Registers &r, Color4f &c0, Float4 &fog, Float4 &z, Float4 &rhw)
1724 {
1725 if(!state.fogActive)
1726 {
1727 return;
1728 }
1729
1730 if(state.pixelFogMode != Context::FOG_NONE)
1731 {
1732 pixelFog(r, fog, z, rhw);
1733
1734 fog = Min(fog, Float4(1.0f, 1.0f, 1.0f, 1.0f));
1735 fog = Max(fog, Float4(0.0f, 0.0f, 0.0f, 0.0f));
1736 }
1737
1738 c0.r -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
1739 c0.g -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
1740 c0.b -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
1741
1742 c0.r *= fog;
1743 c0.g *= fog;
1744 c0.b *= fog;
1745
1746 c0.r += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
1747 c0.g += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
1748 c0.b += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
1749 }
1750
1751 void PixelRoutine::pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw)
1752 {
1753 Float4 &zw = visibility;
1754
1755 if(state.pixelFogMode != Context::FOG_NONE)
1756 {
1757 if(state.wBasedFog)
1758 {
1759 zw = rhw;
1760 }
1761 else
1762 {
1763 if(complementaryDepthBuffer)
1764 {
1765 zw = Float4(1.0f, 1.0f, 1.0f, 1.0f) - z;
1766 }
1767 else
1768 {
1769 zw = z;
1770 }
1771 }
1772 }
1773
1774 switch(state.pixelFogMode)
1775 {
1776 case Context::FOG_NONE:
1777 break;
1778 case Context::FOG_LINEAR:
1779 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale));
1780 zw += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset));
1781 break;
1782 case Context::FOG_EXP:
1783 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE));
1784 zw = exponential(zw, true);
1785 break;
1786 case Context::FOG_EXP2:
1787 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE2));
1788 zw *= zw;
1789 zw = exponential(zw, true);
1790 zw = Rcp_pp(zw);
1791 break;
1792 default:
1793 ASSERT(false);
1794 }
1795 }
1796
1797 void PixelRoutine::specularPixel(Color4i &current, Color4i &specular)
1798 {
1799 if(!state.specularAdd)
1800 {
1801 return;
1802 }
1803
1804 current.r = AddSat(current.r, specular.r);
1805 current.g = AddSat(current.g, specular.g);
1806 current.b = AddSat(current.b, specular.b);
1807 }
1808
1809 void PixelRoutine::writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
1810 {
1811 if(!state.depthWriteEnable)
1812 {
1813 return;
1814 }
1815
1816 Float4 Z = z;
1817
1818 if(pixelShader && pixelShader->depthOverride())
1819 {
1820 if(complementaryDepthBuffer)
1821 {
1822 Z = Float4(1, 1, 1, 1) - r.oDepth;
1823 }
1824 else
1825 {
1826 Z = r.oDepth;
1827 }
1828 }
1829
1830 Pointer<Byte> buffer;
1831 Int pitch;
1832
1833 if(!state.quadLayoutDepthBuffer)
1834 {
1835 buffer = zBuffer + 4 * x;
1836 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
1837 }
1838 else
1839 {
1840 buffer = zBuffer + 8 * x;
1841 }
1842
1843 if(q > 0)
1844 {
1845 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
1846 }
1847
1848 Float4 zValue;
1849
1850 if(state.depthCompareMode != Context::DEPTH_NEVER || (state.depthCompareMode != Context::DEPTH_ALWAYS && !state.depthWriteEnable))
1851 {
1852 if(!state.quadLayoutDepthBuffer)
1853 {
1854 // FIXME: Properly optimizes?
1855 zValue.xy = *Pointer<Float4>(buffer);
1856 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
1857 }
1858 else
1859 {
1860 zValue = *Pointer<Float4>(buffer, 16);
1861 }
1862 }
1863
1864 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
1865 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
1866 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
1867
1868 if(!state.quadLayoutDepthBuffer)
1869 {
1870 // FIXME: Properly optimizes?
1871 *Pointer<Float2>(buffer) = Float2(Z.xy);
1872 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
1873 }
1874 else
1875 {
1876 *Pointer<Float4>(buffer, 16) = Z;
1877 }
1878 }
1879
1880 void PixelRoutine::writeStencil(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
1881 {
1882 if(!state.stencilActive)
1883 {
1884 return;
1885 }
1886
1887 if(state.stencilPassOperation == Context::OPERATION_KEEP && state.stencilZFailOperation == Context::OPERATION_KEEP && state.stencilFailOperation == Context::OPERATION_KEEP)
1888 {
1889 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == Context::OPERATION_KEEP && state.stencilZFailOperationCCW == Context::OPERATION_KEEP && state.stencilFailOperationCCW == Context::OPERATION_KEEP))
1890 {
1891 return;
1892 }
1893 }
1894
1895 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
1896 {
1897 return;
1898 }
1899
1900 Pointer<Byte> buffer = sBuffer + 2 * x;
1901
1902 if(q > 0)
1903 {
1904 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
1905 }
1906
1907 Byte8 bufferValue = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
1908
1909 Byte8 newValue;
1910 stencilOperation(r, newValue, bufferValue, (Context::StencilOperation)state.stencilPassOperation, (Context::StencilOperation)state.stencilZFailOperation, (Context::StencilOperation)state.stencilFailOperation, false, zMask, sMask);
1911
1912 if(!state.noStencilWriteMask)
1913 {
1914 Byte8 maskedValue = bufferValue;
1915 newValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].writeMaskQ));
1916 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
1917 newValue |= maskedValue;
1918 }
1919
1920 if(state.twoSidedStencil)
1921 {
1922 Byte8 newValueCCW;
1923
1924 stencilOperation(r, newValueCCW, bufferValue, (Context::StencilOperation)state.stencilPassOperationCCW, (Context::StencilOperation)state.stencilZFailOperationCCW, (Context::StencilOperation)state.stencilFailOperationCCW, true, zMask, sMask);
1925
1926 if(!state.noStencilWriteMaskCCW)
1927 {
1928 Byte8 maskedValue = bufferValue;
1929 newValueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].writeMaskQ));
1930 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
1931 newValueCCW |= maskedValue;
1932 }
1933
1934 newValue &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
1935 newValueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
1936 newValue |= newValueCCW;
1937 }
1938
1939 newValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
1940 bufferValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
1941 newValue |= bufferValue;
1942
1943 *Pointer<UInt>(buffer) = UInt(As<Long>(newValue));
1944 }
1945
1946 void PixelRoutine::stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, Context::StencilOperation stencilPassOperation, Context::StencilOperation stencilZFailOperation, Context::StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
1947 {
1948 Byte8 &pass = newValue;
1949 Byte8 fail;
1950 Byte8 zFail;
1951
1952 stencilOperation(r, pass, bufferValue, stencilPassOperation, CCW);
1953
1954 if(stencilZFailOperation != stencilPassOperation)
1955 {
1956 stencilOperation(r, zFail, bufferValue, stencilZFailOperation, CCW);
1957 }
1958
1959 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
1960 {
1961 stencilOperation(r, fail, bufferValue, stencilFailOperation, CCW);
1962 }
1963
1964 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
1965 {
1966 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
1967 {
1968 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
1969 zFail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
1970 pass |= zFail;
1971 }
1972
1973 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
1974 fail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
1975 pass |= fail;
1976 }
1977 }
1978
1979 void PixelRoutine::stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, Context::StencilOperation operation, bool CCW)
1980 {
1981 switch(operation)
1982 {
1983 case Context::OPERATION_KEEP:
1984 output = bufferValue;
1985 break;
1986 case Context::OPERATION_ZERO:
1987 output = Byte8(0x0000000000000000);
1988 break;
1989 case Context::OPERATION_REPLACE:
1990 output = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceQ));
1991 break;
1992 case Context::OPERATION_INCRSAT:
1993 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
1994 break;
1995 case Context::OPERATION_DECRSAT:
1996 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
1997 break;
1998 case Context::OPERATION_INVERT:
1999 output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF);
2000 break;
2001 case Context::OPERATION_INCR:
2002 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
2003 break;
2004 case Context::OPERATION_DECR:
2005 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
2006 break;
2007 default:
2008 ASSERT(false);
2009 }
2010 }
2011
2012 void PixelRoutine::sampleTexture(Registers &r, Color4i &c, int coordinates, int stage, bool project)
2013 {
2014 Float4 u = r.vx[2 + coordinates];
2015 Float4 v = r.vy[2 + coordinates];
2016 Float4 w = r.vz[2 + coordinates];
2017 Float4 q = r.vw[2 + coordinates];
2018
2019 if(perturbate)
2020 {
2021 u += r.du;
2022 v += r.dv;
2023
2024 perturbate = false;
2025 }
2026
2027 sampleTexture(r, c, stage, u, v, w, q, project);
2028 }
2029
2030 void PixelRoutine::sampleTexture(Registers &r, Color4i &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project, bool bias, bool fixed12)
2031 {
2032 Color4f dsx;
2033 Color4f dsy;
2034
2035 sampleTexture(r, c, stage, u, v, w, q, dsx, dsy, project, bias, fixed12, false);
2036 }
2037
2038 void PixelRoutine::sampleTexture(Registers &r, Color4i &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool project, bool bias, bool fixed12, bool gradients, bool lodProvided)
2039 {
2040 #if PERF_PROFILE
2041 Long texTime = Ticks();
2042 #endif
2043
2044 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap) + stage * sizeof(Texture);
2045
2046 if(!project)
2047 {
2048 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, fixed12, gradients, lodProvided);
2049 }
2050 else
2051 {
2052 Float4 rq = reciprocal(q);
2053
2054 Float4 u_q = u * rq;
2055 Float4 v_q = v * rq;
2056 Float4 w_q = w * rq;
2057
2058 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, fixed12, gradients, lodProvided);
2059 }
2060
2061 #if PERF_PROFILE
2062 r.cycles[PERF_TEX] += Ticks() - texTime;
2063 #endif
2064 }
2065
2066 void PixelRoutine::sampleTexture(Registers &r, Color4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Color4f &dsx, Color4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
2067 {
2068 #if PERF_PROFILE
2069 Long texTime = Ticks();
2070 #endif
2071
2072 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap) + stage * sizeof(Texture);
2073
2074 if(!project)
2075 {
2076 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, gradients, lodProvided);
2077 }
2078 else
2079 {
2080 Float4 rq = reciprocal(q);
2081
2082 Float4 u_q = u * rq;
2083 Float4 v_q = v * rq;
2084 Float4 w_q = w * rq;
2085
2086 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, gradients, lodProvided);
2087 }
2088
2089 #if PERF_PROFILE
2090 r.cycles[PERF_TEX] += Ticks() - texTime;
2091 #endif
2092 }
2093
2094 void PixelRoutine::clampColor(Color4f oC[4])
2095 {
2096 for(int index = 0; index < 4; index++)
2097 {
2098 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
2099 {
2100 continue;
2101 }
2102
2103 switch(state.targetFormat[index])
2104 {
2105 case FORMAT_NULL:
2106 break;
2107 case FORMAT_A16B16G16R16:
2108 case FORMAT_A8R8G8B8:
2109 case FORMAT_X8R8G8B8:
2110 case FORMAT_G16R16:
2111 oC[index].r = Max(oC[index].r, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].r = Min(oC[index].r, Float4(1.0f, 1.0f, 1.0f, 1.0f));
2112 oC[index].g = Max(oC[index].g, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].g = Min(oC[index].g, Float4(1.0f, 1.0f, 1.0f, 1.0f));
2113 oC[index].b = Max(oC[index].b, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].b = Min(oC[index].b, Float4(1.0f, 1.0f, 1.0f, 1.0f));
2114 oC[index].a = Max(oC[index].a, Float4(0.0f, 0.0f, 0.0f, 0.0f)); oC[index].a = Min(oC[index].a, Float4(1.0f, 1.0f, 1.0f, 1.0f));
2115 break;
2116 case FORMAT_R32F:
2117 case FORMAT_G32R32F:
2118 case FORMAT_A32B32G32R32F:
2119 break;
2120 default:
2121 ASSERT(false);
2122 }
2123 }
2124 }
2125
2126 void PixelRoutine::rasterOperation(Color4i &current, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
2127 {
2128 if(!state.colorWriteActive(0))
2129 {
2130 return;
2131 }
2132
2133 Color4f oC;
2134
2135 switch(state.targetFormat[0])
2136 {
2137 case FORMAT_X8R8G8B8:
2138 case FORMAT_A8R8G8B8:
2139 case FORMAT_G16R16:
2140 case FORMAT_A16B16G16R16:
2141 if(!postBlendSRGB && state.writeSRGB)
2142 {
2143 linearToSRGB12_16(r, current);
2144 }
2145 else
2146 {
2147 current.r <<= 4;
2148 current.g <<= 4;
2149 current.b <<= 4;
2150 current.a <<= 4;
2151 }
2152
2153 fogBlend(r, current, fog, r.z[0], r.rhw);
2154
2155 for(unsigned int q = 0; q < state.multiSample; q++)
2156 {
2157 Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0]));
2158 Color4i color = current;
2159
2160 if(state.multiSampleMask & (1 << q))
2161 {
2162 alphaBlend(r, 0, buffer, color, x);
2163 writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2164 }
2165 }
2166 break;
2167 case FORMAT_R32F:
2168 case FORMAT_G32R32F:
2169 case FORMAT_A32B32G32R32F:
2170 convertSigned12(oC, current);
2171 fogBlend(r, oC, fog, r.z[0], r.rhw);
2172
2173 for(unsigned int q = 0; q < state.multiSample; q++)
2174 {
2175 Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0]));
2176 Color4f color = oC;
2177
2178 if(state.multiSampleMask & (1 << q))
2179 {
2180 alphaBlend(r, 0, buffer, color, x);
2181 writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2182 }
2183 }
2184 break;
2185 default:
2186 ASSERT(false);
2187 }
2188 }
2189
2190 void PixelRoutine::rasterOperation(Color4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
2191 {
2192 for(int index = 0; index < 4; index++)
2193 {
2194 if(!state.colorWriteActive(index))
2195 {
2196 continue;
2197 }
2198
2199 if(!postBlendSRGB && state.writeSRGB)
2200 {
2201 oC[index].r = linearToSRGB(oC[index].r);
2202 oC[index].g = linearToSRGB(oC[index].g);
2203 oC[index].b = linearToSRGB(oC[index].b);
2204 }
2205
2206 if(index == 0)
2207 {
2208 fogBlend(r, oC[index], fog, r.z[0], r.rhw);
2209 }
2210
2211 switch(state.targetFormat[index])
2212 {
2213 case FORMAT_X8R8G8B8:
2214 case FORMAT_A8R8G8B8:
2215 case FORMAT_G16R16:
2216 case FORMAT_A16B16G16R16:
2217 for(unsigned int q = 0; q < state.multiSample; q++)
2218 {
2219 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index]));
2220 Color4i color;
2221
2222 color.r = convertFixed16(oC[index].r, false);
2223 color.g = convertFixed16(oC[index].g, false);
2224 color.b = convertFixed16(oC[index].b, false);
2225 color.a = convertFixed16(oC[index].a, false);
2226
2227 if(state.multiSampleMask & (1 << q))
2228 {
2229 alphaBlend(r, index, buffer, color, x);
2230 writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2231 }
2232 }
2233 break;
2234 case FORMAT_R32F:
2235 case FORMAT_G32R32F:
2236 case FORMAT_A32B32G32R32F:
2237 for(unsigned int q = 0; q < state.multiSample; q++)
2238 {
2239 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index]));
2240 Color4f color = oC[index];
2241
2242 if(state.multiSampleMask & (1 << q))
2243 {
2244 alphaBlend(r, index, buffer, color, x);
2245 writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2246 }
2247 }
2248 break;
2249 default:
2250 ASSERT(false);
2251 }
2252 }
2253 }
2254
2255 void PixelRoutine::blendFactor(Registers &r, const Color4i &blendFactor, const Color4i &current, const Color4i &pixel, Context::BlendFactor blendFactorActive)
2256 {
2257 switch(blendFactorActive)
2258 {
2259 case Context::BLEND_ZERO:
2260 // Optimized
2261 break;
2262 case Context::BLEND_ONE:
2263 // Optimized
2264 break;
2265 case Context::BLEND_SOURCE:
2266 blendFactor.r = current.r;
2267 blendFactor.g = current.g;
2268 blendFactor.b = current.b;
2269 break;
2270 case Context::BLEND_INVSOURCE:
2271 blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.r;
2272 blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.g;
2273 blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.b;
2274 break;
2275 case Context::BLEND_DEST:
2276 blendFactor.r = pixel.r;
2277 blendFactor.g = pixel.g;
2278 blendFactor.b = pixel.b;
2279 break;
2280 case Context::BLEND_INVDEST:
2281 blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.r;
2282 blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.g;
2283 blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.b;
2284 break;
2285 case Context::BLEND_SOURCEALPHA:
2286 blendFactor.r = current.a;
2287 blendFactor.g = current.a;
2288 blendFactor.b = current.a;
2289 break;
2290 case Context::BLEND_INVSOURCEALPHA:
2291 blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a;
2292 blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a;
2293 blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a;
2294 break;
2295 case Context::BLEND_DESTALPHA:
2296 blendFactor.r = pixel.a;
2297 blendFactor.g = pixel.a;
2298 blendFactor.b = pixel.a;
2299 break;
2300 case Context::BLEND_INVDESTALPHA:
2301 blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a;
2302 blendFactor.g = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a;
2303 blendFactor.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a;
2304 break;
2305 case Context::BLEND_SRCALPHASAT:
2306 blendFactor.r = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a;
2307 blendFactor.r = Min(As<UShort4>(blendFactor.r), As<UShort4>(current.a));
2308 blendFactor.g = blendFactor.r;
2309 blendFactor.b = blendFactor.r;
2310 break;
2311 case Context::BLEND_CONSTANT:
2312 blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[0]));
2313 blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[1]));
2314 blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[2]));
2315 break;
2316 case Context::BLEND_INVCONSTANT:
2317 blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
2318 blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
2319 blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
2320 break;
2321 case Context::BLEND_CONSTANTALPHA:
2322 blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
2323 blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
2324 blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
2325 break;
2326 case Context::BLEND_INVCONSTANTALPHA:
2327 blendFactor.r = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
2328 blendFactor.g = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
2329 blendFactor.b = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
2330 break;
2331 default:
2332 ASSERT(false);
2333 }
2334 }
2335
2336 void PixelRoutine::blendFactorAlpha(Registers &r, const Color4i &blendFactor, const Color4i &current, const Color4i &pixel, Context::BlendFactor blendFactorAlphaActive)
2337 {
2338 switch(blendFactorAlphaActive)
2339 {
2340 case Context::BLEND_ZERO:
2341 // Optimized
2342 break;
2343 case Context::BLEND_ONE:
2344 // Optimized
2345 break;
2346 case Context::BLEND_SOURCE:
2347 blendFactor.a = current.a;
2348 break;
2349 case Context::BLEND_INVSOURCE:
2350 blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a;
2351 break;
2352 case Context::BLEND_DEST:
2353 blendFactor.a = pixel.a;
2354 break;
2355 case Context::BLEND_INVDEST:
2356 blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a;
2357 break;
2358 case Context::BLEND_SOURCEALPHA:
2359 blendFactor.a = current.a;
2360 break;
2361 case Context::BLEND_INVSOURCEALPHA:
2362 blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - current.a;
2363 break;
2364 case Context::BLEND_DESTALPHA:
2365 blendFactor.a = pixel.a;
2366 break;
2367 case Context::BLEND_INVDESTALPHA:
2368 blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF) - pixel.a;
2369 break;
2370 case Context::BLEND_SRCALPHASAT:
2371 blendFactor.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
2372 break;
2373 case Context::BLEND_CONSTANT:
2374 case Context::BLEND_CONSTANTALPHA:
2375 blendFactor.a = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
2376 break;
2377 case Context::BLEND_INVCONSTANT:
2378 case Context::BLEND_INVCONSTANTALPHA:
2379 blendFactor.a = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
2380 break;
2381 default:
2382 ASSERT(false);
2383 }
2384 }
2385
2386 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Color4i &current, Int &x)
2387 {
2388 if(!state.alphaBlendActive)
2389 {
2390 return;
2391 }
2392
2393 Pointer<Byte> buffer;
2394
2395 Color4i pixel;
2396 Short4 c01;
2397 Short4 c23;
2398
2399 // Read pixel
2400 switch(state.targetFormat[index])
2401 {
2402 case FORMAT_A8R8G8B8:
2403 buffer = cBuffer + 4 * x;
2404 c01 = *Pointer<Short4>(buffer);
2405 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2406 c23 = *Pointer<Short4>(buffer);
2407 pixel.b = c01;
2408 pixel.g = c01;
2409 pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(c23));
2410 pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(c23));
2411 pixel.r = pixel.b;
2412 pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.g));
2413 pixel.r = UnpackHigh(As<Byte8>(pixel.r), As<Byte8>(pixel.g));
2414 pixel.g = pixel.b;
2415 pixel.a = pixel.r;
2416 pixel.r = UnpackLow(As<Byte8>(pixel.r), As<Byte8>(pixel.r));
2417 pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(pixel.g));
2418 pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.b));
2419 pixel.a = UnpackHigh(As<Byte8>(pixel.a), As<Byte8>(pixel.a));
2420 break;
2421 case FORMAT_X8R8G8B8:
2422 buffer = cBuffer + 4 * x;
2423 c01 = *Pointer<Short4>(buffer);
2424 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2425 c23 = *Pointer<Short4>(buffer);
2426 pixel.b = c01;
2427 pixel.g = c01;
2428 pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(c23));
2429 pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(c23));
2430 pixel.r = pixel.b;
2431 pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.g));
2432 pixel.r = UnpackHigh(As<Byte8>(pixel.r), As<Byte8>(pixel.g));
2433 pixel.g = pixel.b;
2434 pixel.r = UnpackLow(As<Byte8>(pixel.r), As<Byte8>(pixel.r));
2435 pixel.g = UnpackHigh(As<Byte8>(pixel.g), As<Byte8>(pixel.g));
2436 pixel.b = UnpackLow(As<Byte8>(pixel.b), As<Byte8>(pixel.b));
2437 pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
2438 break;
2439 case FORMAT_A8G8R8B8Q:
2440 UNIMPLEMENTED();
2441 // pixel.b = UnpackLow(As<Byte8>(pixel.b), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2442 // pixel.r = UnpackHigh(As<Byte8>(pixel.r), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2443 // pixel.g = UnpackLow(As<Byte8>(pixel.g), *Pointer<Byte8>(cBuffer + 8 * x + 8));
2444 // pixel.a = UnpackHigh(As<Byte8>(pixel.a), *Pointer<Byte8>(cBuffer + 8 * x + 8));
2445 break;
2446 case FORMAT_X8G8R8B8Q:
2447 UNIMPLEMENTED();
2448 // pixel.b = UnpackLow(As<Byte8>(pixel.b), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2449 // pixel.r = UnpackHigh(As<Byte8>(pixel.r), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2450 // pixel.g = UnpackLow(As<Byte8>(pixel.g), *Pointer<Byte8>(cBuffer + 8 * x + 8));
2451 // pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
2452 break;
2453 case FORMAT_A16B16G16R16:
2454 buffer = cBuffer;
2455 pixel.r = *Pointer<Short4>(buffer + 8 * x);
2456 pixel.g = *Pointer<Short4>(buffer + 8 * x + 8);
2457 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2458 pixel.b = *Pointer<Short4>(buffer + 8 * x);
2459 pixel.a = *Pointer<Short4>(buffer + 8 * x + 8);
2460 transpose4x4(pixel.r, pixel.g, pixel.b, pixel.a);
2461 break;
2462 case FORMAT_G16R16:
2463 buffer = cBuffer;
2464 pixel.r = *Pointer<Short4>(buffer + 4 * x);
2465 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2466 pixel.g = *Pointer<Short4>(buffer + 4 * x);
2467 pixel.b = pixel.r;
2468 pixel.r = As<Short4>(UnpackLow(pixel.r, pixel.g));
2469 pixel.b = As<Short4>(UnpackHigh(pixel.b, pixel.g));
2470 pixel.g = pixel.b;
2471 pixel.r = As<Short4>(UnpackLow(pixel.r, pixel.b));
2472 pixel.g = As<Short4>(UnpackHigh(pixel.g, pixel.b));
2473 pixel.b = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
2474 pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
2475 break;
2476 default:
2477 ASSERT(false);
2478 }
2479
2480 if(postBlendSRGB && state.writeSRGB)
2481 {
2482 sRGBtoLinear16_16(r, pixel);
2483 }
2484
2485 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
2486 Color4i sourceFactor;
2487 Color4i destFactor;
2488
2489 blendFactor(r, sourceFactor, current, pixel, (Context::BlendFactor)state.sourceBlendFactor);
2490 blendFactor(r, destFactor, current, pixel, (Context::BlendFactor)state.destBlendFactor);
2491
2492 if(state.sourceBlendFactor != Context::BLEND_ONE && state.sourceBlendFactor != Context::BLEND_ZERO)
2493 {
2494 current.r = MulHigh(As<UShort4>(current.r), As<UShort4>(sourceFactor.r));
2495 current.g = MulHigh(As<UShort4>(current.g), As<UShort4>(sourceFactor.g));
2496 current.b = MulHigh(As<UShort4>(current.b), As<UShort4>(sourceFactor.b));
2497 }
2498
2499 if(state.destBlendFactor != Context::BLEND_ONE && state.destBlendFactor != Context::BLEND_ZERO)
2500 {
2501 pixel.r = MulHigh(As<UShort4>(pixel.r), As<UShort4>(destFactor.r));
2502 pixel.g = MulHigh(As<UShort4>(pixel.g), As<UShort4>(destFactor.g));
2503 pixel.b = MulHigh(As<UShort4>(pixel.b), As<UShort4>(destFactor.b));
2504 }
2505
2506 switch(state.blendOperation)
2507 {
2508 case Context::BLENDOP_ADD:
2509 current.r = AddSat(As<UShort4>(current.r), As<UShort4>(pixel.r));
2510 current.g = AddSat(As<UShort4>(current.g), As<UShort4>(pixel.g));
2511 current.b = AddSat(As<UShort4>(current.b), As<UShort4>(pixel.b));
2512 break;
2513 case Context::BLENDOP_SUB:
2514 current.r = SubSat(As<UShort4>(current.r), As<UShort4>(pixel.r));
2515 current.g = SubSat(As<UShort4>(current.g), As<UShort4>(pixel.g));
2516 current.b = SubSat(As<UShort4>(current.b), As<UShort4>(pixel.b));
2517 break;
2518 case Context::BLENDOP_INVSUB:
2519 current.r = SubSat(As<UShort4>(pixel.r), As<UShort4>(current.r));
2520 current.g = SubSat(As<UShort4>(pixel.g), As<UShort4>(current.g));
2521 current.b = SubSat(As<UShort4>(pixel.b), As<UShort4>(current.b));
2522 break;
2523 case Context::BLENDOP_MIN:
2524 current.r = Min(As<UShort4>(current.r), As<UShort4>(pixel.r));
2525 current.g = Min(As<UShort4>(current.g), As<UShort4>(pixel.g));
2526 current.b = Min(As<UShort4>(current.b), As<UShort4>(pixel.b));
2527 break;
2528 case Context::BLENDOP_MAX:
2529 current.r = Max(As<UShort4>(current.r), As<UShort4>(pixel.r));
2530 current.g = Max(As<UShort4>(current.g), As<UShort4>(pixel.g));
2531 current.b = Max(As<UShort4>(current.b), As<UShort4>(pixel.b));
2532 break;
2533 case Context::BLENDOP_SOURCE:
2534 // No operation
2535 break;
2536 case Context::BLENDOP_DEST:
2537 current.r = pixel.r;
2538 current.g = pixel.g;
2539 current.b = pixel.b;
2540 break;
2541 case Context::BLENDOP_NULL:
2542 current.r = Short4(0x0000, 0x0000, 0x0000, 0x0000);
2543 current.g = Short4(0x0000, 0x0000, 0x0000, 0x0000);
2544 current.b = Short4(0x0000, 0x0000, 0x0000, 0x0000);
2545 break;
2546 default:
2547 ASSERT(false);
2548 }
2549
2550 blendFactorAlpha(r, sourceFactor, current, pixel, (Context::BlendFactor)state.sourceBlendFactorAlpha);
2551 blendFactorAlpha(r, destFactor, current, pixel, (Context::BlendFactor)state.destBlendFactorAlpha);
2552
2553 if(state.sourceBlendFactorAlpha != Context::BLEND_ONE && state.sourceBlendFactorAlpha != Context::BLEND_ZERO)
2554 {
2555 current.a = MulHigh(As<UShort4>(current.a), As<UShort4>(sourceFactor.a));
2556 }
2557
2558 if(state.destBlendFactorAlpha != Context::BLEND_ONE && state.destBlendFactorAlpha != Context::BLEND_ZERO)
2559 {
2560 pixel.a = MulHigh(As<UShort4>(pixel.a), As<UShort4>(destFactor.a));
2561 }
2562
2563 switch(state.blendOperationAlpha)
2564 {
2565 case Context::BLENDOP_ADD:
2566 current.a = AddSat(As<UShort4>(current.a), As<UShort4>(pixel.a));
2567 break;
2568 case Context::BLENDOP_SUB:
2569 current.a = SubSat(As<UShort4>(current.a), As<UShort4>(pixel.a));
2570 break;
2571 case Context::BLENDOP_INVSUB:
2572 current.a = SubSat(As<UShort4>(pixel.a), As<UShort4>(current.a));
2573 break;
2574 case Context::BLENDOP_MIN:
2575 current.a = Min(As<UShort4>(current.a), As<UShort4>(pixel.a));
2576 break;
2577 case Context::BLENDOP_MAX:
2578 current.a = Max(As<UShort4>(current.a), As<UShort4>(pixel.a));
2579 break;
2580 case Context::BLENDOP_SOURCE:
2581 // No operation
2582 break;
2583 case Context::BLENDOP_DEST:
2584 current.a = pixel.a;
2585 break;
2586 case Context::BLENDOP_NULL:
2587 current.a = Short4(0x0000, 0x0000, 0x0000, 0x0000);
2588 break;
2589 default:
2590 ASSERT(false);
2591 }
2592 }
2593
2594 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Color4i &current, Int &sMask, Int &zMask, Int &cMask)
2595 {
2596 if(!state.colorWriteActive(index))
2597 {
2598 return;
2599 }
2600
2601 if(postBlendSRGB && state.writeSRGB)
2602 {
2603 linearToSRGB16_16(r, current);
2604 }
2605
2606 if(exactColorRounding)
2607 {
2608 switch(state.targetFormat[index])
2609 {
2610 case FORMAT_X8G8R8B8Q:
2611 case FORMAT_A8G8R8B8Q:
2612 case FORMAT_X8R8G8B8:
2613 case FORMAT_A8R8G8B8:
2614 {
2615 current.r = current.r - As<Short4>(As<UShort4>(current.r) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
2616 current.g = current.g - As<Short4>(As<UShort4>(current.g) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
2617 current.b = current.b - As<Short4>(As<UShort4>(current.b) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
2618 current.a = current.a - As<Short4>(As<UShort4>(current.a) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
2619 }
2620 break;
2621 }
2622 }
2623
2624 int rgbaWriteMask = state.colorWriteActive(index);
2625 int bgraWriteMask = rgbaWriteMask & 0x0000000A | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
2626 int brgaWriteMask = rgbaWriteMask & 0x00000008 | (rgbaWriteMask & 0x00000001) << 1 | (rgbaWriteMask & 0x00000002) << 1 | (rgbaWriteMask & 0x00000004) >> 2;
2627
2628 switch(state.targetFormat[index])
2629 {
2630 case FORMAT_X8G8R8B8Q:
2631 UNIMPLEMENTED();
2632 // current.r = As<Short4>(As<UShort4>(current.r) >> 8);
2633 // current.g = As<Short4>(As<UShort4>(current.g) >> 8);
2634 // current.b = As<Short4>(As<UShort4>(current.b) >> 8);
2635
2636 // current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r)));
2637 // current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.g)));
2638 break;
2639 case FORMAT_A8G8R8B8Q:
2640 UNIMPLEMENTED();
2641 // current.r = As<Short4>(As<UShort4>(current.r) >> 8);
2642 // current.g = As<Short4>(As<UShort4>(current.g) >> 8);
2643 // current.b = As<Short4>(As<UShort4>(current.b) >> 8);
2644 // current.a = As<Short4>(As<UShort4>(current.a) >> 8);
2645
2646 // current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r)));
2647 // current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.a)));
2648 break;
2649 case FORMAT_X8R8G8B8:
2650 case FORMAT_A8R8G8B8:
2651 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
2652 {
2653 current.r = As<Short4>(As<UShort4>(current.r) >> 8);
2654 current.g = As<Short4>(As<UShort4>(current.g) >> 8);
2655 current.b = As<Short4>(As<UShort4>(current.b) >> 8);
2656
2657 current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r)));
2658 current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.g)));
2659
2660 current.r = current.b;
2661 current.b = UnpackLow(As<Byte8>(current.b), As<Byte8>(current.g));
2662 current.r = UnpackHigh(As<Byte8>(current.r), As<Byte8>(current.g));
2663 current.g = current.b;
2664 current.b = As<Short4>(UnpackLow(current.b, current.r));
2665 current.g = As<Short4>(UnpackHigh(current.g, current.r));
2666 }
2667 else
2668 {
2669 current.r = As<Short4>(As<UShort4>(current.r) >> 8);
2670 current.g = As<Short4>(As<UShort4>(current.g) >> 8);
2671 current.b = As<Short4>(As<UShort4>(current.b) >> 8);
2672 current.a = As<Short4>(As<UShort4>(current.a) >> 8);
2673
2674 current.b = As<Short4>(Pack(As<UShort4>(current.b), As<UShort4>(current.r)));
2675 current.g = As<Short4>(Pack(As<UShort4>(current.g), As<UShort4>(current.a)));
2676
2677 current.r = current.b;
2678 current.b = UnpackLow(As<Byte8>(current.b), As<Byte8>(current.g));
2679 current.r = UnpackHigh(As<Byte8>(current.r), As<Byte8>(current.g));
2680 current.g = current.b;
2681 current.b = As<Short4>(UnpackLow(current.b, current.r));
2682 current.g = As<Short4>(UnpackHigh(current.g, current.r));
2683 }
2684 break;
2685 case FORMAT_G16R16:
2686 current.b = current.r;
2687 current.r = As<Short4>(UnpackLow(current.r, current.g));
2688 current.b = As<Short4>(UnpackHigh(current.b, current.g));
2689 current.g = current.b;
2690 break;
2691 case FORMAT_A16B16G16R16:
2692 transpose4x4(current.r, current.g, current.b, current.a);
2693 break;
2694 case FORMAT_R32F:
2695 case FORMAT_G32R32F:
2696 case FORMAT_A32B32G32R32F:
2697 {
2698 Color4f oC;
2699
2700 oC.r = convertUnsigned16(UShort4(current.r));
2701 oC.g = convertUnsigned16(UShort4(current.g));
2702 oC.b = convertUnsigned16(UShort4(current.b));
2703 oC.a = convertUnsigned16(UShort4(current.a));
2704
2705 writeColor(r, index, cBuffer, x, oC, sMask, zMask, cMask);
2706 }
2707 return;
2708 default:
2709 ASSERT(false);
2710 }
2711
2712 Short4 c01 = current.b;
2713 Short4 c23 = current.g;
2714
2715 Int xMask; // Combination of all masks
2716
2717 if(state.depthTestActive)
2718 {
2719 xMask = zMask;
2720 }
2721 else
2722 {
2723 xMask = cMask;
2724 }
2725
2726 if(state.stencilActive)
2727 {
2728 xMask &= sMask;
2729 }
2730
2731 Pointer<Byte> buffer;
2732 Short4 value;
2733
2734 switch(state.targetFormat[index])
2735 {
2736 case FORMAT_A8G8R8B8Q:
2737 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
2738 UNIMPLEMENTED();
2739 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
2740
2741 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
2742 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
2743 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2744 // {
2745 // Short4 masked = value;
2746 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2747 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2748 // c01 |= masked;
2749 // }
2750
2751 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
2752 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
2753 // c01 |= value;
2754 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
2755
2756 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
2757
2758 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
2759 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
2760 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2761 // {
2762 // Short4 masked = value;
2763 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2764 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2765 // c23 |= masked;
2766 // }
2767
2768 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
2769 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
2770 // c23 |= value;
2771 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
2772 break;
2773 case FORMAT_A8R8G8B8:
2774 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
2775 buffer = cBuffer + x * 4;
2776 value = *Pointer<Short4>(buffer);
2777
2778 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
2779 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
2780 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2781 {
2782 Short4 masked = value;
2783 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2784 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2785 c01 |= masked;
2786 }
2787
2788 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
2789 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
2790 c01 |= value;
2791 *Pointer<Short4>(buffer) = c01;
2792
2793 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2794 value = *Pointer<Short4>(buffer);
2795
2796 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
2797 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
2798 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2799 {
2800 Short4 masked = value;
2801 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2802 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2803 c23 |= masked;
2804 }
2805
2806 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
2807 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
2808 c23 |= value;
2809 *Pointer<Short4>(buffer) = c23;
2810 break;
2811 case FORMAT_G16R16:
2812 buffer = cBuffer + 4 * x;
2813
2814 value = *Pointer<Short4>(buffer);
2815
2816 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2817 {
2818 Short4 masked = value;
2819 current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
2820 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
2821 current.r |= masked;
2822 }
2823
2824 current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
2825 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
2826 current.r |= value;
2827 *Pointer<Short4>(buffer) = current.r;
2828
2829 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2830
2831 value = *Pointer<Short4>(buffer);
2832
2833 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2834 {
2835 Short4 masked = value;
2836 current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
2837 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
2838 current.g |= masked;
2839 }
2840
2841 current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
2842 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
2843 current.g |= value;
2844 *Pointer<Short4>(buffer) = current.g;
2845 break;
2846 case FORMAT_A16B16G16R16:
2847 buffer = cBuffer + 8 * x;
2848
2849 {
2850 value = *Pointer<Short4>(buffer);
2851
2852 if(rgbaWriteMask != 0x0000000F)
2853 {
2854 Short4 masked = value;
2855 current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
2856 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
2857 current.r |= masked;
2858 }
2859
2860 current.r &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
2861 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
2862 current.r |= value;
2863 *Pointer<Short4>(buffer) = current.r;
2864 }
2865
2866 {
2867 value = *Pointer<Short4>(buffer + 8);
2868
2869 if(rgbaWriteMask != 0x0000000F)
2870 {
2871 Short4 masked = value;
2872 current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
2873 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
2874 current.g |= masked;
2875 }
2876
2877 current.g &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
2878 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
2879 current.g |= value;
2880 *Pointer<Short4>(buffer + 8) = current.g;
2881 }
2882
2883 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2884
2885 {
2886 value = *Pointer<Short4>(buffer);
2887
2888 if(rgbaWriteMask != 0x0000000F)
2889 {
2890 Short4 masked = value;
2891 current.b &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
2892 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
2893 current.b |= masked;
2894 }
2895
2896 current.b &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
2897 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
2898 current.b |= value;
2899 *Pointer<Short4>(buffer) = current.b;
2900 }
2901
2902 {
2903 value = *Pointer<Short4>(buffer + 8);
2904
2905 if(rgbaWriteMask != 0x0000000F)
2906 {
2907 Short4 masked = value;
2908 current.a &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
2909 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
2910 current.a |= masked;
2911 }
2912
2913 current.a &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
2914 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
2915 current.a |= value;
2916 *Pointer<Short4>(buffer + 8) = current.a;
2917 }
2918 break;
2919 default:
2920 ASSERT(false);
2921 }
2922 }
2923
2924 void PixelRoutine::blendFactor(Registers &r, const Color4f &blendFactor, const Color4f &oC, const Color4f &pixel, Context::BlendFactor blendFactorActive)
2925 {
2926 switch(blendFactorActive)
2927 {
2928 case Context::BLEND_ZERO:
2929 // Optimized
2930 break;
2931 case Context::BLEND_ONE:
2932 // Optimized
2933 break;
2934 case Context::BLEND_SOURCE:
2935 blendFactor.r = oC.r;
2936 blendFactor.g = oC.g;
2937 blendFactor.b = oC.b;
2938 break;
2939 case Context::BLEND_INVSOURCE:
2940 blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.r;
2941 blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.g;
2942 blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.b;
2943 break;
2944 case Context::BLEND_DEST:
2945 blendFactor.r = pixel.r;
2946 blendFactor.g = pixel.g;
2947 blendFactor.b = pixel.b;
2948 break;
2949 case Context::BLEND_INVDEST:
2950 blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.r;
2951 blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.g;
2952 blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.b;
2953 break;
2954 case Context::BLEND_SOURCEALPHA:
2955 blendFactor.r = oC.a;
2956 blendFactor.g = oC.a;
2957 blendFactor.b = oC.a;
2958 break;
2959 case Context::BLEND_INVSOURCEALPHA:
2960 blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a;
2961 blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a;
2962 blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a;
2963 break;
2964 case Context::BLEND_DESTALPHA:
2965 blendFactor.r = pixel.a;
2966 blendFactor.g = pixel.a;
2967 blendFactor.b = pixel.a;
2968 break;
2969 case Context::BLEND_INVDESTALPHA:
2970 blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a;
2971 blendFactor.g = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a;
2972 blendFactor.b = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a;
2973 break;
2974 case Context::BLEND_SRCALPHASAT:
2975 blendFactor.r = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a;
2976 blendFactor.r = Min(blendFactor.r, oC.a);
2977 blendFactor.g = blendFactor.r;
2978 blendFactor.b = blendFactor.r;
2979 break;
2980 case Context::BLEND_CONSTANT:
2981 blendFactor.r = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[0]));
2982 blendFactor.g = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[1]));
2983 blendFactor.b = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[2]));
2984 break;
2985 case Context::BLEND_INVCONSTANT:
2986 blendFactor.r = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
2987 blendFactor.g = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
2988 blendFactor.b = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
2989 break;
2990 default:
2991 ASSERT(false);
2992 }
2993 }
2994
2995 void PixelRoutine::blendFactorAlpha(Registers &r, const Color4f &blendFactor, const Color4f &oC, const Color4f &pixel, Context::BlendFactor blendFactorAlphaActive)
2996 {
2997 switch(blendFactorAlphaActive)
2998 {
2999 case Context::BLEND_ZERO:
3000 // Optimized
3001 break;
3002 case Context::BLEND_ONE:
3003 // Optimized
3004 break;
3005 case Context::BLEND_SOURCE:
3006 blendFactor.a = oC.a;
3007 break;
3008 case Context::BLEND_INVSOURCE:
3009 blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a;
3010 break;
3011 case Context::BLEND_DEST:
3012 blendFactor.a = pixel.a;
3013 break;
3014 case Context::BLEND_INVDEST:
3015 blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a;
3016 break;
3017 case Context::BLEND_SOURCEALPHA:
3018 blendFactor.a = oC.a;
3019 break;
3020 case Context::BLEND_INVSOURCEALPHA:
3021 blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - oC.a;
3022 break;
3023 case Context::BLEND_DESTALPHA:
3024 blendFactor.a = pixel.a;
3025 break;
3026 case Context::BLEND_INVDESTALPHA:
3027 blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f) - pixel.a;
3028 break;
3029 case Context::BLEND_SRCALPHASAT:
3030 blendFactor.a = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3031 break;
3032 case Context::BLEND_CONSTANT:
3033 blendFactor.a = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[3]));
3034 break;
3035 case Context::BLEND_INVCONSTANT:
3036 blendFactor.a = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
3037 break;
3038 default:
3039 ASSERT(false);
3040 }
3041 }
3042
3043 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Color4f &oC, Int &x)
3044 {
3045 if(!state.alphaBlendActive)
3046 {
3047 return;
3048 }
3049
3050 Pointer<Byte> buffer;
3051 Color4f pixel;
3052
3053 Color4i color;
3054 Short4 c01;
3055 Short4 c23;
3056
3057 // Read pixel
3058 switch(state.targetFormat[index])
3059 {
3060 case FORMAT_A8R8G8B8:
3061 buffer = cBuffer + 4 * x;
3062 c01 = *Pointer<Short4>(buffer);
3063 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3064 c23 = *Pointer<Short4>(buffer);
3065 color.b = c01;
3066 color.g = c01;
3067 color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(c23));
3068 color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(c23));
3069 color.r = color.b;
3070 color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.g));
3071 color.r = UnpackHigh(As<Byte8>(color.r), As<Byte8>(color.g));
3072 color.g = color.b;
3073 color.a = color.r;
3074 color.r = UnpackLow(As<Byte8>(color.r), As<Byte8>(color.r));
3075 color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(color.g));
3076 color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.b));
3077 color.a = UnpackHigh(As<Byte8>(color.a), As<Byte8>(color.a));
3078
3079 pixel.r = convertUnsigned16(As<UShort4>(color.r));
3080 pixel.g = convertUnsigned16(As<UShort4>(color.g));
3081 pixel.b = convertUnsigned16(As<UShort4>(color.b));
3082 pixel.a = convertUnsigned16(As<UShort4>(color.a));
3083 break;
3084 case FORMAT_X8R8G8B8:
3085 buffer = cBuffer + 4 * x;
3086 c01 = *Pointer<Short4>(buffer);
3087 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3088 c23 = *Pointer<Short4>(buffer);
3089 color.b = c01;
3090 color.g = c01;
3091 color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(c23));
3092 color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(c23));
3093 color.r = color.b;
3094 color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.g));
3095 color.r = UnpackHigh(As<Byte8>(color.r), As<Byte8>(color.g));
3096 color.g = color.b;
3097 color.r = UnpackLow(As<Byte8>(color.r), As<Byte8>(color.r));
3098 color.g = UnpackHigh(As<Byte8>(color.g), As<Byte8>(color.g));
3099 color.b = UnpackLow(As<Byte8>(color.b), As<Byte8>(color.b));
3100
3101 pixel.r = convertUnsigned16(As<UShort4>(color.r));
3102 pixel.g = convertUnsigned16(As<UShort4>(color.g));
3103 pixel.b = convertUnsigned16(As<UShort4>(color.b));
3104 pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3105 break;
3106 case FORMAT_A8G8R8B8Q:
3107UNIMPLEMENTED();
3108 // UnpackLow(pixel.b, qword_ptr [cBuffer+8*x+0]);
3109 // UnpackHigh(pixel.r, qword_ptr [cBuffer+8*x+0]);
3110 // UnpackLow(pixel.g, qword_ptr [cBuffer+8*x+8]);
3111 // UnpackHigh(pixel.a, qword_ptr [cBuffer+8*x+8]);
3112 break;
3113 case FORMAT_X8G8R8B8Q:
3114UNIMPLEMENTED();
3115 // UnpackLow(pixel.b, qword_ptr [cBuffer+8*x+0]);
3116 // UnpackHigh(pixel.r, qword_ptr [cBuffer+8*x+0]);
3117 // UnpackLow(pixel.g, qword_ptr [cBuffer+8*x+8]);
3118 // pixel.a = Short4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF);
3119 break;
3120 case FORMAT_A16B16G16R16:
3121 buffer = cBuffer;
3122 color.r = *Pointer<Short4>(buffer + 8 * x);
3123 color.g = *Pointer<Short4>(buffer + 8 * x + 8);
3124 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3125 color.b = *Pointer<Short4>(buffer + 8 * x);
3126 color.a = *Pointer<Short4>(buffer + 8 * x + 8);
3127
3128 transpose4x4(color.r, color.g, color.b, color.a);
3129
3130 pixel.r = convertUnsigned16(As<UShort4>(color.r));
3131 pixel.g = convertUnsigned16(As<UShort4>(color.g));
3132 pixel.b = convertUnsigned16(As<UShort4>(color.b));
3133 pixel.a = convertUnsigned16(As<UShort4>(color.a));
3134 break;
3135 case FORMAT_G16R16:
3136 buffer = cBuffer;
3137 color.r = *Pointer<Short4>(buffer + 4 * x);
3138 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3139 color.g = *Pointer<Short4>(buffer + 4 * x);
3140 color.b = color.r;
3141 color.r = As<Short4>(UnpackLow(color.r, color.g));
3142 color.b = As<Short4>(UnpackHigh(color.b, color.g));
3143 color.g = color.b;
3144 color.r = As<Short4>(UnpackLow(color.r, color.b));
3145 color.g = As<Short4>(UnpackHigh(color.g, color.b));
3146
3147 pixel.r = convertUnsigned16(As<UShort4>(color.r));
3148 pixel.g = convertUnsigned16(As<UShort4>(color.g));
3149 pixel.b = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3150 pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3151 break;
3152 case FORMAT_R32F:
3153 buffer = cBuffer;
3154 // FIXME: movlps
3155 pixel.r.x = *Pointer<Float>(buffer + 4 * x + 0);
3156 pixel.r.y = *Pointer<Float>(buffer + 4 * x + 4);
3157 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3158 // FIXME: movhps
3159 pixel.r.z = *Pointer<Float>(buffer + 4 * x + 0);
3160 pixel.r.w = *Pointer<Float>(buffer + 4 * x + 4);
3161 pixel.g = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3162 pixel.b = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3163 pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3164 break;
3165 case FORMAT_G32R32F:
3166 buffer = cBuffer;
3167 pixel.r = *Pointer<Float4>(buffer + 8 * x, 16);
3168 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3169 pixel.g = *Pointer<Float4>(buffer + 8 * x, 16);
3170 pixel.b = pixel.r;
3171 pixel.r = ShuffleLowHigh(pixel.r, pixel.g, 0x88);
3172 pixel.b = ShuffleLowHigh(pixel.b, pixel.g, 0xDD);
3173 pixel.g = pixel.b;
3174 pixel.b = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3175 pixel.a = Float4(1.0f, 1.0f, 1.0f, 1.0f);
3176 break;
3177 case FORMAT_A32B32G32R32F:
3178 buffer = cBuffer;
3179 pixel.r = *Pointer<Float4>(buffer + 16 * x, 16);
3180 pixel.g = *Pointer<Float4>(buffer + 16 * x + 16, 16);
3181 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3182 pixel.b = *Pointer<Float4>(buffer + 16 * x, 16);
3183 pixel.a = *Pointer<Float4>(buffer + 16 * x + 16, 16);
3184 transpose4x4(pixel.r, pixel.g, pixel.b, pixel.a);
3185 break;
3186 default:
3187 ASSERT(false);
3188 }
3189
3190 if(postBlendSRGB && state.writeSRGB)
3191 {
3192 sRGBtoLinear(pixel.r);
3193 sRGBtoLinear(pixel.g);
3194 sRGBtoLinear(pixel.b);
3195 }
3196
3197 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
3198 Color4f sourceFactor;
3199 Color4f destFactor;
3200
3201 blendFactor(r, sourceFactor, oC, pixel, (Context::BlendFactor)state.sourceBlendFactor);
3202 blendFactor(r, destFactor, oC, pixel, (Context::BlendFactor)state.destBlendFactor);
3203
3204 if(state.sourceBlendFactor != Context::BLEND_ONE && state.sourceBlendFactor != Context::BLEND_ZERO)
3205 {
3206 oC.r *= sourceFactor.r;
3207 oC.g *= sourceFactor.g;
3208 oC.b *= sourceFactor.b;
3209 }
3210
3211 if(state.destBlendFactor != Context::BLEND_ONE && state.destBlendFactor != Context::BLEND_ZERO)
3212 {
3213 pixel.r *= destFactor.r;
3214 pixel.g *= destFactor.g;
3215 pixel.b *= destFactor.b;
3216 }
3217
3218 switch(state.blendOperation)
3219 {
3220 case Context::BLENDOP_ADD:
3221 oC.r += pixel.r;
3222 oC.g += pixel.g;
3223 oC.b += pixel.b;
3224 break;
3225 case Context::BLENDOP_SUB:
3226 oC.r -= pixel.r;
3227 oC.g -= pixel.g;
3228 oC.b -= pixel.b;
3229 break;
3230 case Context::BLENDOP_INVSUB:
3231 oC.r = pixel.r - oC.r;
3232 oC.g = pixel.g - oC.g;
3233 oC.b = pixel.b - oC.b;
3234 break;
3235 case Context::BLENDOP_MIN:
3236 oC.r = Min(oC.r, pixel.r);
3237 oC.g = Min(oC.g, pixel.g);
3238 oC.b = Min(oC.b, pixel.b);
3239 break;
3240 case Context::BLENDOP_MAX:
3241 oC.r = Max(oC.r, pixel.r);
3242 oC.g = Max(oC.g, pixel.g);
3243 oC.b = Max(oC.b, pixel.b);
3244 break;
3245 case Context::BLENDOP_SOURCE:
3246 // No operation
3247 break;
3248 case Context::BLENDOP_DEST:
3249 oC.r = pixel.r;
3250 oC.g = pixel.g;
3251 oC.b = pixel.b;
3252 break;
3253 case Context::BLENDOP_NULL:
3254 oC.r = Float4(0.0f, 0.0f, 0.0f, 0.0f);
3255 oC.g = Float4(0.0f, 0.0f, 0.0f, 0.0f);
3256 oC.b = Float4(0.0f, 0.0f, 0.0f, 0.0f);
3257 break;
3258 default:
3259 ASSERT(false);
3260 }
3261
3262 blendFactorAlpha(r, sourceFactor, oC, pixel, (Context::BlendFactor)state.sourceBlendFactorAlpha);
3263 blendFactorAlpha(r, destFactor, oC, pixel, (Context::BlendFactor)state.destBlendFactorAlpha);
3264
3265 if(state.sourceBlendFactorAlpha != Context::BLEND_ONE && state.sourceBlendFactorAlpha != Context::BLEND_ZERO)
3266 {
3267 oC.a *= sourceFactor.a;
3268 }
3269
3270 if(state.destBlendFactorAlpha != Context::BLEND_ONE && state.destBlendFactorAlpha != Context::BLEND_ZERO)
3271 {
3272 pixel.a *= destFactor.a;
3273 }
3274
3275 switch(state.blendOperationAlpha)
3276 {
3277 case Context::BLENDOP_ADD:
3278 oC.a += pixel.a;
3279 break;
3280 case Context::BLENDOP_SUB:
3281 oC.a -= pixel.a;
3282 break;
3283 case Context::BLENDOP_INVSUB:
3284 pixel.a -= oC.a;
3285 oC.a = pixel.a;
3286 break;
3287 case Context::BLENDOP_MIN:
3288 oC.a = Min(oC.a, pixel.a);
3289 break;
3290 case Context::BLENDOP_MAX:
3291 oC.a = Max(oC.a, pixel.a);
3292 break;
3293 case Context::BLENDOP_SOURCE:
3294 // No operation
3295 break;
3296 case Context::BLENDOP_DEST:
3297 oC.a = pixel.a;
3298 break;
3299 case Context::BLENDOP_NULL:
3300 oC.a = Float4(0.0f, 0.0f, 0.0f, 0.0f);
3301 break;
3302 default:
3303 ASSERT(false);
3304 }
3305 }
3306
3307 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Color4f &oC, Int &sMask, Int &zMask, Int &cMask)
3308 {
3309 if(!state.colorWriteActive(index))
3310 {
3311 return;
3312 }
3313
3314 Color4i color;
3315
3316 switch(state.targetFormat[index])
3317 {
3318 case FORMAT_X8R8G8B8:
3319 case FORMAT_A8R8G8B8:
3320 case FORMAT_G16R16:
3321 case FORMAT_A16B16G16R16:
3322 convertFixed16(color, oC, true);
3323 writeColor(r, index, cBuffer, x, color, sMask, zMask, cMask);
3324 return;
3325 case FORMAT_R32F:
3326 break;
3327 case FORMAT_G32R32F:
3328 oC.b = oC.r;
3329 oC.r = UnpackLow(oC.r, oC.g);
3330 oC.b = UnpackHigh(oC.b, oC.g);
3331 oC.g = oC.b;
3332 break;
3333 case FORMAT_A32B32G32R32F:
3334 transpose4x4(oC.r, oC.g, oC.b, oC.a);
3335 break;
3336 default:
3337 ASSERT(false);
3338 }
3339
3340 int rgbaWriteMask = state.colorWriteActive(index);
3341
3342 Int xMask; // Combination of all masks
3343
3344 if(state.depthTestActive)
3345 {
3346 xMask = zMask;
3347 }
3348 else
3349 {
3350 xMask = cMask;
3351 }
3352
3353 if(state.stencilActive)
3354 {
3355 xMask &= sMask;
3356 }
3357
3358 Pointer<Byte> buffer;
3359 Float4 value;
3360
3361 switch(state.targetFormat[index])
3362 {
3363 case FORMAT_R32F:
3364 if(rgbaWriteMask & 0x00000001)
3365 {
3366 buffer = cBuffer + 4 * x;
3367
3368 // FIXME: movlps
3369 value.x = *Pointer<Float>(buffer + 0);
3370 value.y = *Pointer<Float>(buffer + 4);
3371
3372 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3373
3374 // FIXME: movhps
3375 value.z = *Pointer<Float>(buffer + 0);
3376 value.w = *Pointer<Float>(buffer + 4);
3377
3378 oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
3379 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
3380 oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(value));
3381
3382 // FIXME: movhps
3383 *Pointer<Float>(buffer + 0) = oC.r.z;
3384 *Pointer<Float>(buffer + 4) = oC.r.w;
3385
3386 buffer -= *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3387
3388 // FIXME: movlps
3389 *Pointer<Float>(buffer + 0) = oC.r.x;
3390 *Pointer<Float>(buffer + 4) = oC.r.y;
3391 }
3392 break;
3393 case FORMAT_G32R32F:
3394 buffer = cBuffer + 8 * x;
3395
3396 value = *Pointer<Float4>(buffer);
3397
3398 if((rgbaWriteMask & 0x00000003) != 0x00000003)
3399 {
3400 Float4 masked = value;
3401 oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
3402 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
3403 oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(masked));
3404 }
3405
3406 oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
3407 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
3408 oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(value));
3409 *Pointer<Float4>(buffer) = oC.r;
3410
3411 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3412
3413 value = *Pointer<Float4>(buffer);
3414
3415 if((rgbaWriteMask & 0x00000003) != 0x00000003)
3416 {
3417 Float4 masked;
3418
3419 masked = value;
3420 oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
3421 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
3422 oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(masked));
3423 }
3424
3425 oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
3426 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
3427 oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(value));
3428 *Pointer<Float4>(buffer) = oC.g;
3429 break;
3430 case FORMAT_A32B32G32R32F:
3431 buffer = cBuffer + 16 * x;
3432
3433 {
3434 value = *Pointer<Float4>(buffer, 16);
3435
3436 if(rgbaWriteMask != 0x0000000F)
3437 {
3438 Float4 masked = value;
3439 oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
3440 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
3441 oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(masked));
3442 }
3443
3444 oC.r = As<Float4>(As<Int4>(oC.r) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
3445 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
3446 oC.r = As<Float4>(As<Int4>(oC.r) | As<Int4>(value));
3447 *Pointer<Float4>(buffer, 16) = oC.r;
3448 }
3449
3450 {
3451 value = *Pointer<Float4>(buffer + 16, 16);
3452
3453 if(rgbaWriteMask != 0x0000000F)
3454 {
3455 Float4 masked = value;
3456 oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
3457 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
3458 oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(masked));
3459 }
3460
3461 oC.g = As<Float4>(As<Int4>(oC.g) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
3462 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
3463 oC.g = As<Float4>(As<Int4>(oC.g) | As<Int4>(value));
3464 *Pointer<Float4>(buffer + 16, 16) = oC.g;
3465 }
3466
3467 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3468
3469 {
3470 value = *Pointer<Float4>(buffer, 16);
3471
3472 if(rgbaWriteMask != 0x0000000F)
3473 {
3474 Float4 masked = value;
3475 oC.b = As<Float4>(As<Int4>(oC.b) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
3476 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
3477 oC.b = As<Float4>(As<Int4>(oC.b) | As<Int4>(masked));
3478 }
3479
3480 oC.b = As<Float4>(As<Int4>(oC.b) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
3481 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
3482 oC.b = As<Float4>(As<Int4>(oC.b) | As<Int4>(value));
3483 *Pointer<Float4>(buffer, 16) = oC.b;
3484 }
3485
3486 {
3487 value = *Pointer<Float4>(buffer + 16, 16);
3488
3489 if(rgbaWriteMask != 0x0000000F)
3490 {
3491 Float4 masked = value;
3492 oC.a = As<Float4>(As<Int4>(oC.a) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
3493 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
3494 oC.a = As<Float4>(As<Int4>(oC.a) | As<Int4>(masked));
3495 }
3496
3497 oC.a = As<Float4>(As<Int4>(oC.a) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
3498 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
3499 oC.a = As<Float4>(As<Int4>(oC.a) | As<Int4>(value));
3500 *Pointer<Float4>(buffer + 16, 16) = oC.a;
3501 }
3502 break;
3503 default:
3504 ASSERT(false);
3505 }
3506 }
3507
3508 void PixelRoutine::ps_1_x(Registers &r, Int cMask[4])
3509 {
3510 int pad = 0; // Count number of texm3x3pad instructions
3511 Color4i dPairing; // Destination for first pairing instruction
3512
3513 for(int i = 0; i < pixelShader->getLength(); i++)
3514 {
3515 const ShaderInstruction *instruction = pixelShader->getInstruction(i);
3516 Op::Opcode opcode = instruction->getOpcode();
3517
3518 // #ifndef NDEBUG // FIXME: Centralize debug output control
3519 // pixelShader->printInstruction(i, "debug.txt");
3520 // #endif
3521
3522 if(opcode == Op::OPCODE_DCL || opcode == Op::OPCODE_DEF || opcode == Op::OPCODE_DEFI || opcode == Op::OPCODE_DEFB)
3523 {
3524 continue;
3525 }
3526
3527 const Dst &dst = instruction->getDestinationParameter();
3528 const Src &src0 = instruction->getSourceParameter(0);
3529 const Src &src1 = instruction->getSourceParameter(1);
3530 const Src &src2 = instruction->getSourceParameter(2);
3531 const Src &src3 = instruction->getSourceParameter(3);
3532
3533 bool pairing = i + 1 < pixelShader->getLength() && pixelShader->getInstruction(i + 1)->isCoissue(); // First instruction of pair
3534 bool coissue = instruction->isCoissue(); // Second instruction of pair
3535
3536 Color4i d;
3537 Color4i s0;
3538 Color4i s1;
3539 Color4i s2;
3540 Color4i s3;
3541
3542 if(src0.type != Src::PARAMETER_VOID) s0 = regi(r, src0);
3543 if(src1.type != Src::PARAMETER_VOID) s1 = regi(r, src1);
3544 if(src2.type != Src::PARAMETER_VOID) s2 = regi(r, src2);
3545 if(src3.type != Src::PARAMETER_VOID) s3 = regi(r, src3);
3546
3547 switch(opcode)
3548 {
3549 case Op::OPCODE_PS_1_0: break;
3550 case Op::OPCODE_PS_1_1: break;
3551 case Op::OPCODE_PS_1_2: break;
3552 case Op::OPCODE_PS_1_3: break;
3553 case Op::OPCODE_PS_1_4: break;
3554
3555 case Op::OPCODE_DEF: break;
3556
3557 case Op::OPCODE_NOP: break;
3558 case Op::OPCODE_MOV: MOV(d, s0); break;
3559 case Op::OPCODE_ADD: ADD(d, s0, s1); break;
3560 case Op::OPCODE_SUB: SUB(d, s0, s1); break;
3561 case Op::OPCODE_MAD: MAD(d, s0, s1, s2); break;
3562 case Op::OPCODE_MUL: MUL(d, s0, s1); break;
3563 case Op::OPCODE_DP3: DP3(d, s0, s1); break;
3564 case Op::OPCODE_DP4: DP4(d, s0, s1); break;
3565 case Op::OPCODE_LRP: LRP(d, s0, s1, s2); break;
3566 case Op::OPCODE_TEXCOORD:
3567 if(pixelShader->getVersion() < 0x0104)
3568 {
3569 TEXCOORD(d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index);
3570 }
3571 else
3572 {
3573 if((src0.swizzle & 0x30) == 0x20) // .xyz
3574 {
3575 TEXCRD(d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vz[2 + src0.index]), src0.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW);
3576 }
3577 else // .xyw
3578 {
3579 TEXCRD(d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vw[2 + src0.index]), src0.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW);
3580 }
3581 }
3582 break;
3583 case Op::OPCODE_TEXKILL:
3584 if(pixelShader->getVersion() < 0x0104)
3585 {
3586 TEXKILL(cMask, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]));
3587 }
3588 else if(pixelShader->getVersion() == 0x0104)
3589 {
3590 if(dst.type == Dst::PARAMETER_TEXTURE)
3591 {
3592 TEXKILL(cMask, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]));
3593 }
3594 else
3595 {
3596 TEXKILL(cMask, r.ri[dst.index]);
3597 }
3598 }
3599 else ASSERT(false);
3600 break;
3601 case Op::OPCODE_TEX:
3602 if(pixelShader->getVersion() < 0x0104)
3603 {
3604 TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, false);
3605 }
3606 else if(pixelShader->getVersion() == 0x0104)
3607 {
3608 if(src0.type == Src::PARAMETER_TEXTURE)
3609 {
3610 if((src0.swizzle & 0x30) == 0x20) // .xyz
3611 {
3612 TEX(r, d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vz[2 + src0.index]), dst.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW);
3613 }
3614 else // .xyw
3615 {
3616 TEX(r, d, Float4(r.vx[2 + src0.index]), Float4(r.vy[2 + src0.index]), Float4(r.vw[2 + src0.index]), dst.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW);
3617 }
3618 }
3619 else
3620 {
3621 TEXLD(r, d, s0, dst.index, src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DZ || src0.modifier == ShaderInstruction::SourceParameter::MODIFIER_DW);
3622 }
3623 }
3624 else ASSERT(false);
3625 break;
3626 case Op::OPCODE_TEXBEM: TEXBEM(r, d, s0, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index); break;
3627 case Op::OPCODE_TEXBEML: TEXBEML(r, d, s0, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index); break;
3628 case Op::OPCODE_TEXREG2AR: TEXREG2AR(r, d, s0, dst.index); break;
3629 case Op::OPCODE_TEXREG2GB: TEXREG2GB(r, d, s0, dst.index); break;
3630 case Op::OPCODE_TEXM3X2PAD: TEXM3X2PAD(r, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, 0, src0.modifier == Src::MODIFIER_SIGN); break;
3631 case Op::OPCODE_TEXM3X2TEX: TEXM3X2TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0, src0.modifier == Src::MODIFIER_SIGN); break;
3632 case Op::OPCODE_TEXM3X3PAD: TEXM3X3PAD(r, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, pad++ % 2, src0.modifier == Src::MODIFIER_SIGN); break;
3633 case Op::OPCODE_TEXM3X3TEX: TEXM3X3TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0, src0.modifier == Src::MODIFIER_SIGN); break;
3634 case Op::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0, s1); break;
3635 case Op::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0); break;
3636 case Op::OPCODE_CND: CND(d, s0, s1, s2); break;
3637 case Op::OPCODE_TEXREG2RGB: TEXREG2RGB(r, d, s0, dst.index); break;
3638 case Op::OPCODE_TEXDP3TEX: TEXDP3TEX(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), dst.index, s0); break;
3639 case Op::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, src0.modifier == Src::MODIFIER_SIGN); break;
3640 case Op::OPCODE_TEXDP3: TEXDP3(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0); break;
3641 case Op::OPCODE_TEXM3X3: TEXM3X3(r, d, Float4(r.vx[2 + dst.index]), Float4(r.vy[2 + dst.index]), Float4(r.vz[2 + dst.index]), s0, src0.modifier == Src::MODIFIER_SIGN); break;
3642 case Op::OPCODE_TEXDEPTH: TEXDEPTH(r); break;
3643 case Op::OPCODE_CMP: CMP(d, s0, s1, s2); break;
3644 case Op::OPCODE_BEM: BEM(r, d, s0, s1, dst.index); break;
3645 case Op::OPCODE_PHASE: break;
3646 case Op::OPCODE_END: break;
3647 default:
3648 ASSERT(false);
3649 }
3650
3651 if(dst.type != Dst::PARAMETER_VOID && opcode != Op::OPCODE_TEXKILL)
3652 {
3653 if(dst.shift > 0)
3654 {
3655 if(dst.mask & 0x1) {d.r = AddSat(d.r, d.r); if(dst.shift > 1) d.r = AddSat(d.r, d.r); if(dst.shift > 2) d.r = AddSat(d.r, d.r);}
3656 if(dst.mask & 0x2) {d.g = AddSat(d.g, d.g); if(dst.shift > 1) d.g = AddSat(d.g, d.g); if(dst.shift > 2) d.g = AddSat(d.g, d.g);}
3657 if(dst.mask & 0x4) {d.b = AddSat(d.b, d.b); if(dst.shift > 1) d.b = AddSat(d.b, d.b); if(dst.shift > 2) d.b = AddSat(d.b, d.b);}
3658 if(dst.mask & 0x8) {d.a = AddSat(d.a, d.a); if(dst.shift > 1) d.a = AddSat(d.a, d.a); if(dst.shift > 2) d.a = AddSat(d.a, d.a);}
3659 }
3660 else if(dst.shift < 0)
3661 {
3662 if(dst.mask & 0x1) d.r = d.r >> -dst.shift;
3663 if(dst.mask & 0x2) d.g = d.g >> -dst.shift;
3664 if(dst.mask & 0x4) d.b = d.b >> -dst.shift;
3665 if(dst.mask & 0x8) d.a = d.a >> -dst.shift;
3666 }
3667
3668 if(dst.saturate)
3669 {
3670 if(dst.mask & 0x1) {d.r = Min(d.r, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.r = Max(d.r, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
3671 if(dst.mask & 0x2) {d.g = Min(d.g, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.g = Max(d.g, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
3672 if(dst.mask & 0x4) {d.b = Min(d.b, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.b = Max(d.b, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
3673 if(dst.mask & 0x8) {d.a = Min(d.a, Short4(0x1000, 0x1000, 0x1000, 0x1000)); d.a = Max(d.a, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
3674 }
3675
3676 if(pairing)
3677 {
3678 if(dst.mask & 0x1) dPairing.r = d.r;
3679 if(dst.mask & 0x2) dPairing.g = d.g;
3680 if(dst.mask & 0x4) dPairing.b = d.b;
3681 if(dst.mask & 0x8) dPairing.a = d.a;
3682 }
3683
3684 if(coissue)
3685 {
3686 const Dst &dst = pixelShader->getInstruction(i - 1)->getDestinationParameter();
3687
3688 writeDestination(r, dPairing, dst);
3689 }
3690
3691 if(!pairing)
3692 {
3693 writeDestination(r, d, dst);
3694 }
3695 }
3696 }
3697 }
3698
3699 void PixelRoutine::ps_2_x(Registers &r, Int cMask[4])
3700 {
3701 r.enableIndex = 0;
3702 r.stackIndex = 0;
3703
3704 for(int i = 0; i < pixelShader->getLength(); i++)
3705 {
3706 const ShaderInstruction *instruction = pixelShader->getInstruction(i);
3707 Op::Opcode opcode = instruction->getOpcode();
3708
3709 // #ifndef NDEBUG // FIXME: Centralize debug output control
3710 // pixelShader->printInstruction(i, "debug.txt");
3711 // #endif
3712
3713 if(opcode == Op::OPCODE_DCL || opcode == Op::OPCODE_DEF || opcode == Op::OPCODE_DEFI || opcode == Op::OPCODE_DEFB)
3714 {
3715 continue;
3716 }
3717
3718 const Dst &dst = instruction->getDestinationParameter();
3719 const Src &src0 = instruction->getSourceParameter(0);
3720 const Src &src1 = instruction->getSourceParameter(1);
3721 const Src &src2 = instruction->getSourceParameter(2);
3722 const Src &src3 = instruction->getSourceParameter(3);
3723
3724 bool predicate = instruction->isPredicate();
3725 Control control = instruction->getControl();
3726 bool pp = dst.partialPrecision;
3727 bool project = instruction->isProject();
3728 bool bias = instruction->isBias();
3729
3730 Color4f d;
3731 Color4f s0;
3732 Color4f s1;
3733 Color4f s2;
3734 Color4f s3;
3735
3736 if(opcode == Op::OPCODE_TEXKILL)
3737 {
3738 if(dst.type == Dst::PARAMETER_TEXTURE)
3739 {
3740 d.x = r.vx[2 + dst.index];
3741 d.y = r.vy[2 + dst.index];
3742 d.z = r.vz[2 + dst.index];
3743 d.w = r.vw[2 + dst.index];
3744 }
3745 else
3746 {
3747 d = r.rf[dst.index];
3748 }
3749 }
3750
3751 if(src0.type != Src::PARAMETER_VOID) s0 = reg(r, src0);
3752 if(src1.type != Src::PARAMETER_VOID) s1 = reg(r, src1);
3753 if(src2.type != Src::PARAMETER_VOID) s2 = reg(r, src2);
3754 if(src3.type != Src::PARAMETER_VOID) s3 = reg(r, src3);
3755
3756 switch(opcode)
3757 {
3758 case Op::OPCODE_PS_2_0: break;
3759 case Op::OPCODE_PS_2_x: break;
3760 case Op::OPCODE_PS_3_0: break;
3761 case Op::OPCODE_DEF: break;
3762 case Op::OPCODE_DCL: break;
3763 case Op::OPCODE_NOP: break;
3764 case Op::OPCODE_MOV: mov(d, s0); break;
3765 case Op::OPCODE_ADD: add(d, s0, s1); break;
3766 case Op::OPCODE_SUB: sub(d, s0, s1); break;
3767 case Op::OPCODE_MUL: mul(d, s0, s1); break;
3768 case Op::OPCODE_MAD: mad(d, s0, s1, s2); break;
3769 case Op::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break;
3770 case Op::OPCODE_DP3: dp3(d, s0, s1); break;
3771 case Op::OPCODE_DP4: dp4(d, s0, s1); break;
3772 case Op::OPCODE_CMP: cmp(d, s0, s1, s2); break;
3773 case Op::OPCODE_FRC: frc(d, s0); break;
3774 case Op::OPCODE_EXP: exp(d, s0, pp); break;
3775 case Op::OPCODE_LOG: log(d, s0, pp); break;
3776 case Op::OPCODE_RCP: rcp(d, s0, pp); break;
3777 case Op::OPCODE_RSQ: rsq(d, s0, pp); break;
3778 case Op::OPCODE_MIN: min(d, s0, s1); break;
3779 case Op::OPCODE_MAX: max(d, s0, s1); break;
3780 case Op::OPCODE_LRP: lrp(d, s0, s1, s2); break;
3781 case Op::OPCODE_POW: pow(d, s0, s1, pp); break;
3782 case Op::OPCODE_CRS: crs(d, s0, s1); break;
3783 case Op::OPCODE_NRM: nrm(d, s0, pp); break;
3784 case Op::OPCODE_ABS: abs(d, s0); break;
3785 case Op::OPCODE_SINCOS: sincos(d, s0, pp); break;
3786 case Op::OPCODE_M4X4: M4X4(r, d, s0, src1); break;
3787 case Op::OPCODE_M4X3: M4X3(r, d, s0, src1); break;
3788 case Op::OPCODE_M3X4: M3X4(r, d, s0, src1); break;
3789 case Op::OPCODE_M3X3: M3X3(r, d, s0, src1); break;
3790 case Op::OPCODE_M3X2: M3X2(r, d, s0, src1); break;
3791 case Op::OPCODE_TEX: TEXLD(r, d, s0, src1, project, bias); break;
3792 case Op::OPCODE_TEXLDD: TEXLDD(r, d, s0, src1, s2, s3, project, bias); break;
3793 case Op::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1, project, bias); break;
3794 case Op::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break;
3795 case Op::OPCODE_DSX: DSX(d, s0); break;
3796 case Op::OPCODE_DSY: DSY(d, s0); break;
3797 case Op::OPCODE_BREAK: BREAK(r); break;
3798 case Op::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break;
3799 case Op::OPCODE_BREAKP: BREAKP(r, src0); break;
3800 case Op::OPCODE_CALL: CALL(r, dst.index); break;
3801 case Op::OPCODE_CALLNZ: CALLNZ(r, dst.index, src0); break;
3802 case Op::OPCODE_ELSE: ELSE(r); break;
3803 case Op::OPCODE_ENDIF: ENDIF(r); break;
3804 case Op::OPCODE_ENDLOOP: ENDLOOP(r); break;
3805 case Op::OPCODE_ENDREP: ENDREP(r); break;
3806 case Op::OPCODE_IF: IF(r, src0); break;
3807 case Op::OPCODE_IFC: IFC(r, s0, s1, control); break;
3808 case Op::OPCODE_LABEL: LABEL(dst.index); break;
3809 case Op::OPCODE_LOOP: LOOP(r, src1); break;
3810 case Op::OPCODE_REP: REP(r, src0); break;
3811 case Op::OPCODE_RET: RET(r); break;
3812 case Op::OPCODE_SETP: setp(d, s0, s1, control); break;
3813 case Op::OPCODE_END: break;
3814 default:
3815 ASSERT(false);
3816 }
3817
3818 if(dst.type != Dst::PARAMETER_VOID && dst.type != Dst::PARAMETER_LABEL && opcode != Op::OPCODE_TEXKILL)
3819 {
3820 if(dst.saturate)
3821 {
3822 if(dst.x) d.r = Max(d.r, Float4(0.0f, 0.0f, 0.0f, 0.0f));
3823 if(dst.y) d.g = Max(d.g, Float4(0.0f, 0.0f, 0.0f, 0.0f));
3824 if(dst.z) d.b = Max(d.b, Float4(0.0f, 0.0f, 0.0f, 0.0f));
3825 if(dst.w) d.a = Max(d.a, Float4(0.0f, 0.0f, 0.0f, 0.0f));
3826
3827 if(dst.x) d.r = Min(d.r, Float4(1.0f, 1.0f, 1.0f, 1.0f));
3828 if(dst.y) d.g = Min(d.g, Float4(1.0f, 1.0f, 1.0f, 1.0f));
3829 if(dst.z) d.b = Min(d.b, Float4(1.0f, 1.0f, 1.0f, 1.0f));
3830 if(dst.w) d.a = Min(d.a, Float4(1.0f, 1.0f, 1.0f, 1.0f));
3831 }
3832
3833 if(pixelShader->containsDynamicBranching())
3834 {
3835 Color4f pDst; // FIXME: Rename
3836
3837 switch(dst.type)
3838 {
3839 case Dst::PARAMETER_TEMP:
3840 if(dst.x) pDst.x = r.rf[dst.index].x;
3841 if(dst.y) pDst.y = r.rf[dst.index].y;
3842 if(dst.z) pDst.z = r.rf[dst.index].z;
3843 if(dst.w) pDst.w = r.rf[dst.index].w;
3844 break;
3845 case Dst::PARAMETER_COLOROUT:
3846 if(dst.x) pDst.x = r.oC[dst.index].x;
3847 if(dst.y) pDst.y = r.oC[dst.index].y;
3848 if(dst.z) pDst.z = r.oC[dst.index].z;
3849 if(dst.w) pDst.w = r.oC[dst.index].w;
3850 break;
3851 case Dst::PARAMETER_PREDICATE:
3852 if(dst.x) pDst.x = r.p0.x;
3853 if(dst.y) pDst.y = r.p0.y;
3854 if(dst.z) pDst.z = r.p0.z;
3855 if(dst.w) pDst.w = r.p0.w;
3856 break;
3857 case Dst::PARAMETER_DEPTHOUT:
3858 pDst.x = r.oDepth;
3859 break;
3860 default:
3861 ASSERT(false);
3862 }
3863
3864 Int4 enable = r.enableStack[r.enableIndex] & r.enableBreak;
3865
3866 Int4 xEnable = enable;
3867 Int4 yEnable = enable;
3868 Int4 zEnable = enable;
3869 Int4 wEnable = enable;
3870
3871 if(predicate)
3872 {
3873 unsigned char pSwizzle = instruction->getPredicateSwizzle();
3874
3875 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
3876 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
3877 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
3878 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
3879
3880 if(!instruction->isPredicateNot())
3881 {
3882 if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
3883 if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
3884 if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
3885 if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
3886 }
3887 else
3888 {
3889 if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
3890 if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
3891 if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
3892 if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
3893 }
3894 }
3895
3896 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
3897 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
3898 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
3899 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
3900
3901 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
3902 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
3903 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
3904 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
3905 }
3906
3907 switch(dst.type)
3908 {
3909 case Dst::PARAMETER_TEMP:
3910 if(dst.x) r.rf[dst.index].x = d.x;
3911 if(dst.y) r.rf[dst.index].y = d.y;
3912 if(dst.z) r.rf[dst.index].z = d.z;
3913 if(dst.w) r.rf[dst.index].w = d.w;
3914 break;
3915 case Dst::PARAMETER_COLOROUT:
3916 if(dst.x) r.oC[dst.index].x = d.x;
3917 if(dst.y) r.oC[dst.index].y = d.y;
3918 if(dst.z) r.oC[dst.index].z = d.z;
3919 if(dst.w) r.oC[dst.index].w = d.w;
3920 break;
3921 case Dst::PARAMETER_PREDICATE:
3922 if(dst.x) r.p0.x = d.x;
3923 if(dst.y) r.p0.y = d.y;
3924 if(dst.z) r.p0.z = d.z;
3925 if(dst.w) r.p0.w = d.w;
3926 break;
3927 case Dst::PARAMETER_DEPTHOUT:
3928 r.oDepth = d.x;
3929 break;
3930 default:
3931 ASSERT(false);
3932 }
3933 }
3934 }
3935
3936 if(returns)
3937 {
3938 Nucleus::setInsertBlock(returnBlock);
3939 }
3940 }
3941
3942 Short4 PixelRoutine::convertFixed12(Float4 &cf)
3943 {
3944 return RoundShort4(cf * Float4(0x1000, 0x1000, 0x1000, 0x1000));
3945 }
3946
3947 void PixelRoutine::convertFixed12(Color4i &ci, Color4f &cf)
3948 {
3949 ci.r = convertFixed12(cf.r);
3950 ci.g = convertFixed12(cf.g);
3951 ci.b = convertFixed12(cf.b);
3952 ci.a = convertFixed12(cf.a);
3953 }
3954
3955 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
3956 {
3957 return UShort4(cf * Float4(0xFFFF, 0xFFFF, 0xFFFF, 0xFFFF), saturate);
3958 }
3959
3960 void PixelRoutine::convertFixed16(Color4i &ci, Color4f &cf, bool saturate)
3961 {
3962 ci.r = convertFixed16(cf.r, saturate);
3963 ci.g = convertFixed16(cf.g, saturate);
3964 ci.b = convertFixed16(cf.b, saturate);
3965 ci.a = convertFixed16(cf.a, saturate);
3966 }
3967
3968 Float4 PixelRoutine::convertSigned12(Short4 &ci)
3969 {
3970 return Float4(ci) * Float4(1.0f / 0x0FFE);
3971 }
3972
3973 void PixelRoutine::convertSigned12(Color4f &cf, Color4i &ci)
3974 {
3975 cf.r = convertSigned12(ci.r);
3976 cf.g = convertSigned12(ci.g);
3977 cf.b = convertSigned12(ci.b);
3978 cf.a = convertSigned12(ci.a);
3979 }
3980
3981 Float4 PixelRoutine::convertUnsigned16(UShort4 ci)
3982 {
3983 return Float4(ci) * Float4(1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF, 1.0f / 0xFFFF);
3984 }
3985
3986 void PixelRoutine::sRGBtoLinear16_16(Registers &r, Color4i &c)
3987 {
3988 c.r = As<UShort4>(c.r) >> 4;
3989 c.g = As<UShort4>(c.g) >> 4;
3990 c.b = As<UShort4>(c.b) >> 4;
3991
3992 sRGBtoLinear12_16(r, c);
3993 }
3994
3995 void PixelRoutine::sRGBtoLinear12_16(Registers &r, Color4i &c)
3996 {
3997 Pointer<Byte> LUT = r.constants + OFFSET(Constants,sRGBtoLin12_16);
3998
3999 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 0))), 0);
4000 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 1))), 1);
4001 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 2))), 2);
4002 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 3))), 3);
4003
4004 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 0))), 0);
4005 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 1))), 1);
4006 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 2))), 2);
4007 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 3))), 3);
4008
4009 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 0))), 0);
4010 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 1))), 1);
4011 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 2))), 2);
4012 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 3))), 3);
4013 }
4014
4015 void PixelRoutine::linearToSRGB16_16(Registers &r, Color4i &c)
4016 {
4017 c.r = As<UShort4>(c.r) >> 4;
4018 c.g = As<UShort4>(c.g) >> 4;
4019 c.b = As<UShort4>(c.b) >> 4;
4020
4021 linearToSRGB12_16(r, c);
4022 }
4023
4024 void PixelRoutine::linearToSRGB12_16(Registers &r, Color4i &c)
4025 {
4026 Pointer<Byte> LUT = r.constants + OFFSET(Constants,linToSRGB12_16);
4027
4028 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 0))), 0);
4029 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 1))), 1);
4030 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 2))), 2);
4031 c.r = Insert(c.r, *Pointer<Short>(LUT + 2 * Int(Extract(c.r, 3))), 3);
4032
4033 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 0))), 0);
4034 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 1))), 1);
4035 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 2))), 2);
4036 c.g = Insert(c.g, *Pointer<Short>(LUT + 2 * Int(Extract(c.g, 3))), 3);
4037
4038 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 0))), 0);
4039 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 1))), 1);
4040 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 2))), 2);
4041 c.b = Insert(c.b, *Pointer<Short>(LUT + 2 * Int(Extract(c.b, 3))), 3);
4042 }
4043
4044 Float4 PixelRoutine::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
4045 {
4046 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
4047 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
4048
4049 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
4050 }
4051
4052 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
4053 {
4054 Float4 linear = x * x;
4055 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
4056
4057 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
4058 }
4059
4060 void PixelRoutine::MOV(Color4i &dst, Color4i &src0)
4061 {
4062 dst.r = src0.x;
4063 dst.g = src0.y;
4064 dst.b = src0.z;
4065 dst.a = src0.w;
4066 }
4067
4068 void PixelRoutine::ADD(Color4i &dst, Color4i &src0, Color4i &src1)
4069 {
4070 dst.r = AddSat(src0.x, src1.x);
4071 dst.g = AddSat(src0.y, src1.y);
4072 dst.b = AddSat(src0.z, src1.z);
4073 dst.a = AddSat(src0.w, src1.w);
4074 }
4075
4076 void PixelRoutine::SUB(Color4i &dst, Color4i &src0, Color4i &src1)
4077 {
4078 dst.r = SubSat(src0.x, src1.x);
4079 dst.g = SubSat(src0.y, src1.y);
4080 dst.b = SubSat(src0.z, src1.z);
4081 dst.a = SubSat(src0.w, src1.w);
4082 }
4083
4084 void PixelRoutine::MAD(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2)
4085 {
4086 // FIXME: Long fixed-point multiply fixup
4087 {dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);}
4088 {dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);}
4089 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z);}
4090 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);}
4091 }
4092
4093 void PixelRoutine::MUL(Color4i &dst, Color4i &src0, Color4i &src1)
4094 {
4095 // FIXME: Long fixed-point multiply fixup
4096 {dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x);}
4097 {dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);}
4098 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z);}
4099 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w);}
4100 }
4101
4102 void PixelRoutine::DP3(Color4i &dst, Color4i &src0, Color4i &src1)
4103 {
4104 Short4 t0;
4105 Short4 t1;
4106
4107 // FIXME: Long fixed-point multiply fixup
4108 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
4109 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4110 t0 = AddSat(t0, t1);
4111 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4112 t0 = AddSat(t0, t1);
4113
4114 dst.r = t0;
4115 dst.g = t0;
4116 dst.b = t0;
4117 dst.a = t0;
4118 }
4119
4120 void PixelRoutine::DP4(Color4i &dst, Color4i &src0, Color4i &src1)
4121 {
4122 Short4 t0;
4123 Short4 t1;
4124
4125 // FIXME: Long fixed-point multiply fixup
4126 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
4127 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4128 t0 = AddSat(t0, t1);
4129 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4130 t0 = AddSat(t0, t1);
4131 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4132 t0 = AddSat(t0, t1);
4133
4134 dst.r = t0;
4135 dst.g = t0;
4136 dst.b = t0;
4137 dst.a = t0;
4138 }
4139
4140 void PixelRoutine::LRP(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2)
4141 {
4142 // FIXME: Long fixed-point multiply fixup
4143 {dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);}
4144 {dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);}
4145 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z);}
4146 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);}
4147 }
4148
4149 void PixelRoutine::TEXCOORD(Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
4150 {
4151 Float4 uw;
4152 Float4 vw;
4153 Float4 sw;
4154
4155 if(state.interpolant[2 + coordinate].component & 0x01)
4156 {
4157 uw = Max(u, Float4(0.0f, 0.0f, 0.0f, 0.0f));
4158 uw = Min(uw, Float4(1.0f, 1.0f, 1.0f, 1.0f));
4159 dst.r = convertFixed12(uw);
4160 }
4161 else
4162 {
4163 dst.r = Short4(0x0000, 0x0000, 0x0000, 0x0000);
4164 }
4165
4166 if(state.interpolant[2 + coordinate].component & 0x02)
4167 {
4168 vw = Max(v, Float4(0.0f, 0.0f, 0.0f, 0.0f));
4169 vw = Min(vw, Float4(1.0f, 1.0f, 1.0f, 1.0f));
4170 dst.g = convertFixed12(vw);
4171 }
4172 else
4173 {
4174 dst.g = Short4(0x0000, 0x0000, 0x0000, 0x0000);
4175 }
4176
4177 if(state.interpolant[2 + coordinate].component & 0x04)
4178 {
4179 sw = Max(s, Float4(0.0f, 0.0f, 0.0f, 0.0f));
4180 sw = Min(sw, Float4(1.0f, 1.0f, 1.0f, 1.0f));
4181 dst.b = convertFixed12(sw);
4182 }
4183 else
4184 {
4185 dst.b = Short4(0x0000, 0x0000, 0x0000, 0x0000);
4186 }
4187
4188 dst.a = Short4(0x1000, 0x1000, 0x1000, 0x1000);
4189 }
4190
4191 void PixelRoutine::TEXCRD(Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
4192 {
4193 Float4 uw = u;
4194 Float4 vw = v;
4195 Float4 sw = s;
4196
4197 if(project)
4198 {
4199 uw *= Rcp_pp(s);
4200 vw *= Rcp_pp(s);
4201 }
4202
4203 if(state.interpolant[2 + coordinate].component & 0x01)
4204 {
4205 uw *= Float4(0x1000, 0x1000, 0x1000, 0x1000);
4206 uw = Max(uw, Float4(-0x8000, -0x8000, -0x8000, -0x8000));
4207 uw = Min(uw, Float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF));
4208 dst.r = RoundShort4(uw);
4209 }
4210 else
4211 {
4212 dst.r = Short4(0x0000, 0x0000, 0x0000, 0x0000);
4213 }
4214
4215 if(state.interpolant[2 + coordinate].component & 0x02)
4216 {
4217 vw *= Float4(0x1000, 0x1000, 0x1000, 0x1000);
4218 vw = Max(vw, Float4(-0x8000, -0x8000, -0x8000, -0x8000));
4219 vw = Min(vw, Float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF));
4220 dst.g = RoundShort4(vw);
4221 }
4222 else
4223 {
4224 dst.g = Short4(0x0000, 0x0000, 0x0000, 0x0000);
4225 }
4226
4227 if(state.interpolant[2 + coordinate].component & 0x04)
4228 {
4229 sw *= Float4(0x1000, 0x1000, 0x1000, 0x1000);
4230 sw = Max(sw, Float4(-0x8000, -0x8000, -0x8000, -0x8000));
4231 sw = Min(sw, Float4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF));
4232 dst.b = RoundShort4(sw);
4233 }
4234 else
4235 {
4236 dst.b = Short4(0x0000, 0x0000, 0x0000, 0x0000);
4237 }
4238 }
4239
4240 void PixelRoutine::TEXDP3(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src)
4241 {
4242 TEXM3X3PAD(r, u, v, s, src, 0, false);
4243
4244 Short4 t0 = RoundShort4(r.u_ * Float4(0x1000, 0x1000, 0x1000, 0x1000));
4245
4246 dst.r = t0;
4247 dst.g = t0;
4248 dst.b = t0;
4249 dst.a = t0;
4250 }
4251
4252 void PixelRoutine::TEXDP3TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0)
4253 {
4254 TEXM3X3PAD(r, u, v, s, src0, 0, false);
4255
4256 r.v_ = Float4(0.0f, 0.0f, 0.0f, 0.0f);
4257 r.w_ = Float4(0.0f, 0.0f, 0.0f, 0.0f);
4258
4259 sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
4260 }
4261
4262 void PixelRoutine::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
4263 {
4264 Int kill = SignMask(CmpNLT(u, Float4(0, 0, 0, 0))) &
4265 SignMask(CmpNLT(v, Float4(0, 0, 0, 0))) &
4266 SignMask(CmpNLT(s, Float4(0, 0, 0, 0)));
4267
4268 for(unsigned int q = 0; q < state.multiSample; q++)
4269 {
4270 cMask[q] &= kill;
4271 }
4272 }
4273
4274 void PixelRoutine::TEXKILL(Int cMask[4], Color4i &src)
4275 {
4276 Short4 test = src.r | src.g | src.b;
4277 Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
4278
4279 for(unsigned int q = 0; q < state.multiSample; q++)
4280 {
4281 cMask[q] &= kill;
4282 }
4283 }
4284
4285 void PixelRoutine::TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
4286 {
4287 sampleTexture(r, dst, sampler, u, v, s, s, project);
4288 }
4289
4290 void PixelRoutine::TEXLD(Registers &r, Color4i &dst, Color4i &src, int sampler, bool project)
4291 {
4292 Float4 u = Float4(src.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4293 Float4 v = Float4(src.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4294 Float4 s = Float4(src.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4295
4296 sampleTexture(r, dst, sampler, u, v, s, s, project);
4297 }
4298
4299 void PixelRoutine::TEXBEM(Registers &r, Color4i &dst, Color4i &src, Float4 &u, Float4 &v, Float4 &s, int stage)
4300 {
4301 Float4 du = Float4(src.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4302 Float4 dv = Float4(src.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4303
4304 Float4 du2 = du;
4305 Float4 dv2 = dv;
4306
4307 du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
4308 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
4309 du += dv2;
4310 dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
4311 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
4312 dv += du2;
4313
4314 Float4 u_ = u + du;
4315 Float4 v_ = v + dv;
4316
4317 sampleTexture(r, dst, stage, u_, v_, s, s);
4318 }
4319
4320 void PixelRoutine::TEXBEML(Registers &r, Color4i &dst, Color4i &src, Float4 &u, Float4 &v, Float4 &s, int stage)
4321 {
4322 Float4 du = Float4(src.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4323 Float4 dv = Float4(src.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4324
4325 Float4 du2 = du;
4326 Float4 dv2 = dv;
4327
4328 du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
4329 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
4330 du += dv2;
4331 dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
4332 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
4333 dv += du2;
4334
4335 Float4 u_ = u + du;
4336 Float4 v_ = v + dv;
4337
4338 sampleTexture(r, dst, stage, u_, v_, s, s);
4339
4340 Short4 L;
4341
4342 L = src.b;
4343 L = MulHigh(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4)));
4344 L = L << 4;
4345 L = AddSat(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4)));
4346 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
4347 L = Min(L, Short4(0x1000, 0x1000, 0x1000, 0x1000));
4348
4349 dst.r = MulHigh(dst.r, L); dst.r = dst.r << 4;
4350 dst.g = MulHigh(dst.g, L); dst.g = dst.g << 4;
4351 dst.b = MulHigh(dst.b, L); dst.b = dst.b << 4;
4352 }
4353
4354 void PixelRoutine::TEXREG2AR(Registers &r, Color4i &dst, Color4i &src0, int stage)
4355 {
4356 Float4 u = Float4(src0.a) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4357 Float4 v = Float4(src0.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4358 Float4 s = Float4(src0.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4359
4360 sampleTexture(r, dst, stage, u, v, s, s);
4361 }
4362
4363 void PixelRoutine::TEXREG2GB(Registers &r, Color4i &dst, Color4i &src0, int stage)
4364 {
4365 Float4 u = Float4(src0.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4366 Float4 v = Float4(src0.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4367 Float4 s = v;
4368
4369 sampleTexture(r, dst, stage, u, v, s, s);
4370 }
4371
4372 void PixelRoutine::TEXREG2RGB(Registers &r, Color4i &dst, Color4i &src0, int stage)
4373 {
4374 Float4 u = Float4(src0.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4375 Float4 v = Float4(src0.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4376 Float4 s = Float4(src0.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4377
4378 sampleTexture(r, dst, stage, u, v, s, s);
4379 }
4380
4381 void PixelRoutine::TEXM3X2DEPTH(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src, bool signedScaling)
4382 {
4383 TEXM3X2PAD(r, u, v, s, src, 1, signedScaling);
4384
4385 // z / w
4386 r.u_ *= Rcp_pp(r.v_); // FIXME: Set result to 1.0 when division by zero
4387
4388 r.oDepth = r.u_;
4389 }
4390
4391 void PixelRoutine::TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, int component, bool signedScaling)
4392 {
4393 TEXM3X3PAD(r, u, v, s, src0, component, signedScaling);
4394 }
4395
4396 void PixelRoutine::TEXM3X2TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, bool signedScaling)
4397 {
4398 TEXM3X2PAD(r, u, v, s, src0, 1, signedScaling);
4399
4400 r.w_ = Float4(0.0f, 0.0f, 0.0f, 0.0f);
4401
4402 sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
4403 }
4404
4405 void PixelRoutine::TEXM3X3(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, bool signedScaling)
4406 {
4407 TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
4408
4409 dst.r = RoundShort4(r.u_ * Float4(0x1000, 0x1000, 0x1000, 0x1000));
4410 dst.g = RoundShort4(r.v_ * Float4(0x1000, 0x1000, 0x1000, 0x1000));
4411 dst.b = RoundShort4(r.w_ * Float4(0x1000, 0x1000, 0x1000, 0x1000));
4412 dst.a = Short4(0x1000, 0x1000, 0x1000, 0x1000);
4413 }
4414
4415 void PixelRoutine::TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Color4i &src0, int component, bool signedScaling)
4416 {
4417 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers?
4418 {
4419 r.U = Float4(src0.r);
4420 r.V = Float4(src0.g);
4421 r.W = Float4(src0.b);
4422
4423 previousScaling = signedScaling;
4424 }
4425
4426 Float4 x = r.U * u + r.V * v + r.W * s;
4427
4428 x *= Float4(1.0f / 0x1000, 1.0f / 0x1000, 1.0f / 0x1000, 1.0f / 0x1000);
4429
4430 switch(component)
4431 {
4432 case 0: r.u_ = x; break;
4433 case 1: r.v_ = x; break;
4434 case 2: r.w_ = x; break;
4435 default: ASSERT(false);
4436 }
4437 }
4438
4439 void PixelRoutine::TEXM3X3SPEC(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, Color4i &src1)
4440 {
4441 TEXM3X3PAD(r, u, v, s, src0, 2, false);
4442
4443 Float4 E[3]; // Eye vector
4444
4445 E[0] = Float4(src1.r) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4446 E[1] = Float4(src1.g) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4447 E[2] = Float4(src1.b) * Float4(1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE, 1.0f / 0x0FFE);
4448
4449 // Reflection
4450 Float4 u__;
4451 Float4 v__;
4452 Float4 w__;
4453
4454 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
4455 u__ = r.u_ * E[0];
4456 v__ = r.v_ * E[1];
4457 w__ = r.w_ * E[2];
4458 u__ += v__ + w__;
4459 u__ += u__;
4460 v__ = u__;
4461 w__ = u__;
4462 u__ *= r.u_;
4463 v__ *= r.v_;
4464 w__ *= r.w_;
4465 r.u_ *= r.u_;
4466 r.v_ *= r.v_;
4467 r.w_ *= r.w_;
4468 r.u_ += r.v_ + r.w_;
4469 u__ -= E[0] * r.u_;
4470 v__ -= E[1] * r.u_;
4471 w__ -= E[2] * r.u_;
4472
4473 sampleTexture(r, dst, stage, u__, v__, w__, w__);
4474 }
4475
4476 void PixelRoutine::TEXM3X3TEX(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0, bool signedScaling)
4477 {
4478 TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
4479
4480 sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
4481 }
4482
4483 void PixelRoutine::TEXM3X3VSPEC(Registers &r, Color4i &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Color4i &src0)
4484 {
4485 TEXM3X3PAD(r, u, v, s, src0, 2, false);
4486
4487 Float4 E[3]; // Eye vector
4488
4489 E[0] = r.vw[2 + stage - 2];
4490 E[1] = r.vw[2 + stage - 1];
4491 E[2] = r.vw[2 + stage - 0];
4492
4493 // Reflection
4494 Float4 u__;
4495 Float4 v__;
4496 Float4 w__;
4497
4498 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
4499 u__ = r.u_ * E[0];
4500 v__ = r.v_ * E[1];
4501 w__ = r.w_ * E[2];
4502 u__ += v__ + w__;
4503 u__ += u__;
4504 v__ = u__;
4505 w__ = u__;
4506 u__ *= r.u_;
4507 v__ *= r.v_;
4508 w__ *= r.w_;
4509 r.u_ *= r.u_;
4510 r.v_ *= r.v_;
4511 r.w_ *= r.w_;
4512 r.u_ += r.v_ + r.w_;
4513 u__ -= E[0] * r.u_;
4514 v__ -= E[1] * r.u_;
4515 w__ -= E[2] * r.u_;
4516
4517 sampleTexture(r, dst, stage, u__, v__, w__, w__);
4518 }
4519
4520 void PixelRoutine::TEXDEPTH(Registers &r)
4521 {
4522 r.u_ = Float4(r.ri[5].r);
4523 r.v_ = Float4(r.ri[5].g);
4524
4525 // z / w
4526 r.u_ *= Rcp_pp(r.v_); // FIXME: Set result to 1.0 when division by zero
4527
4528 r.oDepth = r.u_;
4529 }
4530
4531 void PixelRoutine::CND(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2)
4532 {
4533 {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.r = t0;};
4534 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.g = t0;};
4535 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.b = t0;};
4536 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.a = t0;};
4537 }
4538
4539 void PixelRoutine::CMP(Color4i &dst, Color4i &src0, Color4i &src1, Color4i &src2)
4540 {
4541 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.r = t0;};
4542 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.g = t0;};
4543 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.b = t0;};
4544 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.a = t0;};
4545 }
4546
4547 void PixelRoutine::BEM(Registers &r, Color4i &dst, Color4i &src0, Color4i &src1, int stage)
4548 {
4549 Short4 t0;
4550 Short4 t1;
4551
4552 // dst.r = src0.r + BUMPENVMAT00(stage) * src1.r + BUMPENVMAT10(stage) * src1.g
4553 t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
4554 t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
4555 t0 = AddSat(t0, t1);
4556 t0 = AddSat(t0, src0.x);
4557 dst.r = t0;
4558
4559 // dst.g = src0.g + BUMPENVMAT01(stage) * src1.r + BUMPENVMAT11(stage) * src1.g
4560 t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
4561 t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
4562 t0 = AddSat(t0, t1);
4563 t0 = AddSat(t0, src0.y);
4564 dst.g = t0;
4565 }
4566
4567 void PixelRoutine::M3X2(Registers &r, Color4f &dst, Color4f &src0, const Src &src1)
4568 {
4569 Color4f row0 = reg(r, src1, 0);
4570 Color4f row1 = reg(r, src1, 1);
4571
4572 dst.x = dot3(src0, row0);
4573 dst.y = dot3(src0, row1);
4574 }
4575
4576 void PixelRoutine::M3X3(Registers &r, Color4f &dst, Color4f &src0, const Src &src1)
4577 {
4578 Color4f row0 = reg(r, src1, 0);
4579 Color4f row1 = reg(r, src1, 1);
4580 Color4f row2 = reg(r, src1, 2);
4581
4582 dst.x = dot3(src0, row0);
4583 dst.y = dot3(src0, row1);
4584 dst.z = dot3(src0, row2);
4585 }
4586
4587 void PixelRoutine::M3X4(Registers &r, Color4f &dst, Color4f &src0, const Src &src1)
4588 {
4589 Color4f row0 = reg(r, src1, 0);
4590 Color4f row1 = reg(r, src1, 1);
4591 Color4f row2 = reg(r, src1, 2);
4592 Color4f row3 = reg(r, src1, 3);
4593
4594 dst.x = dot3(src0, row0);
4595 dst.y = dot3(src0, row1);
4596 dst.z = dot3(src0, row2);
4597 dst.w = dot3(src0, row3);
4598 }
4599
4600 void PixelRoutine::M4X3(Registers &r, Color4f &dst, Color4f &src0, const Src &src1)
4601 {
4602 Color4f row0 = reg(r, src1, 0);
4603 Color4f row1 = reg(r, src1, 1);
4604 Color4f row2 = reg(r, src1, 2);
4605
4606 dst.x = dot4(src0, row0);
4607 dst.y = dot4(src0, row1);
4608 dst.z = dot4(src0, row2);
4609 }
4610
4611 void PixelRoutine::M4X4(Registers &r, Color4f &dst, Color4f &src0, const Src &src1)
4612 {
4613 Color4f row0 = reg(r, src1, 0);
4614 Color4f row1 = reg(r, src1, 1);
4615 Color4f row2 = reg(r, src1, 2);
4616 Color4f row3 = reg(r, src1, 3);
4617
4618 dst.x = dot4(src0, row0);
4619 dst.y = dot4(src0, row1);
4620 dst.z = dot4(src0, row2);
4621 dst.w = dot4(src0, row3);
4622 }
4623
4624 void PixelRoutine::TEXLD(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, bool project, bool bias)
4625 {
4626 Color4f tmp;
4627
4628 sampleTexture(r, tmp, src1.index, src0.u, src0.v, src0.s, src0.t, src0, src0, project, bias);
4629
4630 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
4631 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
4632 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
4633 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
4634 }
4635
4636 void PixelRoutine::TEXLDD(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, Color4f &src2, Color4f &src3, bool project, bool bias)
4637 {
4638 Color4f tmp;
4639
4640 sampleTexture(r, tmp, src1.index, src0.u, src0.v, src0.s, src0.t, src2, src3, project, bias, true);
4641
4642 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
4643 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
4644 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
4645 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
4646 }
4647
4648 void PixelRoutine::TEXLDL(Registers &r, Color4f &dst, Color4f &src0, const Src &src1, bool project, bool bias)
4649 {
4650 Color4f tmp;
4651
4652 sampleTexture(r, tmp, src1.index, src0.u, src0.v, src0.s, src0.t, src0, src0, project, bias, false, true);
4653
4654 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
4655 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
4656 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
4657 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
4658 }
4659
4660 void PixelRoutine::TEXKILL(Int cMask[4], Color4f &src, unsigned char mask)
4661 {
4662 Int kill = -1;
4663
4664 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0, 0, 0, 0)));
4665 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0, 0, 0, 0)));
4666 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0, 0, 0, 0)));
4667 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0, 0, 0, 0)));
4668
4669 for(unsigned int q = 0; q < state.multiSample; q++)
4670 {
4671 cMask[q] &= kill;
4672 }
4673 }
4674
4675 void PixelRoutine::DSX(Color4f &dst, Color4f &src)
4676 {
4677 dst.x = src.x.yyyy - src.x.xxxx;
4678 dst.y = src.y.yyyy - src.y.xxxx;
4679 dst.z = src.z.yyyy - src.z.xxxx;
4680 dst.w = src.w.yyyy - src.w.xxxx;
4681 }
4682
4683 void PixelRoutine::DSY(Color4f &dst, Color4f &src)
4684 {
4685 dst.x = src.x.zzzz - src.x.xxxx;
4686 dst.y = src.y.zzzz - src.y.xxxx;
4687 dst.z = src.z.zzzz - src.z.xxxx;
4688 dst.w = src.w.zzzz - src.w.xxxx;
4689 }
4690
4691 void PixelRoutine::BREAK(Registers &r)
4692 {
4693 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
4694 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
4695
4696 if(breakDepth == 0)
4697 {
4698 Nucleus::createBr(endBlock);
4699 }
4700 else
4701 {
4702 r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
4703 Bool allBreak = SignMask(r.enableBreak) == 0x0;
4704
4705 branch(allBreak, endBlock, deadBlock);
4706 }
4707
4708 Nucleus::setInsertBlock(deadBlock);
4709 }
4710
4711 void PixelRoutine::BREAKC(Registers &r, Color4f &src0, Color4f &src1, Control control)
4712 {
4713 Int4 condition;
4714
4715 switch(control)
4716 {
4717 case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
4718 case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break;
4719 case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
4720 case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break;
4721 case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
4722 case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break;
4723 default:
4724 ASSERT(false);
4725 }
4726
4727 condition &= r.enableStack[r.enableIndex];
4728
4729 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
4730 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
4731
4732 r.enableBreak = r.enableBreak & ~condition;
4733 Bool allBreak = SignMask(r.enableBreak) == 0x0;
4734
4735 branch(allBreak, endBlock, continueBlock);
4736 Nucleus::setInsertBlock(continueBlock);
4737 }
4738
4739 void PixelRoutine::BREAKP(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC
4740 {
4741 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
4742
4743 if(predicateRegister.modifier == Src::MODIFIER_NOT)
4744 {
4745 condition = ~condition;
4746 }
4747
4748 condition &= r.enableStack[r.enableIndex];
4749
4750 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
4751 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
4752
4753 r.enableBreak = r.enableBreak & ~condition;
4754 Bool allBreak = SignMask(r.enableBreak) == 0x0;
4755
4756 branch(allBreak, endBlock, continueBlock);
4757 Nucleus::setInsertBlock(continueBlock);
4758 }
4759
4760 void PixelRoutine::CALL(Registers &r, int labelIndex)
4761 {
4762 if(!labelBlock[labelIndex])
4763 {
4764 labelBlock[labelIndex] = Nucleus::createBasicBlock();
4765 }
4766
4767 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock();
4768 callRetBlock.push_back(retBlock);
4769
4770 r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME
4771
4772 Nucleus::createBr(labelBlock[labelIndex]);
4773 Nucleus::setInsertBlock(retBlock);
4774 }
4775
4776 void PixelRoutine::CALLNZ(Registers &r, int labelIndex, const Src &src)
4777 {
4778 if(src.type == Src::PARAMETER_CONSTBOOL)
4779 {
4780 CALLNZb(r, labelIndex, src);
4781 }
4782 else if(src.type == Src::PARAMETER_PREDICATE)
4783 {
4784 CALLNZp(r, labelIndex, src);
4785 }
4786 else ASSERT(false);
4787 }
4788
4789 void PixelRoutine::CALLNZb(Registers &r, int labelIndex, const Src &boolRegister)
4790 {
4791 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0)); // FIXME
4792
4793 if(boolRegister.modifier == Src::MODIFIER_NOT)
4794 {
4795 condition = !condition;
4796 }
4797
4798 if(!labelBlock[labelIndex])
4799 {
4800 labelBlock[labelIndex] = Nucleus::createBasicBlock();
4801 }
4802
4803 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock();
4804 callRetBlock.push_back(retBlock);
4805
4806 r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME
4807
4808 branch(condition, labelBlock[labelIndex], retBlock);
4809 Nucleus::setInsertBlock(retBlock);
4810 }
4811
4812 void PixelRoutine::CALLNZp(Registers &r, int labelIndex, const Src &predicateRegister)
4813 {
4814 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
4815
4816 if(predicateRegister.modifier == Src::MODIFIER_NOT)
4817 {
4818 condition = ~condition;
4819 }
4820
4821 condition &= r.enableStack[r.enableIndex];
4822
4823 if(!labelBlock[labelIndex])
4824 {
4825 labelBlock[labelIndex] = Nucleus::createBasicBlock();
4826 }
4827
4828 llvm::BasicBlock *retBlock = Nucleus::createBasicBlock();
4829 callRetBlock.push_back(retBlock);
4830
4831 r.callStack[r.stackIndex++] = UInt((int)callRetBlock.size() - 1); // FIXME
4832
4833 r.enableIndex++;
4834 r.enableStack[r.enableIndex] = condition;
4835
4836 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
4837
4838 branch(notAllFalse, labelBlock[labelIndex], retBlock);
4839 Nucleus::setInsertBlock(retBlock);
4840
4841 r.enableIndex--;
4842 }
4843
4844 void PixelRoutine::ELSE(Registers &r)
4845 {
4846 ifDepth--;
4847
4848 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
4849 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
4850
4851 if(isConditionalIf[ifDepth])
4852 {
4853 Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
4854 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
4855
4856 branch(notAllFalse, falseBlock, endBlock);
4857
4858 r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
4859 }
4860 else
4861 {
4862 Nucleus::createBr(endBlock);
4863 Nucleus::setInsertBlock(falseBlock);
4864 }
4865
4866 ifFalseBlock[ifDepth] = endBlock;
4867
4868 ifDepth++;
4869 }
4870
4871 void PixelRoutine::ENDIF(Registers &r)
4872 {
4873 ifDepth--;
4874
4875 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
4876
4877 Nucleus::createBr(endBlock);
4878 Nucleus::setInsertBlock(endBlock);
4879
4880 if(isConditionalIf[ifDepth])
4881 {
4882 breakDepth--;
4883 r.enableIndex--;
4884 }
4885 }
4886
4887 void PixelRoutine::ENDREP(Registers &r)
4888 {
4889 loopRepDepth--;
4890
4891 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
4892 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
4893
4894 Nucleus::createBr(testBlock);
4895 Nucleus::setInsertBlock(endBlock);
4896
4897 r.loopDepth--;
4898 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
4899 }
4900
4901 void PixelRoutine::ENDLOOP(Registers &r)
4902 {
4903 loopRepDepth--;
4904
4905 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth]; // FIXME: +=
4906
4907 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
4908 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
4909
4910 Nucleus::createBr(testBlock);
4911 Nucleus::setInsertBlock(endBlock);
4912
4913 r.loopDepth--;
4914 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
4915 }
4916
4917 void PixelRoutine::IF(Registers &r, const Src &src)
4918 {
4919 if(src.type == Src::PARAMETER_CONSTBOOL)
4920 {
4921 IFb(r, src);
4922 }
4923 else if(src.type == Src::PARAMETER_PREDICATE)
4924 {
4925 IFp(r, src);
4926 }
4927 else ASSERT(false);
4928 }
4929
4930 void PixelRoutine::IFb(Registers &r, const Src &boolRegister)
4931 {
4932 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0)); // FIXME
4933
4934 if(boolRegister.modifier == Src::MODIFIER_NOT)
4935 {
4936 condition = !condition;
4937 }
4938
4939 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
4940 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
4941
4942 branch(condition, trueBlock, falseBlock);
4943
4944 isConditionalIf[ifDepth] = false;
4945 ifFalseBlock[ifDepth] = falseBlock;
4946
4947 ifDepth++;
4948 }
4949
4950 void PixelRoutine::IFp(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with IFC
4951 {
4952 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
4953
4954 if(predicateRegister.modifier == Src::MODIFIER_NOT)
4955 {
4956 condition = ~condition;
4957 }
4958
4959 condition &= r.enableStack[r.enableIndex];
4960
4961 r.enableIndex++;
4962 r.enableStack[r.enableIndex] = condition;
4963
4964 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
4965 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
4966
4967 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
4968
4969 branch(notAllFalse, trueBlock, falseBlock);
4970
4971 isConditionalIf[ifDepth] = true;
4972 ifFalseBlock[ifDepth] = falseBlock;
4973
4974 ifDepth++;
4975 breakDepth++;
4976 }
4977
4978 void PixelRoutine::IFC(Registers &r, Color4f &src0, Color4f &src1, Control control)
4979 {
4980 Int4 condition;
4981
4982 switch(control)
4983 {
4984 case Op::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
4985 case Op::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break;
4986 case Op::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
4987 case Op::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break;
4988 case Op::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
4989 case Op::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break;
4990 default:
4991 ASSERT(false);
4992 }
4993
4994 condition &= r.enableStack[r.enableIndex];
4995
4996 r.enableIndex++;
4997 r.enableStack[r.enableIndex] = condition;
4998
4999 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
5000 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
5001
5002 Bool notAllFalse = SignMask(condition & r.enableBreak) != 0;
5003
5004 branch(notAllFalse, trueBlock, falseBlock);
5005
5006 isConditionalIf[ifDepth] = true;
5007 ifFalseBlock[ifDepth] = falseBlock;
5008
5009 ifDepth++;
5010 breakDepth++;
5011 }
5012
5013 void PixelRoutine::LABEL(int labelIndex)
5014 {
5015 Nucleus::setInsertBlock(labelBlock[labelIndex]);
5016 }
5017
5018 void PixelRoutine::LOOP(Registers &r, const Src &integerRegister)
5019 {
5020 r.loopDepth++;
5021
5022 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][0]));
5023 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][1]));
5024 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][2]));
5025
5026 // If(r.increment[r.loopDepth] == 0)
5027 // {
5028 // r.increment[r.loopDepth] = 1;
5029 // }
5030
5031 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
5032 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
5033 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
5034
5035 loopRepTestBlock[loopRepDepth] = testBlock;
5036 loopRepEndBlock[loopRepDepth] = endBlock;
5037
5038 // FIXME: jump(testBlock)
5039 Nucleus::createBr(testBlock);
5040 Nucleus::setInsertBlock(testBlock);
5041
5042 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
5043 Nucleus::setInsertBlock(loopBlock);
5044
5045 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
5046
5047 loopRepDepth++;
5048 breakDepth = 0;
5049 }
5050
5051 void PixelRoutine::REP(Registers &r, const Src &integerRegister)
5052 {
5053 r.loopDepth++;
5054
5055 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][0]));
5056 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
5057
5058 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
5059 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
5060 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
5061
5062 loopRepTestBlock[loopRepDepth] = testBlock;
5063 loopRepEndBlock[loopRepDepth] = endBlock;
5064
5065 // FIXME: jump(testBlock)
5066 Nucleus::createBr(testBlock);
5067 Nucleus::setInsertBlock(testBlock);
5068
5069 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
5070 Nucleus::setInsertBlock(loopBlock);
5071
5072 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
5073
5074 loopRepDepth++;
5075 breakDepth = 0;
5076 }
5077
5078 void PixelRoutine::RET(Registers &r)
5079 {
5080 if(!returns)
5081 {
5082 returnBlock = Nucleus::createBasicBlock();
5083 Nucleus::createBr(returnBlock);
5084
5085 returns = true;
5086 }
5087 else
5088 {
5089 // FIXME: Encapsulate
5090 UInt index = r.callStack[--r.stackIndex];
5091
5092 llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
5093 llvm::Value *value = Nucleus::createLoad(index.address);
5094 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock.size());
5095
5096 for(unsigned int i = 0; i < callRetBlock.size(); i++)
5097 {
5098 Nucleus::addSwitchCase(switchInst, i, callRetBlock[i]);
5099 }
5100
5101 Nucleus::setInsertBlock(unreachableBlock);
5102 Nucleus::createUnreachable();
5103 }
5104 }
5105
5106 void PixelRoutine::writeDestination(Registers &r, Color4i &d, const Dst &dst)
5107 {
5108 switch(dst.type)
5109 {
5110 case Dst::PARAMETER_TEMP:
5111 if(dst.mask & 0x1) r.ri[dst.index].x = d.x;
5112 if(dst.mask & 0x2) r.ri[dst.index].y = d.y;
5113 if(dst.mask & 0x4) r.ri[dst.index].z = d.z;
5114 if(dst.mask & 0x8) r.ri[dst.index].w = d.w;
5115 break;
5116 case Dst::PARAMETER_INPUT:
5117 if(dst.mask & 0x1) r.vi[dst.index].x = d.x;
5118 if(dst.mask & 0x2) r.vi[dst.index].y = d.y;
5119 if(dst.mask & 0x4) r.vi[dst.index].z = d.z;
5120 if(dst.mask & 0x8) r.vi[dst.index].w = d.w;
5121 break;
5122 case Dst::PARAMETER_CONST: ASSERT(false); break;
5123 case Dst::PARAMETER_TEXTURE:
5124 if(dst.mask & 0x1) r.ti[dst.index].x = d.x;
5125 if(dst.mask & 0x2) r.ti[dst.index].y = d.y;
5126 if(dst.mask & 0x4) r.ti[dst.index].z = d.z;
5127 if(dst.mask & 0x8) r.ti[dst.index].w = d.w;
5128 break;
5129 case Dst::PARAMETER_COLOROUT:
5130 if(dst.mask & 0x1) r.vi[dst.index].x = d.x;
5131 if(dst.mask & 0x2) r.vi[dst.index].y = d.y;
5132 if(dst.mask & 0x4) r.vi[dst.index].z = d.z;
5133 if(dst.mask & 0x8) r.vi[dst.index].w = d.w;
5134 break;
5135 default:
5136 ASSERT(false);
5137 }
5138 }
5139
5140 Color4i PixelRoutine::regi(Registers &r, const Src &src)
5141 {
5142 Color4i *reg;
5143 int i = src.index;
5144
5145 Color4i c;
5146
5147 if(src.type == ShaderParameter::PARAMETER_CONST)
5148 {
5149 c.r = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][0]));
5150 c.g = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][1]));
5151 c.b = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][2]));
5152 c.a = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][3]));
5153 }
5154
5155 switch(src.type)
5156 {
5157 case Src::PARAMETER_TEMP: reg = &r.ri[i]; break;
5158 case Src::PARAMETER_INPUT: reg = &r.vi[i]; break;
5159 case Src::PARAMETER_CONST: reg = &c; break;
5160 case Src::PARAMETER_TEXTURE: reg = &r.ti[i]; break;
5161 case Src::PARAMETER_VOID: return r.ri[0]; // Dummy
5162 case Src::PARAMETER_FLOATLITERAL: return r.ri[0]; // Dummy
5163 default:
5164 ASSERT(false);
5165 }
5166
5167 Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
5168 Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
5169 Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
5170 Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
5171
5172 Color4i mod;
5173
5174 switch(src.modifier)
5175 {
5176 case Src::MODIFIER_NONE:
5177 mod.r = x;
5178 mod.g = y;
5179 mod.b = z;
5180 mod.a = w;
5181 break;
5182 case Src::MODIFIER_BIAS:
5183 mod.r = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5184 mod.g = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5185 mod.b = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5186 mod.a = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5187 break;
5188 case Src::MODIFIER_BIAS_NEGATE:
5189 mod.r = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
5190 mod.g = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
5191 mod.b = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
5192 mod.a = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
5193 break;
5194 case Src::MODIFIER_COMPLEMENT:
5195 mod.r = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), x);
5196 mod.g = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), y);
5197 mod.b = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), z);
5198 mod.a = SubSat(Short4(0x1000, 0x1000, 0x1000, 0x1000), w);
5199 break;
5200 case Src::MODIFIER_NEGATE:
5201 mod.r = -x;
5202 mod.g = -y;
5203 mod.b = -z;
5204 mod.a = -w;
5205 break;
5206 case Src::MODIFIER_X2:
5207 mod.r = AddSat(x, x);
5208 mod.g = AddSat(y, y);
5209 mod.b = AddSat(z, z);
5210 mod.a = AddSat(w, w);
5211 break;
5212 case Src::MODIFIER_X2_NEGATE:
5213 mod.r = -AddSat(x, x);
5214 mod.g = -AddSat(y, y);
5215 mod.b = -AddSat(z, z);
5216 mod.a = -AddSat(w, w);
5217 break;
5218 case Src::MODIFIER_SIGN:
5219 mod.r = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5220 mod.g = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5221 mod.b = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5222 mod.a = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5223 mod.r = AddSat(mod.r, mod.r);
5224 mod.g = AddSat(mod.g, mod.g);
5225 mod.b = AddSat(mod.b, mod.b);
5226 mod.a = AddSat(mod.a, mod.a);
5227 break;
5228 case Src::MODIFIER_SIGN_NEGATE:
5229 mod.r = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
5230 mod.g = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
5231 mod.b = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
5232 mod.a = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
5233 mod.r = AddSat(mod.r, mod.r);
5234 mod.g = AddSat(mod.g, mod.g);
5235 mod.b = AddSat(mod.b, mod.b);
5236 mod.a = AddSat(mod.a, mod.a);
5237 break;
5238 case Src::MODIFIER_DZ:
5239 mod.r = x;
5240 mod.g = y;
5241 mod.b = z;
5242 mod.a = w;
5243 // Projection performed by texture sampler
5244 break;
5245 case Src::MODIFIER_DW:
5246 mod.r = x;
5247 mod.g = y;
5248 mod.b = z;
5249 mod.a = w;
5250 // Projection performed by texture sampler
5251 break;
5252 default:
5253 ASSERT(false);
5254 }
5255
5256 if(src.type == ShaderParameter::PARAMETER_CONST && (src.modifier == Src::MODIFIER_X2 || src.modifier == Src::MODIFIER_X2_NEGATE))
5257 {
5258 mod.r = Min(mod.r, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.r = Max(mod.r, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
5259 mod.g = Min(mod.g, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.g = Max(mod.g, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
5260 mod.b = Min(mod.b, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.b = Max(mod.b, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
5261 mod.a = Min(mod.a, Short4(0x1000, 0x1000, 0x1000, 0x1000)); mod.a = Max(mod.a, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
5262 }
5263
5264 return mod;
5265 }
5266
5267 Color4f PixelRoutine::reg(Registers &r, const Src &src, int offset)
5268 {
5269 Color4f reg;
5270 int i = src.index + offset;
5271
5272 switch(src.type)
5273 {
5274 case Src::PARAMETER_TEMP: reg = r.rf[i]; break;
5275 case Src::PARAMETER_INPUT:
5276 {
5277 if(!src.relative)
5278 {
5279 reg.x = r.vx[i];
5280 reg.y = r.vy[i];
5281 reg.z = r.vz[i];
5282 reg.w = r.vw[i];
5283 }
5284 else if(src.relativeType == Src::PARAMETER_LOOP)
5285 {
5286 Int aL = r.aL[r.loopDepth];
5287
5288 reg.x = r.vx[i + aL];
5289 reg.y = r.vy[i + aL];
5290 reg.z = r.vz[i + aL];
5291 reg.w = r.vw[i + aL];
5292 }
5293 else ASSERT(false);
5294 }
5295 break;
5296 case Src::PARAMETER_CONST:
5297 {
5298 reg.r = reg.g = reg.b = reg.a = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]));
5299
5300 reg.r = reg.r.xxxx;
5301 reg.g = reg.g.yyyy;
5302 reg.b = reg.b.zzzz;
5303 reg.a = reg.a.wwww;
5304
5305 if(localShaderConstants) // Constant may be known at compile time
5306 {
5307 for(int j = 0; j < pixelShader->getLength(); j++)
5308 {
5309 const ShaderInstruction &instruction = *pixelShader->getInstruction(j);
5310
5311 if(instruction.getOpcode() == ShaderOperation::OPCODE_DEF)
5312 {
5313 if(instruction.getDestinationParameter().index == i)
5314 {
5315 reg.r = Float4(instruction.getSourceParameter(0).value);
5316 reg.g = Float4(instruction.getSourceParameter(1).value);
5317 reg.b = Float4(instruction.getSourceParameter(2).value);
5318 reg.a = Float4(instruction.getSourceParameter(3).value);
5319
5320 break;
5321 }
5322 }
5323 }
5324 }
5325 }
5326 break;
5327 case Src::PARAMETER_TEXTURE:
5328 {
5329 reg.x = r.vx[2 + i];
5330 reg.y = r.vy[2 + i];
5331 reg.z = r.vz[2 + i];
5332 reg.w = r.vw[2 + i];
5333 }
5334 break;
5335 case Src::PARAMETER_MISCTYPE:
5336 if(src.index == 0) reg = r.vPos;
5337 if(src.index == 1) reg = r.vFace;
5338 break;
5339 case Src::PARAMETER_SAMPLER: return r.rf[0]; // Dummy
5340 case Src::PARAMETER_PREDICATE: return r.rf[0]; // Dummy
5341 case Src::PARAMETER_VOID: return r.rf[0]; // Dummy
5342 case Src::PARAMETER_FLOATLITERAL: return r.rf[0]; // Dummy
5343 case Src::PARAMETER_CONSTINT: return r.rf[0]; // Dummy
5344 case Src::PARAMETER_CONSTBOOL: return r.rf[0]; // Dummy
5345 case Src::PARAMETER_LOOP: return r.rf[0]; // Dummy
5346 default:
5347 ASSERT(false);
5348 }
5349
5350 Float4 &x = reg[(src.swizzle >> 0) & 0x3];
5351 Float4 &y = reg[(src.swizzle >> 2) & 0x3];
5352 Float4 &z = reg[(src.swizzle >> 4) & 0x3];
5353 Float4 &w = reg[(src.swizzle >> 6) & 0x3];
5354
5355 Color4f mod;
5356
5357 switch(src.modifier)
5358 {
5359 case Src::MODIFIER_NONE:
5360 mod.x = x;
5361 mod.y = y;
5362 mod.z = z;
5363 mod.w = w;
5364 break;
5365 case Src::MODIFIER_NEGATE:
5366 mod.x = -x;
5367 mod.y = -y;
5368 mod.z = -z;
5369 mod.w = -w;
5370 break;
5371 case Src::MODIFIER_ABS:
5372 mod.x = Abs(x);
5373 mod.y = Abs(y);
5374 mod.z = Abs(z);
5375 mod.w = Abs(w);
5376 break;
5377 case Src::MODIFIER_ABS_NEGATE:
5378 mod.x = -Abs(x);
5379 mod.y = -Abs(y);
5380 mod.z = -Abs(z);
5381 mod.w = -Abs(w);
5382 break;
5383 default:
5384 ASSERT(false);
5385 }
5386
5387 return mod;
5388 }
5389
5390 bool PixelRoutine::colorUsed()
5391 {
5392 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsTexkill;
5393 }
5394
5395 unsigned short PixelRoutine::pixelShaderVersion() const
5396 {
5397 return pixelShader ? pixelShader->getVersion() : 0x0000;
5398 }
5399}