blob: b9f3ab99b3bf631d0b54a3b9579d30759c75b44a [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman66b8ab22014-05-06 15:57:45 -04003// Copyright(c) 2005-2013 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "PixelRoutine.hpp"
13
14#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040015#include "QuadRasterizer.hpp"
16#include "Surface.hpp"
17#include "Primitive.hpp"
18#include "CPUID.hpp"
19#include "SamplerCore.hpp"
20#include "Constants.hpp"
21#include "Debug.hpp"
22
John Bauman89401822014-05-06 15:04:28 -040023extern bool localShaderConstants;
24
25namespace sw
26{
27 extern bool complementaryDepthBuffer;
28 extern bool postBlendSRGB;
29 extern bool exactColorRounding;
John Bauman19bac1e2014-05-06 15:23:49 -040030 extern bool booleanFaceRegister;
31 extern bool halfIntegerCoordinates; // Pixel centers are not at integer coordinates
32 extern bool fullPixelPositionRegister;
John Bauman89401822014-05-06 15:04:28 -040033
John Bauman19bac1e2014-05-06 15:23:49 -040034 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : Rasterizer(state), shader(shader)
John Bauman89401822014-05-06 15:04:28 -040035 {
36 perturbate = false;
37 luminance = false;
38 previousScaling = false;
39
John Bauman89401822014-05-06 15:04:28 -040040 ifDepth = 0;
41 loopRepDepth = 0;
42 breakDepth = 0;
John Bauman19bac1e2014-05-06 15:23:49 -040043 currentLabel = -1;
44 whileTest = false;
John Bauman89401822014-05-06 15:04:28 -040045
46 for(int i = 0; i < 2048; i++)
47 {
48 labelBlock[i] = 0;
49 }
50 }
51
52 PixelRoutine::~PixelRoutine()
53 {
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040054 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040055 {
56 delete sampler[i];
57 }
58 }
59
60 void PixelRoutine::quad(Registers &r, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
61 {
62 #if PERF_PROFILE
63 Long pipeTime = Ticks();
64 #endif
65
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040066 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040067 {
68 sampler[i] = new SamplerCore(r.constants, state.sampler[i]);
69 }
70
71 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman19bac1e2014-05-06 15:23:49 -040072 const bool integerPipeline = shaderVersion() <= 0x0104;
John Bauman89401822014-05-06 15:04:28 -040073
74 Int zMask[4]; // Depth mask
75 Int sMask[4]; // Stencil mask
76
77 for(unsigned int q = 0; q < state.multiSample; q++)
78 {
79 zMask[q] = cMask[q];
80 sMask[q] = cMask[q];
81 }
82
83 for(unsigned int q = 0; q < state.multiSample; q++)
84 {
85 stencilTest(r, sBuffer, q, x, sMask[q], cMask[q]);
86 }
87
88 Float4 f;
89
John Bauman89401822014-05-06 15:04:28 -040090 Float4 (&z)[4] = r.z;
John Bauman19bac1e2014-05-06 15:23:49 -040091 Float4 &w = r.w;
John Bauman89401822014-05-06 15:04:28 -040092 Float4 &rhw = r.rhw;
93 Float4 rhwCentroid;
94
95 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040096
John Bauman19bac1e2014-05-06 15:23:49 -040097 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040098 {
99 for(unsigned int q = 0; q < state.multiSample; q++)
100 {
101 Float4 x = xxxx;
102
103 if(state.multiSample > 1)
104 {
105 x -= *Pointer<Float4>(r.constants + OFFSET(Constants,X) + q * sizeof(float4));
106 }
107
108 z[q] = interpolate(x, r.Dz[q], z[q], r.primitive + OFFSET(Primitive,z), false, false);
109 }
110 }
111
112 Bool depthPass = false;
113
114 if(earlyDepthTest)
115 {
116 for(unsigned int q = 0; q < state.multiSample; q++)
117 {
118 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
119 }
120 }
121
122 If(depthPass || Bool(!earlyDepthTest))
123 {
124 #if PERF_PROFILE
125 Long interpTime = Ticks();
126 #endif
127
Nicolas Capens66be2452015-01-27 14:58:57 -0500128 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400129
John Bauman89401822014-05-06 15:04:28 -0400130 // Centroid locations
131 Float4 XXXX = Float4(0.0f);
132 Float4 YYYY = Float4(0.0f);
133
134 if(state.centroid)
135 {
136 Float4 WWWW(1.0e-9f);
137
138 for(unsigned int q = 0; q < state.multiSample; q++)
139 {
140 XXXX += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
141 YYYY += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
142 WWWW += *Pointer<Float4>(r.constants + OFFSET(Constants,weight) + 16 * cMask[q]);
143 }
144
145 WWWW = Rcp_pp(WWWW);
146 XXXX *= WWWW;
147 YYYY *= WWWW;
148
149 XXXX += xxxx;
150 YYYY += yyyy;
151 }
152
John Bauman19bac1e2014-05-06 15:23:49 -0400153 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400154 {
John Bauman19bac1e2014-05-06 15:23:49 -0400155 w = interpolate(xxxx, r.Dw, rhw, r.primitive + OFFSET(Primitive,w), false, false);
156 rhw = reciprocal(w);
John Bauman89401822014-05-06 15:04:28 -0400157
158 if(state.centroid)
159 {
160 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,w), false, false));
161 }
162 }
163
164 for(int interpolant = 0; interpolant < 10; interpolant++)
165 {
166 for(int component = 0; component < 4; component++)
167 {
John Bauman89401822014-05-06 15:04:28 -0400168 if(state.interpolant[interpolant].component & (1 << component))
169 {
170 if(!state.interpolant[interpolant].centroid)
171 {
John Bauman19bac1e2014-05-06 15:23:49 -0400172 r.vf[interpolant][component] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive,V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400173 }
174 else
175 {
John Bauman19bac1e2014-05-06 15:23:49 -0400176 r.vf[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400177 }
178 }
179 }
180
181 Float4 rcp;
182
183 switch(state.interpolant[interpolant].project)
184 {
185 case 0:
186 break;
187 case 1:
John Bauman19bac1e2014-05-06 15:23:49 -0400188 rcp = reciprocal(r.vf[interpolant].y);
189 r.vf[interpolant].x = r.vf[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400190 break;
191 case 2:
John Bauman19bac1e2014-05-06 15:23:49 -0400192 rcp = reciprocal(r.vf[interpolant].z);
193 r.vf[interpolant].x = r.vf[interpolant].x * rcp;
194 r.vf[interpolant].y = r.vf[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400195 break;
196 case 3:
John Bauman19bac1e2014-05-06 15:23:49 -0400197 rcp = reciprocal(r.vf[interpolant].w);
198 r.vf[interpolant].x = r.vf[interpolant].x * rcp;
199 r.vf[interpolant].y = r.vf[interpolant].y * rcp;
200 r.vf[interpolant].z = r.vf[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400201 break;
202 }
203 }
204
205 if(state.fog.component)
206 {
207 f = interpolate(xxxx, r.Df, rhw, r.primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective);
208 }
209
210 if(integerPipeline)
211 {
Nicolas Capenscbefe532014-10-16 00:16:01 -0400212 if(state.color[0].component & 0x1) r.diffuse.x = convertFixed12(r.vf[0].x); else r.diffuse.x = Short4(0x1000);
213 if(state.color[0].component & 0x2) r.diffuse.y = convertFixed12(r.vf[0].y); else r.diffuse.y = Short4(0x1000);
214 if(state.color[0].component & 0x4) r.diffuse.z = convertFixed12(r.vf[0].z); else r.diffuse.z = Short4(0x1000);
215 if(state.color[0].component & 0x8) r.diffuse.w = convertFixed12(r.vf[0].w); else r.diffuse.w = Short4(0x1000);
John Bauman89401822014-05-06 15:04:28 -0400216
Nicolas Capenscbefe532014-10-16 00:16:01 -0400217 if(state.color[1].component & 0x1) r.specular.x = convertFixed12(r.vf[1].x); else r.specular.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
218 if(state.color[1].component & 0x2) r.specular.y = convertFixed12(r.vf[1].y); else r.specular.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
219 if(state.color[1].component & 0x4) r.specular.z = convertFixed12(r.vf[1].z); else r.specular.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
220 if(state.color[1].component & 0x8) r.specular.w = convertFixed12(r.vf[1].w); else r.specular.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -0400221 }
John Bauman19bac1e2014-05-06 15:23:49 -0400222 else if(shaderVersion() >= 0x0300)
John Bauman89401822014-05-06 15:04:28 -0400223 {
John Bauman19bac1e2014-05-06 15:23:49 -0400224 if(shader->vPosDeclared)
John Bauman89401822014-05-06 15:04:28 -0400225 {
John Bauman19bac1e2014-05-06 15:23:49 -0400226 if(!halfIntegerCoordinates)
227 {
228 r.vPos.x = Float4(Float(x)) + Float4(0, 1, 0, 1);
229 r.vPos.y = Float4(Float(y)) + Float4(0, 0, 1, 1);
230 }
231 else
232 {
233 r.vPos.x = Float4(Float(x)) + Float4(0.5f, 1.5f, 0.5f, 1.5f);
234 r.vPos.y = Float4(Float(y)) + Float4(0.5f, 0.5f, 1.5f, 1.5f);
235 }
236
237 if(fullPixelPositionRegister)
238 {
239 r.vPos.z = z[0]; // FIXME: Centroid?
240 r.vPos.w = w; // FIXME: Centroid?
241 }
John Bauman89401822014-05-06 15:04:28 -0400242 }
243
John Bauman19bac1e2014-05-06 15:23:49 -0400244 if(shader->vFaceDeclared)
John Bauman89401822014-05-06 15:04:28 -0400245 {
246 Float4 area = *Pointer<Float>(r.primitive + OFFSET(Primitive,area));
John Bauman66b8ab22014-05-06 15:57:45 -0400247 Float4 face = booleanFaceRegister ? Float4(As<Float4>(CmpNLT(area, Float4(0.0f)))) : area;
John Bauman19bac1e2014-05-06 15:23:49 -0400248
249 r.vFace.x = face;
250 r.vFace.y = face;
251 r.vFace.z = face;
252 r.vFace.w = face;
John Bauman89401822014-05-06 15:04:28 -0400253 }
254 }
255
256 #if PERF_PROFILE
257 r.cycles[PERF_INTERP] += Ticks() - interpTime;
258 #endif
259
260 Bool alphaPass = true;
261
262 if(colorUsed())
263 {
264 #if PERF_PROFILE
265 Long shaderTime = Ticks();
266 #endif
267
John Bauman19bac1e2014-05-06 15:23:49 -0400268 if(shader)
John Bauman89401822014-05-06 15:04:28 -0400269 {
John Bauman19bac1e2014-05-06 15:23:49 -0400270 // shader->print("PixelShader-%0.8X.txt", state.shaderID);
John Bauman89401822014-05-06 15:04:28 -0400271
John Bauman19bac1e2014-05-06 15:23:49 -0400272 if(shader->getVersion() <= 0x0104)
John Bauman89401822014-05-06 15:04:28 -0400273 {
274 ps_1_x(r, cMask);
275 }
276 else
277 {
278 ps_2_x(r, cMask);
279 }
280 }
281 else
282 {
Nicolas Capenscbefe532014-10-16 00:16:01 -0400283 r.current = r.diffuse;
Alexis Hetu96517182015-04-15 10:30:23 -0400284 Vector4s temp(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -0400285
286 for(int stage = 0; stage < 8; stage++)
287 {
288 if(state.textureStage[stage].stageOperation == TextureStage::STAGE_DISABLE)
289 {
290 break;
291 }
292
Alexis Hetu96517182015-04-15 10:30:23 -0400293 Vector4s texture;
John Bauman89401822014-05-06 15:04:28 -0400294
295 if(state.textureStage[stage].usesTexture)
296 {
297 sampleTexture(r, texture, stage, stage);
298 }
299
Nicolas Capenscbefe532014-10-16 00:16:01 -0400300 blendTexture(r, temp, texture, stage);
John Bauman89401822014-05-06 15:04:28 -0400301 }
302
Nicolas Capenscbefe532014-10-16 00:16:01 -0400303 specularPixel(r.current, r.specular);
John Bauman89401822014-05-06 15:04:28 -0400304 }
305
306 #if PERF_PROFILE
307 r.cycles[PERF_SHADER] += Ticks() - shaderTime;
308 #endif
309
310 if(integerPipeline)
311 {
Nicolas Capenscbefe532014-10-16 00:16:01 -0400312 r.current.x = Min(r.current.x, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.x = Max(r.current.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
313 r.current.y = Min(r.current.y, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.y = Max(r.current.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
314 r.current.z = Min(r.current.z, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.z = Max(r.current.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
315 r.current.w = Min(r.current.w, Short4(0x0FFF, 0x0FFF, 0x0FFF, 0x0FFF)); r.current.w = Max(r.current.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
John Bauman89401822014-05-06 15:04:28 -0400316
Nicolas Capenscbefe532014-10-16 00:16:01 -0400317 alphaPass = alphaTest(r, cMask, r.current);
John Bauman89401822014-05-06 15:04:28 -0400318 }
319 else
320 {
321 clampColor(r.oC);
322
323 alphaPass = alphaTest(r, cMask, r.oC[0]);
324 }
325
John Bauman19bac1e2014-05-06 15:23:49 -0400326 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400327 {
328 for(unsigned int q = 0; q < state.multiSample; q++)
329 {
330 zMask[q] &= cMask[q];
331 sMask[q] &= cMask[q];
332 }
333 }
334 }
335
336 If(alphaPass)
337 {
338 if(!earlyDepthTest)
339 {
340 for(unsigned int q = 0; q < state.multiSample; q++)
341 {
342 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
343 }
344 }
345
346 #if PERF_PROFILE
347 Long ropTime = Ticks();
348 #endif
349
350 If(depthPass || Bool(earlyDepthTest))
351 {
352 for(unsigned int q = 0; q < state.multiSample; q++)
353 {
354 if(state.multiSampleMask & (1 << q))
355 {
356 writeDepth(r, zBuffer, q, x, z[q], zMask[q]);
357
358 if(state.occlusionEnabled)
359 {
360 r.occlusion += *Pointer<UInt>(r.constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
361 }
362 }
363 }
364
365 if(colorUsed())
366 {
367 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400368 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400369 #endif
370
371 if(integerPipeline)
372 {
Nicolas Capenscbefe532014-10-16 00:16:01 -0400373 rasterOperation(r.current, r, f, cBuffer[0], x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400374 }
375 else
376 {
377 rasterOperation(r.oC, r, f, cBuffer, x, sMask, zMask, cMask);
378 }
379 }
380 }
381
382 #if PERF_PROFILE
383 r.cycles[PERF_ROP] += Ticks() - ropTime;
384 #endif
385 }
386 }
387
388 for(unsigned int q = 0; q < state.multiSample; q++)
389 {
390 if(state.multiSampleMask & (1 << q))
391 {
392 writeStencil(r, sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
393 }
394 }
395
396 #if PERF_PROFILE
397 r.cycles[PERF_PIPE] += Ticks() - pipeTime;
398 #endif
399 }
400
401 Float4 PixelRoutine::interpolate(Float4 &x, Float4 &D, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
402 {
403 Float4 interpolant = D;
404
405 if(!flat)
406 {
407 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16);
408
409 if(perspective)
410 {
411 interpolant *= rhw;
412 }
413 }
414
415 return interpolant;
416 }
417
418 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
419 {
420 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
421
422 if(!flat)
423 {
424 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
425 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
426
427 if(perspective)
428 {
429 interpolant *= rhw;
430 }
431 }
432
433 return interpolant;
434 }
435
436 void PixelRoutine::stencilTest(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
437 {
438 if(!state.stencilActive)
439 {
440 return;
441 }
442
443 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
444
445 Pointer<Byte> buffer = sBuffer + 2 * x;
446
447 if(q > 0)
448 {
449 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
450 }
451
452 Byte8 value = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
453 Byte8 valueCCW = value;
454
455 if(!state.noStencilMask)
456 {
457 value &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].testMaskQ));
458 }
459
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400460 stencilTest(r, value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400461
462 if(state.twoSidedStencil)
463 {
464 if(!state.noStencilMaskCCW)
465 {
466 valueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].testMaskQ));
467 }
468
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400469 stencilTest(r, valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400470
471 value &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
472 valueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
473 value |= valueCCW;
474 }
475
476 sMask = SignMask(value) & cMask;
477 }
478
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400479 void PixelRoutine::stencilTest(Registers &r, Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400480 {
481 Byte8 equal;
482
483 switch(stencilCompareMode)
484 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400485 case STENCIL_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400486 value = Byte8(0xFFFFFFFFFFFFFFFF);
487 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400488 case STENCIL_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400489 value = Byte8(0x0000000000000000);
490 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400491 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400492 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
493 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
494 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400495 case STENCIL_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400496 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
497 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400498 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
John Bauman89401822014-05-06 15:04:28 -0400499 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
500 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
501 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400502 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400503 equal = value;
504 equal = CmpEQ(equal, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
505 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
506 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
507 value |= equal;
508 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400509 case STENCIL_GREATER: // a > b
John Bauman89401822014-05-06 15:04:28 -0400510 equal = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
511 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
512 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
513 value = equal;
514 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400515 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400516 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
517 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
518 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
519 break;
520 default:
521 ASSERT(false);
522 }
523 }
524
525 Bool PixelRoutine::depthTest(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
526 {
527 if(!state.depthTestActive)
528 {
529 return true;
530 }
531
532 Float4 Z = z;
533
John Bauman19bac1e2014-05-06 15:23:49 -0400534 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400535 {
536 if(complementaryDepthBuffer)
537 {
John Bauman19bac1e2014-05-06 15:23:49 -0400538 Z = Float4(1.0f) - r.oDepth;
John Bauman89401822014-05-06 15:04:28 -0400539 }
540 else
541 {
542 Z = r.oDepth;
543 }
544 }
545
546 Pointer<Byte> buffer;
547 Int pitch;
548
549 if(!state.quadLayoutDepthBuffer)
550 {
551 buffer = zBuffer + 4 * x;
552 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
553 }
554 else
555 {
556 buffer = zBuffer + 8 * x;
557 }
558
559 if(q > 0)
560 {
561 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
562 }
563
564 Float4 zValue;
565
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400566 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400567 {
568 if(!state.quadLayoutDepthBuffer)
569 {
570 // FIXME: Properly optimizes?
571 zValue.xy = *Pointer<Float4>(buffer);
572 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
573 }
574 else
575 {
576 zValue = *Pointer<Float4>(buffer, 16);
577 }
578 }
579
580 Int4 zTest;
581
582 switch(state.depthCompareMode)
583 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400584 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400585 // Optimized
586 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400587 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400588 // Optimized
589 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400590 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400591 zTest = CmpEQ(zValue, Z);
592 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400593 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400594 zTest = CmpNEQ(zValue, Z);
595 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400596 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400597 if(complementaryDepthBuffer)
598 {
599 zTest = CmpLT(zValue, Z);
600 }
601 else
602 {
603 zTest = CmpNLE(zValue, Z);
604 }
605 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400606 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400607 if(complementaryDepthBuffer)
608 {
609 zTest = CmpNLT(zValue, Z);
610 }
611 else
612 {
613 zTest = CmpLE(zValue, Z);
614 }
615 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400616 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400617 if(complementaryDepthBuffer)
618 {
619 zTest = CmpLE(zValue, Z);
620 }
621 else
622 {
623 zTest = CmpNLT(zValue, Z);
624 }
625 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400626 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400627 if(complementaryDepthBuffer)
628 {
629 zTest = CmpNLE(zValue, Z);
630 }
631 else
632 {
633 zTest = CmpLT(zValue, Z);
634 }
635 break;
636 default:
637 ASSERT(false);
638 }
639
640 switch(state.depthCompareMode)
641 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400642 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400643 zMask = cMask;
644 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400645 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400646 zMask = 0x0;
647 break;
648 default:
649 zMask = SignMask(zTest) & cMask;
650 break;
651 }
652
653 if(state.stencilActive)
654 {
655 zMask &= sMask;
656 }
657
658 return zMask != 0;
659 }
660
Alexis Hetu96517182015-04-15 10:30:23 -0400661 void PixelRoutine::blendTexture(Registers &r, Vector4s &temp, Vector4s &texture, int stage)
John Bauman89401822014-05-06 15:04:28 -0400662 {
Alexis Hetu96517182015-04-15 10:30:23 -0400663 Vector4s *arg1;
664 Vector4s *arg2;
665 Vector4s *arg3;
666 Vector4s res;
John Bauman89401822014-05-06 15:04:28 -0400667
Alexis Hetu96517182015-04-15 10:30:23 -0400668 Vector4s constant;
669 Vector4s tfactor;
John Bauman89401822014-05-06 15:04:28 -0400670
671 const TextureStage::State &textureStage = state.textureStage[stage];
672
673 if(textureStage.firstArgument == TextureStage::SOURCE_CONSTANT ||
674 textureStage.firstArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
675 textureStage.secondArgument == TextureStage::SOURCE_CONSTANT ||
676 textureStage.secondArgumentAlpha == TextureStage::SOURCE_CONSTANT ||
677 textureStage.thirdArgument == TextureStage::SOURCE_CONSTANT ||
678 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_CONSTANT)
679 {
John Bauman19bac1e2014-05-06 15:23:49 -0400680 constant.x = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[0]));
681 constant.y = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[1]));
682 constant.z = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[2]));
683 constant.w = *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].constantColor4[3]));
John Bauman89401822014-05-06 15:04:28 -0400684 }
685
686 if(textureStage.firstArgument == TextureStage::SOURCE_TFACTOR ||
687 textureStage.firstArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
688 textureStage.secondArgument == TextureStage::SOURCE_TFACTOR ||
689 textureStage.secondArgumentAlpha == TextureStage::SOURCE_TFACTOR ||
690 textureStage.thirdArgument == TextureStage::SOURCE_TFACTOR ||
691 textureStage.thirdArgumentAlpha == TextureStage::SOURCE_TFACTOR)
692 {
John Bauman19bac1e2014-05-06 15:23:49 -0400693 tfactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[0]));
694 tfactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[1]));
695 tfactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[2]));
696 tfactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]));
John Bauman89401822014-05-06 15:04:28 -0400697 }
698
699 // Premodulate
700 if(stage > 0 && textureStage.usesTexture)
701 {
702 if(state.textureStage[stage - 1].stageOperation == TextureStage::STAGE_PREMODULATE)
703 {
Nicolas Capenscbefe532014-10-16 00:16:01 -0400704 r.current.x = MulHigh(r.current.x, texture.x) << 4;
705 r.current.y = MulHigh(r.current.y, texture.y) << 4;
706 r.current.z = MulHigh(r.current.z, texture.z) << 4;
John Bauman89401822014-05-06 15:04:28 -0400707 }
708
709 if(state.textureStage[stage - 1].stageOperationAlpha == TextureStage::STAGE_PREMODULATE)
710 {
Nicolas Capenscbefe532014-10-16 00:16:01 -0400711 r.current.w = MulHigh(r.current.w, texture.w) << 4;
John Bauman89401822014-05-06 15:04:28 -0400712 }
713 }
714
715 if(luminance)
716 {
John Bauman19bac1e2014-05-06 15:23:49 -0400717 texture.x = MulHigh(texture.x, r.L) << 4;
718 texture.y = MulHigh(texture.y, r.L) << 4;
719 texture.z = MulHigh(texture.z, r.L) << 4;
John Bauman89401822014-05-06 15:04:28 -0400720
721 luminance = false;
722 }
723
724 switch(textureStage.firstArgument)
725 {
726 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
727 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
Nicolas Capenscbefe532014-10-16 00:16:01 -0400728 case TextureStage::SOURCE_CURRENT: arg1 = &r.current; break;
John Bauman89401822014-05-06 15:04:28 -0400729 case TextureStage::SOURCE_DIFFUSE: arg1 = &r.diffuse; break;
730 case TextureStage::SOURCE_SPECULAR: arg1 = &r.specular; break;
731 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
732 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
733 default:
734 ASSERT(false);
735 }
736
737 switch(textureStage.secondArgument)
738 {
739 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
740 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
Nicolas Capenscbefe532014-10-16 00:16:01 -0400741 case TextureStage::SOURCE_CURRENT: arg2 = &r.current; break;
John Bauman89401822014-05-06 15:04:28 -0400742 case TextureStage::SOURCE_DIFFUSE: arg2 = &r.diffuse; break;
743 case TextureStage::SOURCE_SPECULAR: arg2 = &r.specular; break;
744 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
745 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
746 default:
747 ASSERT(false);
748 }
749
750 switch(textureStage.thirdArgument)
751 {
752 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
753 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
Nicolas Capenscbefe532014-10-16 00:16:01 -0400754 case TextureStage::SOURCE_CURRENT: arg3 = &r.current; break;
John Bauman89401822014-05-06 15:04:28 -0400755 case TextureStage::SOURCE_DIFFUSE: arg3 = &r.diffuse; break;
756 case TextureStage::SOURCE_SPECULAR: arg3 = &r.specular; break;
757 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
758 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
759 default:
760 ASSERT(false);
761 }
762
Alexis Hetu96517182015-04-15 10:30:23 -0400763 Vector4s mod1;
764 Vector4s mod2;
765 Vector4s mod3;
John Bauman89401822014-05-06 15:04:28 -0400766
767 switch(textureStage.firstModifier)
768 {
769 case TextureStage::MODIFIER_COLOR:
770 break;
771 case TextureStage::MODIFIER_INVCOLOR:
772 {
John Bauman19bac1e2014-05-06 15:23:49 -0400773 mod1.x = SubSat(Short4(0x1000), arg1->x);
774 mod1.y = SubSat(Short4(0x1000), arg1->y);
775 mod1.z = SubSat(Short4(0x1000), arg1->z);
776 mod1.w = SubSat(Short4(0x1000), arg1->w);
John Bauman89401822014-05-06 15:04:28 -0400777
778 arg1 = &mod1;
779 }
780 break;
781 case TextureStage::MODIFIER_ALPHA:
782 {
John Bauman19bac1e2014-05-06 15:23:49 -0400783 mod1.x = arg1->w;
784 mod1.y = arg1->w;
785 mod1.z = arg1->w;
786 mod1.w = arg1->w;
John Bauman89401822014-05-06 15:04:28 -0400787
788 arg1 = &mod1;
789 }
790 break;
791 case TextureStage::MODIFIER_INVALPHA:
792 {
John Bauman19bac1e2014-05-06 15:23:49 -0400793 mod1.x = SubSat(Short4(0x1000), arg1->w);
794 mod1.y = SubSat(Short4(0x1000), arg1->w);
795 mod1.z = SubSat(Short4(0x1000), arg1->w);
796 mod1.w = SubSat(Short4(0x1000), arg1->w);
John Bauman89401822014-05-06 15:04:28 -0400797
798 arg1 = &mod1;
799 }
800 break;
801 default:
802 ASSERT(false);
803 }
804
805 switch(textureStage.secondModifier)
806 {
807 case TextureStage::MODIFIER_COLOR:
808 break;
809 case TextureStage::MODIFIER_INVCOLOR:
810 {
John Bauman19bac1e2014-05-06 15:23:49 -0400811 mod2.x = SubSat(Short4(0x1000), arg2->x);
812 mod2.y = SubSat(Short4(0x1000), arg2->y);
813 mod2.z = SubSat(Short4(0x1000), arg2->z);
814 mod2.w = SubSat(Short4(0x1000), arg2->w);
John Bauman89401822014-05-06 15:04:28 -0400815
816 arg2 = &mod2;
817 }
818 break;
819 case TextureStage::MODIFIER_ALPHA:
820 {
John Bauman19bac1e2014-05-06 15:23:49 -0400821 mod2.x = arg2->w;
822 mod2.y = arg2->w;
823 mod2.z = arg2->w;
824 mod2.w = arg2->w;
John Bauman89401822014-05-06 15:04:28 -0400825
826 arg2 = &mod2;
827 }
828 break;
829 case TextureStage::MODIFIER_INVALPHA:
830 {
John Bauman19bac1e2014-05-06 15:23:49 -0400831 mod2.x = SubSat(Short4(0x1000), arg2->w);
832 mod2.y = SubSat(Short4(0x1000), arg2->w);
833 mod2.z = SubSat(Short4(0x1000), arg2->w);
834 mod2.w = SubSat(Short4(0x1000), arg2->w);
John Bauman89401822014-05-06 15:04:28 -0400835
836 arg2 = &mod2;
837 }
838 break;
839 default:
840 ASSERT(false);
841 }
842
843 switch(textureStage.thirdModifier)
844 {
845 case TextureStage::MODIFIER_COLOR:
846 break;
847 case TextureStage::MODIFIER_INVCOLOR:
848 {
John Bauman19bac1e2014-05-06 15:23:49 -0400849 mod3.x = SubSat(Short4(0x1000), arg3->x);
850 mod3.y = SubSat(Short4(0x1000), arg3->y);
851 mod3.z = SubSat(Short4(0x1000), arg3->z);
852 mod3.w = SubSat(Short4(0x1000), arg3->w);
John Bauman89401822014-05-06 15:04:28 -0400853
854 arg3 = &mod3;
855 }
856 break;
857 case TextureStage::MODIFIER_ALPHA:
858 {
John Bauman19bac1e2014-05-06 15:23:49 -0400859 mod3.x = arg3->w;
860 mod3.y = arg3->w;
861 mod3.z = arg3->w;
862 mod3.w = arg3->w;
John Bauman89401822014-05-06 15:04:28 -0400863
864 arg3 = &mod3;
865 }
866 break;
867 case TextureStage::MODIFIER_INVALPHA:
868 {
John Bauman19bac1e2014-05-06 15:23:49 -0400869 mod3.x = SubSat(Short4(0x1000), arg3->w);
870 mod3.y = SubSat(Short4(0x1000), arg3->w);
871 mod3.z = SubSat(Short4(0x1000), arg3->w);
872 mod3.w = SubSat(Short4(0x1000), arg3->w);
John Bauman89401822014-05-06 15:04:28 -0400873
874 arg3 = &mod3;
875 }
876 break;
877 default:
878 ASSERT(false);
879 }
880
881 switch(textureStage.stageOperation)
882 {
883 case TextureStage::STAGE_DISABLE:
884 break;
885 case TextureStage::STAGE_SELECTARG1: // Arg1
886 {
John Bauman19bac1e2014-05-06 15:23:49 -0400887 res.x = arg1->x;
888 res.y = arg1->y;
889 res.z = arg1->z;
John Bauman89401822014-05-06 15:04:28 -0400890 }
891 break;
892 case TextureStage::STAGE_SELECTARG2: // Arg2
893 {
John Bauman19bac1e2014-05-06 15:23:49 -0400894 res.x = arg2->x;
895 res.y = arg2->y;
896 res.z = arg2->z;
John Bauman89401822014-05-06 15:04:28 -0400897 }
898 break;
899 case TextureStage::STAGE_SELECTARG3: // Arg3
900 {
John Bauman19bac1e2014-05-06 15:23:49 -0400901 res.x = arg3->x;
902 res.y = arg3->y;
903 res.z = arg3->z;
John Bauman89401822014-05-06 15:04:28 -0400904 }
905 break;
906 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
907 {
John Bauman19bac1e2014-05-06 15:23:49 -0400908 res.x = MulHigh(arg1->x, arg2->x) << 4;
909 res.y = MulHigh(arg1->y, arg2->y) << 4;
910 res.z = MulHigh(arg1->z, arg2->z) << 4;
John Bauman89401822014-05-06 15:04:28 -0400911 }
912 break;
913 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
914 {
John Bauman19bac1e2014-05-06 15:23:49 -0400915 res.x = MulHigh(arg1->x, arg2->x) << 5;
916 res.y = MulHigh(arg1->y, arg2->y) << 5;
917 res.z = MulHigh(arg1->z, arg2->z) << 5;
John Bauman89401822014-05-06 15:04:28 -0400918 }
919 break;
920 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
921 {
John Bauman19bac1e2014-05-06 15:23:49 -0400922 res.x = MulHigh(arg1->x, arg2->x) << 6;
923 res.y = MulHigh(arg1->y, arg2->y) << 6;
924 res.z = MulHigh(arg1->z, arg2->z) << 6;
John Bauman89401822014-05-06 15:04:28 -0400925 }
926 break;
927 case TextureStage::STAGE_ADD: // Arg1 + Arg2
928 {
John Bauman19bac1e2014-05-06 15:23:49 -0400929 res.x = AddSat(arg1->x, arg2->x);
930 res.y = AddSat(arg1->y, arg2->y);
931 res.z = AddSat(arg1->z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -0400932 }
933 break;
934 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
935 {
John Bauman19bac1e2014-05-06 15:23:49 -0400936 res.x = AddSat(arg1->x, arg2->x);
937 res.y = AddSat(arg1->y, arg2->y);
938 res.z = AddSat(arg1->z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -0400939
John Bauman19bac1e2014-05-06 15:23:49 -0400940 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
941 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
942 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
John Bauman89401822014-05-06 15:04:28 -0400943 }
944 break;
945 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
946 {
John Bauman19bac1e2014-05-06 15:23:49 -0400947 res.x = AddSat(arg1->x, arg2->x);
948 res.y = AddSat(arg1->y, arg2->y);
949 res.z = AddSat(arg1->z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -0400950
John Bauman19bac1e2014-05-06 15:23:49 -0400951 res.x = SubSat(res.x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
952 res.y = SubSat(res.y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
953 res.z = SubSat(res.z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
John Bauman89401822014-05-06 15:04:28 -0400954
John Bauman19bac1e2014-05-06 15:23:49 -0400955 res.x = AddSat(res.x, res.x);
956 res.y = AddSat(res.y, res.y);
957 res.z = AddSat(res.z, res.z);
John Bauman89401822014-05-06 15:04:28 -0400958 }
959 break;
960 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
961 {
John Bauman19bac1e2014-05-06 15:23:49 -0400962 res.x = SubSat(arg1->x, arg2->x);
963 res.y = SubSat(arg1->y, arg2->y);
964 res.z = SubSat(arg1->z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -0400965 }
966 break;
967 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
968 {
969 Short4 tmp;
970
John Bauman19bac1e2014-05-06 15:23:49 -0400971 tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(arg1->x, arg2->x); res.x = SubSat(res.x, tmp);
972 tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(arg1->y, arg2->y); res.y = SubSat(res.y, tmp);
973 tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(arg1->z, arg2->z); res.z = SubSat(res.z, tmp);
John Bauman89401822014-05-06 15:04:28 -0400974 }
975 break;
976 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
977 {
John Bauman19bac1e2014-05-06 15:23:49 -0400978 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg3->x);
979 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg3->y);
980 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg3->z);
John Bauman89401822014-05-06 15:04:28 -0400981 }
982 break;
983 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
984 {
John Bauman19bac1e2014-05-06 15:23:49 -0400985 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, arg3->x) << 4; res.x = AddSat(res.x, arg2->x);
986 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, arg3->y) << 4; res.y = AddSat(res.y, arg2->y);
987 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, arg3->z) << 4; res.z = AddSat(res.z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -0400988 }
989 break;
John Bauman19bac1e2014-05-06 15:23:49 -0400990 case TextureStage::STAGE_DOT3: // 2 * (Arg1.x - 0.5) * 2 * (Arg2.x - 0.5) + 2 * (Arg1.y - 0.5) * 2 * (Arg2.y - 0.5) + 2 * (Arg1.z - 0.5) * 2 * (Arg2.z - 0.5)
John Bauman89401822014-05-06 15:04:28 -0400991 {
992 Short4 tmp;
993
John Bauman19bac1e2014-05-06 15:23:49 -0400994 res.x = SubSat(arg1->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->x, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.x = MulHigh(res.x, tmp);
995 res.y = SubSat(arg1->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->y, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.y = MulHigh(res.y, tmp);
996 res.z = SubSat(arg1->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); tmp = SubSat(arg2->z, Short4(0x0800, 0x0800, 0x0800, 0x0800)); res.z = MulHigh(res.z, tmp);
John Bauman89401822014-05-06 15:04:28 -0400997
John Bauman19bac1e2014-05-06 15:23:49 -0400998 res.x = res.x << 6;
999 res.y = res.y << 6;
1000 res.z = res.z << 6;
John Bauman89401822014-05-06 15:04:28 -04001001
John Bauman19bac1e2014-05-06 15:23:49 -04001002 res.x = AddSat(res.x, res.y);
1003 res.x = AddSat(res.x, res.z);
John Bauman89401822014-05-06 15:04:28 -04001004
1005 // Clamp to [0, 1]
John Bauman19bac1e2014-05-06 15:23:49 -04001006 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1007 res.x = Min(res.x, Short4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04001008
John Bauman19bac1e2014-05-06 15:23:49 -04001009 res.y = res.x;
1010 res.z = res.x;
1011 res.w = res.x;
John Bauman89401822014-05-06 15:04:28 -04001012 }
1013 break;
1014 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1015 {
Nicolas Capenscbefe532014-10-16 00:16:01 -04001016 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, r.current.w) << 4; res.x = AddSat(res.x, arg2->x);
1017 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, r.current.w) << 4; res.y = AddSat(res.y, arg2->y);
1018 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, r.current.w) << 4; res.z = AddSat(res.z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -04001019 }
1020 break;
1021 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1022 {
John Bauman19bac1e2014-05-06 15:23:49 -04001023 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, r.diffuse.w) << 4; res.x = AddSat(res.x, arg2->x);
1024 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, r.diffuse.w) << 4; res.y = AddSat(res.y, arg2->y);
1025 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, r.diffuse.w) << 4; res.z = AddSat(res.z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -04001026 }
1027 break;
1028 case TextureStage::STAGE_BLENDFACTORALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1029 {
John Bauman19bac1e2014-05-06 15:23:49 -04001030 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.x = AddSat(res.x, arg2->x);
1031 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.y = AddSat(res.y, arg2->y);
1032 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.z = AddSat(res.z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -04001033 }
1034 break;
1035 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1036 {
John Bauman19bac1e2014-05-06 15:23:49 -04001037 res.x = SubSat(arg1->x, arg2->x); res.x = MulHigh(res.x, texture.w) << 4; res.x = AddSat(res.x, arg2->x);
1038 res.y = SubSat(arg1->y, arg2->y); res.y = MulHigh(res.y, texture.w) << 4; res.y = AddSat(res.y, arg2->y);
1039 res.z = SubSat(arg1->z, arg2->z); res.z = MulHigh(res.z, texture.w) << 4; res.z = AddSat(res.z, arg2->z);
John Bauman89401822014-05-06 15:04:28 -04001040 }
1041 break;
1042 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
1043 {
John Bauman19bac1e2014-05-06 15:23:49 -04001044 res.x = SubSat(Short4(0x1000), texture.w); res.x = MulHigh(res.x, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
1045 res.y = SubSat(Short4(0x1000), texture.w); res.y = MulHigh(res.y, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
1046 res.z = SubSat(Short4(0x1000), texture.w); res.z = MulHigh(res.z, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
John Bauman89401822014-05-06 15:04:28 -04001047 }
1048 break;
1049 case TextureStage::STAGE_PREMODULATE:
1050 {
John Bauman19bac1e2014-05-06 15:23:49 -04001051 res.x = arg1->x;
1052 res.y = arg1->y;
1053 res.z = arg1->z;
John Bauman89401822014-05-06 15:04:28 -04001054 }
1055 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001056 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR: // Arg1 + Arg1.w * Arg2
John Bauman89401822014-05-06 15:04:28 -04001057 {
John Bauman19bac1e2014-05-06 15:23:49 -04001058 res.x = MulHigh(arg1->w, arg2->x) << 4; res.x = AddSat(res.x, arg1->x);
1059 res.y = MulHigh(arg1->w, arg2->y) << 4; res.y = AddSat(res.y, arg1->y);
1060 res.z = MulHigh(arg1->w, arg2->z) << 4; res.z = AddSat(res.z, arg1->z);
John Bauman89401822014-05-06 15:04:28 -04001061 }
1062 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001063 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA: // Arg1 * Arg2 + Arg1.w
John Bauman89401822014-05-06 15:04:28 -04001064 {
John Bauman19bac1e2014-05-06 15:23:49 -04001065 res.x = MulHigh(arg1->x, arg2->x) << 4; res.x = AddSat(res.x, arg1->w);
1066 res.y = MulHigh(arg1->y, arg2->y) << 4; res.y = AddSat(res.y, arg1->w);
1067 res.z = MulHigh(arg1->z, arg2->z) << 4; res.z = AddSat(res.z, arg1->w);
John Bauman89401822014-05-06 15:04:28 -04001068 }
1069 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001070 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR: // (1 - Arg1.w) * Arg2 + Arg1
John Bauman89401822014-05-06 15:04:28 -04001071 {
1072 Short4 tmp;
1073
John Bauman19bac1e2014-05-06 15:23:49 -04001074 res.x = AddSat(arg1->x, arg2->x); tmp = MulHigh(arg1->w, arg2->x) << 4; res.x = SubSat(res.x, tmp);
1075 res.y = AddSat(arg1->y, arg2->y); tmp = MulHigh(arg1->w, arg2->y) << 4; res.y = SubSat(res.y, tmp);
1076 res.z = AddSat(arg1->z, arg2->z); tmp = MulHigh(arg1->w, arg2->z) << 4; res.z = SubSat(res.z, tmp);
John Bauman89401822014-05-06 15:04:28 -04001077 }
1078 break;
John Bauman19bac1e2014-05-06 15:23:49 -04001079 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA: // (1 - Arg1) * Arg2 + Arg1.w
John Bauman89401822014-05-06 15:04:28 -04001080 {
1081 Short4 tmp;
1082
John Bauman19bac1e2014-05-06 15:23:49 -04001083 res.x = AddSat(arg1->w, arg2->x); tmp = MulHigh(arg1->x, arg2->x) << 4; res.x = SubSat(res.x, tmp);
1084 res.y = AddSat(arg1->w, arg2->y); tmp = MulHigh(arg1->y, arg2->y) << 4; res.y = SubSat(res.y, tmp);
1085 res.z = AddSat(arg1->w, arg2->z); tmp = MulHigh(arg1->z, arg2->z) << 4; res.z = SubSat(res.z, tmp);
John Bauman89401822014-05-06 15:04:28 -04001086 }
1087 break;
1088 case TextureStage::STAGE_BUMPENVMAP:
1089 {
John Bauman19bac1e2014-05-06 15:23:49 -04001090 r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
1091 r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
John Bauman89401822014-05-06 15:04:28 -04001092
1093 Float4 du2;
1094 Float4 dv2;
1095
1096 du2 = r.du;
1097 dv2 = r.dv;
1098 r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
1099 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
1100 r.du += dv2;
1101 r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
1102 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
1103 r.dv += du2;
1104
1105 perturbate = true;
1106
John Bauman19bac1e2014-05-06 15:23:49 -04001107 res.x = r.current.x;
1108 res.y = r.current.y;
1109 res.z = r.current.z;
1110 res.w = r.current.w;
John Bauman89401822014-05-06 15:04:28 -04001111 }
1112 break;
1113 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1114 {
John Bauman19bac1e2014-05-06 15:23:49 -04001115 r.du = Float4(texture.x) * Float4(1.0f / 0x0FE0);
1116 r.dv = Float4(texture.y) * Float4(1.0f / 0x0FE0);
John Bauman89401822014-05-06 15:04:28 -04001117
1118 Float4 du2;
1119 Float4 dv2;
1120
1121 du2 = r.du;
1122 dv2 = r.dv;
1123
1124 r.du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
1125 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
1126 r.du += dv2;
1127 r.dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
1128 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
1129 r.dv += du2;
1130
1131 perturbate = true;
1132
John Bauman19bac1e2014-05-06 15:23:49 -04001133 r.L = texture.z;
John Bauman89401822014-05-06 15:04:28 -04001134 r.L = MulHigh(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4)));
1135 r.L = r.L << 4;
1136 r.L = AddSat(r.L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4)));
1137 r.L = Max(r.L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
John Bauman19bac1e2014-05-06 15:23:49 -04001138 r.L = Min(r.L, Short4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04001139
1140 luminance = true;
1141
John Bauman19bac1e2014-05-06 15:23:49 -04001142 res.x = r.current.x;
1143 res.y = r.current.y;
1144 res.z = r.current.z;
1145 res.w = r.current.w;
John Bauman89401822014-05-06 15:04:28 -04001146 }
1147 break;
1148 default:
1149 ASSERT(false);
1150 }
1151
1152 if(textureStage.stageOperation != TextureStage::STAGE_DOT3)
1153 {
1154 switch(textureStage.firstArgumentAlpha)
1155 {
1156 case TextureStage::SOURCE_TEXTURE: arg1 = &texture; break;
1157 case TextureStage::SOURCE_CONSTANT: arg1 = &constant; break;
Nicolas Capenscbefe532014-10-16 00:16:01 -04001158 case TextureStage::SOURCE_CURRENT: arg1 = &r.current; break;
John Bauman89401822014-05-06 15:04:28 -04001159 case TextureStage::SOURCE_DIFFUSE: arg1 = &r.diffuse; break;
1160 case TextureStage::SOURCE_SPECULAR: arg1 = &r.specular; break;
1161 case TextureStage::SOURCE_TEMP: arg1 = &temp; break;
1162 case TextureStage::SOURCE_TFACTOR: arg1 = &tfactor; break;
1163 default:
1164 ASSERT(false);
1165 }
1166
1167 switch(textureStage.secondArgumentAlpha)
1168 {
1169 case TextureStage::SOURCE_TEXTURE: arg2 = &texture; break;
1170 case TextureStage::SOURCE_CONSTANT: arg2 = &constant; break;
Nicolas Capenscbefe532014-10-16 00:16:01 -04001171 case TextureStage::SOURCE_CURRENT: arg2 = &r.current; break;
John Bauman89401822014-05-06 15:04:28 -04001172 case TextureStage::SOURCE_DIFFUSE: arg2 = &r.diffuse; break;
1173 case TextureStage::SOURCE_SPECULAR: arg2 = &r.specular; break;
1174 case TextureStage::SOURCE_TEMP: arg2 = &temp; break;
1175 case TextureStage::SOURCE_TFACTOR: arg2 = &tfactor; break;
1176 default:
1177 ASSERT(false);
1178 }
1179
1180 switch(textureStage.thirdArgumentAlpha)
1181 {
1182 case TextureStage::SOURCE_TEXTURE: arg3 = &texture; break;
1183 case TextureStage::SOURCE_CONSTANT: arg3 = &constant; break;
Nicolas Capenscbefe532014-10-16 00:16:01 -04001184 case TextureStage::SOURCE_CURRENT: arg3 = &r.current; break;
John Bauman89401822014-05-06 15:04:28 -04001185 case TextureStage::SOURCE_DIFFUSE: arg3 = &r.diffuse; break;
1186 case TextureStage::SOURCE_SPECULAR: arg3 = &r.specular; break;
1187 case TextureStage::SOURCE_TEMP: arg3 = &temp; break;
1188 case TextureStage::SOURCE_TFACTOR: arg3 = &tfactor; break;
1189 default:
1190 ASSERT(false);
1191 }
1192
1193 switch(textureStage.firstModifierAlpha) // FIXME: Check if actually used
1194 {
1195 case TextureStage::MODIFIER_COLOR:
1196 break;
1197 case TextureStage::MODIFIER_INVCOLOR:
1198 {
John Bauman19bac1e2014-05-06 15:23:49 -04001199 mod1.w = SubSat(Short4(0x1000), arg1->w);
John Bauman89401822014-05-06 15:04:28 -04001200
1201 arg1 = &mod1;
1202 }
1203 break;
1204 case TextureStage::MODIFIER_ALPHA:
1205 {
1206 // Redudant
1207 }
1208 break;
1209 case TextureStage::MODIFIER_INVALPHA:
1210 {
John Bauman19bac1e2014-05-06 15:23:49 -04001211 mod1.w = SubSat(Short4(0x1000), arg1->w);
John Bauman89401822014-05-06 15:04:28 -04001212
1213 arg1 = &mod1;
1214 }
1215 break;
1216 default:
1217 ASSERT(false);
1218 }
1219
1220 switch(textureStage.secondModifierAlpha) // FIXME: Check if actually used
1221 {
1222 case TextureStage::MODIFIER_COLOR:
1223 break;
1224 case TextureStage::MODIFIER_INVCOLOR:
1225 {
John Bauman19bac1e2014-05-06 15:23:49 -04001226 mod2.w = SubSat(Short4(0x1000), arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001227
1228 arg2 = &mod2;
1229 }
1230 break;
1231 case TextureStage::MODIFIER_ALPHA:
1232 {
1233 // Redudant
1234 }
1235 break;
1236 case TextureStage::MODIFIER_INVALPHA:
1237 {
John Bauman19bac1e2014-05-06 15:23:49 -04001238 mod2.w = SubSat(Short4(0x1000), arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001239
1240 arg2 = &mod2;
1241 }
1242 break;
1243 default:
1244 ASSERT(false);
1245 }
1246
1247 switch(textureStage.thirdModifierAlpha) // FIXME: Check if actually used
1248 {
1249 case TextureStage::MODIFIER_COLOR:
1250 break;
1251 case TextureStage::MODIFIER_INVCOLOR:
1252 {
John Bauman19bac1e2014-05-06 15:23:49 -04001253 mod3.w = SubSat(Short4(0x1000), arg3->w);
John Bauman89401822014-05-06 15:04:28 -04001254
1255 arg3 = &mod3;
1256 }
1257 break;
1258 case TextureStage::MODIFIER_ALPHA:
1259 {
1260 // Redudant
1261 }
1262 break;
1263 case TextureStage::MODIFIER_INVALPHA:
1264 {
John Bauman19bac1e2014-05-06 15:23:49 -04001265 mod3.w = SubSat(Short4(0x1000), arg3->w);
John Bauman89401822014-05-06 15:04:28 -04001266
1267 arg3 = &mod3;
1268 }
1269 break;
1270 default:
1271 ASSERT(false);
1272 }
1273
1274 switch(textureStage.stageOperationAlpha)
1275 {
1276 case TextureStage::STAGE_DISABLE:
1277 break;
1278 case TextureStage::STAGE_SELECTARG1: // Arg1
1279 {
John Bauman19bac1e2014-05-06 15:23:49 -04001280 res.w = arg1->w;
John Bauman89401822014-05-06 15:04:28 -04001281 }
1282 break;
1283 case TextureStage::STAGE_SELECTARG2: // Arg2
1284 {
John Bauman19bac1e2014-05-06 15:23:49 -04001285 res.w = arg2->w;
John Bauman89401822014-05-06 15:04:28 -04001286 }
1287 break;
1288 case TextureStage::STAGE_SELECTARG3: // Arg3
1289 {
John Bauman19bac1e2014-05-06 15:23:49 -04001290 res.w = arg3->w;
John Bauman89401822014-05-06 15:04:28 -04001291 }
1292 break;
1293 case TextureStage::STAGE_MODULATE: // Arg1 * Arg2
1294 {
John Bauman19bac1e2014-05-06 15:23:49 -04001295 res.w = MulHigh(arg1->w, arg2->w) << 4;
John Bauman89401822014-05-06 15:04:28 -04001296 }
1297 break;
1298 case TextureStage::STAGE_MODULATE2X: // Arg1 * Arg2 * 2
1299 {
John Bauman19bac1e2014-05-06 15:23:49 -04001300 res.w = MulHigh(arg1->w, arg2->w) << 5;
John Bauman89401822014-05-06 15:04:28 -04001301 }
1302 break;
1303 case TextureStage::STAGE_MODULATE4X: // Arg1 * Arg2 * 4
1304 {
John Bauman19bac1e2014-05-06 15:23:49 -04001305 res.w = MulHigh(arg1->w, arg2->w) << 6;
John Bauman89401822014-05-06 15:04:28 -04001306 }
1307 break;
1308 case TextureStage::STAGE_ADD: // Arg1 + Arg2
1309 {
John Bauman19bac1e2014-05-06 15:23:49 -04001310 res.w = AddSat(arg1->w, arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001311 }
1312 break;
1313 case TextureStage::STAGE_ADDSIGNED: // Arg1 + Arg2 - 0.5
1314 {
John Bauman19bac1e2014-05-06 15:23:49 -04001315 res.w = AddSat(arg1->w, arg2->w);
1316 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
John Bauman89401822014-05-06 15:04:28 -04001317 }
1318 break;
1319 case TextureStage::STAGE_ADDSIGNED2X: // (Arg1 + Arg2 - 0.5) << 1
1320 {
John Bauman19bac1e2014-05-06 15:23:49 -04001321 res.w = AddSat(arg1->w, arg2->w);
1322 res.w = SubSat(res.w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
1323 res.w = AddSat(res.w, res.w);
John Bauman89401822014-05-06 15:04:28 -04001324 }
1325 break;
1326 case TextureStage::STAGE_SUBTRACT: // Arg1 - Arg2
1327 {
John Bauman19bac1e2014-05-06 15:23:49 -04001328 res.w = SubSat(arg1->w, arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001329 }
1330 break;
1331 case TextureStage::STAGE_ADDSMOOTH: // Arg1 + Arg2 - Arg1 * Arg2
1332 {
1333 Short4 tmp;
1334
John Bauman19bac1e2014-05-06 15:23:49 -04001335 tmp = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(arg1->w, arg2->w); res.w = SubSat(res.w, tmp);
John Bauman89401822014-05-06 15:04:28 -04001336 }
1337 break;
1338 case TextureStage::STAGE_MULTIPLYADD: // Arg3 + Arg1 * Arg2
1339 {
John Bauman19bac1e2014-05-06 15:23:49 -04001340 res.w = MulHigh(arg1->w, arg2->w) << 4; res.w = AddSat(res.w, arg3->w);
John Bauman89401822014-05-06 15:04:28 -04001341 }
1342 break;
1343 case TextureStage::STAGE_LERP: // Arg3 * (Arg1 - Arg2) + Arg2
1344 {
John Bauman19bac1e2014-05-06 15:23:49 -04001345 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, arg3->w) << 4; res.w = AddSat(res.w, arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001346 }
1347 break;
1348 case TextureStage::STAGE_DOT3:
1349 break; // Already computed in color channel
1350 case TextureStage::STAGE_BLENDCURRENTALPHA: // Alpha * (Arg1 - Arg2) + Arg2
1351 {
Nicolas Capenscbefe532014-10-16 00:16:01 -04001352 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, r.current.w) << 4; res.w = AddSat(res.w, arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001353 }
1354 break;
1355 case TextureStage::STAGE_BLENDDIFFUSEALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
1356 {
John Bauman19bac1e2014-05-06 15:23:49 -04001357 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, r.diffuse.w) << 4; res.w = AddSat(res.w, arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001358 }
1359 break;
1360 case TextureStage::STAGE_BLENDFACTORALPHA:
1361 {
John Bauman19bac1e2014-05-06 15:23:49 -04001362 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.textureFactor4[3]))) << 4; res.w = AddSat(res.w, arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001363 }
1364 break;
1365 case TextureStage::STAGE_BLENDTEXTUREALPHA: // Arg1 * (Alpha) + Arg2 * (1 - Alpha)
1366 {
John Bauman19bac1e2014-05-06 15:23:49 -04001367 res.w = SubSat(arg1->w, arg2->w); res.w = MulHigh(res.w, texture.w) << 4; res.w = AddSat(res.w, arg2->w);
John Bauman89401822014-05-06 15:04:28 -04001368 }
1369 break;
1370 case TextureStage::STAGE_BLENDTEXTUREALPHAPM: // Arg1 + Arg2 * (1 - Alpha)
1371 {
John Bauman19bac1e2014-05-06 15:23:49 -04001372 res.w = SubSat(Short4(0x1000), texture.w); res.w = MulHigh(res.w, arg2->w) << 4; res.w = AddSat(res.w, arg1->w);
John Bauman89401822014-05-06 15:04:28 -04001373 }
1374 break;
1375 case TextureStage::STAGE_PREMODULATE:
1376 {
John Bauman19bac1e2014-05-06 15:23:49 -04001377 res.w = arg1->w;
John Bauman89401822014-05-06 15:04:28 -04001378 }
1379 break;
1380 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1381 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1382 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1383 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1384 case TextureStage::STAGE_BUMPENVMAP:
1385 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1386 break; // Invalid alpha operations
1387 default:
1388 ASSERT(false);
1389 }
1390 }
1391
1392 // Clamp result to [0, 1]
1393
1394 switch(textureStage.stageOperation)
1395 {
1396 case TextureStage::STAGE_DISABLE:
1397 case TextureStage::STAGE_SELECTARG1:
1398 case TextureStage::STAGE_SELECTARG2:
1399 case TextureStage::STAGE_SELECTARG3:
1400 case TextureStage::STAGE_MODULATE:
1401 case TextureStage::STAGE_MODULATE2X:
1402 case TextureStage::STAGE_MODULATE4X:
1403 case TextureStage::STAGE_ADD:
1404 case TextureStage::STAGE_MULTIPLYADD:
1405 case TextureStage::STAGE_LERP:
1406 case TextureStage::STAGE_BLENDCURRENTALPHA:
1407 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1408 case TextureStage::STAGE_BLENDFACTORALPHA:
1409 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1410 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1411 case TextureStage::STAGE_DOT3: // Already clamped
1412 case TextureStage::STAGE_PREMODULATE:
1413 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1414 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1415 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1416 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1417 case TextureStage::STAGE_BUMPENVMAP:
1418 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1419 if(state.textureStage[stage].cantUnderflow)
1420 {
1421 break; // Can't go below zero
1422 }
1423 case TextureStage::STAGE_ADDSIGNED:
1424 case TextureStage::STAGE_ADDSIGNED2X:
1425 case TextureStage::STAGE_SUBTRACT:
1426 case TextureStage::STAGE_ADDSMOOTH:
John Bauman19bac1e2014-05-06 15:23:49 -04001427 res.x = Max(res.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1428 res.y = Max(res.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));
1429 res.z = Max(res.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));
John Bauman89401822014-05-06 15:04:28 -04001430 break;
1431 default:
1432 ASSERT(false);
1433 }
1434
1435 switch(textureStage.stageOperationAlpha)
1436 {
1437 case TextureStage::STAGE_DISABLE:
1438 case TextureStage::STAGE_SELECTARG1:
1439 case TextureStage::STAGE_SELECTARG2:
1440 case TextureStage::STAGE_SELECTARG3:
1441 case TextureStage::STAGE_MODULATE:
1442 case TextureStage::STAGE_MODULATE2X:
1443 case TextureStage::STAGE_MODULATE4X:
1444 case TextureStage::STAGE_ADD:
1445 case TextureStage::STAGE_MULTIPLYADD:
1446 case TextureStage::STAGE_LERP:
1447 case TextureStage::STAGE_BLENDCURRENTALPHA:
1448 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1449 case TextureStage::STAGE_BLENDFACTORALPHA:
1450 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1451 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1452 case TextureStage::STAGE_DOT3: // Already clamped
1453 case TextureStage::STAGE_PREMODULATE:
1454 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1455 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
1456 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1457 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1458 case TextureStage::STAGE_BUMPENVMAP:
1459 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1460 if(state.textureStage[stage].cantUnderflow)
1461 {
1462 break; // Can't go below zero
1463 }
1464 case TextureStage::STAGE_ADDSIGNED:
1465 case TextureStage::STAGE_ADDSIGNED2X:
1466 case TextureStage::STAGE_SUBTRACT:
1467 case TextureStage::STAGE_ADDSMOOTH:
John Bauman19bac1e2014-05-06 15:23:49 -04001468 res.w = Max(res.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));
John Bauman89401822014-05-06 15:04:28 -04001469 break;
1470 default:
1471 ASSERT(false);
1472 }
1473
1474 switch(textureStage.stageOperation)
1475 {
1476 case TextureStage::STAGE_DISABLE:
1477 case TextureStage::STAGE_SELECTARG1:
1478 case TextureStage::STAGE_SELECTARG2:
1479 case TextureStage::STAGE_SELECTARG3:
1480 case TextureStage::STAGE_MODULATE:
1481 case TextureStage::STAGE_SUBTRACT:
1482 case TextureStage::STAGE_ADDSMOOTH:
1483 case TextureStage::STAGE_LERP:
1484 case TextureStage::STAGE_BLENDCURRENTALPHA:
1485 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1486 case TextureStage::STAGE_BLENDFACTORALPHA:
1487 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1488 case TextureStage::STAGE_DOT3: // Already clamped
1489 case TextureStage::STAGE_PREMODULATE:
1490 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1491 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1492 case TextureStage::STAGE_BUMPENVMAP:
1493 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1494 break; // Can't go above one
1495 case TextureStage::STAGE_MODULATE2X:
1496 case TextureStage::STAGE_MODULATE4X:
1497 case TextureStage::STAGE_ADD:
1498 case TextureStage::STAGE_ADDSIGNED:
1499 case TextureStage::STAGE_ADDSIGNED2X:
1500 case TextureStage::STAGE_MULTIPLYADD:
1501 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1502 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1503 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001504 res.x = Min(res.x, Short4(0x1000));
1505 res.y = Min(res.y, Short4(0x1000));
1506 res.z = Min(res.z, Short4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04001507 break;
1508 default:
1509 ASSERT(false);
1510 }
1511
1512 switch(textureStage.stageOperationAlpha)
1513 {
1514 case TextureStage::STAGE_DISABLE:
1515 case TextureStage::STAGE_SELECTARG1:
1516 case TextureStage::STAGE_SELECTARG2:
1517 case TextureStage::STAGE_SELECTARG3:
1518 case TextureStage::STAGE_MODULATE:
1519 case TextureStage::STAGE_SUBTRACT:
1520 case TextureStage::STAGE_ADDSMOOTH:
1521 case TextureStage::STAGE_LERP:
1522 case TextureStage::STAGE_BLENDCURRENTALPHA:
1523 case TextureStage::STAGE_BLENDDIFFUSEALPHA:
1524 case TextureStage::STAGE_BLENDFACTORALPHA:
1525 case TextureStage::STAGE_BLENDTEXTUREALPHA:
1526 case TextureStage::STAGE_DOT3: // Already clamped
1527 case TextureStage::STAGE_PREMODULATE:
1528 case TextureStage::STAGE_MODULATEINVALPHA_ADDCOLOR:
1529 case TextureStage::STAGE_MODULATEINVCOLOR_ADDALPHA:
1530 case TextureStage::STAGE_BUMPENVMAP:
1531 case TextureStage::STAGE_BUMPENVMAPLUMINANCE:
1532 break; // Can't go above one
1533 case TextureStage::STAGE_MODULATE2X:
1534 case TextureStage::STAGE_MODULATE4X:
1535 case TextureStage::STAGE_ADD:
1536 case TextureStage::STAGE_ADDSIGNED:
1537 case TextureStage::STAGE_ADDSIGNED2X:
1538 case TextureStage::STAGE_MULTIPLYADD:
1539 case TextureStage::STAGE_BLENDTEXTUREALPHAPM:
1540 case TextureStage::STAGE_MODULATEALPHA_ADDCOLOR:
1541 case TextureStage::STAGE_MODULATECOLOR_ADDALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001542 res.w = Min(res.w, Short4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04001543 break;
1544 default:
1545 ASSERT(false);
1546 }
1547
1548 switch(textureStage.destinationArgument)
1549 {
1550 case TextureStage::DESTINATION_CURRENT:
Nicolas Capenscbefe532014-10-16 00:16:01 -04001551 r.current.x = res.x;
1552 r.current.y = res.y;
1553 r.current.z = res.z;
1554 r.current.w = res.w;
John Bauman89401822014-05-06 15:04:28 -04001555 break;
1556 case TextureStage::DESTINATION_TEMP:
John Bauman19bac1e2014-05-06 15:23:49 -04001557 temp.x = res.x;
1558 temp.y = res.y;
1559 temp.z = res.z;
1560 temp.w = res.w;
John Bauman89401822014-05-06 15:04:28 -04001561 break;
1562 default:
1563 ASSERT(false);
1564 }
1565 }
1566
1567 void PixelRoutine::alphaTest(Registers &r, Int &aMask, Short4 &alpha)
1568 {
1569 Short4 cmp;
1570 Short4 equal;
1571
1572 switch(state.alphaCompareMode)
1573 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001574 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -04001575 aMask = 0xF;
1576 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001577 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -04001578 aMask = 0x0;
1579 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001580 case ALPHA_EQUAL:
John Bauman89401822014-05-06 15:04:28 -04001581 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1582 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1583 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001584 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
John Bauman89401822014-05-06 15:04:28 -04001585 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
1586 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1587 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001588 case ALPHA_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -04001589 cmp = CmpGT(*Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)), alpha);
1590 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1591 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001592 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
John Bauman89401822014-05-06 15:04:28 -04001593 equal = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1594 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1595 cmp |= equal;
1596 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1597 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001598 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
John Bauman89401822014-05-06 15:04:28 -04001599 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
1600 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1601 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001602 case ALPHA_GREATER: // a > b
John Bauman89401822014-05-06 15:04:28 -04001603 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
1604 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
1605 break;
1606 default:
1607 ASSERT(false);
1608 }
1609 }
1610
1611 void PixelRoutine::alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha)
1612 {
1613 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c0)));
1614 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c1)));
1615 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c2)));
1616 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c3)));
1617
1618 Int aMask0 = SignMask(coverage0);
1619 Int aMask1 = SignMask(coverage1);
1620 Int aMask2 = SignMask(coverage2);
1621 Int aMask3 = SignMask(coverage3);
1622
1623 cMask[0] &= aMask0;
1624 cMask[1] &= aMask1;
1625 cMask[2] &= aMask2;
1626 cMask[3] &= aMask3;
1627 }
1628
Alexis Hetu96517182015-04-15 10:30:23 -04001629 Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Vector4s &current)
John Bauman89401822014-05-06 15:04:28 -04001630 {
1631 if(!state.alphaTestActive())
1632 {
1633 return true;
1634 }
1635
1636 Int aMask;
1637
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001638 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
John Bauman89401822014-05-06 15:04:28 -04001639 {
John Bauman19bac1e2014-05-06 15:23:49 -04001640 alphaTest(r, aMask, current.w);
John Bauman89401822014-05-06 15:04:28 -04001641
1642 for(unsigned int q = 0; q < state.multiSample; q++)
1643 {
1644 cMask[q] &= aMask;
1645 }
1646 }
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001647 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
John Bauman89401822014-05-06 15:04:28 -04001648 {
John Bauman19bac1e2014-05-06 15:23:49 -04001649 Float4 alpha = Float4(current.w) * Float4(1.0f / 0x1000);
John Bauman89401822014-05-06 15:04:28 -04001650
1651 alphaToCoverage(r, cMask, alpha);
1652 }
1653 else ASSERT(false);
1654
1655 Int pass = cMask[0];
1656
1657 for(unsigned int q = 1; q < state.multiSample; q++)
1658 {
1659 pass = pass | cMask[q];
1660 }
1661
1662 return pass != 0x0;
1663 }
1664
John Bauman19bac1e2014-05-06 15:23:49 -04001665 Bool PixelRoutine::alphaTest(Registers &r, Int cMask[4], Vector4f &c0)
John Bauman89401822014-05-06 15:04:28 -04001666 {
1667 if(!state.alphaTestActive())
1668 {
1669 return true;
1670 }
1671
1672 Int aMask;
1673
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001674 if(state.transparencyAntialiasing == TRANSPARENCY_NONE)
John Bauman89401822014-05-06 15:04:28 -04001675 {
John Bauman19bac1e2014-05-06 15:23:49 -04001676 Short4 alpha = RoundShort4(c0.w * Float4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04001677
1678 alphaTest(r, aMask, alpha);
1679
1680 for(unsigned int q = 0; q < state.multiSample; q++)
1681 {
1682 cMask[q] &= aMask;
1683 }
1684 }
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001685 else if(state.transparencyAntialiasing == TRANSPARENCY_ALPHA_TO_COVERAGE)
John Bauman89401822014-05-06 15:04:28 -04001686 {
John Bauman19bac1e2014-05-06 15:23:49 -04001687 alphaToCoverage(r, cMask, c0.w);
John Bauman89401822014-05-06 15:04:28 -04001688 }
1689 else ASSERT(false);
1690
1691 Int pass = cMask[0];
1692
1693 for(unsigned int q = 1; q < state.multiSample; q++)
1694 {
1695 pass = pass | cMask[q];
1696 }
1697
1698 return pass != 0x0;
1699 }
1700
Alexis Hetu96517182015-04-15 10:30:23 -04001701 void PixelRoutine::fogBlend(Registers &r, Vector4s &current, Float4 &f, Float4 &z, Float4 &rhw)
John Bauman89401822014-05-06 15:04:28 -04001702 {
1703 if(!state.fogActive)
1704 {
1705 return;
1706 }
1707
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001708 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -04001709 {
1710 pixelFog(r, f, z, rhw);
1711 }
1712
1713 UShort4 fog = convertFixed16(f, true);
1714
John Bauman19bac1e2014-05-06 15:23:49 -04001715 current.x = As<Short4>(MulHigh(As<UShort4>(current.x), fog));
1716 current.y = As<Short4>(MulHigh(As<UShort4>(current.y), fog));
1717 current.z = As<Short4>(MulHigh(As<UShort4>(current.z), fog));
John Bauman89401822014-05-06 15:04:28 -04001718
John Bauman19bac1e2014-05-06 15:23:49 -04001719 UShort4 invFog = UShort4(0xFFFFu) - fog;
John Bauman89401822014-05-06 15:04:28 -04001720
John Bauman19bac1e2014-05-06 15:23:49 -04001721 current.x += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[0]))));
1722 current.y += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[1]))));
1723 current.z += As<Short4>(MulHigh(invFog, *Pointer<UShort4>(r.data + OFFSET(DrawData,fog.color4[2]))));
John Bauman89401822014-05-06 15:04:28 -04001724 }
1725
John Bauman19bac1e2014-05-06 15:23:49 -04001726 void PixelRoutine::fogBlend(Registers &r, Vector4f &c0, Float4 &fog, Float4 &z, Float4 &rhw)
John Bauman89401822014-05-06 15:04:28 -04001727 {
1728 if(!state.fogActive)
1729 {
1730 return;
1731 }
1732
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001733 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -04001734 {
1735 pixelFog(r, fog, z, rhw);
1736
John Bauman19bac1e2014-05-06 15:23:49 -04001737 fog = Min(fog, Float4(1.0f));
1738 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -04001739 }
1740
John Bauman19bac1e2014-05-06 15:23:49 -04001741 c0.x -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
1742 c0.y -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
1743 c0.z -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -04001744
John Bauman19bac1e2014-05-06 15:23:49 -04001745 c0.x *= fog;
1746 c0.y *= fog;
1747 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -04001748
John Bauman19bac1e2014-05-06 15:23:49 -04001749 c0.x += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
1750 c0.y += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
1751 c0.z += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -04001752 }
1753
1754 void PixelRoutine::pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw)
1755 {
1756 Float4 &zw = visibility;
1757
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001758 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -04001759 {
1760 if(state.wBasedFog)
1761 {
1762 zw = rhw;
1763 }
1764 else
1765 {
1766 if(complementaryDepthBuffer)
1767 {
John Bauman19bac1e2014-05-06 15:23:49 -04001768 zw = Float4(1.0f) - z;
John Bauman89401822014-05-06 15:04:28 -04001769 }
1770 else
1771 {
1772 zw = z;
1773 }
1774 }
1775 }
1776
1777 switch(state.pixelFogMode)
1778 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001779 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -04001780 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001781 case FOG_LINEAR:
John Bauman89401822014-05-06 15:04:28 -04001782 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale));
1783 zw += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset));
1784 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001785 case FOG_EXP:
John Bauman89401822014-05-06 15:04:28 -04001786 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -04001787 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -04001788 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001789 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -04001790 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE2));
1791 zw *= zw;
John Bauman19bac1e2014-05-06 15:23:49 -04001792 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -04001793 zw = Rcp_pp(zw);
1794 break;
1795 default:
1796 ASSERT(false);
1797 }
1798 }
1799
Alexis Hetu96517182015-04-15 10:30:23 -04001800 void PixelRoutine::specularPixel(Vector4s &current, Vector4s &specular)
John Bauman89401822014-05-06 15:04:28 -04001801 {
1802 if(!state.specularAdd)
1803 {
1804 return;
1805 }
1806
John Bauman19bac1e2014-05-06 15:23:49 -04001807 current.x = AddSat(current.x, specular.x);
1808 current.y = AddSat(current.y, specular.y);
1809 current.z = AddSat(current.z, specular.z);
John Bauman89401822014-05-06 15:04:28 -04001810 }
1811
1812 void PixelRoutine::writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
1813 {
1814 if(!state.depthWriteEnable)
1815 {
1816 return;
1817 }
1818
1819 Float4 Z = z;
1820
John Bauman19bac1e2014-05-06 15:23:49 -04001821 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -04001822 {
1823 if(complementaryDepthBuffer)
1824 {
John Bauman19bac1e2014-05-06 15:23:49 -04001825 Z = Float4(1.0f) - r.oDepth;
John Bauman89401822014-05-06 15:04:28 -04001826 }
1827 else
1828 {
1829 Z = r.oDepth;
1830 }
1831 }
1832
1833 Pointer<Byte> buffer;
1834 Int pitch;
1835
1836 if(!state.quadLayoutDepthBuffer)
1837 {
1838 buffer = zBuffer + 4 * x;
1839 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
1840 }
1841 else
1842 {
1843 buffer = zBuffer + 8 * x;
1844 }
1845
1846 if(q > 0)
1847 {
1848 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
1849 }
1850
1851 Float4 zValue;
1852
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001853 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -04001854 {
1855 if(!state.quadLayoutDepthBuffer)
1856 {
1857 // FIXME: Properly optimizes?
1858 zValue.xy = *Pointer<Float4>(buffer);
1859 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
1860 }
1861 else
1862 {
1863 zValue = *Pointer<Float4>(buffer, 16);
1864 }
1865 }
1866
1867 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
1868 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
1869 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
1870
1871 if(!state.quadLayoutDepthBuffer)
1872 {
1873 // FIXME: Properly optimizes?
1874 *Pointer<Float2>(buffer) = Float2(Z.xy);
1875 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
1876 }
1877 else
1878 {
1879 *Pointer<Float4>(buffer, 16) = Z;
1880 }
1881 }
1882
1883 void PixelRoutine::writeStencil(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
1884 {
1885 if(!state.stencilActive)
1886 {
1887 return;
1888 }
1889
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001890 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -04001891 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001892 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -04001893 {
1894 return;
1895 }
1896 }
1897
1898 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
1899 {
1900 return;
1901 }
1902
1903 Pointer<Byte> buffer = sBuffer + 2 * x;
1904
1905 if(q > 0)
1906 {
1907 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
1908 }
1909
1910 Byte8 bufferValue = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
1911
1912 Byte8 newValue;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001913 stencilOperation(r, newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -04001914
1915 if(!state.noStencilWriteMask)
1916 {
1917 Byte8 maskedValue = bufferValue;
1918 newValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].writeMaskQ));
1919 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
1920 newValue |= maskedValue;
1921 }
1922
1923 if(state.twoSidedStencil)
1924 {
1925 Byte8 newValueCCW;
1926
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001927 stencilOperation(r, newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -04001928
1929 if(!state.noStencilWriteMaskCCW)
1930 {
1931 Byte8 maskedValue = bufferValue;
1932 newValueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].writeMaskQ));
1933 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
1934 newValueCCW |= maskedValue;
1935 }
1936
1937 newValue &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
1938 newValueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
1939 newValue |= newValueCCW;
1940 }
1941
1942 newValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
1943 bufferValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
1944 newValue |= bufferValue;
1945
1946 *Pointer<UInt>(buffer) = UInt(As<Long>(newValue));
1947 }
1948
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001949 void PixelRoutine::stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -04001950 {
1951 Byte8 &pass = newValue;
1952 Byte8 fail;
1953 Byte8 zFail;
1954
1955 stencilOperation(r, pass, bufferValue, stencilPassOperation, CCW);
1956
1957 if(stencilZFailOperation != stencilPassOperation)
1958 {
1959 stencilOperation(r, zFail, bufferValue, stencilZFailOperation, CCW);
1960 }
1961
1962 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
1963 {
1964 stencilOperation(r, fail, bufferValue, stencilFailOperation, CCW);
1965 }
1966
1967 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
1968 {
1969 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
1970 {
1971 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
1972 zFail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
1973 pass |= zFail;
1974 }
1975
1976 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
1977 fail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
1978 pass |= fail;
1979 }
1980 }
1981
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001982 void PixelRoutine::stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -04001983 {
1984 switch(operation)
1985 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001986 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -04001987 output = bufferValue;
1988 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001989 case OPERATION_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001990 output = Byte8(0x0000000000000000);
1991 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001992 case OPERATION_REPLACE:
John Bauman89401822014-05-06 15:04:28 -04001993 output = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceQ));
1994 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001995 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -04001996 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
1997 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001998 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -04001999 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
2000 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002001 case OPERATION_INVERT:
John Bauman89401822014-05-06 15:04:28 -04002002 output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF);
2003 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002004 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -04002005 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
2006 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002007 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -04002008 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
2009 break;
2010 default:
2011 ASSERT(false);
2012 }
2013 }
2014
Alexis Hetu96517182015-04-15 10:30:23 -04002015 void PixelRoutine::sampleTexture(Registers &r, Vector4s &c, int coordinates, int stage, bool project)
John Bauman89401822014-05-06 15:04:28 -04002016 {
John Bauman19bac1e2014-05-06 15:23:49 -04002017 Float4 u = r.vf[2 + coordinates].x;
2018 Float4 v = r.vf[2 + coordinates].y;
2019 Float4 w = r.vf[2 + coordinates].z;
2020 Float4 q = r.vf[2 + coordinates].w;
John Bauman89401822014-05-06 15:04:28 -04002021
2022 if(perturbate)
2023 {
2024 u += r.du;
2025 v += r.dv;
2026
2027 perturbate = false;
2028 }
2029
2030 sampleTexture(r, c, stage, u, v, w, q, project);
2031 }
2032
Alexis Hetu96517182015-04-15 10:30:23 -04002033 void PixelRoutine::sampleTexture(Registers &r, Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, bool project, bool bias, bool fixed12)
John Bauman89401822014-05-06 15:04:28 -04002034 {
John Bauman19bac1e2014-05-06 15:23:49 -04002035 Vector4f dsx;
2036 Vector4f dsy;
John Bauman89401822014-05-06 15:04:28 -04002037
2038 sampleTexture(r, c, stage, u, v, w, q, dsx, dsy, project, bias, fixed12, false);
2039 }
2040
Alexis Hetu96517182015-04-15 10:30:23 -04002041 void PixelRoutine::sampleTexture(Registers &r, Vector4s &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool fixed12, bool gradients, bool lodProvided)
John Bauman89401822014-05-06 15:04:28 -04002042 {
2043 #if PERF_PROFILE
2044 Long texTime = Ticks();
2045 #endif
2046
2047 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap) + stage * sizeof(Texture);
2048
2049 if(!project)
2050 {
2051 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, fixed12, gradients, lodProvided);
2052 }
2053 else
2054 {
2055 Float4 rq = reciprocal(q);
2056
2057 Float4 u_q = u * rq;
2058 Float4 v_q = v * rq;
2059 Float4 w_q = w * rq;
2060
2061 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, fixed12, gradients, lodProvided);
2062 }
2063
2064 #if PERF_PROFILE
2065 r.cycles[PERF_TEX] += Ticks() - texTime;
2066 #endif
2067 }
2068
John Bauman19bac1e2014-05-06 15:23:49 -04002069 void PixelRoutine::sampleTexture(Registers &r, Vector4f &c, const Src &sampler, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
2070 {
2071 if(sampler.type == Shader::PARAMETER_SAMPLER && sampler.rel.type == Shader::PARAMETER_VOID)
2072 {
2073 sampleTexture(r, c, sampler.index, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);
2074 }
2075 else
2076 {
Alexis Hetu96517182015-04-15 10:30:23 -04002077 Int index = As<Int>(Float(fetchRegisterF(r, sampler).x.x));
John Bauman19bac1e2014-05-06 15:23:49 -04002078
Alexis Hetu0b65c5e2015-03-31 11:48:57 -04002079 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman19bac1e2014-05-06 15:23:49 -04002080 {
2081 if(shader->usesSampler(i))
2082 {
2083 If(index == i)
2084 {
2085 sampleTexture(r, c, i, u, v, w, q, dsx, dsy, project, bias, gradients, lodProvided);
2086 // FIXME: When the sampler states are the same, we could use one sampler and just index the texture
2087 }
2088 }
2089 }
2090 }
2091 }
2092
2093 void PixelRoutine::sampleTexture(Registers &r, Vector4f &c, int stage, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, bool project, bool bias, bool gradients, bool lodProvided)
John Bauman89401822014-05-06 15:04:28 -04002094 {
2095 #if PERF_PROFILE
2096 Long texTime = Ticks();
2097 #endif
2098
2099 Pointer<Byte> texture = r.data + OFFSET(DrawData,mipmap) + stage * sizeof(Texture);
2100
2101 if(!project)
2102 {
2103 sampler[stage]->sampleTexture(texture, c, u, v, w, q, dsx, dsy, bias, gradients, lodProvided);
2104 }
2105 else
2106 {
2107 Float4 rq = reciprocal(q);
2108
2109 Float4 u_q = u * rq;
2110 Float4 v_q = v * rq;
2111 Float4 w_q = w * rq;
2112
2113 sampler[stage]->sampleTexture(texture, c, u_q, v_q, w_q, q, dsx, dsy, bias, gradients, lodProvided);
2114 }
2115
2116 #if PERF_PROFILE
2117 r.cycles[PERF_TEX] += Ticks() - texTime;
2118 #endif
2119 }
2120
John Bauman19bac1e2014-05-06 15:23:49 -04002121 void PixelRoutine::clampColor(Vector4f oC[4])
John Bauman89401822014-05-06 15:04:28 -04002122 {
2123 for(int index = 0; index < 4; index++)
2124 {
2125 if(!state.colorWriteActive(index) && !(index == 0 && state.alphaTestActive()))
2126 {
2127 continue;
2128 }
2129
2130 switch(state.targetFormat[index])
2131 {
2132 case FORMAT_NULL:
2133 break;
2134 case FORMAT_A16B16G16R16:
2135 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002136 case FORMAT_A8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002137 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002138 case FORMAT_X8B8G8R8:
John Bauman66b8ab22014-05-06 15:57:45 -04002139 case FORMAT_A8:
John Bauman89401822014-05-06 15:04:28 -04002140 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04002141 oC[index].x = Max(oC[index].x, Float4(0.0f)); oC[index].x = Min(oC[index].x, Float4(1.0f));
2142 oC[index].y = Max(oC[index].y, Float4(0.0f)); oC[index].y = Min(oC[index].y, Float4(1.0f));
2143 oC[index].z = Max(oC[index].z, Float4(0.0f)); oC[index].z = Min(oC[index].z, Float4(1.0f));
2144 oC[index].w = Max(oC[index].w, Float4(0.0f)); oC[index].w = Min(oC[index].w, Float4(1.0f));
John Bauman89401822014-05-06 15:04:28 -04002145 break;
2146 case FORMAT_R32F:
2147 case FORMAT_G32R32F:
2148 case FORMAT_A32B32G32R32F:
2149 break;
2150 default:
2151 ASSERT(false);
2152 }
2153 }
2154 }
2155
Alexis Hetu96517182015-04-15 10:30:23 -04002156 void PixelRoutine::rasterOperation(Vector4s &current, Registers &r, Float4 &fog, Pointer<Byte> &cBuffer, Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
John Bauman89401822014-05-06 15:04:28 -04002157 {
2158 if(!state.colorWriteActive(0))
2159 {
2160 return;
2161 }
2162
John Bauman19bac1e2014-05-06 15:23:49 -04002163 Vector4f oC;
John Bauman89401822014-05-06 15:04:28 -04002164
2165 switch(state.targetFormat[0])
2166 {
2167 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002168 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002169 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002170 case FORMAT_A8B8G8R8:
John Bauman66b8ab22014-05-06 15:57:45 -04002171 case FORMAT_A8:
John Bauman89401822014-05-06 15:04:28 -04002172 case FORMAT_G16R16:
2173 case FORMAT_A16B16G16R16:
2174 if(!postBlendSRGB && state.writeSRGB)
2175 {
2176 linearToSRGB12_16(r, current);
2177 }
2178 else
2179 {
John Bauman19bac1e2014-05-06 15:23:49 -04002180 current.x <<= 4;
2181 current.y <<= 4;
2182 current.z <<= 4;
2183 current.w <<= 4;
John Bauman89401822014-05-06 15:04:28 -04002184 }
2185
2186 fogBlend(r, current, fog, r.z[0], r.rhw);
2187
2188 for(unsigned int q = 0; q < state.multiSample; q++)
2189 {
2190 Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0]));
Alexis Hetu96517182015-04-15 10:30:23 -04002191 Vector4s color = current;
John Bauman89401822014-05-06 15:04:28 -04002192
2193 if(state.multiSampleMask & (1 << q))
2194 {
2195 alphaBlend(r, 0, buffer, color, x);
2196 writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2197 }
2198 }
2199 break;
2200 case FORMAT_R32F:
2201 case FORMAT_G32R32F:
2202 case FORMAT_A32B32G32R32F:
2203 convertSigned12(oC, current);
2204 fogBlend(r, oC, fog, r.z[0], r.rhw);
2205
2206 for(unsigned int q = 0; q < state.multiSample; q++)
2207 {
2208 Pointer<Byte> buffer = cBuffer + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[0]));
John Bauman19bac1e2014-05-06 15:23:49 -04002209 Vector4f color = oC;
John Bauman89401822014-05-06 15:04:28 -04002210
2211 if(state.multiSampleMask & (1 << q))
2212 {
2213 alphaBlend(r, 0, buffer, color, x);
2214 writeColor(r, 0, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2215 }
2216 }
2217 break;
2218 default:
2219 ASSERT(false);
2220 }
2221 }
2222
John Bauman19bac1e2014-05-06 15:23:49 -04002223 void PixelRoutine::rasterOperation(Vector4f oC[4], Registers &r, Float4 &fog, Pointer<Byte> cBuffer[4], Int &x, Int sMask[4], Int zMask[4], Int cMask[4])
John Bauman89401822014-05-06 15:04:28 -04002224 {
2225 for(int index = 0; index < 4; index++)
2226 {
2227 if(!state.colorWriteActive(index))
2228 {
2229 continue;
2230 }
2231
2232 if(!postBlendSRGB && state.writeSRGB)
2233 {
John Bauman19bac1e2014-05-06 15:23:49 -04002234 oC[index].x = linearToSRGB(oC[index].x);
2235 oC[index].y = linearToSRGB(oC[index].y);
2236 oC[index].z = linearToSRGB(oC[index].z);
John Bauman89401822014-05-06 15:04:28 -04002237 }
2238
2239 if(index == 0)
2240 {
2241 fogBlend(r, oC[index], fog, r.z[0], r.rhw);
2242 }
2243
2244 switch(state.targetFormat[index])
2245 {
2246 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002247 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002248 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002249 case FORMAT_A8B8G8R8:
John Bauman66b8ab22014-05-06 15:57:45 -04002250 case FORMAT_A8:
John Bauman89401822014-05-06 15:04:28 -04002251 case FORMAT_G16R16:
2252 case FORMAT_A16B16G16R16:
2253 for(unsigned int q = 0; q < state.multiSample; q++)
2254 {
2255 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index]));
Alexis Hetu96517182015-04-15 10:30:23 -04002256 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04002257
John Bauman19bac1e2014-05-06 15:23:49 -04002258 color.x = convertFixed16(oC[index].x, false);
2259 color.y = convertFixed16(oC[index].y, false);
2260 color.z = convertFixed16(oC[index].z, false);
2261 color.w = convertFixed16(oC[index].w, false);
John Bauman89401822014-05-06 15:04:28 -04002262
2263 if(state.multiSampleMask & (1 << q))
2264 {
2265 alphaBlend(r, index, buffer, color, x);
2266 writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2267 }
2268 }
2269 break;
2270 case FORMAT_R32F:
2271 case FORMAT_G32R32F:
2272 case FORMAT_A32B32G32R32F:
2273 for(unsigned int q = 0; q < state.multiSample; q++)
2274 {
2275 Pointer<Byte> buffer = cBuffer[index] + q * *Pointer<Int>(r.data + OFFSET(DrawData,colorSliceB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002276 Vector4f color = oC[index];
John Bauman89401822014-05-06 15:04:28 -04002277
2278 if(state.multiSampleMask & (1 << q))
2279 {
2280 alphaBlend(r, index, buffer, color, x);
2281 writeColor(r, index, buffer, x, color, sMask[q], zMask[q], cMask[q]);
2282 }
2283 }
2284 break;
2285 default:
2286 ASSERT(false);
2287 }
2288 }
2289 }
2290
Alexis Hetu96517182015-04-15 10:30:23 -04002291 void PixelRoutine::blendFactor(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04002292 {
2293 switch(blendFactorActive)
2294 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002295 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04002296 // Optimized
2297 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002298 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04002299 // Optimized
2300 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002301 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04002302 blendFactor.x = current.x;
2303 blendFactor.y = current.y;
2304 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -04002305 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002306 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04002307 blendFactor.x = Short4(0xFFFFu) - current.x;
2308 blendFactor.y = Short4(0xFFFFu) - current.y;
2309 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -04002310 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002311 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002312 blendFactor.x = pixel.x;
2313 blendFactor.y = pixel.y;
2314 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002315 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002316 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002317 blendFactor.x = Short4(0xFFFFu) - pixel.x;
2318 blendFactor.y = Short4(0xFFFFu) - pixel.y;
2319 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002320 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002321 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002322 blendFactor.x = current.w;
2323 blendFactor.y = current.w;
2324 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -04002325 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002326 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002327 blendFactor.x = Short4(0xFFFFu) - current.w;
2328 blendFactor.y = Short4(0xFFFFu) - current.w;
2329 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -04002330 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002331 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002332 blendFactor.x = pixel.w;
2333 blendFactor.y = pixel.w;
2334 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002335 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002336 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002337 blendFactor.x = Short4(0xFFFFu) - pixel.w;
2338 blendFactor.y = Short4(0xFFFFu) - pixel.w;
2339 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002340 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002341 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04002342 blendFactor.x = Short4(0xFFFFu) - pixel.w;
2343 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
2344 blendFactor.y = blendFactor.x;
2345 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04002346 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002347 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04002348 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[0]));
2349 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[1]));
2350 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -04002351 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002352 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04002353 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
2354 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
2355 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -04002356 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002357 case BLEND_CONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002358 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
2359 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
2360 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04002361 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002362 case BLEND_INVCONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002363 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
2364 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
2365 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04002366 break;
2367 default:
2368 ASSERT(false);
2369 }
2370 }
2371
Alexis Hetu96517182015-04-15 10:30:23 -04002372 void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04002373 {
2374 switch(blendFactorAlphaActive)
2375 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002376 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04002377 // Optimized
2378 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002379 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04002380 // Optimized
2381 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002382 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04002383 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -04002384 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002385 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04002386 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -04002387 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002388 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002389 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002390 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002391 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002392 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002393 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002394 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002395 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -04002396 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002397 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002398 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -04002399 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002400 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002401 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002402 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002403 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002404 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002405 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002406 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04002407 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04002408 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002409 case BLEND_CONSTANT:
2410 case BLEND_CONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002411 blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04002412 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002413 case BLEND_INVCONSTANT:
2414 case BLEND_INVCONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002415 blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04002416 break;
2417 default:
2418 ASSERT(false);
2419 }
2420 }
2421
Alexis Hetu96517182015-04-15 10:30:23 -04002422 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
John Bauman89401822014-05-06 15:04:28 -04002423 {
2424 if(!state.alphaBlendActive)
2425 {
2426 return;
2427 }
2428
2429 Pointer<Byte> buffer;
2430
Alexis Hetu96517182015-04-15 10:30:23 -04002431 Vector4s pixel;
John Bauman89401822014-05-06 15:04:28 -04002432 Short4 c01;
2433 Short4 c23;
2434
2435 // Read pixel
2436 switch(state.targetFormat[index])
2437 {
2438 case FORMAT_A8R8G8B8:
2439 buffer = cBuffer + 4 * x;
2440 c01 = *Pointer<Short4>(buffer);
2441 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2442 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04002443 pixel.z = c01;
2444 pixel.y = c01;
2445 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
2446 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
2447 pixel.x = pixel.z;
2448 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
2449 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
2450 pixel.y = pixel.z;
2451 pixel.w = pixel.x;
2452 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
2453 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
2454 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
2455 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04002456 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002457 case FORMAT_A8B8G8R8:
2458 buffer = cBuffer + 4 * x;
2459 c01 = *Pointer<Short4>(buffer);
2460 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2461 c23 = *Pointer<Short4>(buffer);
2462 pixel.z = c01;
2463 pixel.y = c01;
2464 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
2465 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
2466 pixel.x = pixel.z;
2467 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
2468 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
2469 pixel.y = pixel.z;
2470 pixel.w = pixel.x;
2471 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
2472 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
2473 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
2474 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
2475 break;
John Bauman66b8ab22014-05-06 15:57:45 -04002476 case FORMAT_A8:
2477 buffer = cBuffer + 1 * x;
2478 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
2479 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2480 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
2481 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
2482 pixel.x = Short4(0x0000);
2483 pixel.y = Short4(0x0000);
2484 pixel.z = Short4(0x0000);
2485 break;
John Bauman89401822014-05-06 15:04:28 -04002486 case FORMAT_X8R8G8B8:
2487 buffer = cBuffer + 4 * x;
2488 c01 = *Pointer<Short4>(buffer);
2489 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2490 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04002491 pixel.z = c01;
2492 pixel.y = c01;
2493 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
2494 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
2495 pixel.x = pixel.z;
2496 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
2497 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
2498 pixel.y = pixel.z;
2499 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
2500 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
2501 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
2502 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04002503 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002504 case FORMAT_X8B8G8R8:
2505 buffer = cBuffer + 4 * x;
2506 c01 = *Pointer<Short4>(buffer);
2507 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2508 c23 = *Pointer<Short4>(buffer);
2509 pixel.z = c01;
2510 pixel.y = c01;
2511 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
2512 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
2513 pixel.x = pixel.z;
2514 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
2515 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
2516 pixel.y = pixel.z;
2517 pixel.w = pixel.x;
2518 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
2519 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
2520 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
2521 pixel.w = Short4(0xFFFFu);
2522 break;
John Bauman89401822014-05-06 15:04:28 -04002523 case FORMAT_A8G8R8B8Q:
2524 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04002525 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2526 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2527 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
2528 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04002529 break;
2530 case FORMAT_X8G8R8B8Q:
2531 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04002532 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2533 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
2534 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
2535 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04002536 break;
2537 case FORMAT_A16B16G16R16:
2538 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002539 pixel.x = *Pointer<Short4>(buffer + 8 * x);
2540 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
John Bauman89401822014-05-06 15:04:28 -04002541 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002542 pixel.z = *Pointer<Short4>(buffer + 8 * x);
2543 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
2544 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002545 break;
2546 case FORMAT_G16R16:
2547 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002548 pixel.x = *Pointer<Short4>(buffer + 4 * x);
John Bauman89401822014-05-06 15:04:28 -04002549 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002550 pixel.y = *Pointer<Short4>(buffer + 4 * x);
2551 pixel.z = pixel.x;
2552 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
2553 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
2554 pixel.y = pixel.z;
2555 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
2556 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
2557 pixel.z = Short4(0xFFFFu);
2558 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04002559 break;
2560 default:
2561 ASSERT(false);
2562 }
2563
2564 if(postBlendSRGB && state.writeSRGB)
2565 {
2566 sRGBtoLinear16_16(r, pixel);
2567 }
2568
2569 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04002570 Vector4s sourceFactor;
2571 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04002572
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002573 blendFactor(r, sourceFactor, current, pixel, state.sourceBlendFactor);
2574 blendFactor(r, destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002575
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002576 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002577 {
John Bauman19bac1e2014-05-06 15:23:49 -04002578 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
2579 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
2580 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04002581 }
2582
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002583 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002584 {
John Bauman19bac1e2014-05-06 15:23:49 -04002585 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
2586 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
2587 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04002588 }
2589
2590 switch(state.blendOperation)
2591 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002592 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002593 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
2594 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
2595 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04002596 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002597 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002598 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
2599 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
2600 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04002601 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002602 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002603 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
2604 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
2605 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04002606 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002607 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002608 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
2609 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
2610 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04002611 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002612 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002613 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
2614 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
2615 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04002616 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002617 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002618 // No operation
2619 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002620 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002621 current.x = pixel.x;
2622 current.y = pixel.y;
2623 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002624 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002625 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002626 current.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
2627 current.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
2628 current.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04002629 break;
2630 default:
2631 ASSERT(false);
2632 }
2633
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002634 blendFactorAlpha(r, sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
2635 blendFactorAlpha(r, destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002636
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002637 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002638 {
John Bauman19bac1e2014-05-06 15:23:49 -04002639 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04002640 }
2641
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002642 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002643 {
John Bauman19bac1e2014-05-06 15:23:49 -04002644 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04002645 }
2646
2647 switch(state.blendOperationAlpha)
2648 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002649 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002650 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04002651 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002652 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002653 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04002654 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002655 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002656 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04002657 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002658 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002659 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04002660 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002661 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002662 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04002663 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002664 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002665 // No operation
2666 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002667 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002668 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002669 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002670 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002671 current.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04002672 break;
2673 default:
2674 ASSERT(false);
2675 }
2676 }
2677
Alexis Hetu96517182015-04-15 10:30:23 -04002678 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002679 {
2680 if(!state.colorWriteActive(index))
2681 {
2682 return;
2683 }
2684
2685 if(postBlendSRGB && state.writeSRGB)
2686 {
2687 linearToSRGB16_16(r, current);
2688 }
2689
2690 if(exactColorRounding)
2691 {
2692 switch(state.targetFormat[index])
2693 {
2694 case FORMAT_X8G8R8B8Q:
2695 case FORMAT_A8G8R8B8Q:
2696 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002697 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002698 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002699 case FORMAT_A8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04002700 {
John Bauman19bac1e2014-05-06 15:23:49 -04002701 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
2702 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
2703 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
2704 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
John Bauman89401822014-05-06 15:04:28 -04002705 }
2706 break;
2707 }
2708 }
2709
2710 int rgbaWriteMask = state.colorWriteActive(index);
2711 int bgraWriteMask = rgbaWriteMask & 0x0000000A | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
2712 int brgaWriteMask = rgbaWriteMask & 0x00000008 | (rgbaWriteMask & 0x00000001) << 1 | (rgbaWriteMask & 0x00000002) << 1 | (rgbaWriteMask & 0x00000004) >> 2;
2713
2714 switch(state.targetFormat[index])
2715 {
2716 case FORMAT_X8G8R8B8Q:
2717 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04002718 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
2719 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
2720 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04002721
John Bauman19bac1e2014-05-06 15:23:49 -04002722 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
2723 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04002724 break;
2725 case FORMAT_A8G8R8B8Q:
2726 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04002727 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
2728 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
2729 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
2730 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04002731
John Bauman19bac1e2014-05-06 15:23:49 -04002732 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
2733 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04002734 break;
2735 case FORMAT_X8R8G8B8:
2736 case FORMAT_A8R8G8B8:
2737 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
2738 {
John Bauman19bac1e2014-05-06 15:23:49 -04002739 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
2740 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
2741 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04002742
John Bauman19bac1e2014-05-06 15:23:49 -04002743 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
2744 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04002745
John Bauman19bac1e2014-05-06 15:23:49 -04002746 current.x = current.z;
2747 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
2748 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
2749 current.y = current.z;
2750 current.z = As<Short4>(UnpackLow(current.z, current.x));
2751 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04002752 }
2753 else
2754 {
John Bauman19bac1e2014-05-06 15:23:49 -04002755 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
2756 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
2757 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
2758 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04002759
John Bauman19bac1e2014-05-06 15:23:49 -04002760 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
2761 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04002762
John Bauman19bac1e2014-05-06 15:23:49 -04002763 current.x = current.z;
2764 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
2765 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
2766 current.y = current.z;
2767 current.z = As<Short4>(UnpackLow(current.z, current.x));
2768 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04002769 }
2770 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002771 case FORMAT_X8B8G8R8:
2772 case FORMAT_A8B8G8R8:
2773 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || rgbaWriteMask == 0x7)
2774 {
2775 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
2776 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
2777 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
2778
2779 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
2780 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
2781
2782 current.x = current.z;
2783 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
2784 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
2785 current.y = current.z;
2786 current.z = As<Short4>(UnpackLow(current.z, current.x));
2787 current.y = As<Short4>(UnpackHigh(current.y, current.x));
2788 }
2789 else
2790 {
2791 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
2792 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
2793 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
2794 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
2795
2796 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
2797 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
2798
2799 current.x = current.z;
2800 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
2801 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
2802 current.y = current.z;
2803 current.z = As<Short4>(UnpackLow(current.z, current.x));
2804 current.y = As<Short4>(UnpackHigh(current.y, current.x));
2805 }
2806 break;
John Bauman66b8ab22014-05-06 15:57:45 -04002807 case FORMAT_A8:
2808 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
2809 current.w = As<Short4>(Pack(As<UShort4>(current.w), As<UShort4>(current.w)));
2810 break;
John Bauman89401822014-05-06 15:04:28 -04002811 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04002812 current.z = current.x;
2813 current.x = As<Short4>(UnpackLow(current.x, current.y));
2814 current.z = As<Short4>(UnpackHigh(current.z, current.y));
2815 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04002816 break;
2817 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04002818 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04002819 break;
2820 case FORMAT_R32F:
2821 case FORMAT_G32R32F:
2822 case FORMAT_A32B32G32R32F:
2823 {
John Bauman19bac1e2014-05-06 15:23:49 -04002824 Vector4f oC;
John Bauman89401822014-05-06 15:04:28 -04002825
John Bauman19bac1e2014-05-06 15:23:49 -04002826 oC.x = convertUnsigned16(UShort4(current.x));
2827 oC.y = convertUnsigned16(UShort4(current.y));
2828 oC.z = convertUnsigned16(UShort4(current.z));
2829 oC.w = convertUnsigned16(UShort4(current.w));
John Bauman89401822014-05-06 15:04:28 -04002830
2831 writeColor(r, index, cBuffer, x, oC, sMask, zMask, cMask);
2832 }
2833 return;
2834 default:
2835 ASSERT(false);
2836 }
2837
John Bauman19bac1e2014-05-06 15:23:49 -04002838 Short4 c01 = current.z;
2839 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04002840
2841 Int xMask; // Combination of all masks
2842
2843 if(state.depthTestActive)
2844 {
2845 xMask = zMask;
2846 }
2847 else
2848 {
2849 xMask = cMask;
2850 }
2851
2852 if(state.stencilActive)
2853 {
2854 xMask &= sMask;
2855 }
2856
2857 Pointer<Byte> buffer;
2858 Short4 value;
2859
2860 switch(state.targetFormat[index])
2861 {
2862 case FORMAT_A8G8R8B8Q:
2863 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
2864 UNIMPLEMENTED();
2865 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
2866
2867 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
2868 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
2869 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2870 // {
2871 // Short4 masked = value;
2872 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2873 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2874 // c01 |= masked;
2875 // }
2876
2877 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
2878 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
2879 // c01 |= value;
2880 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
2881
2882 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
2883
2884 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
2885 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
2886 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2887 // {
2888 // Short4 masked = value;
2889 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2890 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2891 // c23 |= masked;
2892 // }
2893
2894 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
2895 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
2896 // c23 |= value;
2897 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
2898 break;
2899 case FORMAT_A8R8G8B8:
2900 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
2901 buffer = cBuffer + x * 4;
2902 value = *Pointer<Short4>(buffer);
2903
2904 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
2905 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
2906 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2907 {
2908 Short4 masked = value;
2909 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2910 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2911 c01 |= masked;
2912 }
2913
2914 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
2915 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
2916 c01 |= value;
2917 *Pointer<Short4>(buffer) = c01;
2918
2919 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2920 value = *Pointer<Short4>(buffer);
2921
2922 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
2923 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
2924 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
2925 {
2926 Short4 masked = value;
2927 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
2928 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
2929 c23 |= masked;
2930 }
2931
2932 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
2933 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
2934 c23 |= value;
2935 *Pointer<Short4>(buffer) = c23;
2936 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04002937 case FORMAT_A8B8G8R8:
2938 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
2939 buffer = cBuffer + x * 4;
2940 value = *Pointer<Short4>(buffer);
2941
2942 if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
2943 ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
2944 (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh?
2945 {
2946 Short4 masked = value;
2947 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
2948 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
2949 c01 |= masked;
2950 }
2951
2952 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
2953 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
2954 c01 |= value;
2955 *Pointer<Short4>(buffer) = c01;
2956
2957 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2958 value = *Pointer<Short4>(buffer);
2959
2960 if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
2961 ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
2962 (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh?
2963 {
2964 Short4 masked = value;
2965 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
2966 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
2967 c23 |= masked;
2968 }
2969
2970 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
2971 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
2972 c23 |= value;
2973 *Pointer<Short4>(buffer) = c23;
2974 break;
John Bauman66b8ab22014-05-06 15:57:45 -04002975 case FORMAT_A8:
2976 if(rgbaWriteMask & 0x00000008)
2977 {
2978 buffer = cBuffer + 1 * x;
2979 Insert(value, *Pointer<Short>(buffer), 0);
2980 Int pitch = *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2981 Insert(value, *Pointer<Short>(buffer + pitch), 1);
2982 value = UnpackLow(As<Byte8>(value), As<Byte8>(value));
2983
2984 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
2985 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
2986 current.w |= value;
2987
2988 *Pointer<Short>(buffer) = Extract(current.w, 0);
2989 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
2990 }
2991 break;
John Bauman89401822014-05-06 15:04:28 -04002992 case FORMAT_G16R16:
2993 buffer = cBuffer + 4 * x;
2994
2995 value = *Pointer<Short4>(buffer);
2996
2997 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2998 {
2999 Short4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003000 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman89401822014-05-06 15:04:28 -04003001 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04003002 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04003003 }
3004
John Bauman19bac1e2014-05-06 15:23:49 -04003005 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04003006 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04003007 current.x |= value;
3008 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04003009
3010 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3011
3012 value = *Pointer<Short4>(buffer);
3013
3014 if((rgbaWriteMask & 0x00000003) != 0x00000003)
3015 {
3016 Short4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003017 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman89401822014-05-06 15:04:28 -04003018 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04003019 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04003020 }
3021
John Bauman19bac1e2014-05-06 15:23:49 -04003022 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04003023 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04003024 current.y |= value;
3025 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04003026 break;
3027 case FORMAT_A16B16G16R16:
3028 buffer = cBuffer + 8 * x;
3029
3030 {
3031 value = *Pointer<Short4>(buffer);
3032
3033 if(rgbaWriteMask != 0x0000000F)
3034 {
3035 Short4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003036 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04003037 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04003038 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04003039 }
3040
John Bauman19bac1e2014-05-06 15:23:49 -04003041 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04003042 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04003043 current.x |= value;
3044 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04003045 }
3046
3047 {
3048 value = *Pointer<Short4>(buffer + 8);
3049
3050 if(rgbaWriteMask != 0x0000000F)
3051 {
3052 Short4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003053 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04003054 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04003055 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04003056 }
3057
John Bauman19bac1e2014-05-06 15:23:49 -04003058 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04003059 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04003060 current.y |= value;
3061 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04003062 }
3063
3064 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3065
3066 {
3067 value = *Pointer<Short4>(buffer);
3068
3069 if(rgbaWriteMask != 0x0000000F)
3070 {
3071 Short4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003072 current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04003073 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04003074 current.z |= masked;
John Bauman89401822014-05-06 15:04:28 -04003075 }
3076
John Bauman19bac1e2014-05-06 15:23:49 -04003077 current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04003078 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04003079 current.z |= value;
3080 *Pointer<Short4>(buffer) = current.z;
John Bauman89401822014-05-06 15:04:28 -04003081 }
3082
3083 {
3084 value = *Pointer<Short4>(buffer + 8);
3085
3086 if(rgbaWriteMask != 0x0000000F)
3087 {
3088 Short4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003089 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04003090 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04003091 current.w |= masked;
John Bauman89401822014-05-06 15:04:28 -04003092 }
3093
John Bauman19bac1e2014-05-06 15:23:49 -04003094 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04003095 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04003096 current.w |= value;
3097 *Pointer<Short4>(buffer + 8) = current.w;
John Bauman89401822014-05-06 15:04:28 -04003098 }
3099 break;
3100 default:
3101 ASSERT(false);
3102 }
3103 }
3104
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003105 void PixelRoutine::blendFactor(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04003106 {
3107 switch(blendFactorActive)
3108 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003109 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04003110 // Optimized
3111 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003112 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04003113 // Optimized
3114 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003115 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04003116 blendFactor.x = oC.x;
3117 blendFactor.y = oC.y;
3118 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04003119 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003120 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04003121 blendFactor.x = Float4(1.0f) - oC.x;
3122 blendFactor.y = Float4(1.0f) - oC.y;
3123 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04003124 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003125 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04003126 blendFactor.x = pixel.x;
3127 blendFactor.y = pixel.y;
3128 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04003129 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003130 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04003131 blendFactor.x = Float4(1.0f) - pixel.x;
3132 blendFactor.y = Float4(1.0f) - pixel.y;
3133 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04003134 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003135 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003136 blendFactor.x = oC.w;
3137 blendFactor.y = oC.w;
3138 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04003139 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003140 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003141 blendFactor.x = Float4(1.0f) - oC.w;
3142 blendFactor.y = Float4(1.0f) - oC.w;
3143 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04003144 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003145 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003146 blendFactor.x = pixel.w;
3147 blendFactor.y = pixel.w;
3148 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003149 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003150 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003151 blendFactor.x = Float4(1.0f) - pixel.w;
3152 blendFactor.y = Float4(1.0f) - pixel.w;
3153 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003154 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003155 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04003156 blendFactor.x = Float4(1.0f) - pixel.w;
3157 blendFactor.x = Min(blendFactor.x, oC.w);
3158 blendFactor.y = blendFactor.x;
3159 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04003160 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003161 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04003162 blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[0]));
3163 blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[1]));
3164 blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04003165 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003166 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04003167 blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
3168 blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
3169 blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04003170 break;
3171 default:
3172 ASSERT(false);
3173 }
3174 }
3175
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003176 void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04003177 {
3178 switch(blendFactorAlphaActive)
3179 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003180 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04003181 // Optimized
3182 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003183 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04003184 // Optimized
3185 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003186 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04003187 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04003188 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003189 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04003190 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04003191 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003192 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04003193 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003194 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003195 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04003196 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003197 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003198 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003199 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04003200 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003201 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003202 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04003203 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003204 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003205 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003206 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003207 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04003208 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003209 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003210 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04003211 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04003212 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003213 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04003214 blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04003215 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003216 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04003217 blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04003218 break;
3219 default:
3220 ASSERT(false);
3221 }
3222 }
3223
John Bauman19bac1e2014-05-06 15:23:49 -04003224 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04003225 {
3226 if(!state.alphaBlendActive)
3227 {
3228 return;
3229 }
3230
3231 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04003232 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04003233
Alexis Hetu96517182015-04-15 10:30:23 -04003234 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04003235 Short4 c01;
3236 Short4 c23;
3237
3238 // Read pixel
3239 switch(state.targetFormat[index])
3240 {
3241 case FORMAT_A8R8G8B8:
3242 buffer = cBuffer + 4 * x;
3243 c01 = *Pointer<Short4>(buffer);
3244 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3245 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04003246 color.z = c01;
3247 color.y = c01;
3248 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(c23));
3249 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(c23));
3250 color.x = color.z;
3251 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.y));
3252 color.x = UnpackHigh(As<Byte8>(color.x), As<Byte8>(color.y));
3253 color.y = color.z;
3254 color.w = color.x;
3255 color.x = UnpackLow(As<Byte8>(color.x), As<Byte8>(color.x));
3256 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(color.y));
3257 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.z));
3258 color.w = UnpackHigh(As<Byte8>(color.w), As<Byte8>(color.w));
John Bauman89401822014-05-06 15:04:28 -04003259
John Bauman19bac1e2014-05-06 15:23:49 -04003260 pixel.x = convertUnsigned16(As<UShort4>(color.x));
3261 pixel.y = convertUnsigned16(As<UShort4>(color.y));
3262 pixel.z = convertUnsigned16(As<UShort4>(color.z));
3263 pixel.w = convertUnsigned16(As<UShort4>(color.w));
John Bauman89401822014-05-06 15:04:28 -04003264 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04003265 case FORMAT_A8B8G8R8:
3266 buffer = cBuffer + 4 * x;
3267 c01 = *Pointer<Short4>(buffer);
3268 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3269 c23 = *Pointer<Short4>(buffer);
3270 color.z = c01;
3271 color.y = c01;
3272 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(c23));
3273 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(c23));
3274 color.x = color.z;
3275 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.y));
3276 color.x = UnpackHigh(As<Byte8>(color.x), As<Byte8>(color.y));
3277 color.y = color.z;
3278 color.w = color.x;
3279 color.x = UnpackLow(As<Byte8>(color.x), As<Byte8>(color.x));
3280 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(color.y));
3281 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.z));
3282 color.w = UnpackHigh(As<Byte8>(color.w), As<Byte8>(color.w));
3283
3284 pixel.x = convertUnsigned16(As<UShort4>(color.z));
3285 pixel.y = convertUnsigned16(As<UShort4>(color.y));
3286 pixel.z = convertUnsigned16(As<UShort4>(color.x));
3287 pixel.w = convertUnsigned16(As<UShort4>(color.w));
3288 break;
John Bauman89401822014-05-06 15:04:28 -04003289 case FORMAT_X8R8G8B8:
3290 buffer = cBuffer + 4 * x;
3291 c01 = *Pointer<Short4>(buffer);
3292 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3293 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04003294 color.z = c01;
3295 color.y = c01;
3296 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(c23));
3297 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(c23));
3298 color.x = color.z;
3299 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.y));
3300 color.x = UnpackHigh(As<Byte8>(color.x), As<Byte8>(color.y));
3301 color.y = color.z;
3302 color.x = UnpackLow(As<Byte8>(color.x), As<Byte8>(color.x));
3303 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(color.y));
3304 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.z));
John Bauman89401822014-05-06 15:04:28 -04003305
John Bauman19bac1e2014-05-06 15:23:49 -04003306 pixel.x = convertUnsigned16(As<UShort4>(color.x));
3307 pixel.y = convertUnsigned16(As<UShort4>(color.y));
3308 pixel.z = convertUnsigned16(As<UShort4>(color.z));
3309 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04003310 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04003311 case FORMAT_X8B8G8R8:
3312 buffer = cBuffer + 4 * x;
3313 c01 = *Pointer<Short4>(buffer);
3314 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3315 c23 = *Pointer<Short4>(buffer);
3316 color.z = c01;
3317 color.y = c01;
3318 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(c23));
3319 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(c23));
3320 color.x = color.z;
3321 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.y));
3322 color.x = UnpackHigh(As<Byte8>(color.x), As<Byte8>(color.y));
3323 color.y = color.z;
3324 color.x = UnpackLow(As<Byte8>(color.x), As<Byte8>(color.x));
3325 color.y = UnpackHigh(As<Byte8>(color.y), As<Byte8>(color.y));
3326 color.z = UnpackLow(As<Byte8>(color.z), As<Byte8>(color.z));
3327
3328 pixel.x = convertUnsigned16(As<UShort4>(color.z));
3329 pixel.y = convertUnsigned16(As<UShort4>(color.y));
3330 pixel.z = convertUnsigned16(As<UShort4>(color.x));
3331 pixel.w = Float4(1.0f);
3332 break;
John Bauman66b8ab22014-05-06 15:57:45 -04003333 case FORMAT_A8:
3334 buffer = cBuffer + 1 * x;
3335 c01 = Insert(c01, *Pointer<Short>(buffer), 0);
3336 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3337 c01 = Insert(c01, *Pointer<Short>(buffer), 1);
3338 pixel.w = convertUnsigned16(As<UShort4>(UnpackLow(As<Byte8>(c01), As<Byte8>(c01))));
3339 pixel.x = Float4(0.0f);
3340 pixel.y = Float4(0.0f);
3341 pixel.z = Float4(0.0f);
3342 break;
John Bauman89401822014-05-06 15:04:28 -04003343 case FORMAT_A8G8R8B8Q:
John Bauman66b8ab22014-05-06 15:57:45 -04003344 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04003345 // UnpackLow(pixel.z, qword_ptr [cBuffer+8*x+0]);
3346 // UnpackHigh(pixel.x, qword_ptr [cBuffer+8*x+0]);
3347 // UnpackLow(pixel.y, qword_ptr [cBuffer+8*x+8]);
3348 // UnpackHigh(pixel.w, qword_ptr [cBuffer+8*x+8]);
John Bauman89401822014-05-06 15:04:28 -04003349 break;
3350 case FORMAT_X8G8R8B8Q:
John Bauman66b8ab22014-05-06 15:57:45 -04003351 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04003352 // UnpackLow(pixel.z, qword_ptr [cBuffer+8*x+0]);
3353 // UnpackHigh(pixel.x, qword_ptr [cBuffer+8*x+0]);
3354 // UnpackLow(pixel.y, qword_ptr [cBuffer+8*x+8]);
3355 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04003356 break;
3357 case FORMAT_A16B16G16R16:
3358 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04003359 color.x = *Pointer<Short4>(buffer + 8 * x);
3360 color.y = *Pointer<Short4>(buffer + 8 * x + 8);
John Bauman89401822014-05-06 15:04:28 -04003361 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04003362 color.z = *Pointer<Short4>(buffer + 8 * x);
3363 color.w = *Pointer<Short4>(buffer + 8 * x + 8);
John Bauman89401822014-05-06 15:04:28 -04003364
John Bauman19bac1e2014-05-06 15:23:49 -04003365 transpose4x4(color.x, color.y, color.z, color.w);
John Bauman89401822014-05-06 15:04:28 -04003366
John Bauman19bac1e2014-05-06 15:23:49 -04003367 pixel.x = convertUnsigned16(As<UShort4>(color.x));
3368 pixel.y = convertUnsigned16(As<UShort4>(color.y));
3369 pixel.z = convertUnsigned16(As<UShort4>(color.z));
3370 pixel.w = convertUnsigned16(As<UShort4>(color.w));
John Bauman89401822014-05-06 15:04:28 -04003371 break;
3372 case FORMAT_G16R16:
3373 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04003374 color.x = *Pointer<Short4>(buffer + 4 * x);
John Bauman89401822014-05-06 15:04:28 -04003375 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04003376 color.y = *Pointer<Short4>(buffer + 4 * x);
3377 color.z = color.x;
3378 color.x = As<Short4>(UnpackLow(color.x, color.y));
3379 color.z = As<Short4>(UnpackHigh(color.z, color.y));
3380 color.y = color.z;
3381 color.x = As<Short4>(UnpackLow(color.x, color.z));
3382 color.y = As<Short4>(UnpackHigh(color.y, color.z));
John Bauman89401822014-05-06 15:04:28 -04003383
John Bauman19bac1e2014-05-06 15:23:49 -04003384 pixel.x = convertUnsigned16(As<UShort4>(color.x));
3385 pixel.y = convertUnsigned16(As<UShort4>(color.y));
3386 pixel.z = Float4(1.0f);
3387 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04003388 break;
3389 case FORMAT_R32F:
3390 buffer = cBuffer;
3391 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04003392 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
3393 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
John Bauman89401822014-05-06 15:04:28 -04003394 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3395 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04003396 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
3397 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
3398 pixel.y = Float4(1.0f);
3399 pixel.z = Float4(1.0f);
3400 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04003401 break;
3402 case FORMAT_G32R32F:
3403 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04003404 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
John Bauman89401822014-05-06 15:04:28 -04003405 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04003406 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
3407 pixel.z = pixel.x;
3408 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
3409 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
3410 pixel.y = pixel.z;
3411 pixel.z = Float4(1.0f);
3412 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04003413 break;
3414 case FORMAT_A32B32G32R32F:
3415 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04003416 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
3417 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04003418 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04003419 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
3420 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
3421 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04003422 break;
3423 default:
3424 ASSERT(false);
3425 }
3426
3427 if(postBlendSRGB && state.writeSRGB)
3428 {
John Bauman19bac1e2014-05-06 15:23:49 -04003429 sRGBtoLinear(pixel.x);
3430 sRGBtoLinear(pixel.y);
3431 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04003432 }
3433
3434 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04003435 Vector4f sourceFactor;
3436 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04003437
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003438 blendFactor(r, sourceFactor, oC, pixel, state.sourceBlendFactor);
3439 blendFactor(r, destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04003440
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003441 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04003442 {
John Bauman19bac1e2014-05-06 15:23:49 -04003443 oC.x *= sourceFactor.x;
3444 oC.y *= sourceFactor.y;
3445 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04003446 }
3447
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003448 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04003449 {
John Bauman19bac1e2014-05-06 15:23:49 -04003450 pixel.x *= destFactor.x;
3451 pixel.y *= destFactor.y;
3452 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04003453 }
3454
3455 switch(state.blendOperation)
3456 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003457 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04003458 oC.x += pixel.x;
3459 oC.y += pixel.y;
3460 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04003461 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003462 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04003463 oC.x -= pixel.x;
3464 oC.y -= pixel.y;
3465 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04003466 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003467 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04003468 oC.x = pixel.x - oC.x;
3469 oC.y = pixel.y - oC.y;
3470 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04003471 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003472 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04003473 oC.x = Min(oC.x, pixel.x);
3474 oC.y = Min(oC.y, pixel.y);
3475 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04003476 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003477 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04003478 oC.x = Max(oC.x, pixel.x);
3479 oC.y = Max(oC.y, pixel.y);
3480 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04003481 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003482 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04003483 // No operation
3484 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003485 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04003486 oC.x = pixel.x;
3487 oC.y = pixel.y;
3488 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04003489 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003490 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04003491 oC.x = Float4(0.0f);
3492 oC.y = Float4(0.0f);
3493 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04003494 break;
3495 default:
3496 ASSERT(false);
3497 }
3498
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003499 blendFactorAlpha(r, sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
3500 blendFactorAlpha(r, destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04003501
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003502 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04003503 {
John Bauman19bac1e2014-05-06 15:23:49 -04003504 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04003505 }
3506
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003507 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04003508 {
John Bauman19bac1e2014-05-06 15:23:49 -04003509 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04003510 }
3511
3512 switch(state.blendOperationAlpha)
3513 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003514 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04003515 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003516 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003517 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04003518 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003519 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003520 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04003521 pixel.w -= oC.w;
3522 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003523 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003524 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04003525 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04003526 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003527 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04003528 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04003529 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003530 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04003531 // No operation
3532 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003533 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04003534 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04003535 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04003536 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04003537 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04003538 break;
3539 default:
3540 ASSERT(false);
3541 }
3542 }
3543
John Bauman19bac1e2014-05-06 15:23:49 -04003544 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04003545 {
3546 if(!state.colorWriteActive(index))
3547 {
3548 return;
3549 }
3550
Alexis Hetu96517182015-04-15 10:30:23 -04003551 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04003552
3553 switch(state.targetFormat[index])
3554 {
3555 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04003556 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04003557 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04003558 case FORMAT_A8B8G8R8:
John Bauman66b8ab22014-05-06 15:57:45 -04003559 case FORMAT_A8:
John Bauman89401822014-05-06 15:04:28 -04003560 case FORMAT_G16R16:
3561 case FORMAT_A16B16G16R16:
3562 convertFixed16(color, oC, true);
3563 writeColor(r, index, cBuffer, x, color, sMask, zMask, cMask);
3564 return;
3565 case FORMAT_R32F:
3566 break;
3567 case FORMAT_G32R32F:
John Bauman19bac1e2014-05-06 15:23:49 -04003568 oC.z = oC.x;
3569 oC.x = UnpackLow(oC.x, oC.y);
3570 oC.z = UnpackHigh(oC.z, oC.y);
3571 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04003572 break;
3573 case FORMAT_A32B32G32R32F:
John Bauman19bac1e2014-05-06 15:23:49 -04003574 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04003575 break;
3576 default:
3577 ASSERT(false);
3578 }
3579
3580 int rgbaWriteMask = state.colorWriteActive(index);
3581
3582 Int xMask; // Combination of all masks
3583
3584 if(state.depthTestActive)
3585 {
3586 xMask = zMask;
3587 }
3588 else
3589 {
3590 xMask = cMask;
3591 }
3592
3593 if(state.stencilActive)
3594 {
3595 xMask &= sMask;
3596 }
3597
3598 Pointer<Byte> buffer;
3599 Float4 value;
3600
3601 switch(state.targetFormat[index])
3602 {
3603 case FORMAT_R32F:
3604 if(rgbaWriteMask & 0x00000001)
3605 {
3606 buffer = cBuffer + 4 * x;
3607
3608 // FIXME: movlps
3609 value.x = *Pointer<Float>(buffer + 0);
3610 value.y = *Pointer<Float>(buffer + 4);
3611
3612 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3613
3614 // FIXME: movhps
3615 value.z = *Pointer<Float>(buffer + 0);
3616 value.w = *Pointer<Float>(buffer + 4);
3617
John Bauman19bac1e2014-05-06 15:23:49 -04003618 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04003619 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04003620 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04003621
3622 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04003623 *Pointer<Float>(buffer + 0) = oC.x.z;
3624 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04003625
3626 buffer -= *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3627
3628 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04003629 *Pointer<Float>(buffer + 0) = oC.x.x;
3630 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04003631 }
3632 break;
3633 case FORMAT_G32R32F:
3634 buffer = cBuffer + 8 * x;
3635
3636 value = *Pointer<Float4>(buffer);
3637
3638 if((rgbaWriteMask & 0x00000003) != 0x00000003)
3639 {
3640 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003641 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
John Bauman89401822014-05-06 15:04:28 -04003642 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04003643 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04003644 }
3645
John Bauman19bac1e2014-05-06 15:23:49 -04003646 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04003647 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04003648 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
3649 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04003650
3651 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3652
3653 value = *Pointer<Float4>(buffer);
3654
3655 if((rgbaWriteMask & 0x00000003) != 0x00000003)
3656 {
3657 Float4 masked;
3658
3659 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003660 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
John Bauman89401822014-05-06 15:04:28 -04003661 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04003662 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04003663 }
3664
John Bauman19bac1e2014-05-06 15:23:49 -04003665 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04003666 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04003667 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
3668 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04003669 break;
3670 case FORMAT_A32B32G32R32F:
3671 buffer = cBuffer + 16 * x;
3672
3673 {
3674 value = *Pointer<Float4>(buffer, 16);
3675
3676 if(rgbaWriteMask != 0x0000000F)
3677 {
3678 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003679 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04003680 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04003681 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04003682 }
3683
John Bauman19bac1e2014-05-06 15:23:49 -04003684 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04003685 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04003686 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
3687 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04003688 }
3689
3690 {
3691 value = *Pointer<Float4>(buffer + 16, 16);
3692
3693 if(rgbaWriteMask != 0x0000000F)
3694 {
3695 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003696 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04003697 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04003698 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04003699 }
3700
John Bauman19bac1e2014-05-06 15:23:49 -04003701 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04003702 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04003703 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
3704 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04003705 }
3706
3707 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
3708
3709 {
3710 value = *Pointer<Float4>(buffer, 16);
3711
3712 if(rgbaWriteMask != 0x0000000F)
3713 {
3714 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003715 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04003716 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04003717 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04003718 }
3719
John Bauman19bac1e2014-05-06 15:23:49 -04003720 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04003721 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04003722 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
3723 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04003724 }
3725
3726 {
3727 value = *Pointer<Float4>(buffer + 16, 16);
3728
3729 if(rgbaWriteMask != 0x0000000F)
3730 {
3731 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04003732 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04003733 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04003734 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04003735 }
3736
John Bauman19bac1e2014-05-06 15:23:49 -04003737 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04003738 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04003739 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
3740 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04003741 }
3742 break;
3743 default:
3744 ASSERT(false);
3745 }
3746 }
3747
3748 void PixelRoutine::ps_1_x(Registers &r, Int cMask[4])
3749 {
3750 int pad = 0; // Count number of texm3x3pad instructions
Alexis Hetu96517182015-04-15 10:30:23 -04003751 Vector4s dPairing; // Destination for first pairing instruction
John Bauman89401822014-05-06 15:04:28 -04003752
Alexis Hetu903e0252014-11-25 14:25:32 -05003753 for(size_t i = 0; i < shader->getLength(); i++)
John Bauman89401822014-05-06 15:04:28 -04003754 {
John Bauman19bac1e2014-05-06 15:23:49 -04003755 const Shader::Instruction *instruction = shader->getInstruction(i);
3756 Shader::Opcode opcode = instruction->opcode;
John Bauman89401822014-05-06 15:04:28 -04003757
3758 // #ifndef NDEBUG // FIXME: Centralize debug output control
John Bauman19bac1e2014-05-06 15:23:49 -04003759 // shader->printInstruction(i, "debug.txt");
John Bauman89401822014-05-06 15:04:28 -04003760 // #endif
3761
John Bauman19bac1e2014-05-06 15:23:49 -04003762 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
John Bauman89401822014-05-06 15:04:28 -04003763 {
3764 continue;
3765 }
3766
John Bauman19bac1e2014-05-06 15:23:49 -04003767 const Dst &dst = instruction->dst;
3768 const Src &src0 = instruction->src[0];
3769 const Src &src1 = instruction->src[1];
3770 const Src &src2 = instruction->src[2];
John Bauman89401822014-05-06 15:04:28 -04003771
John Bauman19bac1e2014-05-06 15:23:49 -04003772 unsigned short version = shader->getVersion();
3773 bool pairing = i + 1 < shader->getLength() && shader->getInstruction(i + 1)->coissue; // First instruction of pair
3774 bool coissue = instruction->coissue; // Second instruction of pair
John Bauman89401822014-05-06 15:04:28 -04003775
Alexis Hetu96517182015-04-15 10:30:23 -04003776 Vector4s d;
3777 Vector4s s0;
3778 Vector4s s1;
3779 Vector4s s2;
John Bauman89401822014-05-06 15:04:28 -04003780
Alexis Hetu96517182015-04-15 10:30:23 -04003781 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterS(r, src0);
3782 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterS(r, src1);
3783 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterS(r, src2);
John Bauman19bac1e2014-05-06 15:23:49 -04003784
3785 Float4 u = version < 0x0104 ? r.vf[2 + dst.index].x : r.vf[2 + src0.index].x;
3786 Float4 v = version < 0x0104 ? r.vf[2 + dst.index].y : r.vf[2 + src0.index].y;
3787 Float4 s = version < 0x0104 ? r.vf[2 + dst.index].z : r.vf[2 + src0.index].z;
3788 Float4 t = version < 0x0104 ? r.vf[2 + dst.index].w : r.vf[2 + src0.index].w;
John Bauman89401822014-05-06 15:04:28 -04003789
3790 switch(opcode)
3791 {
John Bauman19bac1e2014-05-06 15:23:49 -04003792 case Shader::OPCODE_PS_1_0: break;
3793 case Shader::OPCODE_PS_1_1: break;
3794 case Shader::OPCODE_PS_1_2: break;
3795 case Shader::OPCODE_PS_1_3: break;
3796 case Shader::OPCODE_PS_1_4: break;
John Bauman89401822014-05-06 15:04:28 -04003797
John Bauman19bac1e2014-05-06 15:23:49 -04003798 case Shader::OPCODE_DEF: break;
John Bauman89401822014-05-06 15:04:28 -04003799
John Bauman19bac1e2014-05-06 15:23:49 -04003800 case Shader::OPCODE_NOP: break;
3801 case Shader::OPCODE_MOV: MOV(d, s0); break;
3802 case Shader::OPCODE_ADD: ADD(d, s0, s1); break;
3803 case Shader::OPCODE_SUB: SUB(d, s0, s1); break;
3804 case Shader::OPCODE_MAD: MAD(d, s0, s1, s2); break;
3805 case Shader::OPCODE_MUL: MUL(d, s0, s1); break;
3806 case Shader::OPCODE_DP3: DP3(d, s0, s1); break;
3807 case Shader::OPCODE_DP4: DP4(d, s0, s1); break;
3808 case Shader::OPCODE_LRP: LRP(d, s0, s1, s2); break;
3809 case Shader::OPCODE_TEXCOORD:
3810 if(version < 0x0104)
John Bauman89401822014-05-06 15:04:28 -04003811 {
John Bauman19bac1e2014-05-06 15:23:49 -04003812 TEXCOORD(d, u, v, s, dst.index);
John Bauman89401822014-05-06 15:04:28 -04003813 }
3814 else
3815 {
3816 if((src0.swizzle & 0x30) == 0x20) // .xyz
3817 {
John Bauman19bac1e2014-05-06 15:23:49 -04003818 TEXCRD(d, u, v, s, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
John Bauman89401822014-05-06 15:04:28 -04003819 }
3820 else // .xyw
3821 {
John Bauman19bac1e2014-05-06 15:23:49 -04003822 TEXCRD(d, u, v, t, src0.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
John Bauman89401822014-05-06 15:04:28 -04003823 }
3824 }
3825 break;
John Bauman19bac1e2014-05-06 15:23:49 -04003826 case Shader::OPCODE_TEXKILL:
3827 if(version < 0x0104)
John Bauman89401822014-05-06 15:04:28 -04003828 {
John Bauman19bac1e2014-05-06 15:23:49 -04003829 TEXKILL(cMask, u, v, s);
John Bauman89401822014-05-06 15:04:28 -04003830 }
John Bauman19bac1e2014-05-06 15:23:49 -04003831 else if(version == 0x0104)
John Bauman89401822014-05-06 15:04:28 -04003832 {
John Bauman19bac1e2014-05-06 15:23:49 -04003833 if(dst.type == Shader::PARAMETER_TEXTURE)
John Bauman89401822014-05-06 15:04:28 -04003834 {
John Bauman19bac1e2014-05-06 15:23:49 -04003835 TEXKILL(cMask, u, v, s);
John Bauman89401822014-05-06 15:04:28 -04003836 }
3837 else
3838 {
Alexis Hetu96517182015-04-15 10:30:23 -04003839 TEXKILL(cMask, r.rs[dst.index]);
John Bauman89401822014-05-06 15:04:28 -04003840 }
3841 }
3842 else ASSERT(false);
3843 break;
John Bauman19bac1e2014-05-06 15:23:49 -04003844 case Shader::OPCODE_TEX:
3845 if(version < 0x0104)
John Bauman89401822014-05-06 15:04:28 -04003846 {
John Bauman19bac1e2014-05-06 15:23:49 -04003847 TEX(r, d, u, v, s, dst.index, false);
John Bauman89401822014-05-06 15:04:28 -04003848 }
John Bauman19bac1e2014-05-06 15:23:49 -04003849 else if(version == 0x0104)
John Bauman89401822014-05-06 15:04:28 -04003850 {
John Bauman19bac1e2014-05-06 15:23:49 -04003851 if(src0.type == Shader::PARAMETER_TEXTURE)
John Bauman89401822014-05-06 15:04:28 -04003852 {
3853 if((src0.swizzle & 0x30) == 0x20) // .xyz
3854 {
John Bauman19bac1e2014-05-06 15:23:49 -04003855 TEX(r, d, u, v, s, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
John Bauman89401822014-05-06 15:04:28 -04003856 }
3857 else // .xyw
3858 {
John Bauman19bac1e2014-05-06 15:23:49 -04003859 TEX(r, d, u, v, t, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
John Bauman89401822014-05-06 15:04:28 -04003860 }
3861 }
3862 else
3863 {
John Bauman19bac1e2014-05-06 15:23:49 -04003864 TEXLD(r, d, s0, dst.index, src0.modifier == Shader::MODIFIER_DZ || src0.modifier == Shader::MODIFIER_DW);
John Bauman89401822014-05-06 15:04:28 -04003865 }
3866 }
3867 else ASSERT(false);
3868 break;
John Bauman19bac1e2014-05-06 15:23:49 -04003869 case Shader::OPCODE_TEXBEM: TEXBEM(r, d, s0, u, v, s, dst.index); break;
3870 case Shader::OPCODE_TEXBEML: TEXBEML(r, d, s0, u, v, s, dst.index); break;
3871 case Shader::OPCODE_TEXREG2AR: TEXREG2AR(r, d, s0, dst.index); break;
3872 case Shader::OPCODE_TEXREG2GB: TEXREG2GB(r, d, s0, dst.index); break;
3873 case Shader::OPCODE_TEXM3X2PAD: TEXM3X2PAD(r, u, v, s, s0, 0, src0.modifier == Shader::MODIFIER_SIGN); break;
3874 case Shader::OPCODE_TEXM3X2TEX: TEXM3X2TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
3875 case Shader::OPCODE_TEXM3X3PAD: TEXM3X3PAD(r, u, v, s, s0, pad++ % 2, src0.modifier == Shader::MODIFIER_SIGN); break;
3876 case Shader::OPCODE_TEXM3X3TEX: TEXM3X3TEX(r, d, u, v, s, dst.index, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
3877 case Shader::OPCODE_TEXM3X3SPEC: TEXM3X3SPEC(r, d, u, v, s, dst.index, s0, s1); break;
3878 case Shader::OPCODE_TEXM3X3VSPEC: TEXM3X3VSPEC(r, d, u, v, s, dst.index, s0); break;
3879 case Shader::OPCODE_CND: CND(d, s0, s1, s2); break;
3880 case Shader::OPCODE_TEXREG2RGB: TEXREG2RGB(r, d, s0, dst.index); break;
3881 case Shader::OPCODE_TEXDP3TEX: TEXDP3TEX(r, d, u, v, s, dst.index, s0); break;
3882 case Shader::OPCODE_TEXM3X2DEPTH: TEXM3X2DEPTH(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
3883 case Shader::OPCODE_TEXDP3: TEXDP3(r, d, u, v, s, s0); break;
3884 case Shader::OPCODE_TEXM3X3: TEXM3X3(r, d, u, v, s, s0, src0.modifier == Shader::MODIFIER_SIGN); break;
3885 case Shader::OPCODE_TEXDEPTH: TEXDEPTH(r); break;
3886 case Shader::OPCODE_CMP0: CMP(d, s0, s1, s2); break;
3887 case Shader::OPCODE_BEM: BEM(r, d, s0, s1, dst.index); break;
3888 case Shader::OPCODE_PHASE: break;
3889 case Shader::OPCODE_END: break;
John Bauman89401822014-05-06 15:04:28 -04003890 default:
3891 ASSERT(false);
3892 }
3893
John Bauman19bac1e2014-05-06 15:23:49 -04003894 if(dst.type != Shader::PARAMETER_VOID && opcode != Shader::OPCODE_TEXKILL)
John Bauman89401822014-05-06 15:04:28 -04003895 {
3896 if(dst.shift > 0)
3897 {
John Bauman19bac1e2014-05-06 15:23:49 -04003898 if(dst.mask & 0x1) {d.x = AddSat(d.x, d.x); if(dst.shift > 1) d.x = AddSat(d.x, d.x); if(dst.shift > 2) d.x = AddSat(d.x, d.x);}
3899 if(dst.mask & 0x2) {d.y = AddSat(d.y, d.y); if(dst.shift > 1) d.y = AddSat(d.y, d.y); if(dst.shift > 2) d.y = AddSat(d.y, d.y);}
3900 if(dst.mask & 0x4) {d.z = AddSat(d.z, d.z); if(dst.shift > 1) d.z = AddSat(d.z, d.z); if(dst.shift > 2) d.z = AddSat(d.z, d.z);}
3901 if(dst.mask & 0x8) {d.w = AddSat(d.w, d.w); if(dst.shift > 1) d.w = AddSat(d.w, d.w); if(dst.shift > 2) d.w = AddSat(d.w, d.w);}
John Bauman89401822014-05-06 15:04:28 -04003902 }
3903 else if(dst.shift < 0)
3904 {
John Bauman19bac1e2014-05-06 15:23:49 -04003905 if(dst.mask & 0x1) d.x = d.x >> -dst.shift;
3906 if(dst.mask & 0x2) d.y = d.y >> -dst.shift;
3907 if(dst.mask & 0x4) d.z = d.z >> -dst.shift;
3908 if(dst.mask & 0x8) d.w = d.w >> -dst.shift;
John Bauman89401822014-05-06 15:04:28 -04003909 }
3910
3911 if(dst.saturate)
3912 {
John Bauman19bac1e2014-05-06 15:23:49 -04003913 if(dst.mask & 0x1) {d.x = Min(d.x, Short4(0x1000)); d.x = Max(d.x, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
3914 if(dst.mask & 0x2) {d.y = Min(d.y, Short4(0x1000)); d.y = Max(d.y, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
3915 if(dst.mask & 0x4) {d.z = Min(d.z, Short4(0x1000)); d.z = Max(d.z, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
3916 if(dst.mask & 0x8) {d.w = Min(d.w, Short4(0x1000)); d.w = Max(d.w, Short4(0x0000, 0x0000, 0x0000, 0x0000));}
John Bauman89401822014-05-06 15:04:28 -04003917 }
3918
3919 if(pairing)
3920 {
John Bauman19bac1e2014-05-06 15:23:49 -04003921 if(dst.mask & 0x1) dPairing.x = d.x;
3922 if(dst.mask & 0x2) dPairing.y = d.y;
3923 if(dst.mask & 0x4) dPairing.z = d.z;
3924 if(dst.mask & 0x8) dPairing.w = d.w;
John Bauman89401822014-05-06 15:04:28 -04003925 }
3926
3927 if(coissue)
3928 {
John Bauman19bac1e2014-05-06 15:23:49 -04003929 const Dst &dst = shader->getInstruction(i - 1)->dst;
John Bauman89401822014-05-06 15:04:28 -04003930
3931 writeDestination(r, dPairing, dst);
3932 }
3933
3934 if(!pairing)
3935 {
3936 writeDestination(r, d, dst);
3937 }
3938 }
3939 }
3940 }
3941
3942 void PixelRoutine::ps_2_x(Registers &r, Int cMask[4])
3943 {
3944 r.enableIndex = 0;
3945 r.stackIndex = 0;
John Bauman19bac1e2014-05-06 15:23:49 -04003946
Nicolas Capens4677a5f2014-05-06 16:42:26 -04003947 if(shader->containsLeaveInstruction())
3948 {
3949 r.enableLeave = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
3950 }
3951
John Bauman19bac1e2014-05-06 15:23:49 -04003952 bool out[4][4] = {false};
3953
3954 // Create all call site return blocks up front
Alexis Hetu903e0252014-11-25 14:25:32 -05003955 for(size_t i = 0; i < shader->getLength(); i++)
John Bauman89401822014-05-06 15:04:28 -04003956 {
John Bauman19bac1e2014-05-06 15:23:49 -04003957 const Shader::Instruction *instruction = shader->getInstruction(i);
3958 Shader::Opcode opcode = instruction->opcode;
John Bauman89401822014-05-06 15:04:28 -04003959
John Bauman19bac1e2014-05-06 15:23:49 -04003960 if(opcode == Shader::OPCODE_CALL || opcode == Shader::OPCODE_CALLNZ)
3961 {
3962 const Dst &dst = instruction->dst;
John Bauman89401822014-05-06 15:04:28 -04003963
John Bauman19bac1e2014-05-06 15:23:49 -04003964 ASSERT(callRetBlock[dst.label].size() == dst.callSite);
3965 callRetBlock[dst.label].push_back(Nucleus::createBasicBlock());
3966 }
3967 }
3968
Alexis Hetu903e0252014-11-25 14:25:32 -05003969 for(size_t i = 0; i < shader->getLength(); i++)
John Bauman19bac1e2014-05-06 15:23:49 -04003970 {
3971 const Shader::Instruction *instruction = shader->getInstruction(i);
3972 Shader::Opcode opcode = instruction->opcode;
3973
3974 if(opcode == Shader::OPCODE_DCL || opcode == Shader::OPCODE_DEF || opcode == Shader::OPCODE_DEFI || opcode == Shader::OPCODE_DEFB)
John Bauman89401822014-05-06 15:04:28 -04003975 {
3976 continue;
3977 }
3978
John Bauman19bac1e2014-05-06 15:23:49 -04003979 const Dst &dst = instruction->dst;
3980 const Src &src0 = instruction->src[0];
3981 const Src &src1 = instruction->src[1];
3982 const Src &src2 = instruction->src[2];
3983 const Src &src3 = instruction->src[3];
John Bauman89401822014-05-06 15:04:28 -04003984
John Bauman19bac1e2014-05-06 15:23:49 -04003985 bool predicate = instruction->predicate;
3986 Control control = instruction->control;
John Bauman89401822014-05-06 15:04:28 -04003987 bool pp = dst.partialPrecision;
John Bauman19bac1e2014-05-06 15:23:49 -04003988 bool project = instruction->project;
3989 bool bias = instruction->bias;
John Bauman89401822014-05-06 15:04:28 -04003990
John Bauman19bac1e2014-05-06 15:23:49 -04003991 Vector4f d;
3992 Vector4f s0;
3993 Vector4f s1;
3994 Vector4f s2;
3995 Vector4f s3;
John Bauman89401822014-05-06 15:04:28 -04003996
John Bauman19bac1e2014-05-06 15:23:49 -04003997 if(opcode == Shader::OPCODE_TEXKILL) // Takes destination as input
John Bauman89401822014-05-06 15:04:28 -04003998 {
John Bauman19bac1e2014-05-06 15:23:49 -04003999 if(dst.type == Shader::PARAMETER_TEXTURE)
John Bauman89401822014-05-06 15:04:28 -04004000 {
John Bauman19bac1e2014-05-06 15:23:49 -04004001 d.x = r.vf[2 + dst.index].x;
4002 d.y = r.vf[2 + dst.index].y;
4003 d.z = r.vf[2 + dst.index].z;
4004 d.w = r.vf[2 + dst.index].w;
John Bauman89401822014-05-06 15:04:28 -04004005 }
4006 else
4007 {
4008 d = r.rf[dst.index];
4009 }
4010 }
4011
Alexis Hetu96517182015-04-15 10:30:23 -04004012 if(src0.type != Shader::PARAMETER_VOID) s0 = fetchRegisterF(r, src0);
4013 if(src1.type != Shader::PARAMETER_VOID) s1 = fetchRegisterF(r, src1);
4014 if(src2.type != Shader::PARAMETER_VOID) s2 = fetchRegisterF(r, src2);
4015 if(src3.type != Shader::PARAMETER_VOID) s3 = fetchRegisterF(r, src3);
John Bauman89401822014-05-06 15:04:28 -04004016
4017 switch(opcode)
4018 {
John Bauman19bac1e2014-05-06 15:23:49 -04004019 case Shader::OPCODE_PS_2_0: break;
4020 case Shader::OPCODE_PS_2_x: break;
4021 case Shader::OPCODE_PS_3_0: break;
4022 case Shader::OPCODE_DEF: break;
4023 case Shader::OPCODE_DCL: break;
4024 case Shader::OPCODE_NOP: break;
4025 case Shader::OPCODE_MOV: mov(d, s0); break;
4026 case Shader::OPCODE_F2B: f2b(d, s0); break;
4027 case Shader::OPCODE_B2F: b2f(d, s0); break;
4028 case Shader::OPCODE_ADD: add(d, s0, s1); break;
4029 case Shader::OPCODE_SUB: sub(d, s0, s1); break;
4030 case Shader::OPCODE_MUL: mul(d, s0, s1); break;
4031 case Shader::OPCODE_MAD: mad(d, s0, s1, s2); break;
4032 case Shader::OPCODE_DP1: dp1(d, s0, s1); break;
4033 case Shader::OPCODE_DP2: dp2(d, s0, s1); break;
4034 case Shader::OPCODE_DP2ADD: dp2add(d, s0, s1, s2); break;
4035 case Shader::OPCODE_DP3: dp3(d, s0, s1); break;
4036 case Shader::OPCODE_DP4: dp4(d, s0, s1); break;
4037 case Shader::OPCODE_CMP0: cmp0(d, s0, s1, s2); break;
4038 case Shader::OPCODE_ICMP: icmp(d, s0, s1, control); break;
4039 case Shader::OPCODE_SELECT: select(d, s0, s1, s2); break;
4040 case Shader::OPCODE_EXTRACT: extract(d.x, s0, s1.x); break;
4041 case Shader::OPCODE_INSERT: insert(d, s0, s1.x, s2.x); break;
4042 case Shader::OPCODE_FRC: frc(d, s0); break;
4043 case Shader::OPCODE_TRUNC: trunc(d, s0); break;
4044 case Shader::OPCODE_FLOOR: floor(d, s0); break;
Alexis Hetuaf1970c2015-04-17 14:26:07 -04004045 case Shader::OPCODE_ROUND: round(d, s0); break;
John Bauman19bac1e2014-05-06 15:23:49 -04004046 case Shader::OPCODE_CEIL: ceil(d, s0); break;
4047 case Shader::OPCODE_EXP2X: exp2x(d, s0, pp); break;
4048 case Shader::OPCODE_EXP2: exp2(d, s0, pp); break;
4049 case Shader::OPCODE_LOG2X: log2x(d, s0, pp); break;
4050 case Shader::OPCODE_LOG2: log2(d, s0, pp); break;
4051 case Shader::OPCODE_EXP: exp(d, s0, pp); break;
4052 case Shader::OPCODE_LOG: log(d, s0, pp); break;
4053 case Shader::OPCODE_RCPX: rcpx(d, s0, pp); break;
4054 case Shader::OPCODE_DIV: div(d, s0, s1); break;
4055 case Shader::OPCODE_MOD: mod(d, s0, s1); break;
4056 case Shader::OPCODE_RSQX: rsqx(d, s0, pp); break;
4057 case Shader::OPCODE_SQRT: sqrt(d, s0, pp); break;
4058 case Shader::OPCODE_RSQ: rsq(d, s0, pp); break;
4059 case Shader::OPCODE_LEN2: len2(d.x, s0, pp); break;
4060 case Shader::OPCODE_LEN3: len3(d.x, s0, pp); break;
4061 case Shader::OPCODE_LEN4: len4(d.x, s0, pp); break;
4062 case Shader::OPCODE_DIST1: dist1(d.x, s0, s1, pp); break;
4063 case Shader::OPCODE_DIST2: dist2(d.x, s0, s1, pp); break;
4064 case Shader::OPCODE_DIST3: dist3(d.x, s0, s1, pp); break;
4065 case Shader::OPCODE_DIST4: dist4(d.x, s0, s1, pp); break;
4066 case Shader::OPCODE_MIN: min(d, s0, s1); break;
4067 case Shader::OPCODE_MAX: max(d, s0, s1); break;
4068 case Shader::OPCODE_LRP: lrp(d, s0, s1, s2); break;
4069 case Shader::OPCODE_STEP: step(d, s0, s1); break;
4070 case Shader::OPCODE_SMOOTH: smooth(d, s0, s1, s2); break;
4071 case Shader::OPCODE_POWX: powx(d, s0, s1, pp); break;
4072 case Shader::OPCODE_POW: pow(d, s0, s1, pp); break;
4073 case Shader::OPCODE_SGN: sgn(d, s0); break;
4074 case Shader::OPCODE_CRS: crs(d, s0, s1); break;
4075 case Shader::OPCODE_FORWARD1: forward1(d, s0, s1, s2); break;
4076 case Shader::OPCODE_FORWARD2: forward2(d, s0, s1, s2); break;
4077 case Shader::OPCODE_FORWARD3: forward3(d, s0, s1, s2); break;
4078 case Shader::OPCODE_FORWARD4: forward4(d, s0, s1, s2); break;
4079 case Shader::OPCODE_REFLECT1: reflect1(d, s0, s1); break;
4080 case Shader::OPCODE_REFLECT2: reflect2(d, s0, s1); break;
4081 case Shader::OPCODE_REFLECT3: reflect3(d, s0, s1); break;
4082 case Shader::OPCODE_REFLECT4: reflect4(d, s0, s1); break;
4083 case Shader::OPCODE_REFRACT1: refract1(d, s0, s1, s2.x); break;
4084 case Shader::OPCODE_REFRACT2: refract2(d, s0, s1, s2.x); break;
4085 case Shader::OPCODE_REFRACT3: refract3(d, s0, s1, s2.x); break;
4086 case Shader::OPCODE_REFRACT4: refract4(d, s0, s1, s2.x); break;
4087 case Shader::OPCODE_NRM2: nrm2(d, s0, pp); break;
4088 case Shader::OPCODE_NRM3: nrm3(d, s0, pp); break;
4089 case Shader::OPCODE_NRM4: nrm4(d, s0, pp); break;
4090 case Shader::OPCODE_ABS: abs(d, s0); break;
4091 case Shader::OPCODE_SINCOS: sincos(d, s0, pp); break;
4092 case Shader::OPCODE_COS: cos(d, s0, pp); break;
4093 case Shader::OPCODE_SIN: sin(d, s0, pp); break;
4094 case Shader::OPCODE_TAN: tan(d, s0, pp); break;
4095 case Shader::OPCODE_ACOS: acos(d, s0, pp); break;
4096 case Shader::OPCODE_ASIN: asin(d, s0, pp); break;
4097 case Shader::OPCODE_ATAN: atan(d, s0, pp); break;
4098 case Shader::OPCODE_ATAN2: atan2(d, s0, s1, pp); break;
Alexis Hetuaf1970c2015-04-17 14:26:07 -04004099 case Shader::OPCODE_COSH: cosh(d, s0, pp); break;
4100 case Shader::OPCODE_SINH: sinh(d, s0, pp); break;
4101 case Shader::OPCODE_TANH: tanh(d, s0, pp); break;
4102 case Shader::OPCODE_ACOSH: acosh(d, s0, pp); break;
4103 case Shader::OPCODE_ASINH: asinh(d, s0, pp); break;
4104 case Shader::OPCODE_ATANH: atanh(d, s0, pp); break;
John Bauman19bac1e2014-05-06 15:23:49 -04004105 case Shader::OPCODE_M4X4: M4X4(r, d, s0, src1); break;
4106 case Shader::OPCODE_M4X3: M4X3(r, d, s0, src1); break;
4107 case Shader::OPCODE_M3X4: M3X4(r, d, s0, src1); break;
4108 case Shader::OPCODE_M3X3: M3X3(r, d, s0, src1); break;
4109 case Shader::OPCODE_M3X2: M3X2(r, d, s0, src1); break;
4110 case Shader::OPCODE_TEX: TEXLD(r, d, s0, src1, project, bias); break;
4111 case Shader::OPCODE_TEXLDD: TEXLDD(r, d, s0, src1, s2, s3, project, bias); break;
4112 case Shader::OPCODE_TEXLDL: TEXLDL(r, d, s0, src1, project, bias); break;
4113 case Shader::OPCODE_TEXKILL: TEXKILL(cMask, d, dst.mask); break;
4114 case Shader::OPCODE_DISCARD: DISCARD(r, cMask, instruction); break;
4115 case Shader::OPCODE_DFDX: DFDX(d, s0); break;
4116 case Shader::OPCODE_DFDY: DFDY(d, s0); break;
4117 case Shader::OPCODE_FWIDTH: FWIDTH(d, s0); break;
4118 case Shader::OPCODE_BREAK: BREAK(r); break;
4119 case Shader::OPCODE_BREAKC: BREAKC(r, s0, s1, control); break;
4120 case Shader::OPCODE_BREAKP: BREAKP(r, src0); break;
4121 case Shader::OPCODE_CONTINUE: CONTINUE(r); break;
4122 case Shader::OPCODE_TEST: TEST(); break;
4123 case Shader::OPCODE_CALL: CALL(r, dst.label, dst.callSite); break;
4124 case Shader::OPCODE_CALLNZ: CALLNZ(r, dst.label, dst.callSite, src0); break;
4125 case Shader::OPCODE_ELSE: ELSE(r); break;
4126 case Shader::OPCODE_ENDIF: ENDIF(r); break;
4127 case Shader::OPCODE_ENDLOOP: ENDLOOP(r); break;
4128 case Shader::OPCODE_ENDREP: ENDREP(r); break;
4129 case Shader::OPCODE_ENDWHILE: ENDWHILE(r); break;
4130 case Shader::OPCODE_IF: IF(r, src0); break;
4131 case Shader::OPCODE_IFC: IFC(r, s0, s1, control); break;
4132 case Shader::OPCODE_LABEL: LABEL(dst.index); break;
4133 case Shader::OPCODE_LOOP: LOOP(r, src1); break;
4134 case Shader::OPCODE_REP: REP(r, src0); break;
4135 case Shader::OPCODE_WHILE: WHILE(r, src0); break;
4136 case Shader::OPCODE_RET: RET(r); break;
4137 case Shader::OPCODE_LEAVE: LEAVE(r); break;
4138 case Shader::OPCODE_CMP: cmp(d, s0, s1, control); break;
4139 case Shader::OPCODE_ALL: all(d.x, s0); break;
4140 case Shader::OPCODE_ANY: any(d.x, s0); break;
4141 case Shader::OPCODE_NOT: not(d, s0); break;
4142 case Shader::OPCODE_OR: or(d.x, s0.x, s1.x); break;
4143 case Shader::OPCODE_XOR: xor(d.x, s0.x, s1.x); break;
4144 case Shader::OPCODE_AND: and(d.x, s0.x, s1.x); break;
4145 case Shader::OPCODE_END: break;
John Bauman89401822014-05-06 15:04:28 -04004146 default:
4147 ASSERT(false);
4148 }
4149
John Bauman19bac1e2014-05-06 15:23:49 -04004150 if(dst.type != Shader::PARAMETER_VOID && dst.type != Shader::PARAMETER_LABEL && opcode != Shader::OPCODE_TEXKILL && opcode != Shader::OPCODE_NOP)
John Bauman89401822014-05-06 15:04:28 -04004151 {
John Bauman19bac1e2014-05-06 15:23:49 -04004152 if(dst.integer)
John Bauman89401822014-05-06 15:04:28 -04004153 {
John Bauman19bac1e2014-05-06 15:23:49 -04004154 switch(opcode)
4155 {
4156 case Shader::OPCODE_DIV:
4157 if(dst.x) d.x = Trunc(d.x);
4158 if(dst.y) d.y = Trunc(d.y);
4159 if(dst.z) d.z = Trunc(d.z);
4160 if(dst.w) d.w = Trunc(d.w);
4161 break;
4162 default:
4163 break; // No truncation to integer required when arguments are integer
4164 }
John Bauman89401822014-05-06 15:04:28 -04004165 }
4166
John Bauman19bac1e2014-05-06 15:23:49 -04004167 if(dst.saturate)
John Bauman89401822014-05-06 15:04:28 -04004168 {
John Bauman19bac1e2014-05-06 15:23:49 -04004169 if(dst.x) d.x = Max(d.x, Float4(0.0f));
4170 if(dst.y) d.y = Max(d.y, Float4(0.0f));
4171 if(dst.z) d.z = Max(d.z, Float4(0.0f));
4172 if(dst.w) d.w = Max(d.w, Float4(0.0f));
4173
4174 if(dst.x) d.x = Min(d.x, Float4(1.0f));
4175 if(dst.y) d.y = Min(d.y, Float4(1.0f));
4176 if(dst.z) d.z = Min(d.z, Float4(1.0f));
4177 if(dst.w) d.w = Min(d.w, Float4(1.0f));
4178 }
4179
Nicolas Capensc6e8ab12014-05-06 23:31:07 -04004180 if(instruction->isPredicated())
John Bauman19bac1e2014-05-06 15:23:49 -04004181 {
4182 Vector4f pDst; // FIXME: Rename
John Bauman89401822014-05-06 15:04:28 -04004183
4184 switch(dst.type)
4185 {
John Bauman19bac1e2014-05-06 15:23:49 -04004186 case Shader::PARAMETER_TEMP:
4187 if(dst.rel.type == Shader::PARAMETER_VOID)
4188 {
4189 if(dst.x) pDst.x = r.rf[dst.index].x;
4190 if(dst.y) pDst.y = r.rf[dst.index].y;
4191 if(dst.z) pDst.z = r.rf[dst.index].z;
4192 if(dst.w) pDst.w = r.rf[dst.index].w;
4193 }
4194 else
4195 {
4196 Int a = relativeAddress(r, dst);
4197
4198 if(dst.x) pDst.x = r.rf[dst.index + a].x;
4199 if(dst.y) pDst.y = r.rf[dst.index + a].y;
4200 if(dst.z) pDst.z = r.rf[dst.index + a].z;
4201 if(dst.w) pDst.w = r.rf[dst.index + a].w;
4202 }
John Bauman89401822014-05-06 15:04:28 -04004203 break;
John Bauman19bac1e2014-05-06 15:23:49 -04004204 case Shader::PARAMETER_COLOROUT:
4205 ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
John Bauman89401822014-05-06 15:04:28 -04004206 if(dst.x) pDst.x = r.oC[dst.index].x;
4207 if(dst.y) pDst.y = r.oC[dst.index].y;
4208 if(dst.z) pDst.z = r.oC[dst.index].z;
4209 if(dst.w) pDst.w = r.oC[dst.index].w;
4210 break;
John Bauman19bac1e2014-05-06 15:23:49 -04004211 case Shader::PARAMETER_PREDICATE:
John Bauman89401822014-05-06 15:04:28 -04004212 if(dst.x) pDst.x = r.p0.x;
4213 if(dst.y) pDst.y = r.p0.y;
4214 if(dst.z) pDst.z = r.p0.z;
4215 if(dst.w) pDst.w = r.p0.w;
4216 break;
John Bauman19bac1e2014-05-06 15:23:49 -04004217 case Shader::PARAMETER_DEPTHOUT:
John Bauman89401822014-05-06 15:04:28 -04004218 pDst.x = r.oDepth;
4219 break;
4220 default:
4221 ASSERT(false);
4222 }
4223
John Bauman19bac1e2014-05-06 15:23:49 -04004224 Int4 enable = enableMask(r, instruction);
John Bauman89401822014-05-06 15:04:28 -04004225
4226 Int4 xEnable = enable;
4227 Int4 yEnable = enable;
4228 Int4 zEnable = enable;
4229 Int4 wEnable = enable;
4230
4231 if(predicate)
4232 {
John Bauman19bac1e2014-05-06 15:23:49 -04004233 unsigned char pSwizzle = instruction->predicateSwizzle;
John Bauman89401822014-05-06 15:04:28 -04004234
4235 Float4 xPredicate = r.p0[(pSwizzle >> 0) & 0x03];
4236 Float4 yPredicate = r.p0[(pSwizzle >> 2) & 0x03];
4237 Float4 zPredicate = r.p0[(pSwizzle >> 4) & 0x03];
4238 Float4 wPredicate = r.p0[(pSwizzle >> 6) & 0x03];
4239
John Bauman19bac1e2014-05-06 15:23:49 -04004240 if(!instruction->predicateNot)
John Bauman89401822014-05-06 15:04:28 -04004241 {
4242 if(dst.x) xEnable = xEnable & As<Int4>(xPredicate);
4243 if(dst.y) yEnable = yEnable & As<Int4>(yPredicate);
4244 if(dst.z) zEnable = zEnable & As<Int4>(zPredicate);
4245 if(dst.w) wEnable = wEnable & As<Int4>(wPredicate);
4246 }
4247 else
4248 {
4249 if(dst.x) xEnable = xEnable & ~As<Int4>(xPredicate);
4250 if(dst.y) yEnable = yEnable & ~As<Int4>(yPredicate);
4251 if(dst.z) zEnable = zEnable & ~As<Int4>(zPredicate);
4252 if(dst.w) wEnable = wEnable & ~As<Int4>(wPredicate);
4253 }
4254 }
4255
4256 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) & xEnable);
4257 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) & yEnable);
4258 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) & zEnable);
4259 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) & wEnable);
4260
4261 if(dst.x) d.x = As<Float4>(As<Int4>(d.x) | (As<Int4>(pDst.x) & ~xEnable));
4262 if(dst.y) d.y = As<Float4>(As<Int4>(d.y) | (As<Int4>(pDst.y) & ~yEnable));
4263 if(dst.z) d.z = As<Float4>(As<Int4>(d.z) | (As<Int4>(pDst.z) & ~zEnable));
4264 if(dst.w) d.w = As<Float4>(As<Int4>(d.w) | (As<Int4>(pDst.w) & ~wEnable));
4265 }
4266
4267 switch(dst.type)
4268 {
John Bauman19bac1e2014-05-06 15:23:49 -04004269 case Shader::PARAMETER_TEMP:
4270 if(dst.rel.type == Shader::PARAMETER_VOID)
4271 {
4272 if(dst.x) r.rf[dst.index].x = d.x;
4273 if(dst.y) r.rf[dst.index].y = d.y;
4274 if(dst.z) r.rf[dst.index].z = d.z;
4275 if(dst.w) r.rf[dst.index].w = d.w;
4276 }
4277 else
4278 {
4279 Int a = relativeAddress(r, dst);
4280
4281 if(dst.x) r.rf[dst.index + a].x = d.x;
4282 if(dst.y) r.rf[dst.index + a].y = d.y;
4283 if(dst.z) r.rf[dst.index + a].z = d.z;
4284 if(dst.w) r.rf[dst.index + a].w = d.w;
4285 }
John Bauman89401822014-05-06 15:04:28 -04004286 break;
John Bauman19bac1e2014-05-06 15:23:49 -04004287 case Shader::PARAMETER_COLOROUT:
4288 ASSERT(dst.rel.type == Shader::PARAMETER_VOID);
4289 if(dst.x) {r.oC[dst.index].x = d.x; out[dst.index][0] = true;}
4290 if(dst.y) {r.oC[dst.index].y = d.y; out[dst.index][1] = true;}
4291 if(dst.z) {r.oC[dst.index].z = d.z; out[dst.index][2] = true;}
4292 if(dst.w) {r.oC[dst.index].w = d.w; out[dst.index][3] = true;}
John Bauman89401822014-05-06 15:04:28 -04004293 break;
John Bauman19bac1e2014-05-06 15:23:49 -04004294 case Shader::PARAMETER_PREDICATE:
John Bauman89401822014-05-06 15:04:28 -04004295 if(dst.x) r.p0.x = d.x;
4296 if(dst.y) r.p0.y = d.y;
4297 if(dst.z) r.p0.z = d.z;
4298 if(dst.w) r.p0.w = d.w;
4299 break;
John Bauman19bac1e2014-05-06 15:23:49 -04004300 case Shader::PARAMETER_DEPTHOUT:
John Bauman89401822014-05-06 15:04:28 -04004301 r.oDepth = d.x;
4302 break;
4303 default:
4304 ASSERT(false);
4305 }
4306 }
4307 }
4308
John Bauman19bac1e2014-05-06 15:23:49 -04004309 if(currentLabel != -1)
John Bauman89401822014-05-06 15:04:28 -04004310 {
4311 Nucleus::setInsertBlock(returnBlock);
4312 }
John Bauman19bac1e2014-05-06 15:23:49 -04004313
4314 for(int i = 0; i < 4; i++)
4315 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04004316 if(state.targetFormat[i] != FORMAT_NULL)
John Bauman19bac1e2014-05-06 15:23:49 -04004317 {
4318 if(!out[i][0]) r.oC[i].x = Float4(0.0f);
4319 if(!out[i][1]) r.oC[i].y = Float4(0.0f);
4320 if(!out[i][2]) r.oC[i].z = Float4(0.0f);
4321 if(!out[i][3]) r.oC[i].w = Float4(0.0f);
4322 }
4323 }
John Bauman89401822014-05-06 15:04:28 -04004324 }
4325
John Bauman19bac1e2014-05-06 15:23:49 -04004326 Short4 PixelRoutine::convertFixed12(RValue<Float4> cf)
John Bauman89401822014-05-06 15:04:28 -04004327 {
John Bauman19bac1e2014-05-06 15:23:49 -04004328 return RoundShort4(cf * Float4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04004329 }
4330
Alexis Hetu96517182015-04-15 10:30:23 -04004331 void PixelRoutine::convertFixed12(Vector4s &cs, Vector4f &cf)
John Bauman89401822014-05-06 15:04:28 -04004332 {
Alexis Hetu96517182015-04-15 10:30:23 -04004333 cs.x = convertFixed12(cf.x);
4334 cs.y = convertFixed12(cf.y);
4335 cs.z = convertFixed12(cf.z);
4336 cs.w = convertFixed12(cf.w);
John Bauman89401822014-05-06 15:04:28 -04004337 }
4338
4339 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
4340 {
John Bauman19bac1e2014-05-06 15:23:49 -04004341 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04004342 }
4343
Alexis Hetu96517182015-04-15 10:30:23 -04004344 void PixelRoutine::convertFixed16(Vector4s &cs, Vector4f &cf, bool saturate)
John Bauman89401822014-05-06 15:04:28 -04004345 {
Alexis Hetu96517182015-04-15 10:30:23 -04004346 cs.x = convertFixed16(cf.x, saturate);
4347 cs.y = convertFixed16(cf.y, saturate);
4348 cs.z = convertFixed16(cf.z, saturate);
4349 cs.w = convertFixed16(cf.w, saturate);
John Bauman89401822014-05-06 15:04:28 -04004350 }
4351
Alexis Hetu96517182015-04-15 10:30:23 -04004352 Float4 PixelRoutine::convertSigned12(Short4 &cs)
John Bauman89401822014-05-06 15:04:28 -04004353 {
Alexis Hetu96517182015-04-15 10:30:23 -04004354 return Float4(cs) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004355 }
4356
Alexis Hetu96517182015-04-15 10:30:23 -04004357 void PixelRoutine::convertSigned12(Vector4f &cf, Vector4s &cs)
John Bauman89401822014-05-06 15:04:28 -04004358 {
Alexis Hetu96517182015-04-15 10:30:23 -04004359 cf.x = convertSigned12(cs.x);
4360 cf.y = convertSigned12(cs.y);
4361 cf.z = convertSigned12(cs.z);
4362 cf.w = convertSigned12(cs.w);
John Bauman89401822014-05-06 15:04:28 -04004363 }
4364
Alexis Hetu96517182015-04-15 10:30:23 -04004365 Float4 PixelRoutine::convertUnsigned16(UShort4 cs)
John Bauman89401822014-05-06 15:04:28 -04004366 {
Alexis Hetu96517182015-04-15 10:30:23 -04004367 return Float4(cs) * Float4(1.0f / 0xFFFF);
John Bauman89401822014-05-06 15:04:28 -04004368 }
4369
Alexis Hetu96517182015-04-15 10:30:23 -04004370 void PixelRoutine::sRGBtoLinear16_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04004371 {
John Bauman19bac1e2014-05-06 15:23:49 -04004372 c.x = As<UShort4>(c.x) >> 4;
4373 c.y = As<UShort4>(c.y) >> 4;
4374 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04004375
4376 sRGBtoLinear12_16(r, c);
4377 }
4378
Alexis Hetu96517182015-04-15 10:30:23 -04004379 void PixelRoutine::sRGBtoLinear12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04004380 {
4381 Pointer<Byte> LUT = r.constants + OFFSET(Constants,sRGBtoLin12_16);
4382
John Bauman19bac1e2014-05-06 15:23:49 -04004383 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
4384 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
4385 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
4386 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04004387
John Bauman19bac1e2014-05-06 15:23:49 -04004388 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
4389 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
4390 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
4391 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04004392
John Bauman19bac1e2014-05-06 15:23:49 -04004393 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
4394 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
4395 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
4396 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04004397 }
4398
Alexis Hetu96517182015-04-15 10:30:23 -04004399 void PixelRoutine::linearToSRGB16_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04004400 {
John Bauman19bac1e2014-05-06 15:23:49 -04004401 c.x = As<UShort4>(c.x) >> 4;
4402 c.y = As<UShort4>(c.y) >> 4;
4403 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04004404
4405 linearToSRGB12_16(r, c);
4406 }
4407
Alexis Hetu96517182015-04-15 10:30:23 -04004408 void PixelRoutine::linearToSRGB12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04004409 {
4410 Pointer<Byte> LUT = r.constants + OFFSET(Constants,linToSRGB12_16);
4411
John Bauman19bac1e2014-05-06 15:23:49 -04004412 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
4413 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
4414 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
4415 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04004416
John Bauman19bac1e2014-05-06 15:23:49 -04004417 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
4418 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
4419 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
4420 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04004421
John Bauman19bac1e2014-05-06 15:23:49 -04004422 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
4423 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
4424 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
4425 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04004426 }
4427
4428 Float4 PixelRoutine::linearToSRGB(const Float4 &x) // Approximates x^(1.0/2.2)
4429 {
4430 Float4 sqrtx = Rcp_pp(RcpSqrt_pp(x));
4431 Float4 sRGB = sqrtx * Float4(1.14f) - x * Float4(0.14f);
4432
4433 return Min(Max(sRGB, Float4(0.0f)), Float4(1.0f));
4434 }
4435
4436 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
4437 {
4438 Float4 linear = x * x;
4439 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
4440
4441 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
4442 }
4443
Alexis Hetu96517182015-04-15 10:30:23 -04004444 void PixelRoutine::MOV(Vector4s &dst, Vector4s &src0)
John Bauman89401822014-05-06 15:04:28 -04004445 {
John Bauman19bac1e2014-05-06 15:23:49 -04004446 dst.x = src0.x;
4447 dst.y = src0.y;
4448 dst.z = src0.z;
4449 dst.w = src0.w;
John Bauman89401822014-05-06 15:04:28 -04004450 }
4451
Alexis Hetu96517182015-04-15 10:30:23 -04004452 void PixelRoutine::ADD(Vector4s &dst, Vector4s &src0, Vector4s &src1)
John Bauman89401822014-05-06 15:04:28 -04004453 {
John Bauman19bac1e2014-05-06 15:23:49 -04004454 dst.x = AddSat(src0.x, src1.x);
4455 dst.y = AddSat(src0.y, src1.y);
4456 dst.z = AddSat(src0.z, src1.z);
4457 dst.w = AddSat(src0.w, src1.w);
John Bauman89401822014-05-06 15:04:28 -04004458 }
4459
Alexis Hetu96517182015-04-15 10:30:23 -04004460 void PixelRoutine::SUB(Vector4s &dst, Vector4s &src0, Vector4s &src1)
John Bauman89401822014-05-06 15:04:28 -04004461 {
John Bauman19bac1e2014-05-06 15:23:49 -04004462 dst.x = SubSat(src0.x, src1.x);
4463 dst.y = SubSat(src0.y, src1.y);
4464 dst.z = SubSat(src0.z, src1.z);
4465 dst.w = SubSat(src0.w, src1.w);
John Bauman89401822014-05-06 15:04:28 -04004466 }
4467
Alexis Hetu96517182015-04-15 10:30:23 -04004468 void PixelRoutine::MAD(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
John Bauman89401822014-05-06 15:04:28 -04004469 {
4470 // FIXME: Long fixed-point multiply fixup
4471 {dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);}
4472 {dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);}
4473 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z);}
4474 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);}
4475 }
4476
Alexis Hetu96517182015-04-15 10:30:23 -04004477 void PixelRoutine::MUL(Vector4s &dst, Vector4s &src0, Vector4s &src1)
John Bauman89401822014-05-06 15:04:28 -04004478 {
4479 // FIXME: Long fixed-point multiply fixup
4480 {dst.x = MulHigh(src0.x, src1.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x);}
4481 {dst.y = MulHigh(src0.y, src1.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y);}
4482 {dst.z = MulHigh(src0.z, src1.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z);}
4483 {dst.w = MulHigh(src0.w, src1.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w);}
4484 }
4485
Alexis Hetu96517182015-04-15 10:30:23 -04004486 void PixelRoutine::DP3(Vector4s &dst, Vector4s &src0, Vector4s &src1)
John Bauman89401822014-05-06 15:04:28 -04004487 {
4488 Short4 t0;
4489 Short4 t1;
4490
4491 // FIXME: Long fixed-point multiply fixup
4492 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
4493 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4494 t0 = AddSat(t0, t1);
4495 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4496 t0 = AddSat(t0, t1);
4497
John Bauman19bac1e2014-05-06 15:23:49 -04004498 dst.x = t0;
4499 dst.y = t0;
4500 dst.z = t0;
4501 dst.w = t0;
John Bauman89401822014-05-06 15:04:28 -04004502 }
4503
Alexis Hetu96517182015-04-15 10:30:23 -04004504 void PixelRoutine::DP4(Vector4s &dst, Vector4s &src0, Vector4s &src1)
John Bauman89401822014-05-06 15:04:28 -04004505 {
4506 Short4 t0;
4507 Short4 t1;
4508
4509 // FIXME: Long fixed-point multiply fixup
4510 t0 = MulHigh(src0.x, src1.x); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0); t0 = AddSat(t0, t0);
4511 t1 = MulHigh(src0.y, src1.y); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4512 t0 = AddSat(t0, t1);
4513 t1 = MulHigh(src0.z, src1.z); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4514 t0 = AddSat(t0, t1);
4515 t1 = MulHigh(src0.w, src1.w); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1); t1 = AddSat(t1, t1);
4516 t0 = AddSat(t0, t1);
4517
John Bauman19bac1e2014-05-06 15:23:49 -04004518 dst.x = t0;
4519 dst.y = t0;
4520 dst.z = t0;
4521 dst.w = t0;
John Bauman89401822014-05-06 15:04:28 -04004522 }
4523
Alexis Hetu96517182015-04-15 10:30:23 -04004524 void PixelRoutine::LRP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
John Bauman89401822014-05-06 15:04:28 -04004525 {
4526 // FIXME: Long fixed-point multiply fixup
4527 {dst.x = SubSat(src1.x, src2.x); dst.x = MulHigh(dst.x, src0.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, dst.x); dst.x = AddSat(dst.x, src2.x);}
4528 {dst.y = SubSat(src1.y, src2.y); dst.y = MulHigh(dst.y, src0.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, dst.y); dst.y = AddSat(dst.y, src2.y);}
4529 {dst.z = SubSat(src1.z, src2.z); dst.z = MulHigh(dst.z, src0.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, dst.z); dst.z = AddSat(dst.z, src2.z);}
4530 {dst.w = SubSat(src1.w, src2.w); dst.w = MulHigh(dst.w, src0.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, dst.w); dst.w = AddSat(dst.w, src2.w);}
4531 }
4532
Alexis Hetu96517182015-04-15 10:30:23 -04004533 void PixelRoutine::TEXCOORD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate)
John Bauman89401822014-05-06 15:04:28 -04004534 {
4535 Float4 uw;
4536 Float4 vw;
4537 Float4 sw;
4538
4539 if(state.interpolant[2 + coordinate].component & 0x01)
4540 {
John Bauman19bac1e2014-05-06 15:23:49 -04004541 uw = Max(u, Float4(0.0f));
4542 uw = Min(uw, Float4(1.0f));
4543 dst.x = convertFixed12(uw);
John Bauman89401822014-05-06 15:04:28 -04004544 }
4545 else
4546 {
John Bauman19bac1e2014-05-06 15:23:49 -04004547 dst.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04004548 }
4549
4550 if(state.interpolant[2 + coordinate].component & 0x02)
4551 {
John Bauman19bac1e2014-05-06 15:23:49 -04004552 vw = Max(v, Float4(0.0f));
4553 vw = Min(vw, Float4(1.0f));
4554 dst.y = convertFixed12(vw);
John Bauman89401822014-05-06 15:04:28 -04004555 }
4556 else
4557 {
John Bauman19bac1e2014-05-06 15:23:49 -04004558 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04004559 }
4560
4561 if(state.interpolant[2 + coordinate].component & 0x04)
4562 {
John Bauman19bac1e2014-05-06 15:23:49 -04004563 sw = Max(s, Float4(0.0f));
4564 sw = Min(sw, Float4(1.0f));
4565 dst.z = convertFixed12(sw);
John Bauman89401822014-05-06 15:04:28 -04004566 }
4567 else
4568 {
John Bauman19bac1e2014-05-06 15:23:49 -04004569 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04004570 }
4571
John Bauman19bac1e2014-05-06 15:23:49 -04004572 dst.w = Short4(0x1000);
John Bauman89401822014-05-06 15:04:28 -04004573 }
4574
Alexis Hetu96517182015-04-15 10:30:23 -04004575 void PixelRoutine::TEXCRD(Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int coordinate, bool project)
John Bauman89401822014-05-06 15:04:28 -04004576 {
4577 Float4 uw = u;
4578 Float4 vw = v;
4579 Float4 sw = s;
4580
4581 if(project)
4582 {
4583 uw *= Rcp_pp(s);
4584 vw *= Rcp_pp(s);
4585 }
4586
4587 if(state.interpolant[2 + coordinate].component & 0x01)
4588 {
John Bauman19bac1e2014-05-06 15:23:49 -04004589 uw *= Float4(0x1000);
4590 uw = Max(uw, Float4(-0x8000));
4591 uw = Min(uw, Float4(0x7FFF));
4592 dst.x = RoundShort4(uw);
John Bauman89401822014-05-06 15:04:28 -04004593 }
4594 else
4595 {
John Bauman19bac1e2014-05-06 15:23:49 -04004596 dst.x = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04004597 }
4598
4599 if(state.interpolant[2 + coordinate].component & 0x02)
4600 {
John Bauman19bac1e2014-05-06 15:23:49 -04004601 vw *= Float4(0x1000);
4602 vw = Max(vw, Float4(-0x8000));
4603 vw = Min(vw, Float4(0x7FFF));
4604 dst.y = RoundShort4(vw);
John Bauman89401822014-05-06 15:04:28 -04004605 }
4606 else
4607 {
John Bauman19bac1e2014-05-06 15:23:49 -04004608 dst.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04004609 }
4610
4611 if(state.interpolant[2 + coordinate].component & 0x04)
4612 {
John Bauman19bac1e2014-05-06 15:23:49 -04004613 sw *= Float4(0x1000);
4614 sw = Max(sw, Float4(-0x8000));
4615 sw = Min(sw, Float4(0x7FFF));
4616 dst.z = RoundShort4(sw);
John Bauman89401822014-05-06 15:04:28 -04004617 }
4618 else
4619 {
John Bauman19bac1e2014-05-06 15:23:49 -04004620 dst.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04004621 }
4622 }
4623
Alexis Hetu96517182015-04-15 10:30:23 -04004624 void PixelRoutine::TEXDP3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src)
John Bauman89401822014-05-06 15:04:28 -04004625 {
4626 TEXM3X3PAD(r, u, v, s, src, 0, false);
4627
John Bauman19bac1e2014-05-06 15:23:49 -04004628 Short4 t0 = RoundShort4(r.u_ * Float4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04004629
John Bauman19bac1e2014-05-06 15:23:49 -04004630 dst.x = t0;
4631 dst.y = t0;
4632 dst.z = t0;
4633 dst.w = t0;
John Bauman89401822014-05-06 15:04:28 -04004634 }
4635
Alexis Hetu96517182015-04-15 10:30:23 -04004636 void PixelRoutine::TEXDP3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
John Bauman89401822014-05-06 15:04:28 -04004637 {
4638 TEXM3X3PAD(r, u, v, s, src0, 0, false);
4639
John Bauman19bac1e2014-05-06 15:23:49 -04004640 r.v_ = Float4(0.0f);
4641 r.w_ = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04004642
4643 sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
4644 }
4645
4646 void PixelRoutine::TEXKILL(Int cMask[4], Float4 &u, Float4 &v, Float4 &s)
4647 {
John Bauman19bac1e2014-05-06 15:23:49 -04004648 Int kill = SignMask(CmpNLT(u, Float4(0.0f))) &
4649 SignMask(CmpNLT(v, Float4(0.0f))) &
4650 SignMask(CmpNLT(s, Float4(0.0f)));
John Bauman89401822014-05-06 15:04:28 -04004651
4652 for(unsigned int q = 0; q < state.multiSample; q++)
4653 {
4654 cMask[q] &= kill;
4655 }
4656 }
4657
Alexis Hetu96517182015-04-15 10:30:23 -04004658 void PixelRoutine::TEXKILL(Int cMask[4], Vector4s &src)
John Bauman89401822014-05-06 15:04:28 -04004659 {
John Bauman19bac1e2014-05-06 15:23:49 -04004660 Short4 test = src.x | src.y | src.z;
John Bauman89401822014-05-06 15:04:28 -04004661 Int kill = SignMask(Pack(test, test)) ^ 0x0000000F;
4662
4663 for(unsigned int q = 0; q < state.multiSample; q++)
4664 {
4665 cMask[q] &= kill;
4666 }
4667 }
4668
Alexis Hetu96517182015-04-15 10:30:23 -04004669 void PixelRoutine::TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int sampler, bool project)
John Bauman89401822014-05-06 15:04:28 -04004670 {
4671 sampleTexture(r, dst, sampler, u, v, s, s, project);
4672 }
4673
Alexis Hetu96517182015-04-15 10:30:23 -04004674 void PixelRoutine::TEXLD(Registers &r, Vector4s &dst, Vector4s &src, int sampler, bool project)
John Bauman89401822014-05-06 15:04:28 -04004675 {
John Bauman19bac1e2014-05-06 15:23:49 -04004676 Float4 u = Float4(src.x) * Float4(1.0f / 0x0FFE);
4677 Float4 v = Float4(src.y) * Float4(1.0f / 0x0FFE);
4678 Float4 s = Float4(src.z) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004679
4680 sampleTexture(r, dst, sampler, u, v, s, s, project);
4681 }
4682
Alexis Hetu96517182015-04-15 10:30:23 -04004683 void PixelRoutine::TEXBEM(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
John Bauman89401822014-05-06 15:04:28 -04004684 {
John Bauman19bac1e2014-05-06 15:23:49 -04004685 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
4686 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004687
4688 Float4 du2 = du;
4689 Float4 dv2 = dv;
4690
4691 du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
4692 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
4693 du += dv2;
4694 dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
4695 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
4696 dv += du2;
4697
4698 Float4 u_ = u + du;
4699 Float4 v_ = v + dv;
4700
4701 sampleTexture(r, dst, stage, u_, v_, s, s);
4702 }
4703
Alexis Hetu96517182015-04-15 10:30:23 -04004704 void PixelRoutine::TEXBEML(Registers &r, Vector4s &dst, Vector4s &src, Float4 &u, Float4 &v, Float4 &s, int stage)
John Bauman89401822014-05-06 15:04:28 -04004705 {
John Bauman19bac1e2014-05-06 15:23:49 -04004706 Float4 du = Float4(src.x) * Float4(1.0f / 0x0FFE);
4707 Float4 dv = Float4(src.y) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004708
4709 Float4 du2 = du;
4710 Float4 dv2 = dv;
4711
4712 du *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][0]));
4713 dv2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][0]));
4714 du += dv2;
4715 dv *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[1][1]));
4716 du2 *= *Pointer<Float4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4F[0][1]));
4717 dv += du2;
4718
4719 Float4 u_ = u + du;
4720 Float4 v_ = v + dv;
4721
4722 sampleTexture(r, dst, stage, u_, v_, s, s);
4723
4724 Short4 L;
4725
John Bauman19bac1e2014-05-06 15:23:49 -04004726 L = src.z;
John Bauman89401822014-05-06 15:04:28 -04004727 L = MulHigh(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceScale4)));
4728 L = L << 4;
4729 L = AddSat(L, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].luminanceOffset4)));
4730 L = Max(L, Short4(0x0000, 0x0000, 0x0000, 0x0000));
John Bauman19bac1e2014-05-06 15:23:49 -04004731 L = Min(L, Short4(0x1000));
John Bauman89401822014-05-06 15:04:28 -04004732
John Bauman19bac1e2014-05-06 15:23:49 -04004733 dst.x = MulHigh(dst.x, L); dst.x = dst.x << 4;
4734 dst.y = MulHigh(dst.y, L); dst.y = dst.y << 4;
4735 dst.z = MulHigh(dst.z, L); dst.z = dst.z << 4;
John Bauman89401822014-05-06 15:04:28 -04004736 }
4737
Alexis Hetu96517182015-04-15 10:30:23 -04004738 void PixelRoutine::TEXREG2AR(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
John Bauman89401822014-05-06 15:04:28 -04004739 {
John Bauman19bac1e2014-05-06 15:23:49 -04004740 Float4 u = Float4(src0.w) * Float4(1.0f / 0x0FFE);
4741 Float4 v = Float4(src0.x) * Float4(1.0f / 0x0FFE);
4742 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004743
4744 sampleTexture(r, dst, stage, u, v, s, s);
4745 }
4746
Alexis Hetu96517182015-04-15 10:30:23 -04004747 void PixelRoutine::TEXREG2GB(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
John Bauman89401822014-05-06 15:04:28 -04004748 {
John Bauman19bac1e2014-05-06 15:23:49 -04004749 Float4 u = Float4(src0.y) * Float4(1.0f / 0x0FFE);
4750 Float4 v = Float4(src0.z) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004751 Float4 s = v;
4752
4753 sampleTexture(r, dst, stage, u, v, s, s);
4754 }
4755
Alexis Hetu96517182015-04-15 10:30:23 -04004756 void PixelRoutine::TEXREG2RGB(Registers &r, Vector4s &dst, Vector4s &src0, int stage)
John Bauman89401822014-05-06 15:04:28 -04004757 {
John Bauman19bac1e2014-05-06 15:23:49 -04004758 Float4 u = Float4(src0.x) * Float4(1.0f / 0x0FFE);
4759 Float4 v = Float4(src0.y) * Float4(1.0f / 0x0FFE);
4760 Float4 s = Float4(src0.z) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004761
4762 sampleTexture(r, dst, stage, u, v, s, s);
4763 }
4764
Alexis Hetu96517182015-04-15 10:30:23 -04004765 void PixelRoutine::TEXM3X2DEPTH(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src, bool signedScaling)
John Bauman89401822014-05-06 15:04:28 -04004766 {
4767 TEXM3X2PAD(r, u, v, s, src, 1, signedScaling);
4768
4769 // z / w
4770 r.u_ *= Rcp_pp(r.v_); // FIXME: Set result to 1.0 when division by zero
4771
4772 r.oDepth = r.u_;
4773 }
4774
Alexis Hetu96517182015-04-15 10:30:23 -04004775 void PixelRoutine::TEXM3X2PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
John Bauman89401822014-05-06 15:04:28 -04004776 {
4777 TEXM3X3PAD(r, u, v, s, src0, component, signedScaling);
4778 }
4779
Alexis Hetu96517182015-04-15 10:30:23 -04004780 void PixelRoutine::TEXM3X2TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
John Bauman89401822014-05-06 15:04:28 -04004781 {
4782 TEXM3X2PAD(r, u, v, s, src0, 1, signedScaling);
4783
John Bauman19bac1e2014-05-06 15:23:49 -04004784 r.w_ = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04004785
4786 sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
4787 }
4788
Alexis Hetu96517182015-04-15 10:30:23 -04004789 void PixelRoutine::TEXM3X3(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, bool signedScaling)
John Bauman89401822014-05-06 15:04:28 -04004790 {
4791 TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
4792
John Bauman19bac1e2014-05-06 15:23:49 -04004793 dst.x = RoundShort4(r.u_ * Float4(0x1000));
4794 dst.y = RoundShort4(r.v_ * Float4(0x1000));
4795 dst.z = RoundShort4(r.w_ * Float4(0x1000));
4796 dst.w = Short4(0x1000);
John Bauman89401822014-05-06 15:04:28 -04004797 }
4798
Alexis Hetu96517182015-04-15 10:30:23 -04004799 void PixelRoutine::TEXM3X3PAD(Registers &r, Float4 &u, Float4 &v, Float4 &s, Vector4s &src0, int component, bool signedScaling)
John Bauman89401822014-05-06 15:04:28 -04004800 {
4801 if(component == 0 || previousScaling != signedScaling) // FIXME: Other source modifiers?
4802 {
John Bauman19bac1e2014-05-06 15:23:49 -04004803 r.U = Float4(src0.x);
4804 r.V = Float4(src0.y);
4805 r.W = Float4(src0.z);
John Bauman89401822014-05-06 15:04:28 -04004806
4807 previousScaling = signedScaling;
4808 }
4809
4810 Float4 x = r.U * u + r.V * v + r.W * s;
4811
John Bauman19bac1e2014-05-06 15:23:49 -04004812 x *= Float4(1.0f / 0x1000);
John Bauman89401822014-05-06 15:04:28 -04004813
4814 switch(component)
4815 {
4816 case 0: r.u_ = x; break;
4817 case 1: r.v_ = x; break;
4818 case 2: r.w_ = x; break;
4819 default: ASSERT(false);
4820 }
4821 }
4822
Alexis Hetu96517182015-04-15 10:30:23 -04004823 void PixelRoutine::TEXM3X3SPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, Vector4s &src1)
John Bauman89401822014-05-06 15:04:28 -04004824 {
4825 TEXM3X3PAD(r, u, v, s, src0, 2, false);
4826
4827 Float4 E[3]; // Eye vector
4828
John Bauman19bac1e2014-05-06 15:23:49 -04004829 E[0] = Float4(src1.x) * Float4(1.0f / 0x0FFE);
4830 E[1] = Float4(src1.y) * Float4(1.0f / 0x0FFE);
4831 E[2] = Float4(src1.z) * Float4(1.0f / 0x0FFE);
John Bauman89401822014-05-06 15:04:28 -04004832
4833 // Reflection
4834 Float4 u__;
4835 Float4 v__;
4836 Float4 w__;
4837
4838 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
4839 u__ = r.u_ * E[0];
4840 v__ = r.v_ * E[1];
4841 w__ = r.w_ * E[2];
4842 u__ += v__ + w__;
4843 u__ += u__;
4844 v__ = u__;
4845 w__ = u__;
4846 u__ *= r.u_;
4847 v__ *= r.v_;
4848 w__ *= r.w_;
4849 r.u_ *= r.u_;
4850 r.v_ *= r.v_;
4851 r.w_ *= r.w_;
4852 r.u_ += r.v_ + r.w_;
4853 u__ -= E[0] * r.u_;
4854 v__ -= E[1] * r.u_;
4855 w__ -= E[2] * r.u_;
4856
4857 sampleTexture(r, dst, stage, u__, v__, w__, w__);
4858 }
4859
Alexis Hetu96517182015-04-15 10:30:23 -04004860 void PixelRoutine::TEXM3X3TEX(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0, bool signedScaling)
John Bauman89401822014-05-06 15:04:28 -04004861 {
4862 TEXM3X3PAD(r, u, v, s, src0, 2, signedScaling);
4863
4864 sampleTexture(r, dst, stage, r.u_, r.v_, r.w_, r.w_);
4865 }
4866
Alexis Hetu96517182015-04-15 10:30:23 -04004867 void PixelRoutine::TEXM3X3VSPEC(Registers &r, Vector4s &dst, Float4 &u, Float4 &v, Float4 &s, int stage, Vector4s &src0)
John Bauman89401822014-05-06 15:04:28 -04004868 {
4869 TEXM3X3PAD(r, u, v, s, src0, 2, false);
4870
4871 Float4 E[3]; // Eye vector
4872
John Bauman19bac1e2014-05-06 15:23:49 -04004873 E[0] = r.vf[2 + stage - 2].w;
4874 E[1] = r.vf[2 + stage - 1].w;
4875 E[2] = r.vf[2 + stage - 0].w;
John Bauman89401822014-05-06 15:04:28 -04004876
4877 // Reflection
4878 Float4 u__;
4879 Float4 v__;
4880 Float4 w__;
4881
4882 // (u'', v'', w'') = 2 * (N . E) * N - E * (N . N)
4883 u__ = r.u_ * E[0];
4884 v__ = r.v_ * E[1];
4885 w__ = r.w_ * E[2];
4886 u__ += v__ + w__;
4887 u__ += u__;
4888 v__ = u__;
4889 w__ = u__;
4890 u__ *= r.u_;
4891 v__ *= r.v_;
4892 w__ *= r.w_;
4893 r.u_ *= r.u_;
4894 r.v_ *= r.v_;
4895 r.w_ *= r.w_;
4896 r.u_ += r.v_ + r.w_;
4897 u__ -= E[0] * r.u_;
4898 v__ -= E[1] * r.u_;
4899 w__ -= E[2] * r.u_;
4900
4901 sampleTexture(r, dst, stage, u__, v__, w__, w__);
4902 }
4903
4904 void PixelRoutine::TEXDEPTH(Registers &r)
4905 {
Alexis Hetu96517182015-04-15 10:30:23 -04004906 r.u_ = Float4(r.rs[5].x);
4907 r.v_ = Float4(r.rs[5].y);
John Bauman89401822014-05-06 15:04:28 -04004908
4909 // z / w
4910 r.u_ *= Rcp_pp(r.v_); // FIXME: Set result to 1.0 when division by zero
4911
4912 r.oDepth = r.u_;
4913 }
4914
Alexis Hetu96517182015-04-15 10:30:23 -04004915 void PixelRoutine::CND(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
John Bauman89401822014-05-06 15:04:28 -04004916 {
John Bauman19bac1e2014-05-06 15:23:49 -04004917 {Short4 t0; t0 = src0.x; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.x; t1 = t1 & t0; t0 = ~t0 & src2.x; t0 = t0 | t1; dst.x = t0;};
4918 {Short4 t0; t0 = src0.y; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.y; t1 = t1 & t0; t0 = ~t0 & src2.y; t0 = t0 | t1; dst.y = t0;};
4919 {Short4 t0; t0 = src0.z; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.z; t1 = t1 & t0; t0 = ~t0 & src2.z; t0 = t0 | t1; dst.z = t0;};
4920 {Short4 t0; t0 = src0.w; t0 = CmpGT(t0, Short4(0x0800, 0x0800, 0x0800, 0x0800)); Short4 t1; t1 = src1.w; t1 = t1 & t0; t0 = ~t0 & src2.w; t0 = t0 | t1; dst.w = t0;};
John Bauman89401822014-05-06 15:04:28 -04004921 }
4922
Alexis Hetu96517182015-04-15 10:30:23 -04004923 void PixelRoutine::CMP(Vector4s &dst, Vector4s &src0, Vector4s &src1, Vector4s &src2)
John Bauman89401822014-05-06 15:04:28 -04004924 {
John Bauman19bac1e2014-05-06 15:23:49 -04004925 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.x); Short4 t1; t1 = src2.x; t1 &= t0; t0 = ~t0 & src1.x; t0 |= t1; dst.x = t0;};
4926 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.y); Short4 t1; t1 = src2.y; t1 &= t0; t0 = ~t0 & src1.y; t0 |= t1; dst.y = t0;};
4927 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.z); Short4 t1; t1 = src2.z; t1 &= t0; t0 = ~t0 & src1.z; t0 |= t1; dst.z = t0;};
4928 {Short4 t0 = CmpGT(Short4(0x0000, 0x0000, 0x0000, 0x0000), src0.w); Short4 t1; t1 = src2.w; t1 &= t0; t0 = ~t0 & src1.w; t0 |= t1; dst.w = t0;};
John Bauman89401822014-05-06 15:04:28 -04004929 }
4930
Alexis Hetu96517182015-04-15 10:30:23 -04004931 void PixelRoutine::BEM(Registers &r, Vector4s &dst, Vector4s &src0, Vector4s &src1, int stage)
John Bauman89401822014-05-06 15:04:28 -04004932 {
4933 Short4 t0;
4934 Short4 t1;
4935
John Bauman19bac1e2014-05-06 15:23:49 -04004936 // dst.x = src0.x + BUMPENVMAT00(stage) * src1.x + BUMPENVMAT10(stage) * src1.y
John Bauman89401822014-05-06 15:04:28 -04004937 t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][0]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
4938 t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][0]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
4939 t0 = AddSat(t0, t1);
4940 t0 = AddSat(t0, src0.x);
John Bauman19bac1e2014-05-06 15:23:49 -04004941 dst.x = t0;
John Bauman89401822014-05-06 15:04:28 -04004942
John Bauman19bac1e2014-05-06 15:23:49 -04004943 // dst.y = src0.y + BUMPENVMAT01(stage) * src1.x + BUMPENVMAT11(stage) * src1.y
John Bauman89401822014-05-06 15:04:28 -04004944 t0 = MulHigh(src1.x, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[0][1]))); t0 = t0 << 4; // FIXME: Matrix components range? Overflow hazard.
4945 t1 = MulHigh(src1.y, *Pointer<Short4>(r.data + OFFSET(DrawData,textureStage[stage].bumpmapMatrix4W[1][1]))); t1 = t1 << 4; // FIXME: Matrix components range? Overflow hazard.
4946 t0 = AddSat(t0, t1);
4947 t0 = AddSat(t0, src0.y);
John Bauman19bac1e2014-05-06 15:23:49 -04004948 dst.y = t0;
John Bauman89401822014-05-06 15:04:28 -04004949 }
4950
John Bauman19bac1e2014-05-06 15:23:49 -04004951 void PixelRoutine::M3X2(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
John Bauman89401822014-05-06 15:04:28 -04004952 {
Alexis Hetu96517182015-04-15 10:30:23 -04004953 Vector4f row0 = fetchRegisterF(r, src1, 0);
4954 Vector4f row1 = fetchRegisterF(r, src1, 1);
John Bauman89401822014-05-06 15:04:28 -04004955
4956 dst.x = dot3(src0, row0);
4957 dst.y = dot3(src0, row1);
4958 }
4959
John Bauman19bac1e2014-05-06 15:23:49 -04004960 void PixelRoutine::M3X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
John Bauman89401822014-05-06 15:04:28 -04004961 {
Alexis Hetu96517182015-04-15 10:30:23 -04004962 Vector4f row0 = fetchRegisterF(r, src1, 0);
4963 Vector4f row1 = fetchRegisterF(r, src1, 1);
4964 Vector4f row2 = fetchRegisterF(r, src1, 2);
John Bauman89401822014-05-06 15:04:28 -04004965
4966 dst.x = dot3(src0, row0);
4967 dst.y = dot3(src0, row1);
4968 dst.z = dot3(src0, row2);
4969 }
4970
John Bauman19bac1e2014-05-06 15:23:49 -04004971 void PixelRoutine::M3X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
John Bauman89401822014-05-06 15:04:28 -04004972 {
Alexis Hetu96517182015-04-15 10:30:23 -04004973 Vector4f row0 = fetchRegisterF(r, src1, 0);
4974 Vector4f row1 = fetchRegisterF(r, src1, 1);
4975 Vector4f row2 = fetchRegisterF(r, src1, 2);
4976 Vector4f row3 = fetchRegisterF(r, src1, 3);
John Bauman89401822014-05-06 15:04:28 -04004977
4978 dst.x = dot3(src0, row0);
4979 dst.y = dot3(src0, row1);
4980 dst.z = dot3(src0, row2);
4981 dst.w = dot3(src0, row3);
4982 }
4983
John Bauman19bac1e2014-05-06 15:23:49 -04004984 void PixelRoutine::M4X3(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
John Bauman89401822014-05-06 15:04:28 -04004985 {
Alexis Hetu96517182015-04-15 10:30:23 -04004986 Vector4f row0 = fetchRegisterF(r, src1, 0);
4987 Vector4f row1 = fetchRegisterF(r, src1, 1);
4988 Vector4f row2 = fetchRegisterF(r, src1, 2);
John Bauman89401822014-05-06 15:04:28 -04004989
4990 dst.x = dot4(src0, row0);
4991 dst.y = dot4(src0, row1);
4992 dst.z = dot4(src0, row2);
4993 }
4994
John Bauman19bac1e2014-05-06 15:23:49 -04004995 void PixelRoutine::M4X4(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1)
John Bauman89401822014-05-06 15:04:28 -04004996 {
Alexis Hetu96517182015-04-15 10:30:23 -04004997 Vector4f row0 = fetchRegisterF(r, src1, 0);
4998 Vector4f row1 = fetchRegisterF(r, src1, 1);
4999 Vector4f row2 = fetchRegisterF(r, src1, 2);
5000 Vector4f row3 = fetchRegisterF(r, src1, 3);
John Bauman89401822014-05-06 15:04:28 -04005001
5002 dst.x = dot4(src0, row0);
5003 dst.y = dot4(src0, row1);
5004 dst.z = dot4(src0, row2);
5005 dst.w = dot4(src0, row3);
5006 }
5007
John Bauman19bac1e2014-05-06 15:23:49 -04005008 void PixelRoutine::TEXLD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
John Bauman89401822014-05-06 15:04:28 -04005009 {
John Bauman19bac1e2014-05-06 15:23:49 -04005010 Vector4f tmp;
5011 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias);
John Bauman89401822014-05-06 15:04:28 -04005012
5013 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
5014 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
5015 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
5016 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
5017 }
5018
John Bauman19bac1e2014-05-06 15:23:49 -04005019 void PixelRoutine::TEXLDD(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, Vector4f &src2, Vector4f &src3, bool project, bool bias)
John Bauman89401822014-05-06 15:04:28 -04005020 {
John Bauman19bac1e2014-05-06 15:23:49 -04005021 Vector4f tmp;
5022 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src2, src3, project, bias, true);
John Bauman89401822014-05-06 15:04:28 -04005023
5024 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
5025 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
5026 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
5027 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
5028 }
5029
John Bauman19bac1e2014-05-06 15:23:49 -04005030 void PixelRoutine::TEXLDL(Registers &r, Vector4f &dst, Vector4f &src0, const Src &src1, bool project, bool bias)
John Bauman89401822014-05-06 15:04:28 -04005031 {
John Bauman19bac1e2014-05-06 15:23:49 -04005032 Vector4f tmp;
5033 sampleTexture(r, tmp, src1, src0.x, src0.y, src0.z, src0.w, src0, src0, project, bias, false, true);
John Bauman89401822014-05-06 15:04:28 -04005034
5035 dst.x = tmp[(src1.swizzle >> 0) & 0x3];
5036 dst.y = tmp[(src1.swizzle >> 2) & 0x3];
5037 dst.z = tmp[(src1.swizzle >> 4) & 0x3];
5038 dst.w = tmp[(src1.swizzle >> 6) & 0x3];
5039 }
5040
John Bauman19bac1e2014-05-06 15:23:49 -04005041 void PixelRoutine::TEXKILL(Int cMask[4], Vector4f &src, unsigned char mask)
John Bauman89401822014-05-06 15:04:28 -04005042 {
5043 Int kill = -1;
5044
John Bauman19bac1e2014-05-06 15:23:49 -04005045 if(mask & 0x1) kill &= SignMask(CmpNLT(src.x, Float4(0.0f)));
5046 if(mask & 0x2) kill &= SignMask(CmpNLT(src.y, Float4(0.0f)));
5047 if(mask & 0x4) kill &= SignMask(CmpNLT(src.z, Float4(0.0f)));
5048 if(mask & 0x8) kill &= SignMask(CmpNLT(src.w, Float4(0.0f)));
5049
5050 // FIXME: Dynamic branching affects TEXKILL?
5051 // if(shader->containsDynamicBranching())
5052 // {
5053 // kill = ~SignMask(enableMask(r));
5054 // }
John Bauman89401822014-05-06 15:04:28 -04005055
5056 for(unsigned int q = 0; q < state.multiSample; q++)
5057 {
5058 cMask[q] &= kill;
5059 }
John Bauman19bac1e2014-05-06 15:23:49 -04005060
5061 // FIXME: Branch to end of shader if all killed?
John Bauman89401822014-05-06 15:04:28 -04005062 }
5063
John Bauman19bac1e2014-05-06 15:23:49 -04005064 void PixelRoutine::DISCARD(Registers &r, Int cMask[4], const Shader::Instruction *instruction)
John Bauman89401822014-05-06 15:04:28 -04005065 {
John Bauman19bac1e2014-05-06 15:23:49 -04005066 Int kill = 0;
5067
5068 if(shader->containsDynamicBranching())
5069 {
5070 kill = ~SignMask(enableMask(r, instruction));
5071 }
5072
5073 for(unsigned int q = 0; q < state.multiSample; q++)
5074 {
5075 cMask[q] &= kill;
5076 }
5077
5078 // FIXME: Branch to end of shader if all killed?
John Bauman89401822014-05-06 15:04:28 -04005079 }
5080
John Bauman19bac1e2014-05-06 15:23:49 -04005081 void PixelRoutine::DFDX(Vector4f &dst, Vector4f &src)
John Bauman89401822014-05-06 15:04:28 -04005082 {
John Bauman19bac1e2014-05-06 15:23:49 -04005083 dst.x = src.x.yyww - src.x.xxzz;
5084 dst.y = src.y.yyww - src.y.xxzz;
5085 dst.z = src.z.yyww - src.z.xxzz;
5086 dst.w = src.w.yyww - src.w.xxzz;
5087 }
5088
5089 void PixelRoutine::DFDY(Vector4f &dst, Vector4f &src)
5090 {
5091 dst.x = src.x.zwzw - src.x.xyxy;
5092 dst.y = src.y.zwzw - src.y.xyxy;
5093 dst.z = src.z.zwzw - src.z.xyxy;
5094 dst.w = src.w.zwzw - src.w.xyxy;
5095 }
5096
5097 void PixelRoutine::FWIDTH(Vector4f &dst, Vector4f &src)
5098 {
5099 // abs(dFdx(src)) + abs(dFdy(src));
5100 dst.x = Abs(src.x.yyww - src.x.xxzz) + Abs(src.x.zwzw - src.x.xyxy);
5101 dst.y = Abs(src.y.yyww - src.x.xxzz) + Abs(src.y.zwzw - src.y.xyxy);
5102 dst.z = Abs(src.z.yyww - src.x.xxzz) + Abs(src.z.zwzw - src.z.xyxy);
5103 dst.w = Abs(src.w.yyww - src.x.xxzz) + Abs(src.w.zwzw - src.w.xyxy);
John Bauman89401822014-05-06 15:04:28 -04005104 }
5105
5106 void PixelRoutine::BREAK(Registers &r)
5107 {
5108 llvm::BasicBlock *deadBlock = Nucleus::createBasicBlock();
5109 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
5110
5111 if(breakDepth == 0)
5112 {
John Bauman19bac1e2014-05-06 15:23:49 -04005113 r.enableIndex = r.enableIndex - breakDepth;
John Bauman89401822014-05-06 15:04:28 -04005114 Nucleus::createBr(endBlock);
5115 }
5116 else
5117 {
5118 r.enableBreak = r.enableBreak & ~r.enableStack[r.enableIndex];
5119 Bool allBreak = SignMask(r.enableBreak) == 0x0;
5120
John Bauman19bac1e2014-05-06 15:23:49 -04005121 r.enableIndex = r.enableIndex - breakDepth;
John Bauman89401822014-05-06 15:04:28 -04005122 branch(allBreak, endBlock, deadBlock);
5123 }
5124
5125 Nucleus::setInsertBlock(deadBlock);
John Bauman19bac1e2014-05-06 15:23:49 -04005126 r.enableIndex = r.enableIndex + breakDepth;
John Bauman89401822014-05-06 15:04:28 -04005127 }
5128
John Bauman19bac1e2014-05-06 15:23:49 -04005129 void PixelRoutine::BREAKC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
John Bauman89401822014-05-06 15:04:28 -04005130 {
5131 Int4 condition;
5132
5133 switch(control)
5134 {
John Bauman19bac1e2014-05-06 15:23:49 -04005135 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
5136 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break;
5137 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
5138 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break;
5139 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
5140 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break;
John Bauman89401822014-05-06 15:04:28 -04005141 default:
5142 ASSERT(false);
5143 }
5144
John Bauman19bac1e2014-05-06 15:23:49 -04005145 BREAK(r, condition);
John Bauman89401822014-05-06 15:04:28 -04005146 }
5147
5148 void PixelRoutine::BREAKP(Registers &r, const Src &predicateRegister) // FIXME: Factor out parts common with BREAKC
5149 {
5150 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
5151
John Bauman19bac1e2014-05-06 15:23:49 -04005152 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
John Bauman89401822014-05-06 15:04:28 -04005153 {
5154 condition = ~condition;
5155 }
5156
John Bauman19bac1e2014-05-06 15:23:49 -04005157 BREAK(r, condition);
5158 }
5159
5160 void PixelRoutine::BREAK(Registers &r, Int4 &condition)
5161 {
John Bauman89401822014-05-06 15:04:28 -04005162 condition &= r.enableStack[r.enableIndex];
5163
5164 llvm::BasicBlock *continueBlock = Nucleus::createBasicBlock();
5165 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth - 1];
5166
5167 r.enableBreak = r.enableBreak & ~condition;
5168 Bool allBreak = SignMask(r.enableBreak) == 0x0;
5169
John Bauman19bac1e2014-05-06 15:23:49 -04005170 r.enableIndex = r.enableIndex - breakDepth;
John Bauman89401822014-05-06 15:04:28 -04005171 branch(allBreak, endBlock, continueBlock);
John Bauman19bac1e2014-05-06 15:23:49 -04005172
John Bauman89401822014-05-06 15:04:28 -04005173 Nucleus::setInsertBlock(continueBlock);
John Bauman19bac1e2014-05-06 15:23:49 -04005174 r.enableIndex = r.enableIndex + breakDepth;
John Bauman89401822014-05-06 15:04:28 -04005175 }
5176
John Bauman19bac1e2014-05-06 15:23:49 -04005177 void PixelRoutine::CONTINUE(Registers &r)
5178 {
5179 r.enableContinue = r.enableContinue & ~r.enableStack[r.enableIndex];
5180 }
5181
5182 void PixelRoutine::TEST()
5183 {
5184 whileTest = true;
5185 }
5186
5187 void PixelRoutine::CALL(Registers &r, int labelIndex, int callSiteIndex)
John Bauman89401822014-05-06 15:04:28 -04005188 {
5189 if(!labelBlock[labelIndex])
5190 {
5191 labelBlock[labelIndex] = Nucleus::createBasicBlock();
5192 }
5193
John Bauman19bac1e2014-05-06 15:23:49 -04005194 if(callRetBlock[labelIndex].size() > 1)
5195 {
5196 r.callStack[r.stackIndex++] = UInt(callSiteIndex);
5197 }
John Bauman89401822014-05-06 15:04:28 -04005198
John Bauman19bac1e2014-05-06 15:23:49 -04005199 Int4 restoreLeave = r.enableLeave;
John Bauman89401822014-05-06 15:04:28 -04005200
5201 Nucleus::createBr(labelBlock[labelIndex]);
John Bauman19bac1e2014-05-06 15:23:49 -04005202 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
5203
5204 r.enableLeave = restoreLeave;
John Bauman89401822014-05-06 15:04:28 -04005205 }
5206
John Bauman19bac1e2014-05-06 15:23:49 -04005207 void PixelRoutine::CALLNZ(Registers &r, int labelIndex, int callSiteIndex, const Src &src)
John Bauman89401822014-05-06 15:04:28 -04005208 {
John Bauman19bac1e2014-05-06 15:23:49 -04005209 if(src.type == Shader::PARAMETER_CONSTBOOL)
John Bauman89401822014-05-06 15:04:28 -04005210 {
John Bauman19bac1e2014-05-06 15:23:49 -04005211 CALLNZb(r, labelIndex, callSiteIndex, src);
John Bauman89401822014-05-06 15:04:28 -04005212 }
John Bauman19bac1e2014-05-06 15:23:49 -04005213 else if(src.type == Shader::PARAMETER_PREDICATE)
John Bauman89401822014-05-06 15:04:28 -04005214 {
John Bauman19bac1e2014-05-06 15:23:49 -04005215 CALLNZp(r, labelIndex, callSiteIndex, src);
John Bauman89401822014-05-06 15:04:28 -04005216 }
5217 else ASSERT(false);
5218 }
5219
John Bauman19bac1e2014-05-06 15:23:49 -04005220 void PixelRoutine::CALLNZb(Registers &r, int labelIndex, int callSiteIndex, const Src &boolRegister)
John Bauman89401822014-05-06 15:04:28 -04005221 {
5222 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0)); // FIXME
5223
John Bauman19bac1e2014-05-06 15:23:49 -04005224 if(boolRegister.modifier == Shader::MODIFIER_NOT)
John Bauman89401822014-05-06 15:04:28 -04005225 {
5226 condition = !condition;
5227 }
5228
5229 if(!labelBlock[labelIndex])
5230 {
5231 labelBlock[labelIndex] = Nucleus::createBasicBlock();
5232 }
5233
John Bauman19bac1e2014-05-06 15:23:49 -04005234 if(callRetBlock[labelIndex].size() > 1)
5235 {
5236 r.callStack[r.stackIndex++] = UInt(callSiteIndex);
5237 }
John Bauman89401822014-05-06 15:04:28 -04005238
John Bauman19bac1e2014-05-06 15:23:49 -04005239 Int4 restoreLeave = r.enableLeave;
John Bauman89401822014-05-06 15:04:28 -04005240
John Bauman19bac1e2014-05-06 15:23:49 -04005241 branch(condition, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
5242 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
5243
5244 r.enableLeave = restoreLeave;
John Bauman89401822014-05-06 15:04:28 -04005245 }
5246
John Bauman19bac1e2014-05-06 15:23:49 -04005247 void PixelRoutine::CALLNZp(Registers &r, int labelIndex, int callSiteIndex, const Src &predicateRegister)
John Bauman89401822014-05-06 15:04:28 -04005248 {
5249 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
5250
John Bauman19bac1e2014-05-06 15:23:49 -04005251 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
John Bauman89401822014-05-06 15:04:28 -04005252 {
5253 condition = ~condition;
5254 }
5255
5256 condition &= r.enableStack[r.enableIndex];
5257
5258 if(!labelBlock[labelIndex])
5259 {
5260 labelBlock[labelIndex] = Nucleus::createBasicBlock();
5261 }
5262
John Bauman19bac1e2014-05-06 15:23:49 -04005263 if(callRetBlock[labelIndex].size() > 1)
5264 {
5265 r.callStack[r.stackIndex++] = UInt(callSiteIndex);
5266 }
John Bauman89401822014-05-06 15:04:28 -04005267
5268 r.enableIndex++;
5269 r.enableStack[r.enableIndex] = condition;
John Bauman19bac1e2014-05-06 15:23:49 -04005270 Int4 restoreLeave = r.enableLeave;
John Bauman89401822014-05-06 15:04:28 -04005271
John Bauman19bac1e2014-05-06 15:23:49 -04005272 Bool notAllFalse = SignMask(condition) != 0;
5273 branch(notAllFalse, labelBlock[labelIndex], callRetBlock[labelIndex][callSiteIndex]);
5274 Nucleus::setInsertBlock(callRetBlock[labelIndex][callSiteIndex]);
John Bauman89401822014-05-06 15:04:28 -04005275
5276 r.enableIndex--;
John Bauman19bac1e2014-05-06 15:23:49 -04005277 r.enableLeave = restoreLeave;
John Bauman89401822014-05-06 15:04:28 -04005278 }
5279
5280 void PixelRoutine::ELSE(Registers &r)
5281 {
5282 ifDepth--;
5283
5284 llvm::BasicBlock *falseBlock = ifFalseBlock[ifDepth];
5285 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
5286
5287 if(isConditionalIf[ifDepth])
5288 {
5289 Int4 condition = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
John Bauman19bac1e2014-05-06 15:23:49 -04005290 Bool notAllFalse = SignMask(condition) != 0;
John Bauman89401822014-05-06 15:04:28 -04005291
5292 branch(notAllFalse, falseBlock, endBlock);
5293
5294 r.enableStack[r.enableIndex] = ~r.enableStack[r.enableIndex] & r.enableStack[r.enableIndex - 1];
5295 }
5296 else
5297 {
5298 Nucleus::createBr(endBlock);
5299 Nucleus::setInsertBlock(falseBlock);
5300 }
5301
5302 ifFalseBlock[ifDepth] = endBlock;
5303
5304 ifDepth++;
5305 }
5306
5307 void PixelRoutine::ENDIF(Registers &r)
5308 {
5309 ifDepth--;
5310
5311 llvm::BasicBlock *endBlock = ifFalseBlock[ifDepth];
5312
5313 Nucleus::createBr(endBlock);
5314 Nucleus::setInsertBlock(endBlock);
5315
5316 if(isConditionalIf[ifDepth])
5317 {
5318 breakDepth--;
5319 r.enableIndex--;
5320 }
5321 }
5322
John Bauman89401822014-05-06 15:04:28 -04005323 void PixelRoutine::ENDLOOP(Registers &r)
5324 {
5325 loopRepDepth--;
5326
5327 r.aL[r.loopDepth] = r.aL[r.loopDepth] + r.increment[r.loopDepth]; // FIXME: +=
5328
5329 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
5330 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
5331
5332 Nucleus::createBr(testBlock);
5333 Nucleus::setInsertBlock(endBlock);
5334
5335 r.loopDepth--;
5336 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
5337 }
5338
John Bauman19bac1e2014-05-06 15:23:49 -04005339 void PixelRoutine::ENDREP(Registers &r)
5340 {
5341 loopRepDepth--;
5342
5343 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
5344 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
5345
5346 Nucleus::createBr(testBlock);
5347 Nucleus::setInsertBlock(endBlock);
5348
5349 r.loopDepth--;
5350 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
5351 }
5352
5353 void PixelRoutine::ENDWHILE(Registers &r)
5354 {
5355 loopRepDepth--;
5356
5357 llvm::BasicBlock *testBlock = loopRepTestBlock[loopRepDepth];
5358 llvm::BasicBlock *endBlock = loopRepEndBlock[loopRepDepth];
5359
5360 Nucleus::createBr(testBlock);
5361 Nucleus::setInsertBlock(endBlock);
5362
5363 r.enableIndex--;
5364 r.enableBreak = Int4(0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
5365 whileTest = false;
5366 }
5367
John Bauman89401822014-05-06 15:04:28 -04005368 void PixelRoutine::IF(Registers &r, const Src &src)
5369 {
John Bauman19bac1e2014-05-06 15:23:49 -04005370 if(src.type == Shader::PARAMETER_CONSTBOOL)
John Bauman89401822014-05-06 15:04:28 -04005371 {
5372 IFb(r, src);
5373 }
John Bauman19bac1e2014-05-06 15:23:49 -04005374 else if(src.type == Shader::PARAMETER_PREDICATE)
John Bauman89401822014-05-06 15:04:28 -04005375 {
5376 IFp(r, src);
5377 }
John Bauman19bac1e2014-05-06 15:23:49 -04005378 else
5379 {
Alexis Hetu96517182015-04-15 10:30:23 -04005380 Int4 condition = As<Int4>(fetchRegisterF(r, src).x);
John Bauman19bac1e2014-05-06 15:23:49 -04005381 IF(r, condition);
5382 }
John Bauman89401822014-05-06 15:04:28 -04005383 }
5384
5385 void PixelRoutine::IFb(Registers &r, const Src &boolRegister)
5386 {
John Bauman19bac1e2014-05-06 15:23:49 -04005387 ASSERT(ifDepth < 24 + 4);
5388
John Bauman89401822014-05-06 15:04:28 -04005389 Bool condition = (*Pointer<Byte>(r.data + OFFSET(DrawData,ps.b[boolRegister.index])) != Byte(0)); // FIXME
5390
John Bauman19bac1e2014-05-06 15:23:49 -04005391 if(boolRegister.modifier == Shader::MODIFIER_NOT)
John Bauman89401822014-05-06 15:04:28 -04005392 {
John Bauman19bac1e2014-05-06 15:23:49 -04005393 condition = !condition;
John Bauman89401822014-05-06 15:04:28 -04005394 }
5395
5396 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
5397 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
5398
5399 branch(condition, trueBlock, falseBlock);
5400
5401 isConditionalIf[ifDepth] = false;
5402 ifFalseBlock[ifDepth] = falseBlock;
5403
5404 ifDepth++;
5405 }
5406
John Bauman19bac1e2014-05-06 15:23:49 -04005407 void PixelRoutine::IFp(Registers &r, const Src &predicateRegister)
John Bauman89401822014-05-06 15:04:28 -04005408 {
5409 Int4 condition = As<Int4>(r.p0[predicateRegister.swizzle & 0x3]);
5410
John Bauman19bac1e2014-05-06 15:23:49 -04005411 if(predicateRegister.modifier == Shader::MODIFIER_NOT)
John Bauman89401822014-05-06 15:04:28 -04005412 {
5413 condition = ~condition;
5414 }
5415
John Bauman19bac1e2014-05-06 15:23:49 -04005416 IF(r, condition);
John Bauman89401822014-05-06 15:04:28 -04005417 }
5418
John Bauman19bac1e2014-05-06 15:23:49 -04005419 void PixelRoutine::IFC(Registers &r, Vector4f &src0, Vector4f &src1, Control control)
John Bauman89401822014-05-06 15:04:28 -04005420 {
5421 Int4 condition;
5422
5423 switch(control)
5424 {
John Bauman19bac1e2014-05-06 15:23:49 -04005425 case Shader::CONTROL_GT: condition = CmpNLE(src0.x, src1.x); break;
5426 case Shader::CONTROL_EQ: condition = CmpEQ(src0.x, src1.x); break;
5427 case Shader::CONTROL_GE: condition = CmpNLT(src0.x, src1.x); break;
5428 case Shader::CONTROL_LT: condition = CmpLT(src0.x, src1.x); break;
5429 case Shader::CONTROL_NE: condition = CmpNEQ(src0.x, src1.x); break;
5430 case Shader::CONTROL_LE: condition = CmpLE(src0.x, src1.x); break;
John Bauman89401822014-05-06 15:04:28 -04005431 default:
5432 ASSERT(false);
5433 }
5434
John Bauman19bac1e2014-05-06 15:23:49 -04005435 IF(r, condition);
5436 }
5437
5438 void PixelRoutine::IF(Registers &r, Int4 &condition)
5439 {
John Bauman89401822014-05-06 15:04:28 -04005440 condition &= r.enableStack[r.enableIndex];
5441
5442 r.enableIndex++;
5443 r.enableStack[r.enableIndex] = condition;
5444
5445 llvm::BasicBlock *trueBlock = Nucleus::createBasicBlock();
5446 llvm::BasicBlock *falseBlock = Nucleus::createBasicBlock();
5447
John Bauman19bac1e2014-05-06 15:23:49 -04005448 Bool notAllFalse = SignMask(condition) != 0;
John Bauman89401822014-05-06 15:04:28 -04005449
5450 branch(notAllFalse, trueBlock, falseBlock);
5451
5452 isConditionalIf[ifDepth] = true;
5453 ifFalseBlock[ifDepth] = falseBlock;
5454
5455 ifDepth++;
5456 breakDepth++;
5457 }
5458
5459 void PixelRoutine::LABEL(int labelIndex)
5460 {
John Bauman19bac1e2014-05-06 15:23:49 -04005461 if(!labelBlock[labelIndex])
5462 {
5463 labelBlock[labelIndex] = Nucleus::createBasicBlock();
5464 }
5465
John Bauman89401822014-05-06 15:04:28 -04005466 Nucleus::setInsertBlock(labelBlock[labelIndex]);
John Bauman19bac1e2014-05-06 15:23:49 -04005467 currentLabel = labelIndex;
John Bauman89401822014-05-06 15:04:28 -04005468 }
5469
5470 void PixelRoutine::LOOP(Registers &r, const Src &integerRegister)
5471 {
5472 r.loopDepth++;
5473
5474 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][0]));
5475 r.aL[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][1]));
5476 r.increment[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][2]));
5477
5478 // If(r.increment[r.loopDepth] == 0)
5479 // {
5480 // r.increment[r.loopDepth] = 1;
5481 // }
5482
5483 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
5484 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
5485 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
5486
5487 loopRepTestBlock[loopRepDepth] = testBlock;
5488 loopRepEndBlock[loopRepDepth] = endBlock;
5489
5490 // FIXME: jump(testBlock)
5491 Nucleus::createBr(testBlock);
5492 Nucleus::setInsertBlock(testBlock);
5493
5494 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
5495 Nucleus::setInsertBlock(loopBlock);
5496
5497 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
5498
5499 loopRepDepth++;
5500 breakDepth = 0;
5501 }
5502
5503 void PixelRoutine::REP(Registers &r, const Src &integerRegister)
5504 {
5505 r.loopDepth++;
5506
5507 r.iteration[r.loopDepth] = *Pointer<Int>(r.data + OFFSET(DrawData,ps.i[integerRegister.index][0]));
5508 r.aL[r.loopDepth] = r.aL[r.loopDepth - 1];
5509
5510 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
5511 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
5512 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
5513
5514 loopRepTestBlock[loopRepDepth] = testBlock;
5515 loopRepEndBlock[loopRepDepth] = endBlock;
5516
5517 // FIXME: jump(testBlock)
5518 Nucleus::createBr(testBlock);
5519 Nucleus::setInsertBlock(testBlock);
5520
5521 branch(r.iteration[r.loopDepth] > 0, loopBlock, endBlock);
5522 Nucleus::setInsertBlock(loopBlock);
5523
5524 r.iteration[r.loopDepth] = r.iteration[r.loopDepth] - 1; // FIXME: --
5525
5526 loopRepDepth++;
5527 breakDepth = 0;
5528 }
5529
John Bauman19bac1e2014-05-06 15:23:49 -04005530 void PixelRoutine::WHILE(Registers &r, const Src &temporaryRegister)
5531 {
5532 r.enableIndex++;
5533
5534 llvm::BasicBlock *loopBlock = Nucleus::createBasicBlock();
5535 llvm::BasicBlock *testBlock = Nucleus::createBasicBlock();
5536 llvm::BasicBlock *endBlock = Nucleus::createBasicBlock();
5537
5538 loopRepTestBlock[loopRepDepth] = testBlock;
5539 loopRepEndBlock[loopRepDepth] = endBlock;
5540
5541 Int4 restoreBreak = r.enableBreak;
5542 Int4 restoreContinue = r.enableContinue;
5543
5544 // FIXME: jump(testBlock)
5545 Nucleus::createBr(testBlock);
5546 Nucleus::setInsertBlock(testBlock);
5547 r.enableContinue = restoreContinue;
5548
Alexis Hetu96517182015-04-15 10:30:23 -04005549 const Vector4f &src = fetchRegisterF(r, temporaryRegister);
John Bauman19bac1e2014-05-06 15:23:49 -04005550 Int4 condition = As<Int4>(src.x);
5551 condition &= r.enableStack[r.enableIndex - 1];
5552 r.enableStack[r.enableIndex] = condition;
5553
5554 Bool notAllFalse = SignMask(condition) != 0;
5555 branch(notAllFalse, loopBlock, endBlock);
5556
5557 Nucleus::setInsertBlock(endBlock);
5558 r.enableBreak = restoreBreak;
5559
5560 Nucleus::setInsertBlock(loopBlock);
5561
5562 loopRepDepth++;
5563 breakDepth = 0;
5564 }
5565
John Bauman89401822014-05-06 15:04:28 -04005566 void PixelRoutine::RET(Registers &r)
5567 {
John Bauman19bac1e2014-05-06 15:23:49 -04005568 if(currentLabel == -1)
John Bauman89401822014-05-06 15:04:28 -04005569 {
5570 returnBlock = Nucleus::createBasicBlock();
5571 Nucleus::createBr(returnBlock);
John Bauman89401822014-05-06 15:04:28 -04005572 }
5573 else
5574 {
John Bauman89401822014-05-06 15:04:28 -04005575 llvm::BasicBlock *unreachableBlock = Nucleus::createBasicBlock();
John Bauman89401822014-05-06 15:04:28 -04005576
John Bauman19bac1e2014-05-06 15:23:49 -04005577 if(callRetBlock[currentLabel].size() > 1) // Pop the return destination from the call stack
John Bauman89401822014-05-06 15:04:28 -04005578 {
John Bauman19bac1e2014-05-06 15:23:49 -04005579 // FIXME: Encapsulate
5580 UInt index = r.callStack[--r.stackIndex];
5581
John Bauman66b8ab22014-05-06 15:57:45 -04005582 llvm::Value *value = index.loadValue();
John Bauman19bac1e2014-05-06 15:23:49 -04005583 llvm::Value *switchInst = Nucleus::createSwitch(value, unreachableBlock, (int)callRetBlock[currentLabel].size());
5584
5585 for(unsigned int i = 0; i < callRetBlock[currentLabel].size(); i++)
5586 {
5587 Nucleus::addSwitchCase(switchInst, i, callRetBlock[currentLabel][i]);
5588 }
5589 }
5590 else if(callRetBlock[currentLabel].size() == 1) // Jump directly to the unique return destination
5591 {
5592 Nucleus::createBr(callRetBlock[currentLabel][0]);
5593 }
5594 else // Function isn't called
5595 {
5596 Nucleus::createBr(unreachableBlock);
John Bauman89401822014-05-06 15:04:28 -04005597 }
5598
5599 Nucleus::setInsertBlock(unreachableBlock);
5600 Nucleus::createUnreachable();
5601 }
5602 }
5603
John Bauman19bac1e2014-05-06 15:23:49 -04005604 void PixelRoutine::LEAVE(Registers &r)
5605 {
5606 r.enableLeave = r.enableLeave & ~r.enableStack[r.enableIndex];
5607
5608 // FIXME: Return from function if all instances left
5609 // FIXME: Use enableLeave in other control-flow constructs
5610 }
5611
Alexis Hetu96517182015-04-15 10:30:23 -04005612 void PixelRoutine::writeDestination(Registers &r, Vector4s &d, const Dst &dst)
John Bauman89401822014-05-06 15:04:28 -04005613 {
5614 switch(dst.type)
5615 {
John Bauman19bac1e2014-05-06 15:23:49 -04005616 case Shader::PARAMETER_TEMP:
Alexis Hetu96517182015-04-15 10:30:23 -04005617 if(dst.mask & 0x1) r.rs[dst.index].x = d.x;
5618 if(dst.mask & 0x2) r.rs[dst.index].y = d.y;
5619 if(dst.mask & 0x4) r.rs[dst.index].z = d.z;
5620 if(dst.mask & 0x8) r.rs[dst.index].w = d.w;
John Bauman89401822014-05-06 15:04:28 -04005621 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005622 case Shader::PARAMETER_INPUT:
Alexis Hetu96517182015-04-15 10:30:23 -04005623 if(dst.mask & 0x1) r.vs[dst.index].x = d.x;
5624 if(dst.mask & 0x2) r.vs[dst.index].y = d.y;
5625 if(dst.mask & 0x4) r.vs[dst.index].z = d.z;
5626 if(dst.mask & 0x8) r.vs[dst.index].w = d.w;
John Bauman89401822014-05-06 15:04:28 -04005627 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005628 case Shader::PARAMETER_CONST: ASSERT(false); break;
5629 case Shader::PARAMETER_TEXTURE:
Alexis Hetu96517182015-04-15 10:30:23 -04005630 if(dst.mask & 0x1) r.ts[dst.index].x = d.x;
5631 if(dst.mask & 0x2) r.ts[dst.index].y = d.y;
5632 if(dst.mask & 0x4) r.ts[dst.index].z = d.z;
5633 if(dst.mask & 0x8) r.ts[dst.index].w = d.w;
John Bauman89401822014-05-06 15:04:28 -04005634 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005635 case Shader::PARAMETER_COLOROUT:
Alexis Hetu96517182015-04-15 10:30:23 -04005636 if(dst.mask & 0x1) r.vs[dst.index].x = d.x;
5637 if(dst.mask & 0x2) r.vs[dst.index].y = d.y;
5638 if(dst.mask & 0x4) r.vs[dst.index].z = d.z;
5639 if(dst.mask & 0x8) r.vs[dst.index].w = d.w;
John Bauman89401822014-05-06 15:04:28 -04005640 break;
5641 default:
5642 ASSERT(false);
5643 }
5644 }
5645
Alexis Hetu96517182015-04-15 10:30:23 -04005646 Vector4s PixelRoutine::fetchRegisterS(Registers &r, const Src &src)
John Bauman89401822014-05-06 15:04:28 -04005647 {
Alexis Hetu96517182015-04-15 10:30:23 -04005648 Vector4s *reg;
John Bauman89401822014-05-06 15:04:28 -04005649 int i = src.index;
5650
Alexis Hetu96517182015-04-15 10:30:23 -04005651 Vector4s c;
John Bauman89401822014-05-06 15:04:28 -04005652
John Bauman19bac1e2014-05-06 15:23:49 -04005653 if(src.type == Shader::PARAMETER_CONST)
John Bauman89401822014-05-06 15:04:28 -04005654 {
John Bauman19bac1e2014-05-06 15:23:49 -04005655 c.x = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][0]));
5656 c.y = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][1]));
5657 c.z = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][2]));
5658 c.w = *Pointer<Short4>(r.data + OFFSET(DrawData,ps.cW[i][3]));
John Bauman89401822014-05-06 15:04:28 -04005659 }
5660
5661 switch(src.type)
5662 {
Alexis Hetu96517182015-04-15 10:30:23 -04005663 case Shader::PARAMETER_TEMP: reg = &r.rs[i]; break;
5664 case Shader::PARAMETER_INPUT: reg = &r.vs[i]; break;
John Bauman19bac1e2014-05-06 15:23:49 -04005665 case Shader::PARAMETER_CONST: reg = &c; break;
Alexis Hetu96517182015-04-15 10:30:23 -04005666 case Shader::PARAMETER_TEXTURE: reg = &r.ts[i]; break;
5667 case Shader::PARAMETER_VOID: return r.rs[0]; // Dummy
5668 case Shader::PARAMETER_FLOAT4LITERAL: return r.rs[0]; // Dummy
John Bauman89401822014-05-06 15:04:28 -04005669 default:
5670 ASSERT(false);
5671 }
5672
John Bauman66b8ab22014-05-06 15:57:45 -04005673 const Short4 &x = (*reg)[(src.swizzle >> 0) & 0x3];
5674 const Short4 &y = (*reg)[(src.swizzle >> 2) & 0x3];
5675 const Short4 &z = (*reg)[(src.swizzle >> 4) & 0x3];
5676 const Short4 &w = (*reg)[(src.swizzle >> 6) & 0x3];
John Bauman89401822014-05-06 15:04:28 -04005677
Alexis Hetu96517182015-04-15 10:30:23 -04005678 Vector4s mod;
John Bauman89401822014-05-06 15:04:28 -04005679
5680 switch(src.modifier)
5681 {
John Bauman19bac1e2014-05-06 15:23:49 -04005682 case Shader::MODIFIER_NONE:
5683 mod.x = x;
5684 mod.y = y;
5685 mod.z = z;
5686 mod.w = w;
John Bauman89401822014-05-06 15:04:28 -04005687 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005688 case Shader::MODIFIER_BIAS:
5689 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5690 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5691 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5692 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
John Bauman89401822014-05-06 15:04:28 -04005693 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005694 case Shader::MODIFIER_BIAS_NEGATE:
5695 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
5696 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
5697 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
5698 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
John Bauman89401822014-05-06 15:04:28 -04005699 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005700 case Shader::MODIFIER_COMPLEMENT:
5701 mod.x = SubSat(Short4(0x1000), x);
5702 mod.y = SubSat(Short4(0x1000), y);
5703 mod.z = SubSat(Short4(0x1000), z);
5704 mod.w = SubSat(Short4(0x1000), w);
John Bauman89401822014-05-06 15:04:28 -04005705 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005706 case Shader::MODIFIER_NEGATE:
5707 mod.x = -x;
5708 mod.y = -y;
5709 mod.z = -z;
5710 mod.w = -w;
John Bauman89401822014-05-06 15:04:28 -04005711 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005712 case Shader::MODIFIER_X2:
5713 mod.x = AddSat(x, x);
5714 mod.y = AddSat(y, y);
5715 mod.z = AddSat(z, z);
5716 mod.w = AddSat(w, w);
John Bauman89401822014-05-06 15:04:28 -04005717 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005718 case Shader::MODIFIER_X2_NEGATE:
5719 mod.x = -AddSat(x, x);
5720 mod.y = -AddSat(y, y);
5721 mod.z = -AddSat(z, z);
5722 mod.w = -AddSat(w, w);
John Bauman89401822014-05-06 15:04:28 -04005723 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005724 case Shader::MODIFIER_SIGN:
5725 mod.x = SubSat(x, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5726 mod.y = SubSat(y, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5727 mod.z = SubSat(z, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5728 mod.w = SubSat(w, Short4(0x0800, 0x0800, 0x0800, 0x0800));
5729 mod.x = AddSat(mod.x, mod.x);
5730 mod.y = AddSat(mod.y, mod.y);
5731 mod.z = AddSat(mod.z, mod.z);
5732 mod.w = AddSat(mod.w, mod.w);
John Bauman89401822014-05-06 15:04:28 -04005733 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005734 case Shader::MODIFIER_SIGN_NEGATE:
5735 mod.x = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), x);
5736 mod.y = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), y);
5737 mod.z = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), z);
5738 mod.w = SubSat(Short4(0x0800, 0x0800, 0x0800, 0x0800), w);
5739 mod.x = AddSat(mod.x, mod.x);
5740 mod.y = AddSat(mod.y, mod.y);
5741 mod.z = AddSat(mod.z, mod.z);
5742 mod.w = AddSat(mod.w, mod.w);
John Bauman89401822014-05-06 15:04:28 -04005743 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005744 case Shader::MODIFIER_DZ:
5745 mod.x = x;
5746 mod.y = y;
5747 mod.z = z;
5748 mod.w = w;
John Bauman89401822014-05-06 15:04:28 -04005749 // Projection performed by texture sampler
5750 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005751 case Shader::MODIFIER_DW:
5752 mod.x = x;
5753 mod.y = y;
5754 mod.z = z;
5755 mod.w = w;
John Bauman89401822014-05-06 15:04:28 -04005756 // Projection performed by texture sampler
5757 break;
5758 default:
5759 ASSERT(false);
5760 }
5761
John Bauman19bac1e2014-05-06 15:23:49 -04005762 if(src.type == Shader::PARAMETER_CONST && (src.modifier == Shader::MODIFIER_X2 || src.modifier == Shader::MODIFIER_X2_NEGATE))
John Bauman89401822014-05-06 15:04:28 -04005763 {
John Bauman19bac1e2014-05-06 15:23:49 -04005764 mod.x = Min(mod.x, Short4(0x1000)); mod.x = Max(mod.x, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
5765 mod.y = Min(mod.y, Short4(0x1000)); mod.y = Max(mod.y, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
5766 mod.z = Min(mod.z, Short4(0x1000)); mod.z = Max(mod.z, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
5767 mod.w = Min(mod.w, Short4(0x1000)); mod.w = Max(mod.w, Short4(-0x1000, -0x1000, -0x1000, -0x1000));
John Bauman89401822014-05-06 15:04:28 -04005768 }
5769
5770 return mod;
5771 }
5772
Alexis Hetu96517182015-04-15 10:30:23 -04005773 Vector4f PixelRoutine::fetchRegisterF(Registers &r, const Src &src, int offset)
John Bauman89401822014-05-06 15:04:28 -04005774 {
John Bauman19bac1e2014-05-06 15:23:49 -04005775 Vector4f reg;
John Bauman89401822014-05-06 15:04:28 -04005776 int i = src.index + offset;
5777
5778 switch(src.type)
5779 {
John Bauman19bac1e2014-05-06 15:23:49 -04005780 case Shader::PARAMETER_TEMP:
5781 if(src.rel.type == Shader::PARAMETER_VOID)
John Bauman89401822014-05-06 15:04:28 -04005782 {
John Bauman19bac1e2014-05-06 15:23:49 -04005783 reg = r.rf[i];
5784 }
5785 else
5786 {
5787 Int a = relativeAddress(r, src);
5788
5789 reg = r.rf[i + a];
5790 }
5791 break;
5792 case Shader::PARAMETER_INPUT:
5793 {
5794 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
John Bauman89401822014-05-06 15:04:28 -04005795 {
John Bauman19bac1e2014-05-06 15:23:49 -04005796 reg = r.vf[i];
John Bauman89401822014-05-06 15:04:28 -04005797 }
John Bauman19bac1e2014-05-06 15:23:49 -04005798 else if(src.rel.type == Shader::PARAMETER_LOOP)
John Bauman89401822014-05-06 15:04:28 -04005799 {
5800 Int aL = r.aL[r.loopDepth];
5801
John Bauman19bac1e2014-05-06 15:23:49 -04005802 reg = r.vf[i + aL];
John Bauman89401822014-05-06 15:04:28 -04005803 }
John Bauman19bac1e2014-05-06 15:23:49 -04005804 else
John Bauman89401822014-05-06 15:04:28 -04005805 {
John Bauman19bac1e2014-05-06 15:23:49 -04005806 Int a = relativeAddress(r, src);
5807
5808 reg = r.vf[i + a];
John Bauman89401822014-05-06 15:04:28 -04005809 }
5810 }
5811 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005812 case Shader::PARAMETER_CONST:
5813 reg = readConstant(r, src, offset);
John Bauman89401822014-05-06 15:04:28 -04005814 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005815 case Shader::PARAMETER_TEXTURE:
5816 reg = r.vf[2 + i];
5817 break;
5818 case Shader::PARAMETER_MISCTYPE:
John Bauman89401822014-05-06 15:04:28 -04005819 if(src.index == 0) reg = r.vPos;
5820 if(src.index == 1) reg = r.vFace;
5821 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005822 case Shader::PARAMETER_SAMPLER:
5823 if(src.rel.type == Shader::PARAMETER_VOID)
5824 {
5825 reg.x = As<Float4>(Int4(i));
5826 }
5827 else if(src.rel.type == Shader::PARAMETER_TEMP)
5828 {
5829 reg.x = As<Float4>(Int4(i) + RoundInt(r.rf[src.rel.index].x));
5830 }
5831 return reg;
5832 case Shader::PARAMETER_PREDICATE: return reg; // Dummy
5833 case Shader::PARAMETER_VOID: return reg; // Dummy
5834 case Shader::PARAMETER_FLOAT4LITERAL:
5835 reg.x = Float4(src.value[0]);
5836 reg.y = Float4(src.value[1]);
5837 reg.z = Float4(src.value[2]);
5838 reg.w = Float4(src.value[3]);
5839 break;
5840 case Shader::PARAMETER_CONSTINT: return reg; // Dummy
5841 case Shader::PARAMETER_CONSTBOOL: return reg; // Dummy
5842 case Shader::PARAMETER_LOOP: return reg; // Dummy
5843 case Shader::PARAMETER_COLOROUT:
5844 reg = r.oC[i];
5845 break;
5846 case Shader::PARAMETER_DEPTHOUT:
5847 reg.x = r.oDepth;
5848 break;
John Bauman89401822014-05-06 15:04:28 -04005849 default:
5850 ASSERT(false);
5851 }
5852
John Bauman66b8ab22014-05-06 15:57:45 -04005853 const Float4 &x = reg[(src.swizzle >> 0) & 0x3];
5854 const Float4 &y = reg[(src.swizzle >> 2) & 0x3];
5855 const Float4 &z = reg[(src.swizzle >> 4) & 0x3];
5856 const Float4 &w = reg[(src.swizzle >> 6) & 0x3];
John Bauman89401822014-05-06 15:04:28 -04005857
John Bauman19bac1e2014-05-06 15:23:49 -04005858 Vector4f mod;
John Bauman89401822014-05-06 15:04:28 -04005859
5860 switch(src.modifier)
5861 {
John Bauman19bac1e2014-05-06 15:23:49 -04005862 case Shader::MODIFIER_NONE:
John Bauman89401822014-05-06 15:04:28 -04005863 mod.x = x;
5864 mod.y = y;
5865 mod.z = z;
5866 mod.w = w;
5867 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005868 case Shader::MODIFIER_NEGATE:
John Bauman89401822014-05-06 15:04:28 -04005869 mod.x = -x;
5870 mod.y = -y;
5871 mod.z = -z;
5872 mod.w = -w;
5873 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005874 case Shader::MODIFIER_ABS:
John Bauman89401822014-05-06 15:04:28 -04005875 mod.x = Abs(x);
5876 mod.y = Abs(y);
5877 mod.z = Abs(z);
5878 mod.w = Abs(w);
5879 break;
John Bauman19bac1e2014-05-06 15:23:49 -04005880 case Shader::MODIFIER_ABS_NEGATE:
John Bauman89401822014-05-06 15:04:28 -04005881 mod.x = -Abs(x);
5882 mod.y = -Abs(y);
5883 mod.z = -Abs(z);
5884 mod.w = -Abs(w);
5885 break;
John Bauman66b8ab22014-05-06 15:57:45 -04005886 case Shader::MODIFIER_NOT:
5887 mod.x = As<Float4>(As<Int4>(x) ^ Int4(0xFFFFFFFF));
5888 mod.y = As<Float4>(As<Int4>(y) ^ Int4(0xFFFFFFFF));
5889 mod.z = As<Float4>(As<Int4>(z) ^ Int4(0xFFFFFFFF));
5890 mod.w = As<Float4>(As<Int4>(w) ^ Int4(0xFFFFFFFF));
5891 break;
John Bauman89401822014-05-06 15:04:28 -04005892 default:
5893 ASSERT(false);
5894 }
5895
5896 return mod;
5897 }
5898
John Bauman19bac1e2014-05-06 15:23:49 -04005899 Vector4f PixelRoutine::readConstant(Registers &r, const Src &src, int offset)
John Bauman89401822014-05-06 15:04:28 -04005900 {
John Bauman19bac1e2014-05-06 15:23:49 -04005901 Vector4f c;
5902
5903 int i = src.index + offset;
5904
5905 if(src.rel.type == Shader::PARAMETER_VOID) // Not relative
5906 {
5907 c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]));
5908
5909 c.x = c.x.xxxx;
5910 c.y = c.y.yyyy;
5911 c.z = c.z.zzzz;
5912 c.w = c.w.wwww;
5913
5914 if(localShaderConstants) // Constant may be known at compile time
5915 {
Alexis Hetu903e0252014-11-25 14:25:32 -05005916 for(size_t j = 0; j < shader->getLength(); j++)
John Bauman19bac1e2014-05-06 15:23:49 -04005917 {
5918 const Shader::Instruction &instruction = *shader->getInstruction(j);
5919
5920 if(instruction.opcode == Shader::OPCODE_DEF)
5921 {
5922 if(instruction.dst.index == i)
5923 {
5924 c.x = Float4(instruction.src[0].value[0]);
5925 c.y = Float4(instruction.src[0].value[1]);
5926 c.z = Float4(instruction.src[0].value[2]);
5927 c.w = Float4(instruction.src[0].value[3]);
5928
5929 break;
5930 }
5931 }
5932 }
5933 }
5934 }
5935 else if(src.rel.type == Shader::PARAMETER_LOOP)
5936 {
5937 Int loopCounter = r.aL[r.loopDepth];
5938
5939 c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]) + loopCounter * 16);
5940
5941 c.x = c.x.xxxx;
5942 c.y = c.y.yyyy;
5943 c.z = c.z.zzzz;
5944 c.w = c.w.wwww;
5945 }
5946 else
5947 {
5948 Int a = relativeAddress(r, src);
5949
5950 c.x = c.y = c.z = c.w = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[i]) + a * 16);
5951
5952 c.x = c.x.xxxx;
5953 c.y = c.y.yyyy;
5954 c.z = c.z.zzzz;
5955 c.w = c.w.wwww;
5956 }
5957
5958 return c;
John Bauman89401822014-05-06 15:04:28 -04005959 }
5960
John Bauman19bac1e2014-05-06 15:23:49 -04005961 Int PixelRoutine::relativeAddress(Registers &r, const Shader::Parameter &var)
John Bauman89401822014-05-06 15:04:28 -04005962 {
John Bauman19bac1e2014-05-06 15:23:49 -04005963 ASSERT(var.rel.deterministic);
5964
5965 if(var.rel.type == Shader::PARAMETER_TEMP)
5966 {
5967 return RoundInt(Extract(r.rf[var.rel.index].x, 0)) * var.rel.scale;
5968 }
5969 else if(var.rel.type == Shader::PARAMETER_INPUT)
5970 {
5971 return RoundInt(Extract(r.vf[var.rel.index].x, 0)) * var.rel.scale;
5972 }
5973 else if(var.rel.type == Shader::PARAMETER_OUTPUT)
5974 {
5975 return RoundInt(Extract(r.oC[var.rel.index].x, 0)) * var.rel.scale;
5976 }
5977 else if(var.rel.type == Shader::PARAMETER_CONST)
5978 {
Nicolas Capensb5e7a2a2014-05-06 16:38:19 -04005979 RValue<Float4> c = *Pointer<Float4>(r.data + OFFSET(DrawData,ps.c[var.rel.index]));
John Bauman19bac1e2014-05-06 15:23:49 -04005980
5981 return RoundInt(Extract(c, 0)) * var.rel.scale;
5982 }
5983 else ASSERT(false);
5984
5985 return 0;
5986 }
5987
5988 Int4 PixelRoutine::enableMask(Registers &r, const Shader::Instruction *instruction)
5989 {
5990 Int4 enable = instruction->analysisBranch ? Int4(r.enableStack[r.enableIndex]) : Int4(0xFFFFFFFF);
John Baumand4ae8632014-05-06 16:18:33 -04005991
5992 if(!whileTest)
John Bauman19bac1e2014-05-06 15:23:49 -04005993 {
John Baumand4ae8632014-05-06 16:18:33 -04005994 if(shader->containsBreakInstruction() && instruction->analysisBreak)
5995 {
5996 enable &= r.enableBreak;
5997 }
John Bauman19bac1e2014-05-06 15:23:49 -04005998
John Baumand4ae8632014-05-06 16:18:33 -04005999 if(shader->containsContinueInstruction() && instruction->analysisContinue)
6000 {
6001 enable &= r.enableContinue;
6002 }
John Bauman19bac1e2014-05-06 15:23:49 -04006003
John Baumand4ae8632014-05-06 16:18:33 -04006004 if(shader->containsLeaveInstruction() && instruction->analysisLeave)
6005 {
6006 enable &= r.enableLeave;
6007 }
John Bauman19bac1e2014-05-06 15:23:49 -04006008 }
6009
6010 return enable;
6011 }
6012
6013 bool PixelRoutine::colorUsed()
6014 {
6015 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
6016 }
6017
6018 unsigned short PixelRoutine::shaderVersion() const
6019 {
6020 return shader ? shader->getVersion() : 0x0000;
6021 }
6022
6023 bool PixelRoutine::interpolateZ() const
6024 {
6025 return state.depthTestActive || state.pixelFogActive() || (shader && shader->vPosDeclared && fullPixelPositionRegister);
6026 }
6027
6028 bool PixelRoutine::interpolateW() const
6029 {
6030 return state.perspective || (shader && shader->vPosDeclared && fullPixelPositionRegister);
John Bauman89401822014-05-06 15:04:28 -04006031 }
6032}