blob: c313eb4e91253afe50850be46fd70ef8150d1711 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman66b8ab22014-05-06 15:57:45 -04003// Copyright(c) 2005-2013 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "PixelRoutine.hpp"
13
14#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040015#include "QuadRasterizer.hpp"
16#include "Surface.hpp"
17#include "Primitive.hpp"
18#include "CPUID.hpp"
19#include "SamplerCore.hpp"
20#include "Constants.hpp"
21#include "Debug.hpp"
22
John Bauman89401822014-05-06 15:04:28 -040023namespace sw
24{
25 extern bool complementaryDepthBuffer;
26 extern bool postBlendSRGB;
27 extern bool exactColorRounding;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040028 extern bool forceClearRegisters;
John Bauman89401822014-05-06 15:04:28 -040029
Alexis Hetuf2a8c372015-07-13 11:08:41 -040030 PixelRoutine::Registers::Registers(const PixelShader *shader) :
31 QuadRasterizer::Registers(),
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -040032 v(shader && shader->dynamicallyIndexedInput)
John Bauman89401822014-05-06 15:04:28 -040033 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040034 if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters)
John Bauman89401822014-05-06 15:04:28 -040035 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040036 for(int i = 0; i < 10; i++)
37 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -040038 v[i].x = Float4(0.0f);
39 v[i].y = Float4(0.0f);
40 v[i].z = Float4(0.0f);
41 v[i].w = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040042 }
John Bauman89401822014-05-06 15:04:28 -040043 }
44 }
45
Alexis Hetuf2a8c372015-07-13 11:08:41 -040046 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader)
47 {
48 }
49
John Bauman89401822014-05-06 15:04:28 -040050 PixelRoutine::~PixelRoutine()
51 {
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040052 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040053 {
54 delete sampler[i];
55 }
56 }
57
Alexis Hetu1edcd8b2015-11-05 11:12:41 -050058 void PixelRoutine::quad(QuadRasterizer::Registers &rBase, Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
John Bauman89401822014-05-06 15:04:28 -040059 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040060 Registers& r = *static_cast<Registers*>(&rBase);
61
John Bauman89401822014-05-06 15:04:28 -040062 #if PERF_PROFILE
63 Long pipeTime = Ticks();
64 #endif
65
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040066 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040067 {
68 sampler[i] = new SamplerCore(r.constants, state.sampler[i]);
69 }
70
71 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman89401822014-05-06 15:04:28 -040072
73 Int zMask[4]; // Depth mask
74 Int sMask[4]; // Stencil mask
75
76 for(unsigned int q = 0; q < state.multiSample; q++)
77 {
78 zMask[q] = cMask[q];
79 sMask[q] = cMask[q];
80 }
81
82 for(unsigned int q = 0; q < state.multiSample; q++)
83 {
84 stencilTest(r, sBuffer, q, x, sMask[q], cMask[q]);
85 }
86
87 Float4 f;
88
John Bauman89401822014-05-06 15:04:28 -040089 Float4 (&z)[4] = r.z;
John Bauman19bac1e2014-05-06 15:23:49 -040090 Float4 &w = r.w;
John Bauman89401822014-05-06 15:04:28 -040091 Float4 &rhw = r.rhw;
92 Float4 rhwCentroid;
93
94 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040095
John Bauman19bac1e2014-05-06 15:23:49 -040096 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040097 {
98 for(unsigned int q = 0; q < state.multiSample; q++)
99 {
100 Float4 x = xxxx;
101
102 if(state.multiSample > 1)
103 {
104 x -= *Pointer<Float4>(r.constants + OFFSET(Constants,X) + q * sizeof(float4));
105 }
106
107 z[q] = interpolate(x, r.Dz[q], z[q], r.primitive + OFFSET(Primitive,z), false, false);
108 }
109 }
110
111 Bool depthPass = false;
112
113 if(earlyDepthTest)
114 {
115 for(unsigned int q = 0; q < state.multiSample; q++)
116 {
117 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
118 }
119 }
120
121 If(depthPass || Bool(!earlyDepthTest))
122 {
123 #if PERF_PROFILE
124 Long interpTime = Ticks();
125 #endif
126
Nicolas Capens66be2452015-01-27 14:58:57 -0500127 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400128
John Bauman89401822014-05-06 15:04:28 -0400129 // Centroid locations
130 Float4 XXXX = Float4(0.0f);
131 Float4 YYYY = Float4(0.0f);
132
133 if(state.centroid)
134 {
135 Float4 WWWW(1.0e-9f);
136
137 for(unsigned int q = 0; q < state.multiSample; q++)
138 {
139 XXXX += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
140 YYYY += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
141 WWWW += *Pointer<Float4>(r.constants + OFFSET(Constants,weight) + 16 * cMask[q]);
142 }
143
144 WWWW = Rcp_pp(WWWW);
145 XXXX *= WWWW;
146 YYYY *= WWWW;
147
148 XXXX += xxxx;
149 YYYY += yyyy;
150 }
151
John Bauman19bac1e2014-05-06 15:23:49 -0400152 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400153 {
John Bauman19bac1e2014-05-06 15:23:49 -0400154 w = interpolate(xxxx, r.Dw, rhw, r.primitive + OFFSET(Primitive,w), false, false);
155 rhw = reciprocal(w);
John Bauman89401822014-05-06 15:04:28 -0400156
157 if(state.centroid)
158 {
159 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,w), false, false));
160 }
161 }
162
163 for(int interpolant = 0; interpolant < 10; interpolant++)
164 {
165 for(int component = 0; component < 4; component++)
166 {
John Bauman89401822014-05-06 15:04:28 -0400167 if(state.interpolant[interpolant].component & (1 << component))
168 {
169 if(!state.interpolant[interpolant].centroid)
170 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400171 r.v[interpolant][component] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400172 }
173 else
174 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400175 r.v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400176 }
177 }
178 }
179
180 Float4 rcp;
181
182 switch(state.interpolant[interpolant].project)
183 {
184 case 0:
185 break;
186 case 1:
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400187 rcp = reciprocal(r.v[interpolant].y);
188 r.v[interpolant].x = r.v[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400189 break;
190 case 2:
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400191 rcp = reciprocal(r.v[interpolant].z);
192 r.v[interpolant].x = r.v[interpolant].x * rcp;
193 r.v[interpolant].y = r.v[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400194 break;
195 case 3:
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400196 rcp = reciprocal(r.v[interpolant].w);
197 r.v[interpolant].x = r.v[interpolant].x * rcp;
198 r.v[interpolant].y = r.v[interpolant].y * rcp;
199 r.v[interpolant].z = r.v[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400200 break;
201 }
202 }
203
204 if(state.fog.component)
205 {
206 f = interpolate(xxxx, r.Df, rhw, r.primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective);
207 }
208
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400209 setBuiltins(r, x, y, z, w);
John Bauman89401822014-05-06 15:04:28 -0400210
211 #if PERF_PROFILE
212 r.cycles[PERF_INTERP] += Ticks() - interpTime;
213 #endif
214
215 Bool alphaPass = true;
216
217 if(colorUsed())
218 {
219 #if PERF_PROFILE
220 Long shaderTime = Ticks();
221 #endif
222
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400223 applyShader(r, cMask);
John Bauman89401822014-05-06 15:04:28 -0400224
225 #if PERF_PROFILE
226 r.cycles[PERF_SHADER] += Ticks() - shaderTime;
227 #endif
228
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400229 alphaPass = alphaTest(r, cMask);
John Bauman89401822014-05-06 15:04:28 -0400230
John Bauman19bac1e2014-05-06 15:23:49 -0400231 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400232 {
233 for(unsigned int q = 0; q < state.multiSample; q++)
234 {
235 zMask[q] &= cMask[q];
236 sMask[q] &= cMask[q];
237 }
238 }
239 }
240
241 If(alphaPass)
242 {
243 if(!earlyDepthTest)
244 {
245 for(unsigned int q = 0; q < state.multiSample; q++)
246 {
247 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
248 }
249 }
250
251 #if PERF_PROFILE
252 Long ropTime = Ticks();
253 #endif
254
255 If(depthPass || Bool(earlyDepthTest))
256 {
257 for(unsigned int q = 0; q < state.multiSample; q++)
258 {
259 if(state.multiSampleMask & (1 << q))
260 {
261 writeDepth(r, zBuffer, q, x, z[q], zMask[q]);
262
263 if(state.occlusionEnabled)
264 {
265 r.occlusion += *Pointer<UInt>(r.constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
266 }
267 }
268 }
269
270 if(colorUsed())
271 {
272 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400273 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400274 #endif
275
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400276 rasterOperation(r, f, cBuffer, x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400277 }
278 }
279
280 #if PERF_PROFILE
281 r.cycles[PERF_ROP] += Ticks() - ropTime;
282 #endif
283 }
284 }
285
286 for(unsigned int q = 0; q < state.multiSample; q++)
287 {
288 if(state.multiSampleMask & (1 << q))
289 {
290 writeStencil(r, sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
291 }
292 }
293
294 #if PERF_PROFILE
295 r.cycles[PERF_PIPE] += Ticks() - pipeTime;
296 #endif
297 }
298
John Bauman89401822014-05-06 15:04:28 -0400299 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
300 {
301 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
302
303 if(!flat)
304 {
305 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
306 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
307
308 if(perspective)
309 {
310 interpolant *= rhw;
311 }
312 }
313
314 return interpolant;
315 }
316
317 void PixelRoutine::stencilTest(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
318 {
319 if(!state.stencilActive)
320 {
321 return;
322 }
323
324 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
325
326 Pointer<Byte> buffer = sBuffer + 2 * x;
327
328 if(q > 0)
329 {
330 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
331 }
332
333 Byte8 value = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
334 Byte8 valueCCW = value;
335
336 if(!state.noStencilMask)
337 {
338 value &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].testMaskQ));
339 }
340
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400341 stencilTest(r, value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400342
343 if(state.twoSidedStencil)
344 {
345 if(!state.noStencilMaskCCW)
346 {
347 valueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].testMaskQ));
348 }
349
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400350 stencilTest(r, valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400351
352 value &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
353 valueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
354 value |= valueCCW;
355 }
356
357 sMask = SignMask(value) & cMask;
358 }
359
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400360 void PixelRoutine::stencilTest(Registers &r, Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400361 {
362 Byte8 equal;
363
364 switch(stencilCompareMode)
365 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400366 case STENCIL_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400367 value = Byte8(0xFFFFFFFFFFFFFFFF);
368 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400369 case STENCIL_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400370 value = Byte8(0x0000000000000000);
371 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400372 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400373 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
374 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
375 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400376 case STENCIL_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400377 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
378 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400379 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
John Bauman89401822014-05-06 15:04:28 -0400380 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
381 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
382 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400383 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400384 equal = value;
385 equal = CmpEQ(equal, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
386 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
387 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
388 value |= equal;
389 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400390 case STENCIL_GREATER: // a > b
John Bauman89401822014-05-06 15:04:28 -0400391 equal = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
392 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
393 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
394 value = equal;
395 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400396 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400397 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
398 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
399 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
400 break;
401 default:
402 ASSERT(false);
403 }
404 }
405
406 Bool PixelRoutine::depthTest(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
407 {
408 if(!state.depthTestActive)
409 {
410 return true;
411 }
412
413 Float4 Z = z;
414
John Bauman19bac1e2014-05-06 15:23:49 -0400415 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400416 {
417 if(complementaryDepthBuffer)
418 {
John Bauman19bac1e2014-05-06 15:23:49 -0400419 Z = Float4(1.0f) - r.oDepth;
John Bauman89401822014-05-06 15:04:28 -0400420 }
421 else
422 {
423 Z = r.oDepth;
424 }
425 }
426
427 Pointer<Byte> buffer;
428 Int pitch;
429
430 if(!state.quadLayoutDepthBuffer)
431 {
432 buffer = zBuffer + 4 * x;
433 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
434 }
435 else
436 {
437 buffer = zBuffer + 8 * x;
438 }
439
440 if(q > 0)
441 {
442 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
443 }
444
445 Float4 zValue;
446
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400447 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400448 {
449 if(!state.quadLayoutDepthBuffer)
450 {
451 // FIXME: Properly optimizes?
452 zValue.xy = *Pointer<Float4>(buffer);
453 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
454 }
455 else
456 {
457 zValue = *Pointer<Float4>(buffer, 16);
458 }
459 }
460
461 Int4 zTest;
462
463 switch(state.depthCompareMode)
464 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400465 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400466 // Optimized
467 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400468 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400469 // Optimized
470 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400471 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400472 zTest = CmpEQ(zValue, Z);
473 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400474 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400475 zTest = CmpNEQ(zValue, Z);
476 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400477 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400478 if(complementaryDepthBuffer)
479 {
480 zTest = CmpLT(zValue, Z);
481 }
482 else
483 {
484 zTest = CmpNLE(zValue, Z);
485 }
486 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400487 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400488 if(complementaryDepthBuffer)
489 {
490 zTest = CmpNLT(zValue, Z);
491 }
492 else
493 {
494 zTest = CmpLE(zValue, Z);
495 }
496 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400497 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400498 if(complementaryDepthBuffer)
499 {
500 zTest = CmpLE(zValue, Z);
501 }
502 else
503 {
504 zTest = CmpNLT(zValue, Z);
505 }
506 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400507 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400508 if(complementaryDepthBuffer)
509 {
510 zTest = CmpNLE(zValue, Z);
511 }
512 else
513 {
514 zTest = CmpLT(zValue, Z);
515 }
516 break;
517 default:
518 ASSERT(false);
519 }
520
521 switch(state.depthCompareMode)
522 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400523 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400524 zMask = cMask;
525 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400526 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400527 zMask = 0x0;
528 break;
529 default:
530 zMask = SignMask(zTest) & cMask;
531 break;
532 }
533
534 if(state.stencilActive)
535 {
536 zMask &= sMask;
537 }
538
539 return zMask != 0;
540 }
541
John Bauman89401822014-05-06 15:04:28 -0400542 void PixelRoutine::alphaTest(Registers &r, Int &aMask, Short4 &alpha)
543 {
544 Short4 cmp;
545 Short4 equal;
546
547 switch(state.alphaCompareMode)
548 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400549 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400550 aMask = 0xF;
551 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400552 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400553 aMask = 0x0;
554 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400555 case ALPHA_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400556 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
557 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
558 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400559 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
John Bauman89401822014-05-06 15:04:28 -0400560 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
561 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
562 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400563 case ALPHA_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400564 cmp = CmpGT(*Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)), alpha);
565 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
566 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400567 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
John Bauman89401822014-05-06 15:04:28 -0400568 equal = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
569 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
570 cmp |= equal;
571 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
572 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400573 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
John Bauman89401822014-05-06 15:04:28 -0400574 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
575 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
576 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400577 case ALPHA_GREATER: // a > b
John Bauman89401822014-05-06 15:04:28 -0400578 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
579 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
580 break;
581 default:
582 ASSERT(false);
583 }
584 }
585
586 void PixelRoutine::alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha)
587 {
588 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c0)));
589 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c1)));
590 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c2)));
591 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c3)));
592
593 Int aMask0 = SignMask(coverage0);
594 Int aMask1 = SignMask(coverage1);
595 Int aMask2 = SignMask(coverage2);
596 Int aMask3 = SignMask(coverage3);
597
598 cMask[0] &= aMask0;
599 cMask[1] &= aMask1;
600 cMask[2] &= aMask2;
601 cMask[3] &= aMask3;
602 }
603
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400604 void PixelRoutine::fogBlend(Registers &r, Vector4f &c0, Float4 &fog)
John Bauman89401822014-05-06 15:04:28 -0400605 {
606 if(!state.fogActive)
607 {
608 return;
609 }
610
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400611 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400612 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400613 pixelFog(r, fog);
John Bauman89401822014-05-06 15:04:28 -0400614
John Bauman19bac1e2014-05-06 15:23:49 -0400615 fog = Min(fog, Float4(1.0f));
616 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400617 }
618
John Bauman19bac1e2014-05-06 15:23:49 -0400619 c0.x -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
620 c0.y -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
621 c0.z -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400622
John Bauman19bac1e2014-05-06 15:23:49 -0400623 c0.x *= fog;
624 c0.y *= fog;
625 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -0400626
John Bauman19bac1e2014-05-06 15:23:49 -0400627 c0.x += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
628 c0.y += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
629 c0.z += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400630 }
631
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400632 void PixelRoutine::pixelFog(Registers &r, Float4 &visibility)
John Bauman89401822014-05-06 15:04:28 -0400633 {
634 Float4 &zw = visibility;
635
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400636 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400637 {
638 if(state.wBasedFog)
639 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400640 zw = r.rhw;
John Bauman89401822014-05-06 15:04:28 -0400641 }
642 else
643 {
644 if(complementaryDepthBuffer)
645 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400646 zw = Float4(1.0f) - r.z[0];
John Bauman89401822014-05-06 15:04:28 -0400647 }
648 else
649 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -0400650 zw = r.z[0];
John Bauman89401822014-05-06 15:04:28 -0400651 }
652 }
653 }
654
655 switch(state.pixelFogMode)
656 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400657 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -0400658 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400659 case FOG_LINEAR:
John Bauman89401822014-05-06 15:04:28 -0400660 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale));
661 zw += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset));
662 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400663 case FOG_EXP:
John Bauman89401822014-05-06 15:04:28 -0400664 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -0400665 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400666 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400667 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -0400668 zw *= zw;
Nicolas Capensa36f3f92015-08-04 15:34:26 -0400669 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.density2E));
John Bauman19bac1e2014-05-06 15:23:49 -0400670 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400671 break;
672 default:
673 ASSERT(false);
674 }
675 }
676
John Bauman89401822014-05-06 15:04:28 -0400677 void PixelRoutine::writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
678 {
679 if(!state.depthWriteEnable)
680 {
681 return;
682 }
683
684 Float4 Z = z;
685
John Bauman19bac1e2014-05-06 15:23:49 -0400686 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400687 {
688 if(complementaryDepthBuffer)
689 {
John Bauman19bac1e2014-05-06 15:23:49 -0400690 Z = Float4(1.0f) - r.oDepth;
John Bauman89401822014-05-06 15:04:28 -0400691 }
692 else
693 {
694 Z = r.oDepth;
695 }
696 }
697
698 Pointer<Byte> buffer;
699 Int pitch;
700
701 if(!state.quadLayoutDepthBuffer)
702 {
703 buffer = zBuffer + 4 * x;
704 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
705 }
706 else
707 {
708 buffer = zBuffer + 8 * x;
709 }
710
711 if(q > 0)
712 {
713 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
714 }
715
716 Float4 zValue;
717
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400718 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400719 {
720 if(!state.quadLayoutDepthBuffer)
721 {
722 // FIXME: Properly optimizes?
723 zValue.xy = *Pointer<Float4>(buffer);
724 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
725 }
726 else
727 {
728 zValue = *Pointer<Float4>(buffer, 16);
729 }
730 }
731
732 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
733 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
734 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
735
736 if(!state.quadLayoutDepthBuffer)
737 {
738 // FIXME: Properly optimizes?
739 *Pointer<Float2>(buffer) = Float2(Z.xy);
740 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
741 }
742 else
743 {
744 *Pointer<Float4>(buffer, 16) = Z;
745 }
746 }
747
748 void PixelRoutine::writeStencil(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
749 {
750 if(!state.stencilActive)
751 {
752 return;
753 }
754
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400755 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -0400756 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400757 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -0400758 {
759 return;
760 }
761 }
762
763 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
764 {
765 return;
766 }
767
768 Pointer<Byte> buffer = sBuffer + 2 * x;
769
770 if(q > 0)
771 {
772 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
773 }
774
775 Byte8 bufferValue = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
776
777 Byte8 newValue;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400778 stencilOperation(r, newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400779
780 if(!state.noStencilWriteMask)
781 {
782 Byte8 maskedValue = bufferValue;
783 newValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].writeMaskQ));
784 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
785 newValue |= maskedValue;
786 }
787
788 if(state.twoSidedStencil)
789 {
790 Byte8 newValueCCW;
791
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400792 stencilOperation(r, newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400793
794 if(!state.noStencilWriteMaskCCW)
795 {
796 Byte8 maskedValue = bufferValue;
797 newValueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].writeMaskQ));
798 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
799 newValueCCW |= maskedValue;
800 }
801
802 newValue &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
803 newValueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
804 newValue |= newValueCCW;
805 }
806
807 newValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
808 bufferValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
809 newValue |= bufferValue;
810
811 *Pointer<UInt>(buffer) = UInt(As<Long>(newValue));
812 }
813
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400814 void PixelRoutine::stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -0400815 {
816 Byte8 &pass = newValue;
817 Byte8 fail;
818 Byte8 zFail;
819
820 stencilOperation(r, pass, bufferValue, stencilPassOperation, CCW);
821
822 if(stencilZFailOperation != stencilPassOperation)
823 {
824 stencilOperation(r, zFail, bufferValue, stencilZFailOperation, CCW);
825 }
826
827 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
828 {
829 stencilOperation(r, fail, bufferValue, stencilFailOperation, CCW);
830 }
831
832 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
833 {
834 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
835 {
836 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
837 zFail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
838 pass |= zFail;
839 }
840
841 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
842 fail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
843 pass |= fail;
844 }
845 }
846
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400847 void PixelRoutine::stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400848 {
849 switch(operation)
850 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400851 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -0400852 output = bufferValue;
853 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400854 case OPERATION_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400855 output = Byte8(0x0000000000000000);
856 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400857 case OPERATION_REPLACE:
John Bauman89401822014-05-06 15:04:28 -0400858 output = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceQ));
859 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400860 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -0400861 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
862 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400863 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -0400864 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
865 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400866 case OPERATION_INVERT:
John Bauman89401822014-05-06 15:04:28 -0400867 output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF);
868 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400869 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -0400870 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
871 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400872 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -0400873 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
874 break;
875 default:
876 ASSERT(false);
877 }
878 }
879
Alexis Hetu96517182015-04-15 10:30:23 -0400880 void PixelRoutine::blendFactor(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -0400881 {
882 switch(blendFactorActive)
883 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400884 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400885 // Optimized
886 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400887 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400888 // Optimized
889 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400890 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400891 blendFactor.x = current.x;
892 blendFactor.y = current.y;
893 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -0400894 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400895 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400896 blendFactor.x = Short4(0xFFFFu) - current.x;
897 blendFactor.y = Short4(0xFFFFu) - current.y;
898 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -0400899 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400900 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400901 blendFactor.x = pixel.x;
902 blendFactor.y = pixel.y;
903 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400904 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400905 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400906 blendFactor.x = Short4(0xFFFFu) - pixel.x;
907 blendFactor.y = Short4(0xFFFFu) - pixel.y;
908 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400909 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400910 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400911 blendFactor.x = current.w;
912 blendFactor.y = current.w;
913 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -0400914 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400915 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400916 blendFactor.x = Short4(0xFFFFu) - current.w;
917 blendFactor.y = Short4(0xFFFFu) - current.w;
918 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400919 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400920 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400921 blendFactor.x = pixel.w;
922 blendFactor.y = pixel.w;
923 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400924 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400925 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400926 blendFactor.x = Short4(0xFFFFu) - pixel.w;
927 blendFactor.y = Short4(0xFFFFu) - pixel.w;
928 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400929 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400930 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400931 blendFactor.x = Short4(0xFFFFu) - pixel.w;
932 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
933 blendFactor.y = blendFactor.x;
934 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -0400935 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400936 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -0400937 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[0]));
938 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[1]));
939 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400940 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400941 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -0400942 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
943 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
944 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400945 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400946 case BLEND_CONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400947 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
948 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
949 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400950 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400951 case BLEND_INVCONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400952 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
953 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
954 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400955 break;
956 default:
957 ASSERT(false);
958 }
959 }
960
Alexis Hetu96517182015-04-15 10:30:23 -0400961 void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -0400962 {
963 switch(blendFactorAlphaActive)
964 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400965 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400966 // Optimized
967 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400968 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400969 // Optimized
970 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400971 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400972 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400973 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400974 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400975 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400976 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400977 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400978 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400979 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400980 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400981 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400982 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400983 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400984 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400985 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400986 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400987 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400988 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400989 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400990 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400991 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400992 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400993 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400994 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400995 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400996 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -0400997 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400998 case BLEND_CONSTANT:
999 case BLEND_CONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001000 blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04001001 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001002 case BLEND_INVCONSTANT:
1003 case BLEND_INVCONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001004 blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04001005 break;
1006 default:
1007 ASSERT(false);
1008 }
1009 }
1010
Nicolas Capens64750b52015-09-22 10:11:00 -04001011 void PixelRoutine::readPixel(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
John Bauman89401822014-05-06 15:04:28 -04001012 {
John Bauman89401822014-05-06 15:04:28 -04001013 Short4 c01;
1014 Short4 c23;
Maxime Grégoired9762742015-07-08 16:43:48 -04001015 Pointer<Byte> buffer;
Nicolas Capensb40a2562016-01-05 00:08:45 -05001016 Pointer<Byte> buffer2;
John Bauman89401822014-05-06 15:04:28 -04001017
John Bauman89401822014-05-06 15:04:28 -04001018 switch(state.targetFormat[index])
1019 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001020 case FORMAT_R5G6B5:
1021 buffer = cBuffer + 2 * x;
Nicolas Capensb40a2562016-01-05 00:08:45 -05001022 buffer2 = buffer + *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
1023 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001024
1025 pixel.x = c01 & Short4(0xF800u);
1026 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1027 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1028 pixel.w = Short4(0xFFFFu);
1029 break;
John Bauman89401822014-05-06 15:04:28 -04001030 case FORMAT_A8R8G8B8:
1031 buffer = cBuffer + 4 * x;
1032 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001033 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001034 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001035 pixel.z = c01;
1036 pixel.y = c01;
1037 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1038 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1039 pixel.x = pixel.z;
1040 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1041 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1042 pixel.y = pixel.z;
1043 pixel.w = pixel.x;
1044 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1045 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1046 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1047 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001048 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001049 case FORMAT_A8B8G8R8:
1050 buffer = cBuffer + 4 * x;
1051 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001052 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001053 c23 = *Pointer<Short4>(buffer);
1054 pixel.z = c01;
1055 pixel.y = c01;
1056 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1057 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1058 pixel.x = pixel.z;
1059 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1060 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1061 pixel.y = pixel.z;
1062 pixel.w = pixel.x;
1063 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1064 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1065 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1066 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1067 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001068 case FORMAT_A8:
1069 buffer = cBuffer + 1 * x;
1070 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
Maxime Grégoired9762742015-07-08 16:43:48 -04001071 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001072 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
1073 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1074 pixel.x = Short4(0x0000);
1075 pixel.y = Short4(0x0000);
1076 pixel.z = Short4(0x0000);
1077 break;
John Bauman89401822014-05-06 15:04:28 -04001078 case FORMAT_X8R8G8B8:
1079 buffer = cBuffer + 4 * x;
1080 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001081 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001082 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001083 pixel.z = c01;
1084 pixel.y = c01;
1085 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1086 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1087 pixel.x = pixel.z;
1088 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1089 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1090 pixel.y = pixel.z;
1091 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1092 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1093 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1094 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001095 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001096 case FORMAT_X8B8G8R8:
1097 buffer = cBuffer + 4 * x;
1098 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001099 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001100 c23 = *Pointer<Short4>(buffer);
1101 pixel.z = c01;
1102 pixel.y = c01;
1103 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1104 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1105 pixel.x = pixel.z;
1106 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1107 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1108 pixel.y = pixel.z;
1109 pixel.w = pixel.x;
1110 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1111 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1112 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1113 pixel.w = Short4(0xFFFFu);
1114 break;
John Bauman89401822014-05-06 15:04:28 -04001115 case FORMAT_A8G8R8B8Q:
1116 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001117 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1118 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1119 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1120 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04001121 break;
1122 case FORMAT_X8G8R8B8Q:
1123 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001124 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1125 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1126 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1127 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001128 break;
1129 case FORMAT_A16B16G16R16:
Maxime Grégoired9762742015-07-08 16:43:48 -04001130 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001131 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1132 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
Maxime Grégoired9762742015-07-08 16:43:48 -04001133 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001134 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1135 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1136 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04001137 break;
1138 case FORMAT_G16R16:
1139 buffer = cBuffer;
Maxime Grégoired9762742015-07-08 16:43:48 -04001140 pixel.x = *Pointer<Short4>(buffer + 4 * x);
1141 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
1142 pixel.y = *Pointer<Short4>(buffer + 4 * x);
John Bauman19bac1e2014-05-06 15:23:49 -04001143 pixel.z = pixel.x;
1144 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1145 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1146 pixel.y = pixel.z;
1147 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1148 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1149 pixel.z = Short4(0xFFFFu);
1150 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001151 break;
1152 default:
1153 ASSERT(false);
1154 }
1155
1156 if(postBlendSRGB && state.writeSRGB)
1157 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001158 sRGBtoLinear16_12_16(r, pixel);
John Bauman89401822014-05-06 15:04:28 -04001159 }
Maxime Grégoired9762742015-07-08 16:43:48 -04001160 }
1161
1162 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
1163 {
1164 if(!state.alphaBlendActive)
1165 {
1166 return;
1167 }
1168
1169 Vector4s pixel;
Nicolas Capens64750b52015-09-22 10:11:00 -04001170 readPixel(r, index, cBuffer, x, pixel);
John Bauman89401822014-05-06 15:04:28 -04001171
1172 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04001173 Vector4s sourceFactor;
1174 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04001175
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001176 blendFactor(r, sourceFactor, current, pixel, state.sourceBlendFactor);
1177 blendFactor(r, destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04001178
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001179 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001180 {
John Bauman19bac1e2014-05-06 15:23:49 -04001181 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1182 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1183 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001184 }
1185
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001186 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001187 {
John Bauman19bac1e2014-05-06 15:23:49 -04001188 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1189 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1190 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001191 }
1192
1193 switch(state.blendOperation)
1194 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001195 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001196 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1197 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1198 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001199 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001200 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001201 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1202 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1203 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001204 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001205 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001206 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1207 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1208 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04001209 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001210 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001211 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1212 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1213 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001214 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001215 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001216 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1217 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1218 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001219 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001220 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001221 // No operation
1222 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001223 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001224 current.x = pixel.x;
1225 current.y = pixel.y;
1226 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001227 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001228 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04001229 current.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1230 current.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1231 current.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04001232 break;
1233 default:
1234 ASSERT(false);
1235 }
1236
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001237 blendFactorAlpha(r, sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1238 blendFactorAlpha(r, destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04001239
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001240 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001241 {
John Bauman19bac1e2014-05-06 15:23:49 -04001242 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001243 }
1244
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001245 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001246 {
John Bauman19bac1e2014-05-06 15:23:49 -04001247 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001248 }
1249
1250 switch(state.blendOperationAlpha)
1251 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001252 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001253 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001254 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001255 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001256 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001257 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001258 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001259 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04001260 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001261 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001262 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001263 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001264 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001265 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001266 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001267 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001268 // No operation
1269 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001270 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001271 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001272 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001273 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04001274 current.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04001275 break;
1276 default:
1277 ASSERT(false);
1278 }
1279 }
1280
Maxime Grégoired9762742015-07-08 16:43:48 -04001281 void PixelRoutine::logicOperation(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
1282 {
Nicolas Capens2afcc802015-08-04 10:34:43 -04001283 if(state.logicalOperation == LOGICALOP_COPY)
Maxime Grégoired9762742015-07-08 16:43:48 -04001284 {
1285 return;
1286 }
1287
1288 Vector4s pixel;
Nicolas Capens64750b52015-09-22 10:11:00 -04001289 readPixel(r, index, cBuffer, x, pixel);
Maxime Grégoired9762742015-07-08 16:43:48 -04001290
1291 switch(state.logicalOperation)
1292 {
1293 case LOGICALOP_CLEAR:
1294 current.x = 0;
1295 current.y = 0;
1296 current.z = 0;
1297 break;
1298 case LOGICALOP_SET:
Nicolas Capens2afcc802015-08-04 10:34:43 -04001299 current.x = 0xFFFFu;
1300 current.y = 0xFFFFu;
1301 current.z = 0xFFFFu;
Maxime Grégoired9762742015-07-08 16:43:48 -04001302 break;
1303 case LOGICALOP_COPY:
1304 ASSERT(false); // Optimized out
1305 break;
1306 case LOGICALOP_COPY_INVERTED:
1307 current.x = ~current.x;
1308 current.y = ~current.y;
1309 current.z = ~current.z;
1310 break;
1311 case LOGICALOP_NOOP:
1312 current.x = pixel.x;
1313 current.y = pixel.y;
1314 current.z = pixel.z;
1315 break;
1316 case LOGICALOP_INVERT:
1317 current.x = ~pixel.x;
1318 current.y = ~pixel.y;
1319 current.z = ~pixel.z;
1320 break;
1321 case LOGICALOP_AND:
1322 current.x = pixel.x & current.x;
1323 current.y = pixel.y & current.y;
1324 current.z = pixel.z & current.z;
1325 break;
1326 case LOGICALOP_NAND:
1327 current.x = ~(pixel.x & current.x);
1328 current.y = ~(pixel.y & current.y);
1329 current.z = ~(pixel.z & current.z);
1330 break;
1331 case LOGICALOP_OR:
1332 current.x = pixel.x | current.x;
1333 current.y = pixel.y | current.y;
1334 current.z = pixel.z | current.z;
1335 break;
1336 case LOGICALOP_NOR:
1337 current.x = ~(pixel.x | current.x);
1338 current.y = ~(pixel.y | current.y);
1339 current.z = ~(pixel.z | current.z);
1340 break;
1341 case LOGICALOP_XOR:
1342 current.x = pixel.x ^ current.x;
1343 current.y = pixel.y ^ current.y;
1344 current.z = pixel.z ^ current.z;
1345 break;
1346 case LOGICALOP_EQUIV:
1347 current.x = ~(pixel.x ^ current.x);
1348 current.y = ~(pixel.y ^ current.y);
1349 current.z = ~(pixel.z ^ current.z);
1350 break;
1351 case LOGICALOP_AND_REVERSE:
1352 current.x = ~pixel.x & current.x;
1353 current.y = ~pixel.y & current.y;
1354 current.z = ~pixel.z & current.z;
1355 break;
1356 case LOGICALOP_AND_INVERTED:
1357 current.x = pixel.x & ~current.x;
1358 current.y = pixel.y & ~current.y;
1359 current.z = pixel.z & ~current.z;
1360 break;
1361 case LOGICALOP_OR_REVERSE:
1362 current.x = ~pixel.x | current.x;
1363 current.y = ~pixel.y | current.y;
1364 current.z = ~pixel.z | current.z;
1365 break;
1366 case LOGICALOP_OR_INVERTED:
1367 current.x = pixel.x | ~current.x;
1368 current.y = pixel.y | ~current.y;
1369 current.z = pixel.z | ~current.z;
1370 break;
1371 default:
1372 ASSERT(false);
1373 }
1374 }
1375
Alexis Hetu96517182015-04-15 10:30:23 -04001376 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04001377 {
John Bauman89401822014-05-06 15:04:28 -04001378 if(postBlendSRGB && state.writeSRGB)
1379 {
Nicolas Capense1a50af2015-05-13 16:48:18 -04001380 linearToSRGB16_12_16(r, current);
John Bauman89401822014-05-06 15:04:28 -04001381 }
1382
1383 if(exactColorRounding)
1384 {
1385 switch(state.targetFormat[index])
1386 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001387 case FORMAT_R5G6B5:
Nicolas Capens26f37222015-09-22 09:53:45 -04001388 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1389 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1390 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001391 break;
John Bauman89401822014-05-06 15:04:28 -04001392 case FORMAT_X8G8R8B8Q:
1393 case FORMAT_A8G8R8B8Q:
1394 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001395 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001396 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001397 case FORMAT_A8B8G8R8:
Nicolas Capens26f37222015-09-22 09:53:45 -04001398 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1399 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1400 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1401 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
John Bauman89401822014-05-06 15:04:28 -04001402 break;
1403 }
1404 }
1405
1406 int rgbaWriteMask = state.colorWriteActive(index);
1407 int bgraWriteMask = rgbaWriteMask & 0x0000000A | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
1408 int brgaWriteMask = rgbaWriteMask & 0x00000008 | (rgbaWriteMask & 0x00000001) << 1 | (rgbaWriteMask & 0x00000002) << 1 | (rgbaWriteMask & 0x00000004) >> 2;
1409
1410 switch(state.targetFormat[index])
1411 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001412 case FORMAT_R5G6B5:
1413 {
1414 current.x = current.x & Short4(0xF800u);
1415 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1416 current.z = As<UShort4>(current.z) >> 11;
1417
1418 current.x = current.x | current.y | current.z;
1419 }
1420 break;
John Bauman89401822014-05-06 15:04:28 -04001421 case FORMAT_X8G8R8B8Q:
1422 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001423 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1424 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1425 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001426
John Bauman19bac1e2014-05-06 15:23:49 -04001427 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1428 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001429 break;
1430 case FORMAT_A8G8R8B8Q:
1431 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001432 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1433 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1434 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1435 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001436
John Bauman19bac1e2014-05-06 15:23:49 -04001437 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1438 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001439 break;
1440 case FORMAT_X8R8G8B8:
1441 case FORMAT_A8R8G8B8:
1442 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
1443 {
John Bauman19bac1e2014-05-06 15:23:49 -04001444 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1445 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1446 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001447
John Bauman19bac1e2014-05-06 15:23:49 -04001448 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1449 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001450
John Bauman19bac1e2014-05-06 15:23:49 -04001451 current.x = current.z;
1452 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1453 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1454 current.y = current.z;
1455 current.z = As<Short4>(UnpackLow(current.z, current.x));
1456 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001457 }
1458 else
1459 {
John Bauman19bac1e2014-05-06 15:23:49 -04001460 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1461 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1462 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1463 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001464
John Bauman19bac1e2014-05-06 15:23:49 -04001465 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1466 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001467
John Bauman19bac1e2014-05-06 15:23:49 -04001468 current.x = current.z;
1469 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1470 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1471 current.y = current.z;
1472 current.z = As<Short4>(UnpackLow(current.z, current.x));
1473 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001474 }
1475 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001476 case FORMAT_X8B8G8R8:
1477 case FORMAT_A8B8G8R8:
1478 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || rgbaWriteMask == 0x7)
1479 {
1480 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1481 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1482 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1483
1484 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
1485 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
1486
1487 current.x = current.z;
1488 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1489 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1490 current.y = current.z;
1491 current.z = As<Short4>(UnpackLow(current.z, current.x));
1492 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1493 }
1494 else
1495 {
1496 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1497 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1498 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1499 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1500
1501 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
1502 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
1503
1504 current.x = current.z;
1505 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1506 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1507 current.y = current.z;
1508 current.z = As<Short4>(UnpackLow(current.z, current.x));
1509 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1510 }
1511 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001512 case FORMAT_A8:
1513 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1514 current.w = As<Short4>(Pack(As<UShort4>(current.w), As<UShort4>(current.w)));
1515 break;
John Bauman89401822014-05-06 15:04:28 -04001516 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001517 current.z = current.x;
1518 current.x = As<Short4>(UnpackLow(current.x, current.y));
1519 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1520 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04001521 break;
1522 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001523 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04001524 break;
John Bauman89401822014-05-06 15:04:28 -04001525 default:
1526 ASSERT(false);
1527 }
1528
John Bauman19bac1e2014-05-06 15:23:49 -04001529 Short4 c01 = current.z;
1530 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04001531
1532 Int xMask; // Combination of all masks
1533
1534 if(state.depthTestActive)
1535 {
1536 xMask = zMask;
1537 }
1538 else
1539 {
1540 xMask = cMask;
1541 }
1542
1543 if(state.stencilActive)
1544 {
1545 xMask &= sMask;
1546 }
1547
John Bauman89401822014-05-06 15:04:28 -04001548 switch(state.targetFormat[index])
1549 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001550 case FORMAT_R5G6B5:
1551 {
1552 Pointer<Byte> buffer = cBuffer + 2 * x;
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001553 Int value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001554
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001555 Int c01 = Extract(As<Int2>(current.x), 0);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001556
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001557 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001558 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001559 Int masked = value;
1560 c01 &= *Pointer<Int>(r.constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1561 masked &= *Pointer<Int>(r.constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0]));
1562 c01 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001563 }
1564
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001565 c01 &= *Pointer<Int>(r.constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1566 value &= *Pointer<Int>(r.constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
1567 c01 |= value;
1568 *Pointer<Int>(buffer) = c01;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001569
1570 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001571 value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001572
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001573 Int c23 = Extract(As<Int2>(current.x), 1);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001574
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001575 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001576 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001577 Int masked = value;
1578 c23 &= *Pointer<Int>(r.constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1579 masked &= *Pointer<Int>(r.constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0]));
1580 c23 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001581 }
1582
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001583 c23 &= *Pointer<Int>(r.constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1584 value &= *Pointer<Int>(r.constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
1585 c23 |= value;
1586 *Pointer<Int>(buffer) = c23;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001587 }
1588 break;
John Bauman89401822014-05-06 15:04:28 -04001589 case FORMAT_A8G8R8B8Q:
1590 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
1591 UNIMPLEMENTED();
1592 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
1593
1594 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1595 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1596 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1597 // {
1598 // Short4 masked = value;
1599 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1600 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1601 // c01 |= masked;
1602 // }
1603
1604 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1605 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1606 // c01 |= value;
1607 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
1608
1609 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
1610
1611 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1612 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1613 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1614 // {
1615 // Short4 masked = value;
1616 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1617 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1618 // c23 |= masked;
1619 // }
1620
1621 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1622 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1623 // c23 |= value;
1624 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
1625 break;
1626 case FORMAT_A8R8G8B8:
1627 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
John Bauman89401822014-05-06 15:04:28 -04001628 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001629 Pointer<Byte> buffer = cBuffer + x * 4;
1630 Short4 value = *Pointer<Short4>(buffer);
1631
1632 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1633 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1634 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1635 {
1636 Short4 masked = value;
1637 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1638 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1639 c01 |= masked;
1640 }
1641
1642 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1643 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1644 c01 |= value;
1645 *Pointer<Short4>(buffer) = c01;
1646
1647 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1648 value = *Pointer<Short4>(buffer);
1649
1650 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1651 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1652 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1653 {
1654 Short4 masked = value;
1655 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1656 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1657 c23 |= masked;
1658 }
1659
1660 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1661 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1662 c23 |= value;
1663 *Pointer<Short4>(buffer) = c23;
John Bauman89401822014-05-06 15:04:28 -04001664 }
John Bauman89401822014-05-06 15:04:28 -04001665 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001666 case FORMAT_A8B8G8R8:
1667 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001668 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001669 Pointer<Byte> buffer = cBuffer + x * 4;
1670 Short4 value = *Pointer<Short4>(buffer);
1671
1672 if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
1673 ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
1674 (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh?
1675 {
1676 Short4 masked = value;
1677 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1678 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1679 c01 |= masked;
1680 }
1681
1682 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1683 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1684 c01 |= value;
1685 *Pointer<Short4>(buffer) = c01;
1686
1687 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1688 value = *Pointer<Short4>(buffer);
1689
1690 if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
1691 ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
1692 (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh?
1693 {
1694 Short4 masked = value;
1695 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1696 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1697 c23 |= masked;
1698 }
1699
1700 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1701 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1702 c23 |= value;
1703 *Pointer<Short4>(buffer) = c23;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001704 }
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001705 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001706 case FORMAT_A8:
1707 if(rgbaWriteMask & 0x00000008)
1708 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001709 Pointer<Byte> buffer = cBuffer + 1 * x;
1710 Short4 value;
John Bauman66b8ab22014-05-06 15:57:45 -04001711 Insert(value, *Pointer<Short>(buffer), 0);
1712 Int pitch = *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1713 Insert(value, *Pointer<Short>(buffer + pitch), 1);
1714 value = UnpackLow(As<Byte8>(value), As<Byte8>(value));
1715
1716 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
1717 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
1718 current.w |= value;
1719
1720 *Pointer<Short>(buffer) = Extract(current.w, 0);
1721 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
1722 }
1723 break;
John Bauman89401822014-05-06 15:04:28 -04001724 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04001725 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001726 Pointer<Byte> buffer = cBuffer + 4 * x;
John Bauman89401822014-05-06 15:04:28 -04001727
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001728 Short4 value = *Pointer<Short4>(buffer);
John Bauman89401822014-05-06 15:04:28 -04001729
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001730 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001731 {
1732 Short4 masked = value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001733 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1734 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001735 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04001736 }
1737
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001738 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1739 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001740 current.x |= value;
1741 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001742
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001743 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001744
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001745 value = *Pointer<Short4>(buffer);
1746
1747 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001748 {
1749 Short4 masked = value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001750 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1751 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001752 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04001753 }
1754
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001755 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1756 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001757 current.y |= value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001758 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001759 }
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001760 break;
1761 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04001762 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001763 Pointer<Byte> buffer = cBuffer + 8 * x;
John Bauman89401822014-05-06 15:04:28 -04001764
John Bauman89401822014-05-06 15:04:28 -04001765 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001766 Short4 value = *Pointer<Short4>(buffer);
1767
1768 if(rgbaWriteMask != 0x0000000F)
1769 {
1770 Short4 masked = value;
1771 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1772 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1773 current.x |= masked;
1774 }
1775
1776 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1777 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
1778 current.x |= value;
1779 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001780 }
1781
John Bauman89401822014-05-06 15:04:28 -04001782 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001783 Short4 value = *Pointer<Short4>(buffer + 8);
1784
1785 if(rgbaWriteMask != 0x0000000F)
1786 {
1787 Short4 masked = value;
1788 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1789 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1790 current.y |= masked;
1791 }
1792
1793 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1794 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
1795 current.y |= value;
1796 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001797 }
1798
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001799 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1800
1801 {
1802 Short4 value = *Pointer<Short4>(buffer);
1803
1804 if(rgbaWriteMask != 0x0000000F)
1805 {
1806 Short4 masked = value;
1807 current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1808 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1809 current.z |= masked;
1810 }
1811
1812 current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1813 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
1814 current.z |= value;
1815 *Pointer<Short4>(buffer) = current.z;
1816 }
1817
1818 {
1819 Short4 value = *Pointer<Short4>(buffer + 8);
1820
1821 if(rgbaWriteMask != 0x0000000F)
1822 {
1823 Short4 masked = value;
1824 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1825 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1826 current.w |= masked;
1827 }
1828
1829 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1830 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
1831 current.w |= value;
1832 *Pointer<Short4>(buffer + 8) = current.w;
1833 }
John Bauman89401822014-05-06 15:04:28 -04001834 }
1835 break;
1836 default:
1837 ASSERT(false);
1838 }
1839 }
1840
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001841 void PixelRoutine::blendFactor(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04001842 {
1843 switch(blendFactorActive)
1844 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001845 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001846 // Optimized
1847 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001848 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001849 // Optimized
1850 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001851 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001852 blendFactor.x = oC.x;
1853 blendFactor.y = oC.y;
1854 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04001855 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001856 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001857 blendFactor.x = Float4(1.0f) - oC.x;
1858 blendFactor.y = Float4(1.0f) - oC.y;
1859 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04001860 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001861 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001862 blendFactor.x = pixel.x;
1863 blendFactor.y = pixel.y;
1864 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001865 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001866 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001867 blendFactor.x = Float4(1.0f) - pixel.x;
1868 blendFactor.y = Float4(1.0f) - pixel.y;
1869 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001870 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001871 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001872 blendFactor.x = oC.w;
1873 blendFactor.y = oC.w;
1874 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001875 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001876 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001877 blendFactor.x = Float4(1.0f) - oC.w;
1878 blendFactor.y = Float4(1.0f) - oC.w;
1879 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001880 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001881 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001882 blendFactor.x = pixel.w;
1883 blendFactor.y = pixel.w;
1884 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001885 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001886 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001887 blendFactor.x = Float4(1.0f) - pixel.w;
1888 blendFactor.y = Float4(1.0f) - pixel.w;
1889 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001890 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001891 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001892 blendFactor.x = Float4(1.0f) - pixel.w;
1893 blendFactor.x = Min(blendFactor.x, oC.w);
1894 blendFactor.y = blendFactor.x;
1895 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04001896 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001897 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001898 blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[0]));
1899 blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[1]));
1900 blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001901 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001902 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001903 blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1904 blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1905 blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001906 break;
1907 default:
1908 ASSERT(false);
1909 }
1910 }
1911
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001912 void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04001913 {
1914 switch(blendFactorAlphaActive)
1915 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001916 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001917 // Optimized
1918 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001919 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001920 // Optimized
1921 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001922 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001923 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001924 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001925 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001926 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001927 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001928 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001929 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001930 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001931 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001932 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001933 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001934 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001935 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001936 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001937 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001938 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001939 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001940 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001941 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001942 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001943 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001944 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001945 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001946 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001947 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04001948 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001949 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001950 blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04001951 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001952 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001953 blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04001954 break;
1955 default:
1956 ASSERT(false);
1957 }
1958 }
1959
John Bauman19bac1e2014-05-06 15:23:49 -04001960 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04001961 {
1962 if(!state.alphaBlendActive)
1963 {
1964 return;
1965 }
1966
1967 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001968 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04001969
Alexis Hetu96517182015-04-15 10:30:23 -04001970 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04001971 Short4 c01;
1972 Short4 c23;
1973
John Bauman89401822014-05-06 15:04:28 -04001974 switch(state.targetFormat[index])
1975 {
John Bauman89401822014-05-06 15:04:28 -04001976 case FORMAT_R32F:
1977 buffer = cBuffer;
1978 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04001979 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
1980 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
John Bauman89401822014-05-06 15:04:28 -04001981 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1982 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04001983 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
1984 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
1985 pixel.y = Float4(1.0f);
1986 pixel.z = Float4(1.0f);
1987 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04001988 break;
1989 case FORMAT_G32R32F:
1990 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001991 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
John Bauman89401822014-05-06 15:04:28 -04001992 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001993 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
1994 pixel.z = pixel.x;
1995 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
1996 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
1997 pixel.y = pixel.z;
1998 pixel.z = Float4(1.0f);
1999 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04002000 break;
2001 case FORMAT_A32B32G32R32F:
2002 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002003 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
2004 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04002005 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002006 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
2007 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
2008 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002009 break;
2010 default:
2011 ASSERT(false);
2012 }
2013
2014 if(postBlendSRGB && state.writeSRGB)
2015 {
John Bauman19bac1e2014-05-06 15:23:49 -04002016 sRGBtoLinear(pixel.x);
2017 sRGBtoLinear(pixel.y);
2018 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002019 }
2020
2021 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04002022 Vector4f sourceFactor;
2023 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04002024
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002025 blendFactor(r, sourceFactor, oC, pixel, state.sourceBlendFactor);
2026 blendFactor(r, destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002027
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002028 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002029 {
John Bauman19bac1e2014-05-06 15:23:49 -04002030 oC.x *= sourceFactor.x;
2031 oC.y *= sourceFactor.y;
2032 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002033 }
2034
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002035 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002036 {
John Bauman19bac1e2014-05-06 15:23:49 -04002037 pixel.x *= destFactor.x;
2038 pixel.y *= destFactor.y;
2039 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002040 }
2041
2042 switch(state.blendOperation)
2043 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002044 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002045 oC.x += pixel.x;
2046 oC.y += pixel.y;
2047 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002048 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002049 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002050 oC.x -= pixel.x;
2051 oC.y -= pixel.y;
2052 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002053 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002054 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002055 oC.x = pixel.x - oC.x;
2056 oC.y = pixel.y - oC.y;
2057 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04002058 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002059 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002060 oC.x = Min(oC.x, pixel.x);
2061 oC.y = Min(oC.y, pixel.y);
2062 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002063 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002064 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002065 oC.x = Max(oC.x, pixel.x);
2066 oC.y = Max(oC.y, pixel.y);
2067 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002068 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002069 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002070 // No operation
2071 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002072 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002073 oC.x = pixel.x;
2074 oC.y = pixel.y;
2075 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002076 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002077 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002078 oC.x = Float4(0.0f);
2079 oC.y = Float4(0.0f);
2080 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002081 break;
2082 default:
2083 ASSERT(false);
2084 }
2085
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002086 blendFactorAlpha(r, sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2087 blendFactorAlpha(r, destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002088
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002089 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002090 {
John Bauman19bac1e2014-05-06 15:23:49 -04002091 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002092 }
2093
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002094 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002095 {
John Bauman19bac1e2014-05-06 15:23:49 -04002096 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002097 }
2098
2099 switch(state.blendOperationAlpha)
2100 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002101 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002102 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002103 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002104 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002105 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002106 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002107 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002108 pixel.w -= oC.w;
2109 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002110 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002111 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002112 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002113 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002114 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002115 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002116 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002117 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002118 // No operation
2119 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002120 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002121 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002122 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002123 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002124 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002125 break;
2126 default:
2127 ASSERT(false);
2128 }
2129 }
2130
John Bauman19bac1e2014-05-06 15:23:49 -04002131 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002132 {
John Bauman89401822014-05-06 15:04:28 -04002133 switch(state.targetFormat[index])
2134 {
John Bauman89401822014-05-06 15:04:28 -04002135 case FORMAT_R32F:
2136 break;
2137 case FORMAT_G32R32F:
John Bauman19bac1e2014-05-06 15:23:49 -04002138 oC.z = oC.x;
2139 oC.x = UnpackLow(oC.x, oC.y);
2140 oC.z = UnpackHigh(oC.z, oC.y);
2141 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002142 break;
2143 case FORMAT_A32B32G32R32F:
John Bauman19bac1e2014-05-06 15:23:49 -04002144 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04002145 break;
2146 default:
2147 ASSERT(false);
2148 }
2149
2150 int rgbaWriteMask = state.colorWriteActive(index);
2151
2152 Int xMask; // Combination of all masks
2153
2154 if(state.depthTestActive)
2155 {
2156 xMask = zMask;
2157 }
2158 else
2159 {
2160 xMask = cMask;
2161 }
2162
2163 if(state.stencilActive)
2164 {
2165 xMask &= sMask;
2166 }
2167
2168 Pointer<Byte> buffer;
2169 Float4 value;
2170
2171 switch(state.targetFormat[index])
2172 {
2173 case FORMAT_R32F:
2174 if(rgbaWriteMask & 0x00000001)
2175 {
2176 buffer = cBuffer + 4 * x;
2177
2178 // FIXME: movlps
2179 value.x = *Pointer<Float>(buffer + 0);
2180 value.y = *Pointer<Float>(buffer + 4);
2181
2182 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2183
2184 // FIXME: movhps
2185 value.z = *Pointer<Float>(buffer + 0);
2186 value.w = *Pointer<Float>(buffer + 4);
2187
John Bauman19bac1e2014-05-06 15:23:49 -04002188 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002189 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002190 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04002191
2192 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002193 *Pointer<Float>(buffer + 0) = oC.x.z;
2194 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04002195
2196 buffer -= *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2197
2198 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002199 *Pointer<Float>(buffer + 0) = oC.x.x;
2200 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04002201 }
2202 break;
2203 case FORMAT_G32R32F:
2204 buffer = cBuffer + 8 * x;
2205
2206 value = *Pointer<Float4>(buffer);
2207
2208 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2209 {
2210 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002211 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
John Bauman89401822014-05-06 15:04:28 -04002212 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002213 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002214 }
2215
John Bauman19bac1e2014-05-06 15:23:49 -04002216 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002217 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002218 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2219 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002220
2221 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2222
2223 value = *Pointer<Float4>(buffer);
2224
2225 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2226 {
2227 Float4 masked;
2228
2229 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002230 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
John Bauman89401822014-05-06 15:04:28 -04002231 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002232 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002233 }
2234
John Bauman19bac1e2014-05-06 15:23:49 -04002235 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002236 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002237 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2238 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002239 break;
2240 case FORMAT_A32B32G32R32F:
2241 buffer = cBuffer + 16 * x;
2242
2243 {
2244 value = *Pointer<Float4>(buffer, 16);
2245
2246 if(rgbaWriteMask != 0x0000000F)
2247 {
2248 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002249 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002250 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002251 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002252 }
2253
John Bauman19bac1e2014-05-06 15:23:49 -04002254 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002255 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002256 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2257 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002258 }
2259
2260 {
2261 value = *Pointer<Float4>(buffer + 16, 16);
2262
2263 if(rgbaWriteMask != 0x0000000F)
2264 {
2265 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002266 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002267 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002268 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002269 }
2270
John Bauman19bac1e2014-05-06 15:23:49 -04002271 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002272 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002273 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2274 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002275 }
2276
2277 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2278
2279 {
2280 value = *Pointer<Float4>(buffer, 16);
2281
2282 if(rgbaWriteMask != 0x0000000F)
2283 {
2284 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002285 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002286 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002287 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002288 }
2289
John Bauman19bac1e2014-05-06 15:23:49 -04002290 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002291 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002292 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2293 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002294 }
2295
2296 {
2297 value = *Pointer<Float4>(buffer + 16, 16);
2298
2299 if(rgbaWriteMask != 0x0000000F)
2300 {
2301 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002302 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002303 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002304 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002305 }
2306
John Bauman19bac1e2014-05-06 15:23:49 -04002307 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002308 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002309 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2310 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002311 }
2312 break;
2313 default:
2314 ASSERT(false);
2315 }
2316 }
2317
John Bauman89401822014-05-06 15:04:28 -04002318 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2319 {
John Bauman19bac1e2014-05-06 15:23:49 -04002320 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04002321 }
2322
Nicolas Capense1a50af2015-05-13 16:48:18 -04002323 void PixelRoutine::sRGBtoLinear16_12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002324 {
John Bauman19bac1e2014-05-06 15:23:49 -04002325 c.x = As<UShort4>(c.x) >> 4;
2326 c.y = As<UShort4>(c.y) >> 4;
2327 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002328
2329 sRGBtoLinear12_16(r, c);
2330 }
2331
Alexis Hetu96517182015-04-15 10:30:23 -04002332 void PixelRoutine::sRGBtoLinear12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002333 {
Nicolas Capense1a50af2015-05-13 16:48:18 -04002334 Pointer<Byte> LUT = r.constants + OFFSET(Constants,sRGBtoLinear12_16);
John Bauman89401822014-05-06 15:04:28 -04002335
John Bauman19bac1e2014-05-06 15:23:49 -04002336 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2337 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2338 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2339 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002340
John Bauman19bac1e2014-05-06 15:23:49 -04002341 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2342 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2343 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2344 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002345
John Bauman19bac1e2014-05-06 15:23:49 -04002346 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2347 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2348 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2349 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002350 }
2351
Nicolas Capense1a50af2015-05-13 16:48:18 -04002352 void PixelRoutine::linearToSRGB16_12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002353 {
John Bauman19bac1e2014-05-06 15:23:49 -04002354 c.x = As<UShort4>(c.x) >> 4;
2355 c.y = As<UShort4>(c.y) >> 4;
2356 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002357
2358 linearToSRGB12_16(r, c);
2359 }
2360
Alexis Hetu96517182015-04-15 10:30:23 -04002361 void PixelRoutine::linearToSRGB12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002362 {
Nicolas Capense1a50af2015-05-13 16:48:18 -04002363 Pointer<Byte> LUT = r.constants + OFFSET(Constants,linearToSRGB12_16);
John Bauman89401822014-05-06 15:04:28 -04002364
John Bauman19bac1e2014-05-06 15:23:49 -04002365 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2366 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2367 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2368 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002369
John Bauman19bac1e2014-05-06 15:23:49 -04002370 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2371 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2372 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2373 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002374
John Bauman19bac1e2014-05-06 15:23:49 -04002375 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2376 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2377 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2378 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002379 }
2380
John Bauman89401822014-05-06 15:04:28 -04002381 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2382 {
2383 Float4 linear = x * x;
2384 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2385
2386 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2387 }
2388
John Bauman19bac1e2014-05-06 15:23:49 -04002389 bool PixelRoutine::colorUsed()
2390 {
2391 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
2392 }
John Bauman89401822014-05-06 15:04:28 -04002393}