blob: 4c2ce1bfcc0eb9ec2f298b9e14ee5e4319ccfb14 [file] [log] [blame]
John Bauman89401822014-05-06 15:04:28 -04001// SwiftShader Software Renderer
2//
John Bauman66b8ab22014-05-06 15:57:45 -04003// Copyright(c) 2005-2013 TransGaming Inc.
John Bauman89401822014-05-06 15:04:28 -04004//
5// All rights reserved. No part of this software may be copied, distributed, transmitted,
6// transcribed, stored in a retrieval system, translated into any human or computer
7// language by any means, or disclosed to third parties without the explicit written
8// agreement of TransGaming Inc. Without such an agreement, no rights or licenses, express
9// or implied, including but not limited to any patent rights, are granted to you.
10//
11
12#include "PixelRoutine.hpp"
13
14#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040015#include "QuadRasterizer.hpp"
16#include "Surface.hpp"
17#include "Primitive.hpp"
18#include "CPUID.hpp"
19#include "SamplerCore.hpp"
20#include "Constants.hpp"
21#include "Debug.hpp"
22
John Bauman89401822014-05-06 15:04:28 -040023namespace sw
24{
25 extern bool complementaryDepthBuffer;
26 extern bool postBlendSRGB;
27 extern bool exactColorRounding;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040028 extern bool forceClearRegisters;
John Bauman89401822014-05-06 15:04:28 -040029
Alexis Hetuf2a8c372015-07-13 11:08:41 -040030 PixelRoutine::Registers::Registers(const PixelShader *shader) :
31 QuadRasterizer::Registers(),
32 rf(shader && shader->dynamicallyIndexedTemporaries),
33 vf(shader && shader->dynamicallyIndexedInput)
John Bauman89401822014-05-06 15:04:28 -040034 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040035 if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters)
John Bauman89401822014-05-06 15:04:28 -040036 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040037 for(int i = 0; i < 10; i++)
38 {
39 vf[i].x = Float4(0.0f);
40 vf[i].y = Float4(0.0f);
41 vf[i].z = Float4(0.0f);
42 vf[i].w = Float4(0.0f);
43 }
John Bauman89401822014-05-06 15:04:28 -040044 }
45 }
46
Alexis Hetuf2a8c372015-07-13 11:08:41 -040047 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader)
48 {
49 }
50
John Bauman89401822014-05-06 15:04:28 -040051 PixelRoutine::~PixelRoutine()
52 {
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040053 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040054 {
55 delete sampler[i];
56 }
57 }
58
Alexis Hetuf2a8c372015-07-13 11:08:41 -040059 void PixelRoutine::quad(QuadRasterizer::Registers &rBase, Pointer<Byte> cBuffer[4], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
John Bauman89401822014-05-06 15:04:28 -040060 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040061 Registers& r = *static_cast<Registers*>(&rBase);
62
John Bauman89401822014-05-06 15:04:28 -040063 #if PERF_PROFILE
64 Long pipeTime = Ticks();
65 #endif
66
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040067 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040068 {
69 sampler[i] = new SamplerCore(r.constants, state.sampler[i]);
70 }
71
72 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman89401822014-05-06 15:04:28 -040073
74 Int zMask[4]; // Depth mask
75 Int sMask[4]; // Stencil mask
76
77 for(unsigned int q = 0; q < state.multiSample; q++)
78 {
79 zMask[q] = cMask[q];
80 sMask[q] = cMask[q];
81 }
82
83 for(unsigned int q = 0; q < state.multiSample; q++)
84 {
85 stencilTest(r, sBuffer, q, x, sMask[q], cMask[q]);
86 }
87
88 Float4 f;
89
John Bauman89401822014-05-06 15:04:28 -040090 Float4 (&z)[4] = r.z;
John Bauman19bac1e2014-05-06 15:23:49 -040091 Float4 &w = r.w;
John Bauman89401822014-05-06 15:04:28 -040092 Float4 &rhw = r.rhw;
93 Float4 rhwCentroid;
94
95 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040096
John Bauman19bac1e2014-05-06 15:23:49 -040097 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040098 {
99 for(unsigned int q = 0; q < state.multiSample; q++)
100 {
101 Float4 x = xxxx;
102
103 if(state.multiSample > 1)
104 {
105 x -= *Pointer<Float4>(r.constants + OFFSET(Constants,X) + q * sizeof(float4));
106 }
107
108 z[q] = interpolate(x, r.Dz[q], z[q], r.primitive + OFFSET(Primitive,z), false, false);
109 }
110 }
111
112 Bool depthPass = false;
113
114 if(earlyDepthTest)
115 {
116 for(unsigned int q = 0; q < state.multiSample; q++)
117 {
118 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
119 }
120 }
121
122 If(depthPass || Bool(!earlyDepthTest))
123 {
124 #if PERF_PROFILE
125 Long interpTime = Ticks();
126 #endif
127
Nicolas Capens66be2452015-01-27 14:58:57 -0500128 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(r.primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400129
John Bauman89401822014-05-06 15:04:28 -0400130 // Centroid locations
131 Float4 XXXX = Float4(0.0f);
132 Float4 YYYY = Float4(0.0f);
133
134 if(state.centroid)
135 {
136 Float4 WWWW(1.0e-9f);
137
138 for(unsigned int q = 0; q < state.multiSample; q++)
139 {
140 XXXX += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
141 YYYY += *Pointer<Float4>(r.constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
142 WWWW += *Pointer<Float4>(r.constants + OFFSET(Constants,weight) + 16 * cMask[q]);
143 }
144
145 WWWW = Rcp_pp(WWWW);
146 XXXX *= WWWW;
147 YYYY *= WWWW;
148
149 XXXX += xxxx;
150 YYYY += yyyy;
151 }
152
John Bauman19bac1e2014-05-06 15:23:49 -0400153 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400154 {
John Bauman19bac1e2014-05-06 15:23:49 -0400155 w = interpolate(xxxx, r.Dw, rhw, r.primitive + OFFSET(Primitive,w), false, false);
156 rhw = reciprocal(w);
John Bauman89401822014-05-06 15:04:28 -0400157
158 if(state.centroid)
159 {
160 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive,w), false, false));
161 }
162 }
163
164 for(int interpolant = 0; interpolant < 10; interpolant++)
165 {
166 for(int component = 0; component < 4; component++)
167 {
John Bauman89401822014-05-06 15:04:28 -0400168 if(state.interpolant[interpolant].component & (1 << component))
169 {
170 if(!state.interpolant[interpolant].centroid)
171 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400172 r.vf[interpolant][component] = interpolate(xxxx, r.Dv[interpolant][component], rhw, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400173 }
174 else
175 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400176 r.vf[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, r.primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400177 }
178 }
179 }
180
181 Float4 rcp;
182
183 switch(state.interpolant[interpolant].project)
184 {
185 case 0:
186 break;
187 case 1:
John Bauman19bac1e2014-05-06 15:23:49 -0400188 rcp = reciprocal(r.vf[interpolant].y);
189 r.vf[interpolant].x = r.vf[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400190 break;
191 case 2:
John Bauman19bac1e2014-05-06 15:23:49 -0400192 rcp = reciprocal(r.vf[interpolant].z);
193 r.vf[interpolant].x = r.vf[interpolant].x * rcp;
194 r.vf[interpolant].y = r.vf[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400195 break;
196 case 3:
John Bauman19bac1e2014-05-06 15:23:49 -0400197 rcp = reciprocal(r.vf[interpolant].w);
198 r.vf[interpolant].x = r.vf[interpolant].x * rcp;
199 r.vf[interpolant].y = r.vf[interpolant].y * rcp;
200 r.vf[interpolant].z = r.vf[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400201 break;
202 }
203 }
204
205 if(state.fog.component)
206 {
207 f = interpolate(xxxx, r.Df, rhw, r.primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective);
208 }
209
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400210 setBuiltins(r, x, y, z, w);
John Bauman89401822014-05-06 15:04:28 -0400211
212 #if PERF_PROFILE
213 r.cycles[PERF_INTERP] += Ticks() - interpTime;
214 #endif
215
216 Bool alphaPass = true;
217
218 if(colorUsed())
219 {
220 #if PERF_PROFILE
221 Long shaderTime = Ticks();
222 #endif
223
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400224 applyShader(r, cMask);
John Bauman89401822014-05-06 15:04:28 -0400225
226 #if PERF_PROFILE
227 r.cycles[PERF_SHADER] += Ticks() - shaderTime;
228 #endif
229
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400230 alphaPass = alphaTest(r, cMask);
John Bauman89401822014-05-06 15:04:28 -0400231
John Bauman19bac1e2014-05-06 15:23:49 -0400232 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400233 {
234 for(unsigned int q = 0; q < state.multiSample; q++)
235 {
236 zMask[q] &= cMask[q];
237 sMask[q] &= cMask[q];
238 }
239 }
240 }
241
242 If(alphaPass)
243 {
244 if(!earlyDepthTest)
245 {
246 for(unsigned int q = 0; q < state.multiSample; q++)
247 {
248 depthPass = depthPass || depthTest(r, zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
249 }
250 }
251
252 #if PERF_PROFILE
253 Long ropTime = Ticks();
254 #endif
255
256 If(depthPass || Bool(earlyDepthTest))
257 {
258 for(unsigned int q = 0; q < state.multiSample; q++)
259 {
260 if(state.multiSampleMask & (1 << q))
261 {
262 writeDepth(r, zBuffer, q, x, z[q], zMask[q]);
263
264 if(state.occlusionEnabled)
265 {
266 r.occlusion += *Pointer<UInt>(r.constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
267 }
268 }
269 }
270
271 if(colorUsed())
272 {
273 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400274 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400275 #endif
276
Alexis Hetuf2a8c372015-07-13 11:08:41 -0400277 rasterOperation(r, f, cBuffer, x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400278 }
279 }
280
281 #if PERF_PROFILE
282 r.cycles[PERF_ROP] += Ticks() - ropTime;
283 #endif
284 }
285 }
286
287 for(unsigned int q = 0; q < state.multiSample; q++)
288 {
289 if(state.multiSampleMask & (1 << q))
290 {
291 writeStencil(r, sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
292 }
293 }
294
295 #if PERF_PROFILE
296 r.cycles[PERF_PIPE] += Ticks() - pipeTime;
297 #endif
298 }
299
John Bauman89401822014-05-06 15:04:28 -0400300 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
301 {
302 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
303
304 if(!flat)
305 {
306 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
307 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
308
309 if(perspective)
310 {
311 interpolant *= rhw;
312 }
313 }
314
315 return interpolant;
316 }
317
318 void PixelRoutine::stencilTest(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
319 {
320 if(!state.stencilActive)
321 {
322 return;
323 }
324
325 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
326
327 Pointer<Byte> buffer = sBuffer + 2 * x;
328
329 if(q > 0)
330 {
331 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
332 }
333
334 Byte8 value = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
335 Byte8 valueCCW = value;
336
337 if(!state.noStencilMask)
338 {
339 value &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].testMaskQ));
340 }
341
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400342 stencilTest(r, value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400343
344 if(state.twoSidedStencil)
345 {
346 if(!state.noStencilMaskCCW)
347 {
348 valueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].testMaskQ));
349 }
350
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400351 stencilTest(r, valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400352
353 value &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
354 valueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
355 value |= valueCCW;
356 }
357
358 sMask = SignMask(value) & cMask;
359 }
360
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400361 void PixelRoutine::stencilTest(Registers &r, Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400362 {
363 Byte8 equal;
364
365 switch(stencilCompareMode)
366 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400367 case STENCIL_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400368 value = Byte8(0xFFFFFFFFFFFFFFFF);
369 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400370 case STENCIL_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400371 value = Byte8(0x0000000000000000);
372 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400373 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400374 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
375 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
376 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400377 case STENCIL_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400378 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
379 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400380 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
John Bauman89401822014-05-06 15:04:28 -0400381 value = CmpEQ(value, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
382 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
383 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400384 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400385 equal = value;
386 equal = CmpEQ(equal, *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
387 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
388 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
389 value |= equal;
390 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400391 case STENCIL_GREATER: // a > b
John Bauman89401822014-05-06 15:04:28 -0400392 equal = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
393 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
394 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
395 value = equal;
396 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400397 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400398 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
399 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
400 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
401 break;
402 default:
403 ASSERT(false);
404 }
405 }
406
407 Bool PixelRoutine::depthTest(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
408 {
409 if(!state.depthTestActive)
410 {
411 return true;
412 }
413
414 Float4 Z = z;
415
John Bauman19bac1e2014-05-06 15:23:49 -0400416 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400417 {
418 if(complementaryDepthBuffer)
419 {
John Bauman19bac1e2014-05-06 15:23:49 -0400420 Z = Float4(1.0f) - r.oDepth;
John Bauman89401822014-05-06 15:04:28 -0400421 }
422 else
423 {
424 Z = r.oDepth;
425 }
426 }
427
428 Pointer<Byte> buffer;
429 Int pitch;
430
431 if(!state.quadLayoutDepthBuffer)
432 {
433 buffer = zBuffer + 4 * x;
434 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
435 }
436 else
437 {
438 buffer = zBuffer + 8 * x;
439 }
440
441 if(q > 0)
442 {
443 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
444 }
445
446 Float4 zValue;
447
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400448 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400449 {
450 if(!state.quadLayoutDepthBuffer)
451 {
452 // FIXME: Properly optimizes?
453 zValue.xy = *Pointer<Float4>(buffer);
454 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
455 }
456 else
457 {
458 zValue = *Pointer<Float4>(buffer, 16);
459 }
460 }
461
462 Int4 zTest;
463
464 switch(state.depthCompareMode)
465 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400466 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400467 // Optimized
468 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400469 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400470 // Optimized
471 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400472 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400473 zTest = CmpEQ(zValue, Z);
474 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400475 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400476 zTest = CmpNEQ(zValue, Z);
477 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400478 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400479 if(complementaryDepthBuffer)
480 {
481 zTest = CmpLT(zValue, Z);
482 }
483 else
484 {
485 zTest = CmpNLE(zValue, Z);
486 }
487 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400488 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400489 if(complementaryDepthBuffer)
490 {
491 zTest = CmpNLT(zValue, Z);
492 }
493 else
494 {
495 zTest = CmpLE(zValue, Z);
496 }
497 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400498 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400499 if(complementaryDepthBuffer)
500 {
501 zTest = CmpLE(zValue, Z);
502 }
503 else
504 {
505 zTest = CmpNLT(zValue, Z);
506 }
507 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400508 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400509 if(complementaryDepthBuffer)
510 {
511 zTest = CmpNLE(zValue, Z);
512 }
513 else
514 {
515 zTest = CmpLT(zValue, Z);
516 }
517 break;
518 default:
519 ASSERT(false);
520 }
521
522 switch(state.depthCompareMode)
523 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400524 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400525 zMask = cMask;
526 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400527 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400528 zMask = 0x0;
529 break;
530 default:
531 zMask = SignMask(zTest) & cMask;
532 break;
533 }
534
535 if(state.stencilActive)
536 {
537 zMask &= sMask;
538 }
539
540 return zMask != 0;
541 }
542
John Bauman89401822014-05-06 15:04:28 -0400543 void PixelRoutine::alphaTest(Registers &r, Int &aMask, Short4 &alpha)
544 {
545 Short4 cmp;
546 Short4 equal;
547
548 switch(state.alphaCompareMode)
549 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400550 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400551 aMask = 0xF;
552 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400553 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400554 aMask = 0x0;
555 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400556 case ALPHA_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400557 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
558 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
559 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400560 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
John Bauman89401822014-05-06 15:04:28 -0400561 cmp = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
562 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
563 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400564 case ALPHA_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400565 cmp = CmpGT(*Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)), alpha);
566 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
567 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400568 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
John Bauman89401822014-05-06 15:04:28 -0400569 equal = CmpEQ(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
570 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
571 cmp |= equal;
572 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
573 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400574 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
John Bauman89401822014-05-06 15:04:28 -0400575 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
576 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
577 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400578 case ALPHA_GREATER: // a > b
John Bauman89401822014-05-06 15:04:28 -0400579 cmp = CmpGT(alpha, *Pointer<Short4>(r.data + OFFSET(DrawData,factor.alphaReference4)));
580 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
581 break;
582 default:
583 ASSERT(false);
584 }
585 }
586
587 void PixelRoutine::alphaToCoverage(Registers &r, Int cMask[4], Float4 &alpha)
588 {
589 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c0)));
590 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c1)));
591 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c2)));
592 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(r.data + OFFSET(DrawData,a2c3)));
593
594 Int aMask0 = SignMask(coverage0);
595 Int aMask1 = SignMask(coverage1);
596 Int aMask2 = SignMask(coverage2);
597 Int aMask3 = SignMask(coverage3);
598
599 cMask[0] &= aMask0;
600 cMask[1] &= aMask1;
601 cMask[2] &= aMask2;
602 cMask[3] &= aMask3;
603 }
604
John Bauman19bac1e2014-05-06 15:23:49 -0400605 void PixelRoutine::fogBlend(Registers &r, Vector4f &c0, Float4 &fog, Float4 &z, Float4 &rhw)
John Bauman89401822014-05-06 15:04:28 -0400606 {
607 if(!state.fogActive)
608 {
609 return;
610 }
611
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400612 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400613 {
614 pixelFog(r, fog, z, rhw);
615
John Bauman19bac1e2014-05-06 15:23:49 -0400616 fog = Min(fog, Float4(1.0f));
617 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400618 }
619
John Bauman19bac1e2014-05-06 15:23:49 -0400620 c0.x -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
621 c0.y -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
622 c0.z -= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400623
John Bauman19bac1e2014-05-06 15:23:49 -0400624 c0.x *= fog;
625 c0.y *= fog;
626 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -0400627
John Bauman19bac1e2014-05-06 15:23:49 -0400628 c0.x += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[0]));
629 c0.y += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[1]));
630 c0.z += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400631 }
632
633 void PixelRoutine::pixelFog(Registers &r, Float4 &visibility, Float4 &z, Float4 &rhw)
634 {
635 Float4 &zw = visibility;
636
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400637 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400638 {
639 if(state.wBasedFog)
640 {
641 zw = rhw;
642 }
643 else
644 {
645 if(complementaryDepthBuffer)
646 {
John Bauman19bac1e2014-05-06 15:23:49 -0400647 zw = Float4(1.0f) - z;
John Bauman89401822014-05-06 15:04:28 -0400648 }
649 else
650 {
651 zw = z;
652 }
653 }
654 }
655
656 switch(state.pixelFogMode)
657 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400658 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -0400659 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400660 case FOG_LINEAR:
John Bauman89401822014-05-06 15:04:28 -0400661 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.scale));
662 zw += *Pointer<Float4>(r.data + OFFSET(DrawData,fog.offset));
663 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400664 case FOG_EXP:
John Bauman89401822014-05-06 15:04:28 -0400665 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -0400666 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400667 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400668 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -0400669 zw *= zw;
Nicolas Capensa36f3f92015-08-04 15:34:26 -0400670 zw *= *Pointer<Float4>(r.data + OFFSET(DrawData,fog.density2E));
John Bauman19bac1e2014-05-06 15:23:49 -0400671 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400672 break;
673 default:
674 ASSERT(false);
675 }
676 }
677
John Bauman89401822014-05-06 15:04:28 -0400678 void PixelRoutine::writeDepth(Registers &r, Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
679 {
680 if(!state.depthWriteEnable)
681 {
682 return;
683 }
684
685 Float4 Z = z;
686
John Bauman19bac1e2014-05-06 15:23:49 -0400687 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400688 {
689 if(complementaryDepthBuffer)
690 {
John Bauman19bac1e2014-05-06 15:23:49 -0400691 Z = Float4(1.0f) - r.oDepth;
John Bauman89401822014-05-06 15:04:28 -0400692 }
693 else
694 {
695 Z = r.oDepth;
696 }
697 }
698
699 Pointer<Byte> buffer;
700 Int pitch;
701
702 if(!state.quadLayoutDepthBuffer)
703 {
704 buffer = zBuffer + 4 * x;
705 pitch = *Pointer<Int>(r.data + OFFSET(DrawData,depthPitchB));
706 }
707 else
708 {
709 buffer = zBuffer + 8 * x;
710 }
711
712 if(q > 0)
713 {
714 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,depthSliceB));
715 }
716
717 Float4 zValue;
718
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400719 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400720 {
721 if(!state.quadLayoutDepthBuffer)
722 {
723 // FIXME: Properly optimizes?
724 zValue.xy = *Pointer<Float4>(buffer);
725 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
726 }
727 else
728 {
729 zValue = *Pointer<Float4>(buffer, 16);
730 }
731 }
732
733 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
734 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
735 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
736
737 if(!state.quadLayoutDepthBuffer)
738 {
739 // FIXME: Properly optimizes?
740 *Pointer<Float2>(buffer) = Float2(Z.xy);
741 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
742 }
743 else
744 {
745 *Pointer<Float4>(buffer, 16) = Z;
746 }
747 }
748
749 void PixelRoutine::writeStencil(Registers &r, Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
750 {
751 if(!state.stencilActive)
752 {
753 return;
754 }
755
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400756 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -0400757 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400758 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -0400759 {
760 return;
761 }
762 }
763
764 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
765 {
766 return;
767 }
768
769 Pointer<Byte> buffer = sBuffer + 2 * x;
770
771 if(q > 0)
772 {
773 buffer += q * *Pointer<Int>(r.data + OFFSET(DrawData,stencilSliceB));
774 }
775
776 Byte8 bufferValue = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
777
778 Byte8 newValue;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400779 stencilOperation(r, newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400780
781 if(!state.noStencilWriteMask)
782 {
783 Byte8 maskedValue = bufferValue;
784 newValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].writeMaskQ));
785 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
786 newValue |= maskedValue;
787 }
788
789 if(state.twoSidedStencil)
790 {
791 Byte8 newValueCCW;
792
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400793 stencilOperation(r, newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400794
795 if(!state.noStencilWriteMaskCCW)
796 {
797 Byte8 maskedValue = bufferValue;
798 newValueCCW &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].writeMaskQ));
799 maskedValue &= *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
800 newValueCCW |= maskedValue;
801 }
802
803 newValue &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,clockwiseMask));
804 newValueCCW &= *Pointer<Byte8>(r.primitive + OFFSET(Primitive,invClockwiseMask));
805 newValue |= newValueCCW;
806 }
807
808 newValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
809 bufferValue &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
810 newValue |= bufferValue;
811
812 *Pointer<UInt>(buffer) = UInt(As<Long>(newValue));
813 }
814
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400815 void PixelRoutine::stencilOperation(Registers &r, Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -0400816 {
817 Byte8 &pass = newValue;
818 Byte8 fail;
819 Byte8 zFail;
820
821 stencilOperation(r, pass, bufferValue, stencilPassOperation, CCW);
822
823 if(stencilZFailOperation != stencilPassOperation)
824 {
825 stencilOperation(r, zFail, bufferValue, stencilZFailOperation, CCW);
826 }
827
828 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
829 {
830 stencilOperation(r, fail, bufferValue, stencilFailOperation, CCW);
831 }
832
833 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
834 {
835 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
836 {
837 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
838 zFail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
839 pass |= zFail;
840 }
841
842 pass &= *Pointer<Byte8>(r.constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
843 fail &= *Pointer<Byte8>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
844 pass |= fail;
845 }
846 }
847
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400848 void PixelRoutine::stencilOperation(Registers &r, Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400849 {
850 switch(operation)
851 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400852 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -0400853 output = bufferValue;
854 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400855 case OPERATION_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400856 output = Byte8(0x0000000000000000);
857 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400858 case OPERATION_REPLACE:
John Bauman89401822014-05-06 15:04:28 -0400859 output = *Pointer<Byte8>(r.data + OFFSET(DrawData,stencil[CCW].referenceQ));
860 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400861 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -0400862 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
863 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400864 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -0400865 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
866 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400867 case OPERATION_INVERT:
John Bauman89401822014-05-06 15:04:28 -0400868 output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF);
869 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400870 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -0400871 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
872 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400873 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -0400874 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
875 break;
876 default:
877 ASSERT(false);
878 }
879 }
880
Alexis Hetu96517182015-04-15 10:30:23 -0400881 void PixelRoutine::blendFactor(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -0400882 {
883 switch(blendFactorActive)
884 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400885 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400886 // Optimized
887 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400888 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400889 // Optimized
890 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400891 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400892 blendFactor.x = current.x;
893 blendFactor.y = current.y;
894 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -0400895 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400896 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400897 blendFactor.x = Short4(0xFFFFu) - current.x;
898 blendFactor.y = Short4(0xFFFFu) - current.y;
899 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -0400900 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400901 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400902 blendFactor.x = pixel.x;
903 blendFactor.y = pixel.y;
904 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400905 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400906 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400907 blendFactor.x = Short4(0xFFFFu) - pixel.x;
908 blendFactor.y = Short4(0xFFFFu) - pixel.y;
909 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400910 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400911 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400912 blendFactor.x = current.w;
913 blendFactor.y = current.w;
914 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -0400915 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400916 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400917 blendFactor.x = Short4(0xFFFFu) - current.w;
918 blendFactor.y = Short4(0xFFFFu) - current.w;
919 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400920 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400921 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400922 blendFactor.x = pixel.w;
923 blendFactor.y = pixel.w;
924 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400925 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400926 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400927 blendFactor.x = Short4(0xFFFFu) - pixel.w;
928 blendFactor.y = Short4(0xFFFFu) - pixel.w;
929 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400930 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400931 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400932 blendFactor.x = Short4(0xFFFFu) - pixel.w;
933 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
934 blendFactor.y = blendFactor.x;
935 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -0400936 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400937 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -0400938 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[0]));
939 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[1]));
940 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400941 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400942 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -0400943 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
944 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
945 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400946 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400947 case BLEND_CONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400948 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
949 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
950 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400951 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400952 case BLEND_INVCONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400953 blendFactor.x = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
954 blendFactor.y = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
955 blendFactor.z = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400956 break;
957 default:
958 ASSERT(false);
959 }
960 }
961
Alexis Hetu96517182015-04-15 10:30:23 -0400962 void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -0400963 {
964 switch(blendFactorAlphaActive)
965 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400966 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400967 // Optimized
968 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400969 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400970 // Optimized
971 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400972 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400973 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400974 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400975 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400976 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400977 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400978 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400979 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400980 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400981 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400982 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400983 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400984 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400985 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400986 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400987 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400988 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400989 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400990 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400991 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400992 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400993 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400994 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400995 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400996 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400997 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -0400998 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400999 case BLEND_CONSTANT:
1000 case BLEND_CONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001001 blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04001002 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001003 case BLEND_INVCONSTANT:
1004 case BLEND_INVCONSTANTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001005 blendFactor.w = *Pointer<Short4>(r.data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04001006 break;
1007 default:
1008 ASSERT(false);
1009 }
1010 }
1011
Maxime Grégoired9762742015-07-08 16:43:48 -04001012 void PixelRoutine::readPixel(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x, Vector4s &pixel)
John Bauman89401822014-05-06 15:04:28 -04001013 {
John Bauman89401822014-05-06 15:04:28 -04001014 Short4 c01;
1015 Short4 c23;
Maxime Grégoired9762742015-07-08 16:43:48 -04001016 Pointer<Byte> buffer;
John Bauman89401822014-05-06 15:04:28 -04001017
John Bauman89401822014-05-06 15:04:28 -04001018 switch(state.targetFormat[index])
1019 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001020 case FORMAT_R5G6B5:
1021 buffer = cBuffer + 2 * x;
1022 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
Maxime Grégoired9762742015-07-08 16:43:48 -04001023 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001024 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
1025
1026 pixel.x = c01 & Short4(0xF800u);
1027 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1028 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1029 pixel.w = Short4(0xFFFFu);
1030 break;
John Bauman89401822014-05-06 15:04:28 -04001031 case FORMAT_A8R8G8B8:
1032 buffer = cBuffer + 4 * x;
1033 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001034 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001035 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001036 pixel.z = c01;
1037 pixel.y = c01;
1038 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1039 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1040 pixel.x = pixel.z;
1041 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1042 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1043 pixel.y = pixel.z;
1044 pixel.w = pixel.x;
1045 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1046 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1047 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1048 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001049 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001050 case FORMAT_A8B8G8R8:
1051 buffer = cBuffer + 4 * x;
1052 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001053 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001054 c23 = *Pointer<Short4>(buffer);
1055 pixel.z = c01;
1056 pixel.y = c01;
1057 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1058 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1059 pixel.x = pixel.z;
1060 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1061 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1062 pixel.y = pixel.z;
1063 pixel.w = pixel.x;
1064 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1065 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1066 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1067 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1068 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001069 case FORMAT_A8:
1070 buffer = cBuffer + 1 * x;
1071 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
Maxime Grégoired9762742015-07-08 16:43:48 -04001072 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001073 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
1074 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1075 pixel.x = Short4(0x0000);
1076 pixel.y = Short4(0x0000);
1077 pixel.z = Short4(0x0000);
1078 break;
John Bauman89401822014-05-06 15:04:28 -04001079 case FORMAT_X8R8G8B8:
1080 buffer = cBuffer + 4 * x;
1081 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001082 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001083 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001084 pixel.z = c01;
1085 pixel.y = c01;
1086 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1087 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1088 pixel.x = pixel.z;
1089 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1090 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1091 pixel.y = pixel.z;
1092 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1093 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1094 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1095 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001096 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001097 case FORMAT_X8B8G8R8:
1098 buffer = cBuffer + 4 * x;
1099 c01 = *Pointer<Short4>(buffer);
Maxime Grégoired9762742015-07-08 16:43:48 -04001100 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001101 c23 = *Pointer<Short4>(buffer);
1102 pixel.z = c01;
1103 pixel.y = c01;
1104 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1105 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1106 pixel.x = pixel.z;
1107 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1108 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1109 pixel.y = pixel.z;
1110 pixel.w = pixel.x;
1111 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1112 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1113 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1114 pixel.w = Short4(0xFFFFu);
1115 break;
John Bauman89401822014-05-06 15:04:28 -04001116 case FORMAT_A8G8R8B8Q:
1117 UNIMPLEMENTED();
Maxime Grégoired9762742015-07-08 16:43:48 -04001118 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1119 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1120 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1121 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04001122 break;
1123 case FORMAT_X8G8R8B8Q:
1124 UNIMPLEMENTED();
Maxime Grégoired9762742015-07-08 16:43:48 -04001125 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1126 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1127 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1128 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001129 break;
1130 case FORMAT_A16B16G16R16:
Maxime Grégoired9762742015-07-08 16:43:48 -04001131 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001132 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1133 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
Maxime Grégoired9762742015-07-08 16:43:48 -04001134 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001135 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1136 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1137 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04001138 break;
1139 case FORMAT_G16R16:
1140 buffer = cBuffer;
Maxime Grégoired9762742015-07-08 16:43:48 -04001141 pixel.x = *Pointer<Short4>(buffer + 4 * x);
1142 buffer += *Pointer<Int>(r.data + OFFSET(DrawData, colorPitchB[index]));
1143 pixel.y = *Pointer<Short4>(buffer + 4 * x);
John Bauman19bac1e2014-05-06 15:23:49 -04001144 pixel.z = pixel.x;
1145 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1146 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1147 pixel.y = pixel.z;
1148 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1149 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1150 pixel.z = Short4(0xFFFFu);
1151 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001152 break;
1153 default:
1154 ASSERT(false);
1155 }
1156
1157 if(postBlendSRGB && state.writeSRGB)
1158 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001159 sRGBtoLinear16_12_16(r, pixel);
John Bauman89401822014-05-06 15:04:28 -04001160 }
Maxime Grégoired9762742015-07-08 16:43:48 -04001161 }
1162
1163 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
1164 {
1165 if(!state.alphaBlendActive)
1166 {
1167 return;
1168 }
1169
1170 Vector4s pixel;
1171 Short4 c01;
1172 Short4 c23;
1173
1174 readPixel(r, index, cBuffer, current, x, pixel);
John Bauman89401822014-05-06 15:04:28 -04001175
1176 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04001177 Vector4s sourceFactor;
1178 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04001179
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001180 blendFactor(r, sourceFactor, current, pixel, state.sourceBlendFactor);
1181 blendFactor(r, destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04001182
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001183 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001184 {
John Bauman19bac1e2014-05-06 15:23:49 -04001185 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1186 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1187 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001188 }
1189
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001190 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001191 {
John Bauman19bac1e2014-05-06 15:23:49 -04001192 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1193 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1194 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001195 }
1196
1197 switch(state.blendOperation)
1198 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001199 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001200 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1201 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1202 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001203 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001204 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001205 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1206 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1207 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001208 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001209 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001210 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1211 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1212 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04001213 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001214 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001215 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1216 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1217 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001218 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001219 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001220 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1221 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1222 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001223 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001224 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001225 // No operation
1226 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001227 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001228 current.x = pixel.x;
1229 current.y = pixel.y;
1230 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001231 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001232 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04001233 current.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1234 current.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1235 current.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04001236 break;
1237 default:
1238 ASSERT(false);
1239 }
1240
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001241 blendFactorAlpha(r, sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1242 blendFactorAlpha(r, destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04001243
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001244 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001245 {
John Bauman19bac1e2014-05-06 15:23:49 -04001246 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001247 }
1248
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001249 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001250 {
John Bauman19bac1e2014-05-06 15:23:49 -04001251 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001252 }
1253
1254 switch(state.blendOperationAlpha)
1255 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001256 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001257 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001258 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001259 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001260 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001261 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001262 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001263 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04001264 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001265 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001266 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001267 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001268 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001269 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001270 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001271 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001272 // No operation
1273 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001274 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001275 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001276 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001277 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04001278 current.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04001279 break;
1280 default:
1281 ASSERT(false);
1282 }
1283 }
1284
Maxime Grégoired9762742015-07-08 16:43:48 -04001285 void PixelRoutine::logicOperation(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
1286 {
Nicolas Capens2afcc802015-08-04 10:34:43 -04001287 if(state.logicalOperation == LOGICALOP_COPY)
Maxime Grégoired9762742015-07-08 16:43:48 -04001288 {
1289 return;
1290 }
1291
1292 Vector4s pixel;
1293
1294 // Read pixel
1295 readPixel(r, index, cBuffer, current, x, pixel);
1296
1297 switch(state.logicalOperation)
1298 {
1299 case LOGICALOP_CLEAR:
1300 current.x = 0;
1301 current.y = 0;
1302 current.z = 0;
1303 break;
1304 case LOGICALOP_SET:
Nicolas Capens2afcc802015-08-04 10:34:43 -04001305 current.x = 0xFFFFu;
1306 current.y = 0xFFFFu;
1307 current.z = 0xFFFFu;
Maxime Grégoired9762742015-07-08 16:43:48 -04001308 break;
1309 case LOGICALOP_COPY:
1310 ASSERT(false); // Optimized out
1311 break;
1312 case LOGICALOP_COPY_INVERTED:
1313 current.x = ~current.x;
1314 current.y = ~current.y;
1315 current.z = ~current.z;
1316 break;
1317 case LOGICALOP_NOOP:
1318 current.x = pixel.x;
1319 current.y = pixel.y;
1320 current.z = pixel.z;
1321 break;
1322 case LOGICALOP_INVERT:
1323 current.x = ~pixel.x;
1324 current.y = ~pixel.y;
1325 current.z = ~pixel.z;
1326 break;
1327 case LOGICALOP_AND:
1328 current.x = pixel.x & current.x;
1329 current.y = pixel.y & current.y;
1330 current.z = pixel.z & current.z;
1331 break;
1332 case LOGICALOP_NAND:
1333 current.x = ~(pixel.x & current.x);
1334 current.y = ~(pixel.y & current.y);
1335 current.z = ~(pixel.z & current.z);
1336 break;
1337 case LOGICALOP_OR:
1338 current.x = pixel.x | current.x;
1339 current.y = pixel.y | current.y;
1340 current.z = pixel.z | current.z;
1341 break;
1342 case LOGICALOP_NOR:
1343 current.x = ~(pixel.x | current.x);
1344 current.y = ~(pixel.y | current.y);
1345 current.z = ~(pixel.z | current.z);
1346 break;
1347 case LOGICALOP_XOR:
1348 current.x = pixel.x ^ current.x;
1349 current.y = pixel.y ^ current.y;
1350 current.z = pixel.z ^ current.z;
1351 break;
1352 case LOGICALOP_EQUIV:
1353 current.x = ~(pixel.x ^ current.x);
1354 current.y = ~(pixel.y ^ current.y);
1355 current.z = ~(pixel.z ^ current.z);
1356 break;
1357 case LOGICALOP_AND_REVERSE:
1358 current.x = ~pixel.x & current.x;
1359 current.y = ~pixel.y & current.y;
1360 current.z = ~pixel.z & current.z;
1361 break;
1362 case LOGICALOP_AND_INVERTED:
1363 current.x = pixel.x & ~current.x;
1364 current.y = pixel.y & ~current.y;
1365 current.z = pixel.z & ~current.z;
1366 break;
1367 case LOGICALOP_OR_REVERSE:
1368 current.x = ~pixel.x | current.x;
1369 current.y = ~pixel.y | current.y;
1370 current.z = ~pixel.z | current.z;
1371 break;
1372 case LOGICALOP_OR_INVERTED:
1373 current.x = pixel.x | ~current.x;
1374 current.y = pixel.y | ~current.y;
1375 current.z = pixel.z | ~current.z;
1376 break;
1377 default:
1378 ASSERT(false);
1379 }
1380 }
1381
Alexis Hetu96517182015-04-15 10:30:23 -04001382 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04001383 {
John Bauman89401822014-05-06 15:04:28 -04001384 if(postBlendSRGB && state.writeSRGB)
1385 {
Nicolas Capense1a50af2015-05-13 16:48:18 -04001386 linearToSRGB16_12_16(r, current);
John Bauman89401822014-05-06 15:04:28 -04001387 }
1388
1389 if(exactColorRounding)
1390 {
1391 switch(state.targetFormat[index])
1392 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001393 case FORMAT_R5G6B5:
1394 // UNIMPLEMENTED(); // FIXME
1395 break;
John Bauman89401822014-05-06 15:04:28 -04001396 case FORMAT_X8G8R8B8Q:
1397 case FORMAT_A8G8R8B8Q:
1398 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001399 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001400 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001401 case FORMAT_A8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001402 {
John Bauman19bac1e2014-05-06 15:23:49 -04001403 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1404 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1405 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1406 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
John Bauman89401822014-05-06 15:04:28 -04001407 }
1408 break;
1409 }
1410 }
1411
1412 int rgbaWriteMask = state.colorWriteActive(index);
1413 int bgraWriteMask = rgbaWriteMask & 0x0000000A | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
1414 int brgaWriteMask = rgbaWriteMask & 0x00000008 | (rgbaWriteMask & 0x00000001) << 1 | (rgbaWriteMask & 0x00000002) << 1 | (rgbaWriteMask & 0x00000004) >> 2;
1415
1416 switch(state.targetFormat[index])
1417 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001418 case FORMAT_R5G6B5:
1419 {
1420 current.x = current.x & Short4(0xF800u);
1421 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1422 current.z = As<UShort4>(current.z) >> 11;
1423
1424 current.x = current.x | current.y | current.z;
1425 }
1426 break;
John Bauman89401822014-05-06 15:04:28 -04001427 case FORMAT_X8G8R8B8Q:
1428 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001429 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1430 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1431 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001432
John Bauman19bac1e2014-05-06 15:23:49 -04001433 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1434 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001435 break;
1436 case FORMAT_A8G8R8B8Q:
1437 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001438 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1439 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1440 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1441 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001442
John Bauman19bac1e2014-05-06 15:23:49 -04001443 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1444 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001445 break;
1446 case FORMAT_X8R8G8B8:
1447 case FORMAT_A8R8G8B8:
1448 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
1449 {
John Bauman19bac1e2014-05-06 15:23:49 -04001450 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1451 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1452 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001453
John Bauman19bac1e2014-05-06 15:23:49 -04001454 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1455 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001456
John Bauman19bac1e2014-05-06 15:23:49 -04001457 current.x = current.z;
1458 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1459 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1460 current.y = current.z;
1461 current.z = As<Short4>(UnpackLow(current.z, current.x));
1462 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001463 }
1464 else
1465 {
John Bauman19bac1e2014-05-06 15:23:49 -04001466 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1467 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1468 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1469 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001470
John Bauman19bac1e2014-05-06 15:23:49 -04001471 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1472 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001473
John Bauman19bac1e2014-05-06 15:23:49 -04001474 current.x = current.z;
1475 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1476 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1477 current.y = current.z;
1478 current.z = As<Short4>(UnpackLow(current.z, current.x));
1479 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001480 }
1481 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001482 case FORMAT_X8B8G8R8:
1483 case FORMAT_A8B8G8R8:
1484 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || rgbaWriteMask == 0x7)
1485 {
1486 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1487 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1488 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1489
1490 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
1491 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
1492
1493 current.x = current.z;
1494 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1495 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1496 current.y = current.z;
1497 current.z = As<Short4>(UnpackLow(current.z, current.x));
1498 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1499 }
1500 else
1501 {
1502 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1503 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1504 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1505 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1506
1507 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
1508 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
1509
1510 current.x = current.z;
1511 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1512 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1513 current.y = current.z;
1514 current.z = As<Short4>(UnpackLow(current.z, current.x));
1515 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1516 }
1517 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001518 case FORMAT_A8:
1519 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1520 current.w = As<Short4>(Pack(As<UShort4>(current.w), As<UShort4>(current.w)));
1521 break;
John Bauman89401822014-05-06 15:04:28 -04001522 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001523 current.z = current.x;
1524 current.x = As<Short4>(UnpackLow(current.x, current.y));
1525 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1526 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04001527 break;
1528 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001529 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04001530 break;
John Bauman89401822014-05-06 15:04:28 -04001531 default:
1532 ASSERT(false);
1533 }
1534
John Bauman19bac1e2014-05-06 15:23:49 -04001535 Short4 c01 = current.z;
1536 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04001537
1538 Int xMask; // Combination of all masks
1539
1540 if(state.depthTestActive)
1541 {
1542 xMask = zMask;
1543 }
1544 else
1545 {
1546 xMask = cMask;
1547 }
1548
1549 if(state.stencilActive)
1550 {
1551 xMask &= sMask;
1552 }
1553
John Bauman89401822014-05-06 15:04:28 -04001554 switch(state.targetFormat[index])
1555 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001556 case FORMAT_R5G6B5:
1557 {
1558 Pointer<Byte> buffer = cBuffer + 2 * x;
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001559 Int value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001560
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001561 Int c01 = Extract(As<Int2>(current.x), 0);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001562
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001563 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001564 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001565 Int masked = value;
1566 c01 &= *Pointer<Int>(r.constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1567 masked &= *Pointer<Int>(r.constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0]));
1568 c01 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001569 }
1570
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001571 c01 &= *Pointer<Int>(r.constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1572 value &= *Pointer<Int>(r.constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
1573 c01 |= value;
1574 *Pointer<Int>(buffer) = c01;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001575
1576 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001577 value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001578
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001579 Int c23 = Extract(As<Int2>(current.x), 1);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001580
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001581 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001582 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001583 Int masked = value;
1584 c23 &= *Pointer<Int>(r.constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1585 masked &= *Pointer<Int>(r.constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0]));
1586 c23 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001587 }
1588
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001589 c23 &= *Pointer<Int>(r.constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1590 value &= *Pointer<Int>(r.constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
1591 c23 |= value;
1592 *Pointer<Int>(buffer) = c23;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001593 }
1594 break;
John Bauman89401822014-05-06 15:04:28 -04001595 case FORMAT_A8G8R8B8Q:
1596 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
1597 UNIMPLEMENTED();
1598 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
1599
1600 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1601 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1602 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1603 // {
1604 // Short4 masked = value;
1605 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1606 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1607 // c01 |= masked;
1608 // }
1609
1610 // c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1611 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1612 // c01 |= value;
1613 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
1614
1615 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
1616
1617 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1618 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1619 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1620 // {
1621 // Short4 masked = value;
1622 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1623 // masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1624 // c23 |= masked;
1625 // }
1626
1627 // c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1628 // value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1629 // c23 |= value;
1630 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
1631 break;
1632 case FORMAT_A8R8G8B8:
1633 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
John Bauman89401822014-05-06 15:04:28 -04001634 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001635 Pointer<Byte> buffer = cBuffer + x * 4;
1636 Short4 value = *Pointer<Short4>(buffer);
1637
1638 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1639 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1640 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1641 {
1642 Short4 masked = value;
1643 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1644 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1645 c01 |= masked;
1646 }
1647
1648 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1649 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1650 c01 |= value;
1651 *Pointer<Short4>(buffer) = c01;
1652
1653 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1654 value = *Pointer<Short4>(buffer);
1655
1656 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1657 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1658 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1659 {
1660 Short4 masked = value;
1661 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1662 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
1663 c23 |= masked;
1664 }
1665
1666 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1667 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1668 c23 |= value;
1669 *Pointer<Short4>(buffer) = c23;
John Bauman89401822014-05-06 15:04:28 -04001670 }
John Bauman89401822014-05-06 15:04:28 -04001671 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001672 case FORMAT_A8B8G8R8:
1673 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001674 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001675 Pointer<Byte> buffer = cBuffer + x * 4;
1676 Short4 value = *Pointer<Short4>(buffer);
1677
1678 if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
1679 ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
1680 (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh?
1681 {
1682 Short4 masked = value;
1683 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1684 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1685 c01 |= masked;
1686 }
1687
1688 c01 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1689 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
1690 c01 |= value;
1691 *Pointer<Short4>(buffer) = c01;
1692
1693 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1694 value = *Pointer<Short4>(buffer);
1695
1696 if((state.targetFormat[index] == FORMAT_A8B8G8R8 && rgbaWriteMask != 0x0000000F) ||
1697 ((state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x00000007) &&
1698 (state.targetFormat[index] == FORMAT_X8B8G8R8 && rgbaWriteMask != 0x0000000F))) // FIXME: Need for masking when XBGR && Fh?
1699 {
1700 Short4 masked = value;
1701 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1702 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
1703 c23 |= masked;
1704 }
1705
1706 c23 &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1707 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
1708 c23 |= value;
1709 *Pointer<Short4>(buffer) = c23;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001710 }
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001711 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001712 case FORMAT_A8:
1713 if(rgbaWriteMask & 0x00000008)
1714 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001715 Pointer<Byte> buffer = cBuffer + 1 * x;
1716 Short4 value;
John Bauman66b8ab22014-05-06 15:57:45 -04001717 Insert(value, *Pointer<Short>(buffer), 0);
1718 Int pitch = *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1719 Insert(value, *Pointer<Short>(buffer + pitch), 1);
1720 value = UnpackLow(As<Byte8>(value), As<Byte8>(value));
1721
1722 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
1723 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
1724 current.w |= value;
1725
1726 *Pointer<Short>(buffer) = Extract(current.w, 0);
1727 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
1728 }
1729 break;
John Bauman89401822014-05-06 15:04:28 -04001730 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04001731 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001732 Pointer<Byte> buffer = cBuffer + 4 * x;
John Bauman89401822014-05-06 15:04:28 -04001733
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001734 Short4 value = *Pointer<Short4>(buffer);
John Bauman89401822014-05-06 15:04:28 -04001735
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001736 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001737 {
1738 Short4 masked = value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001739 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1740 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001741 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04001742 }
1743
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001744 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1745 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001746 current.x |= value;
1747 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001748
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001749 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001750
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001751 value = *Pointer<Short4>(buffer);
1752
1753 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001754 {
1755 Short4 masked = value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001756 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1757 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001758 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04001759 }
1760
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001761 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1762 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001763 current.y |= value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001764 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001765 }
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001766 break;
1767 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04001768 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001769 Pointer<Byte> buffer = cBuffer + 8 * x;
John Bauman89401822014-05-06 15:04:28 -04001770
John Bauman89401822014-05-06 15:04:28 -04001771 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001772 Short4 value = *Pointer<Short4>(buffer);
1773
1774 if(rgbaWriteMask != 0x0000000F)
1775 {
1776 Short4 masked = value;
1777 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1778 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1779 current.x |= masked;
1780 }
1781
1782 current.x &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1783 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
1784 current.x |= value;
1785 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001786 }
1787
John Bauman89401822014-05-06 15:04:28 -04001788 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001789 Short4 value = *Pointer<Short4>(buffer + 8);
1790
1791 if(rgbaWriteMask != 0x0000000F)
1792 {
1793 Short4 masked = value;
1794 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1795 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1796 current.y |= masked;
1797 }
1798
1799 current.y &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1800 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
1801 current.y |= value;
1802 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001803 }
1804
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001805 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1806
1807 {
1808 Short4 value = *Pointer<Short4>(buffer);
1809
1810 if(rgbaWriteMask != 0x0000000F)
1811 {
1812 Short4 masked = value;
1813 current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1814 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1815 current.z |= masked;
1816 }
1817
1818 current.z &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1819 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
1820 current.z |= value;
1821 *Pointer<Short4>(buffer) = current.z;
1822 }
1823
1824 {
1825 Short4 value = *Pointer<Short4>(buffer + 8);
1826
1827 if(rgbaWriteMask != 0x0000000F)
1828 {
1829 Short4 masked = value;
1830 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1831 masked &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
1832 current.w |= masked;
1833 }
1834
1835 current.w &= *Pointer<Short4>(r.constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1836 value &= *Pointer<Short4>(r.constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
1837 current.w |= value;
1838 *Pointer<Short4>(buffer + 8) = current.w;
1839 }
John Bauman89401822014-05-06 15:04:28 -04001840 }
1841 break;
1842 default:
1843 ASSERT(false);
1844 }
1845 }
1846
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001847 void PixelRoutine::blendFactor(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04001848 {
1849 switch(blendFactorActive)
1850 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001851 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001852 // Optimized
1853 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001854 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001855 // Optimized
1856 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001857 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001858 blendFactor.x = oC.x;
1859 blendFactor.y = oC.y;
1860 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04001861 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001862 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001863 blendFactor.x = Float4(1.0f) - oC.x;
1864 blendFactor.y = Float4(1.0f) - oC.y;
1865 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04001866 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001867 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001868 blendFactor.x = pixel.x;
1869 blendFactor.y = pixel.y;
1870 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001871 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001872 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001873 blendFactor.x = Float4(1.0f) - pixel.x;
1874 blendFactor.y = Float4(1.0f) - pixel.y;
1875 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001876 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001877 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001878 blendFactor.x = oC.w;
1879 blendFactor.y = oC.w;
1880 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001881 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001882 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001883 blendFactor.x = Float4(1.0f) - oC.w;
1884 blendFactor.y = Float4(1.0f) - oC.w;
1885 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001886 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001887 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001888 blendFactor.x = pixel.w;
1889 blendFactor.y = pixel.w;
1890 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001891 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001892 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001893 blendFactor.x = Float4(1.0f) - pixel.w;
1894 blendFactor.y = Float4(1.0f) - pixel.w;
1895 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001896 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001897 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001898 blendFactor.x = Float4(1.0f) - pixel.w;
1899 blendFactor.x = Min(blendFactor.x, oC.w);
1900 blendFactor.y = blendFactor.x;
1901 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04001902 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001903 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001904 blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[0]));
1905 blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[1]));
1906 blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001907 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001908 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001909 blendFactor.x = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1910 blendFactor.y = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1911 blendFactor.z = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001912 break;
1913 default:
1914 ASSERT(false);
1915 }
1916 }
1917
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001918 void PixelRoutine::blendFactorAlpha(Registers &r, const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04001919 {
1920 switch(blendFactorAlphaActive)
1921 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001922 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001923 // Optimized
1924 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001925 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001926 // Optimized
1927 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001928 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001929 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001930 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001931 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001932 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001933 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001934 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001935 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001936 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001937 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001938 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001939 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001940 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001941 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001942 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001943 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001944 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001945 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001946 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001947 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001948 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001949 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001950 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001951 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001952 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001953 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04001954 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001955 case BLEND_CONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001956 blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04001957 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001958 case BLEND_INVCONSTANT:
John Bauman19bac1e2014-05-06 15:23:49 -04001959 blendFactor.w = *Pointer<Float4>(r.data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04001960 break;
1961 default:
1962 ASSERT(false);
1963 }
1964 }
1965
John Bauman19bac1e2014-05-06 15:23:49 -04001966 void PixelRoutine::alphaBlend(Registers &r, int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04001967 {
1968 if(!state.alphaBlendActive)
1969 {
1970 return;
1971 }
1972
1973 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001974 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04001975
Alexis Hetu96517182015-04-15 10:30:23 -04001976 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04001977 Short4 c01;
1978 Short4 c23;
1979
John Bauman89401822014-05-06 15:04:28 -04001980 switch(state.targetFormat[index])
1981 {
John Bauman89401822014-05-06 15:04:28 -04001982 case FORMAT_R32F:
1983 buffer = cBuffer;
1984 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04001985 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
1986 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
John Bauman89401822014-05-06 15:04:28 -04001987 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
1988 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04001989 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
1990 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
1991 pixel.y = Float4(1.0f);
1992 pixel.z = Float4(1.0f);
1993 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04001994 break;
1995 case FORMAT_G32R32F:
1996 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001997 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
John Bauman89401822014-05-06 15:04:28 -04001998 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001999 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
2000 pixel.z = pixel.x;
2001 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
2002 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
2003 pixel.y = pixel.z;
2004 pixel.z = Float4(1.0f);
2005 pixel.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04002006 break;
2007 case FORMAT_A32B32G32R32F:
2008 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002009 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
2010 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04002011 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002012 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
2013 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
2014 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002015 break;
2016 default:
2017 ASSERT(false);
2018 }
2019
2020 if(postBlendSRGB && state.writeSRGB)
2021 {
John Bauman19bac1e2014-05-06 15:23:49 -04002022 sRGBtoLinear(pixel.x);
2023 sRGBtoLinear(pixel.y);
2024 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002025 }
2026
2027 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04002028 Vector4f sourceFactor;
2029 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04002030
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002031 blendFactor(r, sourceFactor, oC, pixel, state.sourceBlendFactor);
2032 blendFactor(r, destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002033
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002034 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002035 {
John Bauman19bac1e2014-05-06 15:23:49 -04002036 oC.x *= sourceFactor.x;
2037 oC.y *= sourceFactor.y;
2038 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002039 }
2040
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002041 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002042 {
John Bauman19bac1e2014-05-06 15:23:49 -04002043 pixel.x *= destFactor.x;
2044 pixel.y *= destFactor.y;
2045 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002046 }
2047
2048 switch(state.blendOperation)
2049 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002050 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002051 oC.x += pixel.x;
2052 oC.y += pixel.y;
2053 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002054 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002055 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002056 oC.x -= pixel.x;
2057 oC.y -= pixel.y;
2058 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002059 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002060 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002061 oC.x = pixel.x - oC.x;
2062 oC.y = pixel.y - oC.y;
2063 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04002064 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002065 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002066 oC.x = Min(oC.x, pixel.x);
2067 oC.y = Min(oC.y, pixel.y);
2068 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002069 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002070 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002071 oC.x = Max(oC.x, pixel.x);
2072 oC.y = Max(oC.y, pixel.y);
2073 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002074 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002075 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002076 // No operation
2077 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002078 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002079 oC.x = pixel.x;
2080 oC.y = pixel.y;
2081 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002082 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002083 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002084 oC.x = Float4(0.0f);
2085 oC.y = Float4(0.0f);
2086 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002087 break;
2088 default:
2089 ASSERT(false);
2090 }
2091
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002092 blendFactorAlpha(r, sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2093 blendFactorAlpha(r, destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002094
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002095 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002096 {
John Bauman19bac1e2014-05-06 15:23:49 -04002097 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002098 }
2099
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002100 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002101 {
John Bauman19bac1e2014-05-06 15:23:49 -04002102 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002103 }
2104
2105 switch(state.blendOperationAlpha)
2106 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002107 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002108 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002109 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002110 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002111 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002112 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002113 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002114 pixel.w -= oC.w;
2115 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002116 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002117 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002118 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002119 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002120 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002121 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002122 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002123 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002124 // No operation
2125 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002126 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002127 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002128 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002129 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002130 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002131 break;
2132 default:
2133 ASSERT(false);
2134 }
2135 }
2136
John Bauman19bac1e2014-05-06 15:23:49 -04002137 void PixelRoutine::writeColor(Registers &r, int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002138 {
John Bauman89401822014-05-06 15:04:28 -04002139 switch(state.targetFormat[index])
2140 {
John Bauman89401822014-05-06 15:04:28 -04002141 case FORMAT_R32F:
2142 break;
2143 case FORMAT_G32R32F:
John Bauman19bac1e2014-05-06 15:23:49 -04002144 oC.z = oC.x;
2145 oC.x = UnpackLow(oC.x, oC.y);
2146 oC.z = UnpackHigh(oC.z, oC.y);
2147 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002148 break;
2149 case FORMAT_A32B32G32R32F:
John Bauman19bac1e2014-05-06 15:23:49 -04002150 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04002151 break;
2152 default:
2153 ASSERT(false);
2154 }
2155
2156 int rgbaWriteMask = state.colorWriteActive(index);
2157
2158 Int xMask; // Combination of all masks
2159
2160 if(state.depthTestActive)
2161 {
2162 xMask = zMask;
2163 }
2164 else
2165 {
2166 xMask = cMask;
2167 }
2168
2169 if(state.stencilActive)
2170 {
2171 xMask &= sMask;
2172 }
2173
2174 Pointer<Byte> buffer;
2175 Float4 value;
2176
2177 switch(state.targetFormat[index])
2178 {
2179 case FORMAT_R32F:
2180 if(rgbaWriteMask & 0x00000001)
2181 {
2182 buffer = cBuffer + 4 * x;
2183
2184 // FIXME: movlps
2185 value.x = *Pointer<Float>(buffer + 0);
2186 value.y = *Pointer<Float>(buffer + 4);
2187
2188 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2189
2190 // FIXME: movhps
2191 value.z = *Pointer<Float>(buffer + 0);
2192 value.w = *Pointer<Float>(buffer + 4);
2193
John Bauman19bac1e2014-05-06 15:23:49 -04002194 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002195 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002196 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04002197
2198 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002199 *Pointer<Float>(buffer + 0) = oC.x.z;
2200 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04002201
2202 buffer -= *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2203
2204 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002205 *Pointer<Float>(buffer + 0) = oC.x.x;
2206 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04002207 }
2208 break;
2209 case FORMAT_G32R32F:
2210 buffer = cBuffer + 8 * x;
2211
2212 value = *Pointer<Float4>(buffer);
2213
2214 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2215 {
2216 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002217 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
John Bauman89401822014-05-06 15:04:28 -04002218 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002219 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002220 }
2221
John Bauman19bac1e2014-05-06 15:23:49 -04002222 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002223 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002224 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2225 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002226
2227 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2228
2229 value = *Pointer<Float4>(buffer);
2230
2231 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2232 {
2233 Float4 masked;
2234
2235 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002236 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
John Bauman89401822014-05-06 15:04:28 -04002237 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002238 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002239 }
2240
John Bauman19bac1e2014-05-06 15:23:49 -04002241 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002242 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002243 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2244 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002245 break;
2246 case FORMAT_A32B32G32R32F:
2247 buffer = cBuffer + 16 * x;
2248
2249 {
2250 value = *Pointer<Float4>(buffer, 16);
2251
2252 if(rgbaWriteMask != 0x0000000F)
2253 {
2254 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002255 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002256 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002257 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002258 }
2259
John Bauman19bac1e2014-05-06 15:23:49 -04002260 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002261 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002262 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2263 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002264 }
2265
2266 {
2267 value = *Pointer<Float4>(buffer + 16, 16);
2268
2269 if(rgbaWriteMask != 0x0000000F)
2270 {
2271 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002272 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002273 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002274 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002275 }
2276
John Bauman19bac1e2014-05-06 15:23:49 -04002277 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002278 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002279 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2280 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002281 }
2282
2283 buffer += *Pointer<Int>(r.data + OFFSET(DrawData,colorPitchB[index]));
2284
2285 {
2286 value = *Pointer<Float4>(buffer, 16);
2287
2288 if(rgbaWriteMask != 0x0000000F)
2289 {
2290 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002291 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002292 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002293 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002294 }
2295
John Bauman19bac1e2014-05-06 15:23:49 -04002296 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002297 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002298 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2299 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002300 }
2301
2302 {
2303 value = *Pointer<Float4>(buffer + 16, 16);
2304
2305 if(rgbaWriteMask != 0x0000000F)
2306 {
2307 Float4 masked = value;
John Bauman19bac1e2014-05-06 15:23:49 -04002308 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
John Bauman89401822014-05-06 15:04:28 -04002309 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002310 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002311 }
2312
John Bauman19bac1e2014-05-06 15:23:49 -04002313 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(r.constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -04002314 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(r.constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002315 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2316 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002317 }
2318 break;
2319 default:
2320 ASSERT(false);
2321 }
2322 }
2323
John Bauman89401822014-05-06 15:04:28 -04002324 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2325 {
John Bauman19bac1e2014-05-06 15:23:49 -04002326 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04002327 }
2328
Nicolas Capense1a50af2015-05-13 16:48:18 -04002329 void PixelRoutine::sRGBtoLinear16_12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002330 {
John Bauman19bac1e2014-05-06 15:23:49 -04002331 c.x = As<UShort4>(c.x) >> 4;
2332 c.y = As<UShort4>(c.y) >> 4;
2333 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002334
2335 sRGBtoLinear12_16(r, c);
2336 }
2337
Alexis Hetu96517182015-04-15 10:30:23 -04002338 void PixelRoutine::sRGBtoLinear12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002339 {
Nicolas Capense1a50af2015-05-13 16:48:18 -04002340 Pointer<Byte> LUT = r.constants + OFFSET(Constants,sRGBtoLinear12_16);
John Bauman89401822014-05-06 15:04:28 -04002341
John Bauman19bac1e2014-05-06 15:23:49 -04002342 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2343 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2344 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2345 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002346
John Bauman19bac1e2014-05-06 15:23:49 -04002347 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2348 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2349 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2350 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002351
John Bauman19bac1e2014-05-06 15:23:49 -04002352 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2353 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2354 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2355 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002356 }
2357
Nicolas Capense1a50af2015-05-13 16:48:18 -04002358 void PixelRoutine::linearToSRGB16_12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002359 {
John Bauman19bac1e2014-05-06 15:23:49 -04002360 c.x = As<UShort4>(c.x) >> 4;
2361 c.y = As<UShort4>(c.y) >> 4;
2362 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002363
2364 linearToSRGB12_16(r, c);
2365 }
2366
Alexis Hetu96517182015-04-15 10:30:23 -04002367 void PixelRoutine::linearToSRGB12_16(Registers &r, Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002368 {
Nicolas Capense1a50af2015-05-13 16:48:18 -04002369 Pointer<Byte> LUT = r.constants + OFFSET(Constants,linearToSRGB12_16);
John Bauman89401822014-05-06 15:04:28 -04002370
John Bauman19bac1e2014-05-06 15:23:49 -04002371 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2372 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2373 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2374 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002375
John Bauman19bac1e2014-05-06 15:23:49 -04002376 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2377 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2378 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2379 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002380
John Bauman19bac1e2014-05-06 15:23:49 -04002381 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2382 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2383 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2384 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002385 }
2386
John Bauman89401822014-05-06 15:04:28 -04002387 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2388 {
2389 Float4 linear = x * x;
2390 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2391
2392 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2393 }
2394
John Bauman19bac1e2014-05-06 15:23:49 -04002395 bool PixelRoutine::colorUsed()
2396 {
2397 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
2398 }
John Bauman89401822014-05-06 15:04:28 -04002399}