blob: 90e2073046790f7810e093b410931824b6992754 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "PixelRoutine.hpp"
16
17#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040018#include "QuadRasterizer.hpp"
19#include "Surface.hpp"
20#include "Primitive.hpp"
John Bauman89401822014-05-06 15:04:28 -040021#include "SamplerCore.hpp"
22#include "Constants.hpp"
23#include "Debug.hpp"
24
John Bauman89401822014-05-06 15:04:28 -040025namespace sw
26{
27 extern bool complementaryDepthBuffer;
28 extern bool postBlendSRGB;
29 extern bool exactColorRounding;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040030 extern bool forceClearRegisters;
John Bauman89401822014-05-06 15:04:28 -040031
Nicolas Capens4f172c72016-01-13 08:34:30 -050032 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput)
John Bauman89401822014-05-06 15:04:28 -040033 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040034 if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters)
John Bauman89401822014-05-06 15:04:28 -040035 {
Nicolas Capens3b4c93f2016-05-18 12:51:37 -040036 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++)
Alexis Hetuf2a8c372015-07-13 11:08:41 -040037 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -040038 v[i].x = Float4(0.0f);
39 v[i].y = Float4(0.0f);
40 v[i].z = Float4(0.0f);
41 v[i].w = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040042 }
John Bauman89401822014-05-06 15:04:28 -040043 }
44 }
45
46 PixelRoutine::~PixelRoutine()
47 {
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040048 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040049 {
50 delete sampler[i];
51 }
52 }
53
Nicolas Capens4f172c72016-01-13 08:34:30 -050054 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
John Bauman89401822014-05-06 15:04:28 -040055 {
56 #if PERF_PROFILE
57 Long pipeTime = Ticks();
58 #endif
59
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040060 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040061 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050062 sampler[i] = new SamplerCore(constants, state.sampler[i]);
John Bauman89401822014-05-06 15:04:28 -040063 }
64
65 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman89401822014-05-06 15:04:28 -040066
67 Int zMask[4]; // Depth mask
68 Int sMask[4]; // Stencil mask
69
70 for(unsigned int q = 0; q < state.multiSample; q++)
71 {
72 zMask[q] = cMask[q];
73 sMask[q] = cMask[q];
74 }
75
76 for(unsigned int q = 0; q < state.multiSample; q++)
77 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050078 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -040079 }
80
81 Float4 f;
John Bauman89401822014-05-06 15:04:28 -040082 Float4 rhwCentroid;
83
Nicolas Capens4f172c72016-01-13 08:34:30 -050084 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040085
John Bauman19bac1e2014-05-06 15:23:49 -040086 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040087 {
88 for(unsigned int q = 0; q < state.multiSample; q++)
89 {
90 Float4 x = xxxx;
Nicolas Capens4f172c72016-01-13 08:34:30 -050091
John Bauman89401822014-05-06 15:04:28 -040092 if(state.multiSample > 1)
93 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050094 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
John Bauman89401822014-05-06 15:04:28 -040095 }
96
Nicolas Capens4f172c72016-01-13 08:34:30 -050097 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false);
Nicolas Capens3cbeac52017-09-15 11:49:31 -040098
99 if(state.depthClamp)
100 {
101 z[q] = Min(Max(z[q], Float4(0.0f)), Float4(1.0f));
102 }
John Bauman89401822014-05-06 15:04:28 -0400103 }
104 }
105
106 Bool depthPass = false;
107
108 if(earlyDepthTest)
109 {
110 for(unsigned int q = 0; q < state.multiSample; q++)
111 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500112 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400113 }
114 }
115
116 If(depthPass || Bool(!earlyDepthTest))
117 {
118 #if PERF_PROFILE
119 Long interpTime = Ticks();
120 #endif
121
Nicolas Capens4f172c72016-01-13 08:34:30 -0500122 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400123
John Bauman89401822014-05-06 15:04:28 -0400124 // Centroid locations
125 Float4 XXXX = Float4(0.0f);
126 Float4 YYYY = Float4(0.0f);
127
128 if(state.centroid)
129 {
130 Float4 WWWW(1.0e-9f);
131
132 for(unsigned int q = 0; q < state.multiSample; q++)
133 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500134 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
135 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
136 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400137 }
138
139 WWWW = Rcp_pp(WWWW);
140 XXXX *= WWWW;
141 YYYY *= WWWW;
142
143 XXXX += xxxx;
144 YYYY += yyyy;
145 }
146
John Bauman19bac1e2014-05-06 15:23:49 -0400147 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400148 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500149 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500150 rhw = reciprocal(w, false, false, true);
John Bauman89401822014-05-06 15:04:28 -0400151
152 if(state.centroid)
153 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500154 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
John Bauman89401822014-05-06 15:04:28 -0400155 }
156 }
157
Nicolas Capens3b4c93f2016-05-18 12:51:37 -0400158 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
John Bauman89401822014-05-06 15:04:28 -0400159 {
160 for(int component = 0; component < 4; component++)
161 {
John Bauman89401822014-05-06 15:04:28 -0400162 if(state.interpolant[interpolant].component & (1 << component))
163 {
164 if(!state.interpolant[interpolant].centroid)
165 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500166 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400167 }
168 else
169 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500170 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400171 }
172 }
173 }
174
175 Float4 rcp;
176
177 switch(state.interpolant[interpolant].project)
178 {
179 case 0:
180 break;
181 case 1:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500182 rcp = reciprocal(v[interpolant].y);
183 v[interpolant].x = v[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400184 break;
185 case 2:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500186 rcp = reciprocal(v[interpolant].z);
187 v[interpolant].x = v[interpolant].x * rcp;
188 v[interpolant].y = v[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400189 break;
190 case 3:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500191 rcp = reciprocal(v[interpolant].w);
192 v[interpolant].x = v[interpolant].x * rcp;
193 v[interpolant].y = v[interpolant].y * rcp;
194 v[interpolant].z = v[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400195 break;
196 }
197 }
198
199 if(state.fog.component)
200 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500201 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400202 }
203
Nicolas Capens4f172c72016-01-13 08:34:30 -0500204 setBuiltins(x, y, z, w);
John Bauman89401822014-05-06 15:04:28 -0400205
206 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500207 cycles[PERF_INTERP] += Ticks() - interpTime;
John Bauman89401822014-05-06 15:04:28 -0400208 #endif
209
210 Bool alphaPass = true;
211
212 if(colorUsed())
213 {
214 #if PERF_PROFILE
215 Long shaderTime = Ticks();
216 #endif
217
Nicolas Capens4f172c72016-01-13 08:34:30 -0500218 applyShader(cMask);
John Bauman89401822014-05-06 15:04:28 -0400219
220 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500221 cycles[PERF_SHADER] += Ticks() - shaderTime;
John Bauman89401822014-05-06 15:04:28 -0400222 #endif
223
Nicolas Capens4f172c72016-01-13 08:34:30 -0500224 alphaPass = alphaTest(cMask);
John Bauman89401822014-05-06 15:04:28 -0400225
John Bauman19bac1e2014-05-06 15:23:49 -0400226 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400227 {
228 for(unsigned int q = 0; q < state.multiSample; q++)
229 {
230 zMask[q] &= cMask[q];
231 sMask[q] &= cMask[q];
232 }
233 }
234 }
235
236 If(alphaPass)
237 {
238 if(!earlyDepthTest)
239 {
240 for(unsigned int q = 0; q < state.multiSample; q++)
241 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500242 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400243 }
244 }
245
246 #if PERF_PROFILE
247 Long ropTime = Ticks();
248 #endif
249
250 If(depthPass || Bool(earlyDepthTest))
251 {
252 for(unsigned int q = 0; q < state.multiSample; q++)
253 {
254 if(state.multiSampleMask & (1 << q))
255 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500256 writeDepth(zBuffer, q, x, z[q], zMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400257
258 if(state.occlusionEnabled)
259 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500260 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
John Bauman89401822014-05-06 15:04:28 -0400261 }
262 }
263 }
264
265 if(colorUsed())
266 {
267 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400268 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400269 #endif
270
Nicolas Capens4f172c72016-01-13 08:34:30 -0500271 rasterOperation(f, cBuffer, x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400272 }
273 }
274
275 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500276 cycles[PERF_ROP] += Ticks() - ropTime;
John Bauman89401822014-05-06 15:04:28 -0400277 #endif
278 }
279 }
280
281 for(unsigned int q = 0; q < state.multiSample; q++)
282 {
283 if(state.multiSampleMask & (1 << q))
284 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500285 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400286 }
287 }
288
289 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500290 cycles[PERF_PIPE] += Ticks() - pipeTime;
John Bauman89401822014-05-06 15:04:28 -0400291 #endif
292 }
293
John Bauman89401822014-05-06 15:04:28 -0400294 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
295 {
296 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
297
298 if(!flat)
299 {
300 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
301 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
302
303 if(perspective)
304 {
305 interpolant *= rhw;
306 }
307 }
308
309 return interpolant;
310 }
311
Nicolas Capens4f172c72016-01-13 08:34:30 -0500312 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400313 {
314 if(!state.stencilActive)
315 {
316 return;
317 }
318
319 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
320
321 Pointer<Byte> buffer = sBuffer + 2 * x;
322
323 if(q > 0)
324 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500325 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400326 }
327
Nicolas Capens48ef1252016-11-07 15:30:33 -0500328 Byte8 value = *Pointer<Byte8>(buffer);
John Bauman89401822014-05-06 15:04:28 -0400329 Byte8 valueCCW = value;
330
331 if(!state.noStencilMask)
332 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500333 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400334 }
335
Nicolas Capens4f172c72016-01-13 08:34:30 -0500336 stencilTest(value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400337
338 if(state.twoSidedStencil)
339 {
340 if(!state.noStencilMaskCCW)
341 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500342 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400343 }
344
Nicolas Capens4f172c72016-01-13 08:34:30 -0500345 stencilTest(valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400346
Nicolas Capens4f172c72016-01-13 08:34:30 -0500347 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
348 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400349 value |= valueCCW;
350 }
351
352 sMask = SignMask(value) & cMask;
353 }
354
Nicolas Capens4f172c72016-01-13 08:34:30 -0500355 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400356 {
357 Byte8 equal;
358
359 switch(stencilCompareMode)
360 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400361 case STENCIL_ALWAYS:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400362 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400363 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400364 case STENCIL_NEVER:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400365 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400366 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400367 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400368 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500369 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400370 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400371 case STENCIL_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500372 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400373 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400374 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500375 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400376 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400377 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400378 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400379 equal = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500380 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400381 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500382 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400383 value |= equal;
384 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400385 case STENCIL_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500386 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
John Bauman89401822014-05-06 15:04:28 -0400387 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
388 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
389 value = equal;
390 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400391 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400392 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500393 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400394 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400395 break;
396 default:
397 ASSERT(false);
398 }
399 }
400
Nicolas Capens4f172c72016-01-13 08:34:30 -0500401 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400402 {
403 if(!state.depthTestActive)
404 {
405 return true;
406 }
407
408 Float4 Z = z;
409
John Bauman19bac1e2014-05-06 15:23:49 -0400410 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400411 {
412 if(complementaryDepthBuffer)
413 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500414 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400415 }
416 else
417 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500418 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400419 }
420 }
421
422 Pointer<Byte> buffer;
423 Int pitch;
424
425 if(!state.quadLayoutDepthBuffer)
426 {
427 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500428 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400429 }
430 else
431 {
432 buffer = zBuffer + 8 * x;
433 }
434
435 if(q > 0)
436 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500437 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400438 }
439
440 Float4 zValue;
441
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400442 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400443 {
444 if(!state.quadLayoutDepthBuffer)
445 {
446 // FIXME: Properly optimizes?
447 zValue.xy = *Pointer<Float4>(buffer);
448 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
449 }
450 else
451 {
452 zValue = *Pointer<Float4>(buffer, 16);
453 }
454 }
455
456 Int4 zTest;
457
458 switch(state.depthCompareMode)
459 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400460 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400461 // Optimized
462 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400463 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400464 // Optimized
465 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400466 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400467 zTest = CmpEQ(zValue, Z);
468 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400469 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400470 zTest = CmpNEQ(zValue, Z);
471 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400472 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400473 if(complementaryDepthBuffer)
474 {
475 zTest = CmpLT(zValue, Z);
476 }
477 else
478 {
479 zTest = CmpNLE(zValue, Z);
480 }
481 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400482 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400483 if(complementaryDepthBuffer)
484 {
485 zTest = CmpNLT(zValue, Z);
486 }
487 else
488 {
489 zTest = CmpLE(zValue, Z);
490 }
491 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400492 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400493 if(complementaryDepthBuffer)
494 {
495 zTest = CmpLE(zValue, Z);
496 }
497 else
498 {
499 zTest = CmpNLT(zValue, Z);
500 }
501 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400502 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400503 if(complementaryDepthBuffer)
504 {
505 zTest = CmpNLE(zValue, Z);
506 }
507 else
508 {
509 zTest = CmpLT(zValue, Z);
510 }
511 break;
512 default:
513 ASSERT(false);
514 }
515
516 switch(state.depthCompareMode)
517 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400518 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400519 zMask = cMask;
520 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400521 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400522 zMask = 0x0;
523 break;
524 default:
525 zMask = SignMask(zTest) & cMask;
526 break;
527 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500528
John Bauman89401822014-05-06 15:04:28 -0400529 if(state.stencilActive)
530 {
531 zMask &= sMask;
532 }
533
534 return zMask != 0;
535 }
536
Nicolas Capens4f172c72016-01-13 08:34:30 -0500537 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400538 {
539 Short4 cmp;
540 Short4 equal;
541
542 switch(state.alphaCompareMode)
543 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400544 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400545 aMask = 0xF;
546 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400547 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400548 aMask = 0x0;
549 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400550 case ALPHA_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500551 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400552 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400553 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400554 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
555 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400556 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400557 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400558 case ALPHA_LESS: // a < b ~ b > a
Nicolas Capens4f172c72016-01-13 08:34:30 -0500559 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
Nicolas Capens33438a62017-09-27 11:47:35 -0400560 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400561 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400562 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
Nicolas Capens4f172c72016-01-13 08:34:30 -0500563 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
564 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
John Bauman89401822014-05-06 15:04:28 -0400565 cmp |= equal;
Nicolas Capens33438a62017-09-27 11:47:35 -0400566 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400567 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400568 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
569 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400570 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400571 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400572 case ALPHA_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500573 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400574 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400575 break;
576 default:
577 ASSERT(false);
578 }
579 }
580
Nicolas Capens4f172c72016-01-13 08:34:30 -0500581 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400582 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500583 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
584 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
585 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
586 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
John Bauman89401822014-05-06 15:04:28 -0400587
588 Int aMask0 = SignMask(coverage0);
589 Int aMask1 = SignMask(coverage1);
590 Int aMask2 = SignMask(coverage2);
591 Int aMask3 = SignMask(coverage3);
592
593 cMask[0] &= aMask0;
594 cMask[1] &= aMask1;
595 cMask[2] &= aMask2;
596 cMask[3] &= aMask3;
597 }
598
Nicolas Capens4f172c72016-01-13 08:34:30 -0500599 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog)
John Bauman89401822014-05-06 15:04:28 -0400600 {
601 if(!state.fogActive)
602 {
603 return;
604 }
605
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400606 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400607 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500608 pixelFog(fog);
John Bauman89401822014-05-06 15:04:28 -0400609
John Bauman19bac1e2014-05-06 15:23:49 -0400610 fog = Min(fog, Float4(1.0f));
611 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400612 }
613
Nicolas Capens4f172c72016-01-13 08:34:30 -0500614 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
615 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
616 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400617
John Bauman19bac1e2014-05-06 15:23:49 -0400618 c0.x *= fog;
619 c0.y *= fog;
620 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -0400621
Nicolas Capens4f172c72016-01-13 08:34:30 -0500622 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
623 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
624 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400625 }
626
Nicolas Capens4f172c72016-01-13 08:34:30 -0500627 void PixelRoutine::pixelFog(Float4 &visibility)
John Bauman89401822014-05-06 15:04:28 -0400628 {
629 Float4 &zw = visibility;
630
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400631 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400632 {
633 if(state.wBasedFog)
634 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500635 zw = rhw;
John Bauman89401822014-05-06 15:04:28 -0400636 }
637 else
638 {
639 if(complementaryDepthBuffer)
640 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500641 zw = Float4(1.0f) - z[0];
John Bauman89401822014-05-06 15:04:28 -0400642 }
643 else
644 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500645 zw = z[0];
John Bauman89401822014-05-06 15:04:28 -0400646 }
647 }
648 }
649
650 switch(state.pixelFogMode)
651 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400652 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -0400653 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400654 case FOG_LINEAR:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500655 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale));
656 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
John Bauman89401822014-05-06 15:04:28 -0400657 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400658 case FOG_EXP:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500659 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -0400660 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400661 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400662 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -0400663 zw *= zw;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500664 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E));
John Bauman19bac1e2014-05-06 15:23:49 -0400665 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400666 break;
667 default:
668 ASSERT(false);
669 }
670 }
671
Nicolas Capens4f172c72016-01-13 08:34:30 -0500672 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
John Bauman89401822014-05-06 15:04:28 -0400673 {
674 if(!state.depthWriteEnable)
675 {
676 return;
677 }
678
679 Float4 Z = z;
680
John Bauman19bac1e2014-05-06 15:23:49 -0400681 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400682 {
683 if(complementaryDepthBuffer)
684 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500685 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400686 }
687 else
688 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500689 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400690 }
691 }
692
693 Pointer<Byte> buffer;
694 Int pitch;
695
696 if(!state.quadLayoutDepthBuffer)
Nicolas Capens05b3d662016-02-25 23:58:33 -0500697 {
John Bauman89401822014-05-06 15:04:28 -0400698 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500699 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400700 }
701 else
Nicolas Capens05b3d662016-02-25 23:58:33 -0500702 {
John Bauman89401822014-05-06 15:04:28 -0400703 buffer = zBuffer + 8 * x;
704 }
705
706 if(q > 0)
707 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500708 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400709 }
710
711 Float4 zValue;
712
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400713 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400714 {
715 if(!state.quadLayoutDepthBuffer)
716 {
717 // FIXME: Properly optimizes?
718 zValue.xy = *Pointer<Float4>(buffer);
719 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
720 }
721 else
722 {
723 zValue = *Pointer<Float4>(buffer, 16);
724 }
725 }
726
Nicolas Capens4f172c72016-01-13 08:34:30 -0500727 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
728 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -0400729 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
730
731 if(!state.quadLayoutDepthBuffer)
732 {
733 // FIXME: Properly optimizes?
734 *Pointer<Float2>(buffer) = Float2(Z.xy);
735 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
736 }
737 else
738 {
739 *Pointer<Float4>(buffer, 16) = Z;
740 }
741 }
742
Nicolas Capens4f172c72016-01-13 08:34:30 -0500743 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400744 {
745 if(!state.stencilActive)
746 {
747 return;
748 }
749
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400750 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -0400751 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400752 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -0400753 {
754 return;
755 }
756 }
757
758 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
759 {
760 return;
761 }
762
763 Pointer<Byte> buffer = sBuffer + 2 * x;
764
765 if(q > 0)
766 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500767 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400768 }
769
Nicolas Capens48ef1252016-11-07 15:30:33 -0500770 Byte8 bufferValue = *Pointer<Byte8>(buffer);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500771
John Bauman89401822014-05-06 15:04:28 -0400772 Byte8 newValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500773 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400774
775 if(!state.noStencilWriteMask)
776 {
777 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500778 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
779 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400780 newValue |= maskedValue;
781 }
782
783 if(state.twoSidedStencil)
784 {
785 Byte8 newValueCCW;
786
Nicolas Capens4f172c72016-01-13 08:34:30 -0500787 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400788
789 if(!state.noStencilWriteMaskCCW)
790 {
791 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500792 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
793 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400794 newValueCCW |= maskedValue;
795 }
796
Nicolas Capens4f172c72016-01-13 08:34:30 -0500797 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
798 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400799 newValue |= newValueCCW;
800 }
801
Nicolas Capens4f172c72016-01-13 08:34:30 -0500802 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
803 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
John Bauman89401822014-05-06 15:04:28 -0400804 newValue |= bufferValue;
805
Nicolas Capens16b5f152016-10-13 13:39:01 -0400806 *Pointer<Byte4>(buffer) = Byte4(newValue);
John Bauman89401822014-05-06 15:04:28 -0400807 }
808
Nicolas Capens4f172c72016-01-13 08:34:30 -0500809 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -0400810 {
811 Byte8 &pass = newValue;
812 Byte8 fail;
813 Byte8 zFail;
814
Nicolas Capens4f172c72016-01-13 08:34:30 -0500815 stencilOperation(pass, bufferValue, stencilPassOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400816
817 if(stencilZFailOperation != stencilPassOperation)
818 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500819 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400820 }
821
822 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
823 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500824 stencilOperation(fail, bufferValue, stencilFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400825 }
826
827 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
828 {
829 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
830 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500831 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
832 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
John Bauman89401822014-05-06 15:04:28 -0400833 pass |= zFail;
834 }
835
Nicolas Capens4f172c72016-01-13 08:34:30 -0500836 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
837 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
John Bauman89401822014-05-06 15:04:28 -0400838 pass |= fail;
839 }
840 }
841
Nicolas Capens4f172c72016-01-13 08:34:30 -0500842 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400843 {
844 switch(operation)
845 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400846 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -0400847 output = bufferValue;
848 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400849 case OPERATION_ZERO:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400850 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400851 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400852 case OPERATION_REPLACE:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500853 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ));
John Bauman89401822014-05-06 15:04:28 -0400854 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400855 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -0400856 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
857 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400858 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -0400859 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
860 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400861 case OPERATION_INVERT:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400862 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400863 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400864 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -0400865 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
866 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400867 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -0400868 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
869 break;
870 default:
871 ASSERT(false);
872 }
873 }
874
Nicolas Capens96d4e092016-11-18 14:22:38 -0500875 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -0400876 {
877 switch(blendFactorActive)
878 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400879 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400880 // Optimized
881 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400882 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400883 // Optimized
884 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400885 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400886 blendFactor.x = current.x;
887 blendFactor.y = current.y;
888 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -0400889 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400890 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400891 blendFactor.x = Short4(0xFFFFu) - current.x;
892 blendFactor.y = Short4(0xFFFFu) - current.y;
893 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -0400894 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400895 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400896 blendFactor.x = pixel.x;
897 blendFactor.y = pixel.y;
898 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400899 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400900 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400901 blendFactor.x = Short4(0xFFFFu) - pixel.x;
902 blendFactor.y = Short4(0xFFFFu) - pixel.y;
903 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400904 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400905 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400906 blendFactor.x = current.w;
907 blendFactor.y = current.w;
908 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -0400909 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400910 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400911 blendFactor.x = Short4(0xFFFFu) - current.w;
912 blendFactor.y = Short4(0xFFFFu) - current.w;
913 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400914 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400915 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400916 blendFactor.x = pixel.w;
917 blendFactor.y = pixel.w;
918 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400919 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400920 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400921 blendFactor.x = Short4(0xFFFFu) - pixel.w;
922 blendFactor.y = Short4(0xFFFFu) - pixel.w;
923 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400924 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400925 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400926 blendFactor.x = Short4(0xFFFFu) - pixel.w;
927 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
928 blendFactor.y = blendFactor.x;
929 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -0400930 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400931 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500932 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
933 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
934 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400935 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400936 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500937 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
938 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
939 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400940 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400941 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500942 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
943 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
944 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400945 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400946 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500947 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
948 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
949 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400950 break;
951 default:
952 ASSERT(false);
953 }
954 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500955
Nicolas Capens96d4e092016-11-18 14:22:38 -0500956 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -0400957 {
958 switch(blendFactorAlphaActive)
959 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400960 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400961 // Optimized
962 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400963 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400964 // Optimized
965 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400966 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400967 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400968 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400969 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400970 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400971 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400972 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400973 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400974 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400975 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400976 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400977 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400978 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400979 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400980 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400981 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400982 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400983 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400984 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400985 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400986 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400987 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400988 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400989 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400990 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400991 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -0400992 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400993 case BLEND_CONSTANT:
994 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500995 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400996 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400997 case BLEND_INVCONSTANT:
998 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500999 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -04001000 break;
1001 default:
1002 ASSERT(false);
1003 }
1004 }
1005
Alexis Hetu049a1872016-04-25 16:59:58 -04001006 bool PixelRoutine::isSRGB(int index) const
1007 {
1008 return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8;
1009 }
1010
Nicolas Capens4f172c72016-01-13 08:34:30 -05001011 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
John Bauman89401822014-05-06 15:04:28 -04001012 {
John Bauman89401822014-05-06 15:04:28 -04001013 Short4 c01;
1014 Short4 c23;
Maxime Grégoired9762742015-07-08 16:43:48 -04001015 Pointer<Byte> buffer;
Nicolas Capensb40a2562016-01-05 00:08:45 -05001016 Pointer<Byte> buffer2;
John Bauman89401822014-05-06 15:04:28 -04001017
John Bauman89401822014-05-06 15:04:28 -04001018 switch(state.targetFormat[index])
1019 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001020 case FORMAT_R5G6B5:
1021 buffer = cBuffer + 2 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001022 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capensb40a2562016-01-05 00:08:45 -05001023 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001024
1025 pixel.x = c01 & Short4(0xF800u);
1026 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1027 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1028 pixel.w = Short4(0xFFFFu);
1029 break;
John Bauman89401822014-05-06 15:04:28 -04001030 case FORMAT_A8R8G8B8:
1031 buffer = cBuffer + 4 * x;
1032 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001033 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001034 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001035 pixel.z = c01;
1036 pixel.y = c01;
1037 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1038 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1039 pixel.x = pixel.z;
1040 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1041 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1042 pixel.y = pixel.z;
1043 pixel.w = pixel.x;
1044 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1045 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1046 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1047 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001048 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001049 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001050 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001051 buffer = cBuffer + 4 * x;
1052 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001053 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001054 c23 = *Pointer<Short4>(buffer);
1055 pixel.z = c01;
1056 pixel.y = c01;
1057 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1058 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1059 pixel.x = pixel.z;
1060 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1061 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1062 pixel.y = pixel.z;
1063 pixel.w = pixel.x;
1064 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1065 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1066 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1067 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1068 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001069 case FORMAT_A8:
1070 buffer = cBuffer + 1 * x;
1071 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001072 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001073 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
1074 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1075 pixel.x = Short4(0x0000);
1076 pixel.y = Short4(0x0000);
1077 pixel.z = Short4(0x0000);
1078 break;
John Bauman89401822014-05-06 15:04:28 -04001079 case FORMAT_X8R8G8B8:
1080 buffer = cBuffer + 4 * x;
1081 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001082 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001083 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001084 pixel.z = c01;
1085 pixel.y = c01;
1086 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1087 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1088 pixel.x = pixel.z;
1089 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1090 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1091 pixel.y = pixel.z;
1092 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1093 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1094 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1095 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001096 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001097 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001098 case FORMAT_SRGB8_X8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001099 buffer = cBuffer + 4 * x;
1100 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001101 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001102 c23 = *Pointer<Short4>(buffer);
1103 pixel.z = c01;
1104 pixel.y = c01;
1105 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1106 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1107 pixel.x = pixel.z;
1108 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1109 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1110 pixel.y = pixel.z;
1111 pixel.w = pixel.x;
1112 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1113 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1114 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1115 pixel.w = Short4(0xFFFFu);
1116 break;
John Bauman89401822014-05-06 15:04:28 -04001117 case FORMAT_A8G8R8B8Q:
1118 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001119 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1120 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1121 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1122 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04001123 break;
1124 case FORMAT_X8G8R8B8Q:
1125 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001126 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1127 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1128 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1129 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001130 break;
1131 case FORMAT_A16B16G16R16:
Maxime Grégoired9762742015-07-08 16:43:48 -04001132 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001133 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1134 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001135 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001136 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1137 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1138 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04001139 break;
1140 case FORMAT_G16R16:
1141 buffer = cBuffer;
Maxime Grégoired9762742015-07-08 16:43:48 -04001142 pixel.x = *Pointer<Short4>(buffer + 4 * x);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001143 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Maxime Grégoired9762742015-07-08 16:43:48 -04001144 pixel.y = *Pointer<Short4>(buffer + 4 * x);
John Bauman19bac1e2014-05-06 15:23:49 -04001145 pixel.z = pixel.x;
1146 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1147 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1148 pixel.y = pixel.z;
1149 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1150 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1151 pixel.z = Short4(0xFFFFu);
1152 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001153 break;
1154 default:
1155 ASSERT(false);
1156 }
1157
Alexis Hetu049a1872016-04-25 16:59:58 -04001158 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001159 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001160 sRGBtoLinear16_12_16(pixel);
John Bauman89401822014-05-06 15:04:28 -04001161 }
Maxime Grégoired9762742015-07-08 16:43:48 -04001162 }
1163
Nicolas Capens4f172c72016-01-13 08:34:30 -05001164 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001165 {
1166 if(!state.alphaBlendActive)
1167 {
1168 return;
1169 }
1170
1171 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001172 readPixel(index, cBuffer, x, pixel);
John Bauman89401822014-05-06 15:04:28 -04001173
1174 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04001175 Vector4s sourceFactor;
1176 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04001177
Nicolas Capens4f172c72016-01-13 08:34:30 -05001178 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1179 blendFactor(destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04001180
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001181 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001182 {
John Bauman19bac1e2014-05-06 15:23:49 -04001183 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1184 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1185 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001186 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001187
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001188 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001189 {
John Bauman19bac1e2014-05-06 15:23:49 -04001190 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1191 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1192 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001193 }
1194
1195 switch(state.blendOperation)
1196 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001197 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001198 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1199 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1200 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001201 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001202 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001203 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1204 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1205 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001206 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001207 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001208 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1209 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1210 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04001211 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001212 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001213 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1214 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1215 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001216 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001217 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001218 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1219 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1220 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001221 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001222 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001223 // No operation
1224 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001225 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001226 current.x = pixel.x;
1227 current.y = pixel.y;
1228 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001229 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001230 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001231 current.x = Short4(0x0000);
1232 current.y = Short4(0x0000);
1233 current.z = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001234 break;
1235 default:
1236 ASSERT(false);
1237 }
1238
Nicolas Capens4f172c72016-01-13 08:34:30 -05001239 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1240 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04001241
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001242 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001243 {
John Bauman19bac1e2014-05-06 15:23:49 -04001244 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001245 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001246
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001247 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001248 {
John Bauman19bac1e2014-05-06 15:23:49 -04001249 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001250 }
1251
1252 switch(state.blendOperationAlpha)
1253 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001254 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001255 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001256 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001257 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001258 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001259 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001260 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001261 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04001262 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001263 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001264 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001265 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001266 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001267 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001268 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001269 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001270 // No operation
1271 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001272 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001273 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001274 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001275 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001276 current.w = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001277 break;
1278 default:
1279 ASSERT(false);
1280 }
1281 }
1282
Nicolas Capens4f172c72016-01-13 08:34:30 -05001283 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001284 {
Nicolas Capens2afcc802015-08-04 10:34:43 -04001285 if(state.logicalOperation == LOGICALOP_COPY)
Maxime Grégoired9762742015-07-08 16:43:48 -04001286 {
1287 return;
1288 }
1289
1290 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001291 readPixel(index, cBuffer, x, pixel);
Maxime Grégoired9762742015-07-08 16:43:48 -04001292
1293 switch(state.logicalOperation)
1294 {
1295 case LOGICALOP_CLEAR:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001296 current.x = UShort4(0);
1297 current.y = UShort4(0);
1298 current.z = UShort4(0);
Maxime Grégoired9762742015-07-08 16:43:48 -04001299 break;
1300 case LOGICALOP_SET:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001301 current.x = UShort4(0xFFFFu);
1302 current.y = UShort4(0xFFFFu);
1303 current.z = UShort4(0xFFFFu);
Maxime Grégoired9762742015-07-08 16:43:48 -04001304 break;
1305 case LOGICALOP_COPY:
1306 ASSERT(false); // Optimized out
1307 break;
1308 case LOGICALOP_COPY_INVERTED:
1309 current.x = ~current.x;
1310 current.y = ~current.y;
1311 current.z = ~current.z;
1312 break;
1313 case LOGICALOP_NOOP:
1314 current.x = pixel.x;
1315 current.y = pixel.y;
1316 current.z = pixel.z;
1317 break;
1318 case LOGICALOP_INVERT:
1319 current.x = ~pixel.x;
1320 current.y = ~pixel.y;
1321 current.z = ~pixel.z;
1322 break;
1323 case LOGICALOP_AND:
1324 current.x = pixel.x & current.x;
1325 current.y = pixel.y & current.y;
1326 current.z = pixel.z & current.z;
1327 break;
1328 case LOGICALOP_NAND:
1329 current.x = ~(pixel.x & current.x);
1330 current.y = ~(pixel.y & current.y);
1331 current.z = ~(pixel.z & current.z);
1332 break;
1333 case LOGICALOP_OR:
1334 current.x = pixel.x | current.x;
1335 current.y = pixel.y | current.y;
1336 current.z = pixel.z | current.z;
1337 break;
1338 case LOGICALOP_NOR:
1339 current.x = ~(pixel.x | current.x);
1340 current.y = ~(pixel.y | current.y);
1341 current.z = ~(pixel.z | current.z);
1342 break;
1343 case LOGICALOP_XOR:
1344 current.x = pixel.x ^ current.x;
1345 current.y = pixel.y ^ current.y;
1346 current.z = pixel.z ^ current.z;
1347 break;
1348 case LOGICALOP_EQUIV:
1349 current.x = ~(pixel.x ^ current.x);
1350 current.y = ~(pixel.y ^ current.y);
1351 current.z = ~(pixel.z ^ current.z);
1352 break;
1353 case LOGICALOP_AND_REVERSE:
1354 current.x = ~pixel.x & current.x;
1355 current.y = ~pixel.y & current.y;
1356 current.z = ~pixel.z & current.z;
1357 break;
1358 case LOGICALOP_AND_INVERTED:
1359 current.x = pixel.x & ~current.x;
1360 current.y = pixel.y & ~current.y;
1361 current.z = pixel.z & ~current.z;
1362 break;
1363 case LOGICALOP_OR_REVERSE:
1364 current.x = ~pixel.x | current.x;
1365 current.y = ~pixel.y | current.y;
1366 current.z = ~pixel.z | current.z;
1367 break;
1368 case LOGICALOP_OR_INVERTED:
1369 current.x = pixel.x | ~current.x;
1370 current.y = pixel.y | ~current.y;
1371 current.z = pixel.z | ~current.z;
1372 break;
1373 default:
1374 ASSERT(false);
1375 }
1376 }
1377
Nicolas Capens4f172c72016-01-13 08:34:30 -05001378 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04001379 {
Alexis Hetu049a1872016-04-25 16:59:58 -04001380 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001381 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001382 linearToSRGB16_12_16(current);
John Bauman89401822014-05-06 15:04:28 -04001383 }
1384
1385 if(exactColorRounding)
1386 {
1387 switch(state.targetFormat[index])
1388 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001389 case FORMAT_R5G6B5:
Nicolas Capens26f37222015-09-22 09:53:45 -04001390 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1391 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1392 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001393 break;
John Bauman89401822014-05-06 15:04:28 -04001394 case FORMAT_X8G8R8B8Q:
1395 case FORMAT_A8G8R8B8Q:
1396 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001397 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001398 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001399 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001400 case FORMAT_SRGB8_X8:
1401 case FORMAT_SRGB8_A8:
Alexis Hetu143dfc72016-09-13 18:41:27 -04001402 case FORMAT_G8R8:
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001403 case FORMAT_R8:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001404 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1405 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1406 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1407 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
John Bauman89401822014-05-06 15:04:28 -04001408 break;
Nicolas Capensb69aa272016-01-02 00:06:41 -05001409 default:
1410 break;
John Bauman89401822014-05-06 15:04:28 -04001411 }
1412 }
1413
1414 int rgbaWriteMask = state.colorWriteActive(index);
Nicolas Capens3b396462016-01-02 00:23:53 -05001415 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
John Bauman89401822014-05-06 15:04:28 -04001416
1417 switch(state.targetFormat[index])
1418 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001419 case FORMAT_R5G6B5:
1420 {
1421 current.x = current.x & Short4(0xF800u);
1422 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1423 current.z = As<UShort4>(current.z) >> 11;
1424
1425 current.x = current.x | current.y | current.z;
1426 }
1427 break;
John Bauman89401822014-05-06 15:04:28 -04001428 case FORMAT_X8G8R8B8Q:
1429 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001430 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1431 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1432 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001433
John Bauman19bac1e2014-05-06 15:23:49 -04001434 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1435 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001436 break;
1437 case FORMAT_A8G8R8B8Q:
1438 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001439 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1440 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1441 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1442 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001443
John Bauman19bac1e2014-05-06 15:23:49 -04001444 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1445 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001446 break;
1447 case FORMAT_X8R8G8B8:
1448 case FORMAT_A8R8G8B8:
1449 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
1450 {
John Bauman19bac1e2014-05-06 15:23:49 -04001451 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1452 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1453 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001454
Nicolas Capens33438a62017-09-27 11:47:35 -04001455 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1456 current.y = As<Short4>(PackUnsigned(current.y, current.y));
John Bauman89401822014-05-06 15:04:28 -04001457
John Bauman19bac1e2014-05-06 15:23:49 -04001458 current.x = current.z;
1459 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1460 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1461 current.y = current.z;
1462 current.z = As<Short4>(UnpackLow(current.z, current.x));
1463 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001464 }
1465 else
1466 {
John Bauman19bac1e2014-05-06 15:23:49 -04001467 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1468 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1469 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1470 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001471
Nicolas Capens33438a62017-09-27 11:47:35 -04001472 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1473 current.y = As<Short4>(PackUnsigned(current.y, current.w));
John Bauman89401822014-05-06 15:04:28 -04001474
John Bauman19bac1e2014-05-06 15:23:49 -04001475 current.x = current.z;
1476 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1477 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1478 current.y = current.z;
1479 current.z = As<Short4>(UnpackLow(current.z, current.x));
1480 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001481 }
1482 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001483 case FORMAT_X8B8G8R8:
1484 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001485 case FORMAT_SRGB8_X8:
1486 case FORMAT_SRGB8_A8:
1487 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7)
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001488 {
1489 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1490 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1491 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1492
Nicolas Capens33438a62017-09-27 11:47:35 -04001493 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1494 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001495
1496 current.x = current.z;
1497 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1498 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1499 current.y = current.z;
1500 current.z = As<Short4>(UnpackLow(current.z, current.x));
1501 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1502 }
1503 else
1504 {
1505 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1506 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1507 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1508 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1509
Nicolas Capens33438a62017-09-27 11:47:35 -04001510 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1511 current.y = As<Short4>(PackUnsigned(current.y, current.w));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001512
1513 current.x = current.z;
1514 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1515 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1516 current.y = current.z;
1517 current.z = As<Short4>(UnpackLow(current.z, current.x));
1518 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1519 }
1520 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001521 case FORMAT_G8R8:
1522 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1523 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001524 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1525 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Alexis Hetu143dfc72016-09-13 18:41:27 -04001526 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1527 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001528 case FORMAT_R8:
1529 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001530 current.x = As<Short4>(PackUnsigned(current.x, current.x));
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001531 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001532 case FORMAT_A8:
1533 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001534 current.w = As<Short4>(PackUnsigned(current.w, current.w));
John Bauman66b8ab22014-05-06 15:57:45 -04001535 break;
John Bauman89401822014-05-06 15:04:28 -04001536 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001537 current.z = current.x;
1538 current.x = As<Short4>(UnpackLow(current.x, current.y));
1539 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1540 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04001541 break;
1542 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001543 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04001544 break;
John Bauman89401822014-05-06 15:04:28 -04001545 default:
1546 ASSERT(false);
1547 }
1548
John Bauman19bac1e2014-05-06 15:23:49 -04001549 Short4 c01 = current.z;
1550 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04001551
1552 Int xMask; // Combination of all masks
1553
1554 if(state.depthTestActive)
1555 {
1556 xMask = zMask;
1557 }
1558 else
1559 {
1560 xMask = cMask;
1561 }
1562
1563 if(state.stencilActive)
1564 {
1565 xMask &= sMask;
1566 }
1567
John Bauman89401822014-05-06 15:04:28 -04001568 switch(state.targetFormat[index])
1569 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001570 case FORMAT_R5G6B5:
1571 {
1572 Pointer<Byte> buffer = cBuffer + 2 * x;
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001573 Int value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001574
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001575 Int c01 = Extract(As<Int2>(current.x), 0);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001576
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001577 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001578 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001579 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001580 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001581 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001582 c01 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001583 }
1584
Nicolas Capens4f172c72016-01-13 08:34:30 -05001585 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1586 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001587 c01 |= value;
1588 *Pointer<Int>(buffer) = c01;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001589
Nicolas Capens4f172c72016-01-13 08:34:30 -05001590 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001591 value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001592
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001593 Int c23 = Extract(As<Int2>(current.x), 1);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001594
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001595 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001596 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001597 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001598 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001599 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001600 c23 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001601 }
1602
Nicolas Capens4f172c72016-01-13 08:34:30 -05001603 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1604 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001605 c23 |= value;
1606 *Pointer<Int>(buffer) = c23;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001607 }
1608 break;
John Bauman89401822014-05-06 15:04:28 -04001609 case FORMAT_A8G8R8B8Q:
1610 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
1611 UNIMPLEMENTED();
1612 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
1613
1614 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1615 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1616 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1617 // {
1618 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001619 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1620 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001621 // c01 |= masked;
1622 // }
1623
Nicolas Capens4f172c72016-01-13 08:34:30 -05001624 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1625 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001626 // c01 |= value;
1627 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
1628
1629 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
1630
1631 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1632 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1633 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1634 // {
1635 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001636 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1637 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001638 // c23 |= masked;
1639 // }
1640
Nicolas Capens4f172c72016-01-13 08:34:30 -05001641 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1642 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001643 // c23 |= value;
1644 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
1645 break;
1646 case FORMAT_A8R8G8B8:
1647 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
John Bauman89401822014-05-06 15:04:28 -04001648 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001649 Pointer<Byte> buffer = cBuffer + x * 4;
1650 Short4 value = *Pointer<Short4>(buffer);
1651
1652 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1653 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1654 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1655 {
1656 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001657 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1658 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001659 c01 |= masked;
1660 }
1661
Nicolas Capens4f172c72016-01-13 08:34:30 -05001662 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1663 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001664 c01 |= value;
1665 *Pointer<Short4>(buffer) = c01;
1666
Nicolas Capens4f172c72016-01-13 08:34:30 -05001667 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001668 value = *Pointer<Short4>(buffer);
1669
1670 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1671 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1672 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1673 {
1674 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001675 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1676 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001677 c23 |= masked;
1678 }
1679
Nicolas Capens4f172c72016-01-13 08:34:30 -05001680 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1681 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001682 c23 |= value;
1683 *Pointer<Short4>(buffer) = c23;
John Bauman89401822014-05-06 15:04:28 -04001684 }
John Bauman89401822014-05-06 15:04:28 -04001685 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001686 case FORMAT_A8B8G8R8:
1687 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
Alexis Hetu049a1872016-04-25 16:59:58 -04001688 case FORMAT_SRGB8_X8:
1689 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001690 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001691 Pointer<Byte> buffer = cBuffer + x * 4;
1692 Short4 value = *Pointer<Short4>(buffer);
1693
Alexis Hetu049a1872016-04-25 16:59:58 -04001694 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) ||
1695 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) &&
1696 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh?
1697
1698 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001699 {
1700 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001701 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1702 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001703 c01 |= masked;
1704 }
1705
Nicolas Capens4f172c72016-01-13 08:34:30 -05001706 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1707 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001708 c01 |= value;
1709 *Pointer<Short4>(buffer) = c01;
1710
Nicolas Capens4f172c72016-01-13 08:34:30 -05001711 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001712 value = *Pointer<Short4>(buffer);
1713
Alexis Hetu049a1872016-04-25 16:59:58 -04001714 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001715 {
1716 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001717 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1718 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001719 c23 |= masked;
1720 }
1721
Nicolas Capens4f172c72016-01-13 08:34:30 -05001722 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1723 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001724 c23 |= value;
1725 *Pointer<Short4>(buffer) = c23;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001726 }
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001727 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001728 case FORMAT_G8R8:
1729 if((rgbaWriteMask & 0x00000003) != 0x0)
1730 {
1731 Pointer<Byte> buffer = cBuffer + 2 * x;
1732 Int2 value;
1733 value = Insert(value, *Pointer<Int>(buffer), 0);
1734 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1735 value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
1736
1737 Int2 packedCol = As<Int2>(current.x);
1738
1739 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1740 if((rgbaWriteMask & 0x3) != 0x3)
1741 {
1742 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1743 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1744 mergedMask &= rgbaMask;
1745 }
1746
1747 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1748
1749 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1750 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
1751 }
1752 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001753 case FORMAT_R8:
1754 if(rgbaWriteMask & 0x00000001)
1755 {
1756 Pointer<Byte> buffer = cBuffer + 1 * x;
1757 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001758 value = Insert(value, *Pointer<Short>(buffer), 0);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001759 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001760 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001761 value = UnpackLow(As<Byte8>(value), As<Byte8>(value));
1762
1763 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1764 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1765 current.x |= value;
1766
1767 *Pointer<Short>(buffer) = Extract(current.x, 0);
1768 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1769 }
1770 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001771 case FORMAT_A8:
1772 if(rgbaWriteMask & 0x00000008)
1773 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001774 Pointer<Byte> buffer = cBuffer + 1 * x;
1775 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001776 value = Insert(value, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001777 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001778 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
John Bauman66b8ab22014-05-06 15:57:45 -04001779 value = UnpackLow(As<Byte8>(value), As<Byte8>(value));
1780
Nicolas Capens4f172c72016-01-13 08:34:30 -05001781 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
1782 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
John Bauman66b8ab22014-05-06 15:57:45 -04001783 current.w |= value;
1784
1785 *Pointer<Short>(buffer) = Extract(current.w, 0);
1786 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
1787 }
1788 break;
John Bauman89401822014-05-06 15:04:28 -04001789 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04001790 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001791 Pointer<Byte> buffer = cBuffer + 4 * x;
John Bauman89401822014-05-06 15:04:28 -04001792
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001793 Short4 value = *Pointer<Short4>(buffer);
John Bauman89401822014-05-06 15:04:28 -04001794
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001795 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001796 {
1797 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001798 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001799 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001800 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04001801 }
1802
Nicolas Capens4f172c72016-01-13 08:34:30 -05001803 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1804 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001805 current.x |= value;
1806 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001807
Nicolas Capens4f172c72016-01-13 08:34:30 -05001808 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001809
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001810 value = *Pointer<Short4>(buffer);
1811
1812 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001813 {
1814 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001815 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001816 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001817 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04001818 }
1819
Nicolas Capens4f172c72016-01-13 08:34:30 -05001820 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1821 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001822 current.y |= value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001823 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001824 }
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001825 break;
1826 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04001827 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001828 Pointer<Byte> buffer = cBuffer + 8 * x;
John Bauman89401822014-05-06 15:04:28 -04001829
John Bauman89401822014-05-06 15:04:28 -04001830 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001831 Short4 value = *Pointer<Short4>(buffer);
1832
1833 if(rgbaWriteMask != 0x0000000F)
1834 {
1835 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001836 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1837 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001838 current.x |= masked;
1839 }
1840
Nicolas Capens4f172c72016-01-13 08:34:30 -05001841 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1842 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001843 current.x |= value;
1844 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001845 }
1846
John Bauman89401822014-05-06 15:04:28 -04001847 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001848 Short4 value = *Pointer<Short4>(buffer + 8);
1849
1850 if(rgbaWriteMask != 0x0000000F)
1851 {
1852 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001853 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1854 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001855 current.y |= masked;
1856 }
1857
Nicolas Capens4f172c72016-01-13 08:34:30 -05001858 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1859 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001860 current.y |= value;
1861 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001862 }
1863
Nicolas Capens4f172c72016-01-13 08:34:30 -05001864 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001865
1866 {
1867 Short4 value = *Pointer<Short4>(buffer);
1868
1869 if(rgbaWriteMask != 0x0000000F)
1870 {
1871 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001872 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1873 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001874 current.z |= masked;
1875 }
1876
Nicolas Capens4f172c72016-01-13 08:34:30 -05001877 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1878 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001879 current.z |= value;
1880 *Pointer<Short4>(buffer) = current.z;
1881 }
1882
1883 {
1884 Short4 value = *Pointer<Short4>(buffer + 8);
1885
1886 if(rgbaWriteMask != 0x0000000F)
1887 {
1888 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001889 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1890 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001891 current.w |= masked;
1892 }
1893
Nicolas Capens4f172c72016-01-13 08:34:30 -05001894 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1895 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001896 current.w |= value;
1897 *Pointer<Short4>(buffer + 8) = current.w;
1898 }
John Bauman89401822014-05-06 15:04:28 -04001899 }
1900 break;
1901 default:
1902 ASSERT(false);
1903 }
1904 }
1905
Nicolas Capens96d4e092016-11-18 14:22:38 -05001906 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04001907 {
1908 switch(blendFactorActive)
1909 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001910 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001911 // Optimized
1912 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001913 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001914 // Optimized
1915 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001916 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001917 blendFactor.x = oC.x;
1918 blendFactor.y = oC.y;
1919 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04001920 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001921 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001922 blendFactor.x = Float4(1.0f) - oC.x;
1923 blendFactor.y = Float4(1.0f) - oC.y;
1924 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04001925 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001926 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001927 blendFactor.x = pixel.x;
1928 blendFactor.y = pixel.y;
1929 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001930 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001931 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001932 blendFactor.x = Float4(1.0f) - pixel.x;
1933 blendFactor.y = Float4(1.0f) - pixel.y;
1934 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001935 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001936 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001937 blendFactor.x = oC.w;
1938 blendFactor.y = oC.w;
1939 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001940 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001941 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001942 blendFactor.x = Float4(1.0f) - oC.w;
1943 blendFactor.y = Float4(1.0f) - oC.w;
1944 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001945 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001946 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001947 blendFactor.x = pixel.w;
1948 blendFactor.y = pixel.w;
1949 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001950 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001951 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001952 blendFactor.x = Float4(1.0f) - pixel.w;
1953 blendFactor.y = Float4(1.0f) - pixel.w;
1954 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001955 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001956 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001957 blendFactor.x = Float4(1.0f) - pixel.w;
1958 blendFactor.x = Min(blendFactor.x, oC.w);
1959 blendFactor.y = blendFactor.x;
1960 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04001961 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001962 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001963 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1964 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1965 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001966 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001967 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001968 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1969 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1970 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001971 break;
1972 default:
1973 ASSERT(false);
1974 }
1975 }
1976
Nicolas Capens96d4e092016-11-18 14:22:38 -05001977 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04001978 {
1979 switch(blendFactorAlphaActive)
1980 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001981 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001982 // Optimized
1983 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001984 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001985 // Optimized
1986 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001987 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001988 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001989 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001990 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001991 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001992 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001993 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001994 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001995 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001996 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001997 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001998 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001999 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002000 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002001 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002002 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002003 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04002004 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002005 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002006 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002007 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002008 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002009 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002010 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002011 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04002012 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04002013 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002014 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002015 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002016 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002017 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002018 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002019 break;
2020 default:
2021 ASSERT(false);
2022 }
2023 }
2024
Nicolas Capens4f172c72016-01-13 08:34:30 -05002025 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04002026 {
2027 if(!state.alphaBlendActive)
2028 {
2029 return;
2030 }
2031
2032 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002033 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04002034
Alexis Hetu96517182015-04-15 10:30:23 -04002035 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04002036 Short4 c01;
2037 Short4 c23;
2038
Alexis Hetu1abb6382016-02-08 11:21:16 -05002039 Float4 one;
Alexis Hetu7208e932016-06-02 11:19:24 -04002040 if(Surface::isFloatFormat(state.targetFormat[index]))
John Bauman89401822014-05-06 15:04:28 -04002041 {
Alexis Hetu1abb6382016-02-08 11:21:16 -05002042 one = Float4(1.0f);
Alexis Hetu7208e932016-06-02 11:19:24 -04002043 }
2044 else if(Surface::isNonNormalizedInteger(state.targetFormat[index]))
2045 {
2046 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Alexis Hetu1abb6382016-02-08 11:21:16 -05002047 }
2048
2049 switch(state.targetFormat[index])
2050 {
2051 case FORMAT_R32I:
2052 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002053 case FORMAT_R32F:
2054 buffer = cBuffer;
2055 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002056 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
2057 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002058 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002059 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002060 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
2061 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
Alexis Hetu1abb6382016-02-08 11:21:16 -05002062 pixel.y = pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002063 break;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002064 case FORMAT_G32R32I:
2065 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002066 case FORMAT_G32R32F:
2067 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002068 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002069 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002070 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
2071 pixel.z = pixel.x;
2072 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
2073 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
2074 pixel.y = pixel.z;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002075 pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002076 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002077 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002078 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002079 case FORMAT_A32B32G32R32I:
2080 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002081 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002082 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
2083 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002084 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002085 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
2086 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
2087 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002088 if(state.targetFormat[index] == FORMAT_X32B32G32R32F)
2089 {
2090 pixel.w = Float4(1.0f);
2091 }
John Bauman89401822014-05-06 15:04:28 -04002092 break;
2093 default:
2094 ASSERT(false);
2095 }
2096
Alexis Hetu049a1872016-04-25 16:59:58 -04002097 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04002098 {
John Bauman19bac1e2014-05-06 15:23:49 -04002099 sRGBtoLinear(pixel.x);
2100 sRGBtoLinear(pixel.y);
2101 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002102 }
2103
2104 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04002105 Vector4f sourceFactor;
2106 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04002107
Nicolas Capens4f172c72016-01-13 08:34:30 -05002108 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
2109 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002110
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002111 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002112 {
John Bauman19bac1e2014-05-06 15:23:49 -04002113 oC.x *= sourceFactor.x;
2114 oC.y *= sourceFactor.y;
2115 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002116 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002117
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002118 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002119 {
John Bauman19bac1e2014-05-06 15:23:49 -04002120 pixel.x *= destFactor.x;
2121 pixel.y *= destFactor.y;
2122 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002123 }
2124
2125 switch(state.blendOperation)
2126 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002127 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002128 oC.x += pixel.x;
2129 oC.y += pixel.y;
2130 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002131 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002132 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002133 oC.x -= pixel.x;
2134 oC.y -= pixel.y;
2135 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002136 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002137 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002138 oC.x = pixel.x - oC.x;
2139 oC.y = pixel.y - oC.y;
2140 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04002141 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002142 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002143 oC.x = Min(oC.x, pixel.x);
2144 oC.y = Min(oC.y, pixel.y);
2145 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002146 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002147 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002148 oC.x = Max(oC.x, pixel.x);
2149 oC.y = Max(oC.y, pixel.y);
2150 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002151 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002152 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002153 // No operation
2154 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002155 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002156 oC.x = pixel.x;
2157 oC.y = pixel.y;
2158 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002159 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002160 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002161 oC.x = Float4(0.0f);
2162 oC.y = Float4(0.0f);
2163 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002164 break;
2165 default:
2166 ASSERT(false);
2167 }
2168
Nicolas Capens4f172c72016-01-13 08:34:30 -05002169 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2170 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002171
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002172 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002173 {
John Bauman19bac1e2014-05-06 15:23:49 -04002174 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002175 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002176
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002177 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002178 {
John Bauman19bac1e2014-05-06 15:23:49 -04002179 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002180 }
2181
2182 switch(state.blendOperationAlpha)
2183 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002184 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002185 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002186 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002187 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002188 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002189 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002190 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002191 pixel.w -= oC.w;
2192 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002193 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002194 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002195 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002196 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002197 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002198 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002199 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002200 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002201 // No operation
2202 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002203 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002204 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002205 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002206 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002207 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002208 break;
2209 default:
2210 ASSERT(false);
2211 }
2212 }
2213
Nicolas Capens4f172c72016-01-13 08:34:30 -05002214 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002215 {
John Bauman89401822014-05-06 15:04:28 -04002216 switch(state.targetFormat[index])
2217 {
John Bauman89401822014-05-06 15:04:28 -04002218 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002219 case FORMAT_R32I:
2220 case FORMAT_R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002221 case FORMAT_R16I:
2222 case FORMAT_R16UI:
2223 case FORMAT_R8I:
2224 case FORMAT_R8UI:
John Bauman89401822014-05-06 15:04:28 -04002225 break;
2226 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002227 case FORMAT_G32R32I:
2228 case FORMAT_G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002229 case FORMAT_G16R16I:
2230 case FORMAT_G16R16UI:
2231 case FORMAT_G8R8I:
2232 case FORMAT_G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002233 oC.z = oC.x;
2234 oC.x = UnpackLow(oC.x, oC.y);
2235 oC.z = UnpackHigh(oC.z, oC.y);
2236 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002237 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002238 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002239 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002240 case FORMAT_A32B32G32R32I:
2241 case FORMAT_A32B32G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002242 case FORMAT_A16B16G16R16I:
2243 case FORMAT_A16B16G16R16UI:
2244 case FORMAT_A8B8G8R8I:
2245 case FORMAT_A8B8G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002246 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04002247 break;
2248 default:
2249 ASSERT(false);
2250 }
2251
2252 int rgbaWriteMask = state.colorWriteActive(index);
2253
2254 Int xMask; // Combination of all masks
2255
2256 if(state.depthTestActive)
2257 {
2258 xMask = zMask;
2259 }
2260 else
2261 {
2262 xMask = cMask;
2263 }
2264
2265 if(state.stencilActive)
2266 {
2267 xMask &= sMask;
2268 }
2269
2270 Pointer<Byte> buffer;
2271 Float4 value;
2272
2273 switch(state.targetFormat[index])
2274 {
2275 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002276 case FORMAT_R32I:
2277 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002278 if(rgbaWriteMask & 0x00000001)
2279 {
2280 buffer = cBuffer + 4 * x;
2281
2282 // FIXME: movlps
2283 value.x = *Pointer<Float>(buffer + 0);
2284 value.y = *Pointer<Float>(buffer + 4);
2285
Nicolas Capens4f172c72016-01-13 08:34:30 -05002286 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002287
2288 // FIXME: movhps
2289 value.z = *Pointer<Float>(buffer + 0);
2290 value.w = *Pointer<Float>(buffer + 4);
2291
Nicolas Capens4f172c72016-01-13 08:34:30 -05002292 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2293 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002294 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04002295
2296 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002297 *Pointer<Float>(buffer + 0) = oC.x.z;
2298 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04002299
Nicolas Capens4f172c72016-01-13 08:34:30 -05002300 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002301
2302 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002303 *Pointer<Float>(buffer + 0) = oC.x.x;
2304 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04002305 }
2306 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002307 case FORMAT_R16I:
2308 case FORMAT_R16UI:
2309 if(rgbaWriteMask & 0x00000001)
2310 {
2311 buffer = cBuffer + 2 * x;
2312
2313 UShort4 xyzw;
2314 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2315
2316 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2317
2318 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2319 value = As<Float4>(Int4(xyzw));
2320
2321 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2322 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2323 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2324
2325 if(state.targetFormat[index] == FORMAT_R16I)
2326 {
2327 Float component = oC.x.z;
2328 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2329 component = oC.x.w;
2330 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2331
2332 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2333
2334 component = oC.x.x;
2335 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2336 component = oC.x.y;
2337 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2338 }
2339 else // FORMAT_R16UI
2340 {
2341 Float component = oC.x.z;
2342 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2343 component = oC.x.w;
2344 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2345
2346 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2347
2348 component = oC.x.x;
2349 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2350 component = oC.x.y;
2351 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2352 }
2353 }
2354 break;
2355 case FORMAT_R8I:
2356 case FORMAT_R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002357 if(rgbaWriteMask & 0x00000001)
2358 {
2359 buffer = cBuffer + x;
2360
2361 UInt xyzw, packedCol;
2362
Alexis Hetu827d07a2016-09-15 17:54:05 -04002363 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002364 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetu827d07a2016-09-15 17:54:05 -04002365 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002366
2367 Short4 tmpCol = Short4(As<Int4>(oC.x));
2368 if(state.targetFormat[index] == FORMAT_R8I)
2369 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002370 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002371 }
2372 else
2373 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002374 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002375 }
2376 packedCol = Extract(As<Int2>(tmpCol), 0);
2377
2378 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2379 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2380
2381 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2382 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2383 *Pointer<UShort>(buffer) = UShort(packedCol);
2384 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002385 break;
John Bauman89401822014-05-06 15:04:28 -04002386 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002387 case FORMAT_G32R32I:
2388 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002389 buffer = cBuffer + 8 * x;
2390
2391 value = *Pointer<Float4>(buffer);
2392
2393 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2394 {
2395 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002396 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002397 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002398 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002399 }
2400
Nicolas Capens4f172c72016-01-13 08:34:30 -05002401 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2402 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002403 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2404 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002405
Nicolas Capens4f172c72016-01-13 08:34:30 -05002406 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002407
2408 value = *Pointer<Float4>(buffer);
2409
2410 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2411 {
2412 Float4 masked;
2413
2414 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002415 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002416 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002417 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002418 }
2419
Nicolas Capens4f172c72016-01-13 08:34:30 -05002420 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2421 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002422 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2423 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002424 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002425 case FORMAT_G16R16I:
2426 case FORMAT_G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002427 if((rgbaWriteMask & 0x00000003) != 0x0)
2428 {
2429 buffer = cBuffer + 4 * x;
2430
2431 UInt2 rgbaMask;
2432 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2433 UShort4 value = *Pointer<UShort4>(buffer);
2434 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2435 if((rgbaWriteMask & 0x3) != 0x3)
2436 {
2437 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2438 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2439 mergedMask &= rgbaMask;
2440 }
2441 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2442
2443 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2444
2445 packedCol = UShort4(As<Int4>(oC.y));
2446 value = *Pointer<UShort4>(buffer);
2447 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2448 if((rgbaWriteMask & 0x3) != 0x3)
2449 {
2450 mergedMask &= rgbaMask;
2451 }
2452 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2453 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002454 break;
2455 case FORMAT_G8R8I:
2456 case FORMAT_G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002457 if((rgbaWriteMask & 0x00000003) != 0x0)
2458 {
2459 buffer = cBuffer + 2 * x;
2460
2461 Int2 xyzw, packedCol;
2462
2463 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2464 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2465 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2466
2467 if(state.targetFormat[index] == FORMAT_G8R8I)
2468 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002469 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002470 }
2471 else
2472 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002473 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002474 }
2475
2476 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2477 if((rgbaWriteMask & 0x3) != 0x3)
2478 {
2479 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2480 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2481 mergedMask &= rgbaMask;
2482 }
2483
2484 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2485
2486 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2487 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2488 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2489 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002490 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002491 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002492 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002493 case FORMAT_A32B32G32R32I:
2494 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002495 buffer = cBuffer + 16 * x;
2496
2497 {
2498 value = *Pointer<Float4>(buffer, 16);
2499
2500 if(rgbaWriteMask != 0x0000000F)
2501 {
2502 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002503 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2504 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002505 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002506 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002507
Nicolas Capens4f172c72016-01-13 08:34:30 -05002508 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2509 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002510 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2511 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002512 }
2513
2514 {
2515 value = *Pointer<Float4>(buffer + 16, 16);
2516
2517 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens05b3d662016-02-25 23:58:33 -05002518 {
John Bauman89401822014-05-06 15:04:28 -04002519 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002520 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2521 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002522 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002523 }
2524
Nicolas Capens4f172c72016-01-13 08:34:30 -05002525 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2526 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002527 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2528 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002529 }
2530
Nicolas Capens4f172c72016-01-13 08:34:30 -05002531 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002532
2533 {
2534 value = *Pointer<Float4>(buffer, 16);
2535
2536 if(rgbaWriteMask != 0x0000000F)
2537 {
2538 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002539 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2540 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002541 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002542 }
2543
Nicolas Capens4f172c72016-01-13 08:34:30 -05002544 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2545 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002546 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2547 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002548 }
2549
2550 {
Nicolas Capens400667e2017-03-29 14:40:14 -04002551 value = *Pointer<Float4>(buffer + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04002552
2553 if(rgbaWriteMask != 0x0000000F)
2554 {
2555 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002556 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2557 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002558 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002559 }
2560
Nicolas Capens4f172c72016-01-13 08:34:30 -05002561 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2562 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002563 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2564 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002565 }
2566 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002567 case FORMAT_A16B16G16R16I:
2568 case FORMAT_A16B16G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002569 if((rgbaWriteMask & 0x0000000F) != 0x0)
2570 {
2571 buffer = cBuffer + 8 * x;
2572
2573 UInt4 rgbaMask;
2574 UShort8 value = *Pointer<UShort8>(buffer);
2575 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2576 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2577 if((rgbaWriteMask & 0xF) != 0xF)
2578 {
2579 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2580 rgbaMask = UInt4(tmpMask, tmpMask);
2581 mergedMask &= rgbaMask;
2582 }
2583 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2584
2585 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2586
2587 value = *Pointer<UShort8>(buffer);
2588 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2589 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2590 if((rgbaWriteMask & 0xF) != 0xF)
2591 {
2592 mergedMask &= rgbaMask;
2593 }
2594 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2595 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002596 break;
2597 case FORMAT_A8B8G8R8I:
2598 case FORMAT_A8B8G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002599 if((rgbaWriteMask & 0x0000000F) != 0x0)
2600 {
2601 UInt2 value, packedCol, mergedMask;
2602
2603 buffer = cBuffer + 4 * x;
2604
2605 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2606 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002607 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002608 }
2609 else
2610 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002611 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002612 }
2613 value = *Pointer<UInt2>(buffer, 16);
2614 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2615 if(rgbaWriteMask != 0xF)
2616 {
2617 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2618 }
2619 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2620
2621 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2622
2623 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2624 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002625 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002626 }
2627 else
2628 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002629 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002630 }
2631 value = *Pointer<UInt2>(buffer, 16);
2632 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2633 if(rgbaWriteMask != 0xF)
2634 {
2635 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2636 }
2637 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2638 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002639 break;
John Bauman89401822014-05-06 15:04:28 -04002640 default:
2641 ASSERT(false);
2642 }
2643 }
2644
John Bauman89401822014-05-06 15:04:28 -04002645 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2646 {
John Bauman19bac1e2014-05-06 15:23:49 -04002647 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04002648 }
2649
Nicolas Capens4f172c72016-01-13 08:34:30 -05002650 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002651 {
John Bauman19bac1e2014-05-06 15:23:49 -04002652 c.x = As<UShort4>(c.x) >> 4;
2653 c.y = As<UShort4>(c.y) >> 4;
2654 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002655
Nicolas Capens4f172c72016-01-13 08:34:30 -05002656 sRGBtoLinear12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002657 }
2658
Nicolas Capens4f172c72016-01-13 08:34:30 -05002659 void PixelRoutine::sRGBtoLinear12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002660 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002661 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
John Bauman89401822014-05-06 15:04:28 -04002662
John Bauman19bac1e2014-05-06 15:23:49 -04002663 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2664 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2665 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2666 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002667
John Bauman19bac1e2014-05-06 15:23:49 -04002668 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2669 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2670 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2671 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002672
John Bauman19bac1e2014-05-06 15:23:49 -04002673 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2674 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2675 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2676 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002677 }
2678
Nicolas Capens4f172c72016-01-13 08:34:30 -05002679 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002680 {
John Bauman19bac1e2014-05-06 15:23:49 -04002681 c.x = As<UShort4>(c.x) >> 4;
2682 c.y = As<UShort4>(c.y) >> 4;
2683 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002684
Nicolas Capens4f172c72016-01-13 08:34:30 -05002685 linearToSRGB12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002686 }
2687
Nicolas Capens4f172c72016-01-13 08:34:30 -05002688 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002689 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002690 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
John Bauman89401822014-05-06 15:04:28 -04002691
John Bauman19bac1e2014-05-06 15:23:49 -04002692 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2693 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2694 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2695 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002696
John Bauman19bac1e2014-05-06 15:23:49 -04002697 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2698 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2699 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2700 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002701
John Bauman19bac1e2014-05-06 15:23:49 -04002702 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2703 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2704 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2705 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002706 }
2707
John Bauman89401822014-05-06 15:04:28 -04002708 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2709 {
2710 Float4 linear = x * x;
2711 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2712
2713 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2714 }
2715
John Bauman19bac1e2014-05-06 15:23:49 -04002716 bool PixelRoutine::colorUsed()
2717 {
2718 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
2719 }
John Bauman89401822014-05-06 15:04:28 -04002720}