blob: 776cc3bf1e341813c74f43114c0a350cabe1109d [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "PixelRoutine.hpp"
16
17#include "Renderer.hpp"
John Bauman89401822014-05-06 15:04:28 -040018#include "QuadRasterizer.hpp"
19#include "Surface.hpp"
20#include "Primitive.hpp"
21#include "CPUID.hpp"
22#include "SamplerCore.hpp"
23#include "Constants.hpp"
24#include "Debug.hpp"
25
John Bauman89401822014-05-06 15:04:28 -040026namespace sw
27{
28 extern bool complementaryDepthBuffer;
29 extern bool postBlendSRGB;
30 extern bool exactColorRounding;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040031 extern bool forceClearRegisters;
John Bauman89401822014-05-06 15:04:28 -040032
Nicolas Capens4f172c72016-01-13 08:34:30 -050033 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput)
John Bauman89401822014-05-06 15:04:28 -040034 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040035 if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters)
John Bauman89401822014-05-06 15:04:28 -040036 {
Nicolas Capens3b4c93f2016-05-18 12:51:37 -040037 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++)
Alexis Hetuf2a8c372015-07-13 11:08:41 -040038 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -040039 v[i].x = Float4(0.0f);
40 v[i].y = Float4(0.0f);
41 v[i].z = Float4(0.0f);
42 v[i].w = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040043 }
John Bauman89401822014-05-06 15:04:28 -040044 }
45 }
46
47 PixelRoutine::~PixelRoutine()
48 {
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040049 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040050 {
51 delete sampler[i];
52 }
53 }
54
Nicolas Capens4f172c72016-01-13 08:34:30 -050055 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
John Bauman89401822014-05-06 15:04:28 -040056 {
57 #if PERF_PROFILE
58 Long pipeTime = Ticks();
59 #endif
60
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040061 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040062 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050063 sampler[i] = new SamplerCore(constants, state.sampler[i]);
John Bauman89401822014-05-06 15:04:28 -040064 }
65
66 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman89401822014-05-06 15:04:28 -040067
68 Int zMask[4]; // Depth mask
69 Int sMask[4]; // Stencil mask
70
71 for(unsigned int q = 0; q < state.multiSample; q++)
72 {
73 zMask[q] = cMask[q];
74 sMask[q] = cMask[q];
75 }
76
77 for(unsigned int q = 0; q < state.multiSample; q++)
78 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050079 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -040080 }
81
82 Float4 f;
John Bauman89401822014-05-06 15:04:28 -040083 Float4 rhwCentroid;
84
Nicolas Capens4f172c72016-01-13 08:34:30 -050085 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040086
John Bauman19bac1e2014-05-06 15:23:49 -040087 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040088 {
89 for(unsigned int q = 0; q < state.multiSample; q++)
90 {
91 Float4 x = xxxx;
Nicolas Capens4f172c72016-01-13 08:34:30 -050092
John Bauman89401822014-05-06 15:04:28 -040093 if(state.multiSample > 1)
94 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050095 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
John Bauman89401822014-05-06 15:04:28 -040096 }
97
Nicolas Capens4f172c72016-01-13 08:34:30 -050098 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false);
John Bauman89401822014-05-06 15:04:28 -040099 }
100 }
101
102 Bool depthPass = false;
103
104 if(earlyDepthTest)
105 {
106 for(unsigned int q = 0; q < state.multiSample; q++)
107 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500108 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400109 }
110 }
111
112 If(depthPass || Bool(!earlyDepthTest))
113 {
114 #if PERF_PROFILE
115 Long interpTime = Ticks();
116 #endif
117
Nicolas Capens4f172c72016-01-13 08:34:30 -0500118 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400119
John Bauman89401822014-05-06 15:04:28 -0400120 // Centroid locations
121 Float4 XXXX = Float4(0.0f);
122 Float4 YYYY = Float4(0.0f);
123
124 if(state.centroid)
125 {
126 Float4 WWWW(1.0e-9f);
127
128 for(unsigned int q = 0; q < state.multiSample; q++)
129 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500130 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
131 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
132 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400133 }
134
135 WWWW = Rcp_pp(WWWW);
136 XXXX *= WWWW;
137 YYYY *= WWWW;
138
139 XXXX += xxxx;
140 YYYY += yyyy;
141 }
142
John Bauman19bac1e2014-05-06 15:23:49 -0400143 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400144 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500145 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500146 rhw = reciprocal(w, false, false, true);
John Bauman89401822014-05-06 15:04:28 -0400147
148 if(state.centroid)
149 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500150 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
John Bauman89401822014-05-06 15:04:28 -0400151 }
152 }
153
Nicolas Capens3b4c93f2016-05-18 12:51:37 -0400154 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
John Bauman89401822014-05-06 15:04:28 -0400155 {
156 for(int component = 0; component < 4; component++)
157 {
John Bauman89401822014-05-06 15:04:28 -0400158 if(state.interpolant[interpolant].component & (1 << component))
159 {
160 if(!state.interpolant[interpolant].centroid)
161 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500162 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400163 }
164 else
165 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500166 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400167 }
168 }
169 }
170
171 Float4 rcp;
172
173 switch(state.interpolant[interpolant].project)
174 {
175 case 0:
176 break;
177 case 1:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500178 rcp = reciprocal(v[interpolant].y);
179 v[interpolant].x = v[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400180 break;
181 case 2:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500182 rcp = reciprocal(v[interpolant].z);
183 v[interpolant].x = v[interpolant].x * rcp;
184 v[interpolant].y = v[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400185 break;
186 case 3:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500187 rcp = reciprocal(v[interpolant].w);
188 v[interpolant].x = v[interpolant].x * rcp;
189 v[interpolant].y = v[interpolant].y * rcp;
190 v[interpolant].z = v[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400191 break;
192 }
193 }
194
195 if(state.fog.component)
196 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500197 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400198 }
199
Nicolas Capens4f172c72016-01-13 08:34:30 -0500200 setBuiltins(x, y, z, w);
John Bauman89401822014-05-06 15:04:28 -0400201
202 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500203 cycles[PERF_INTERP] += Ticks() - interpTime;
John Bauman89401822014-05-06 15:04:28 -0400204 #endif
205
206 Bool alphaPass = true;
207
208 if(colorUsed())
209 {
210 #if PERF_PROFILE
211 Long shaderTime = Ticks();
212 #endif
213
Nicolas Capens4f172c72016-01-13 08:34:30 -0500214 applyShader(cMask);
John Bauman89401822014-05-06 15:04:28 -0400215
216 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500217 cycles[PERF_SHADER] += Ticks() - shaderTime;
John Bauman89401822014-05-06 15:04:28 -0400218 #endif
219
Nicolas Capens4f172c72016-01-13 08:34:30 -0500220 alphaPass = alphaTest(cMask);
John Bauman89401822014-05-06 15:04:28 -0400221
John Bauman19bac1e2014-05-06 15:23:49 -0400222 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400223 {
224 for(unsigned int q = 0; q < state.multiSample; q++)
225 {
226 zMask[q] &= cMask[q];
227 sMask[q] &= cMask[q];
228 }
229 }
230 }
231
232 If(alphaPass)
233 {
234 if(!earlyDepthTest)
235 {
236 for(unsigned int q = 0; q < state.multiSample; q++)
237 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500238 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400239 }
240 }
241
242 #if PERF_PROFILE
243 Long ropTime = Ticks();
244 #endif
245
246 If(depthPass || Bool(earlyDepthTest))
247 {
248 for(unsigned int q = 0; q < state.multiSample; q++)
249 {
250 if(state.multiSampleMask & (1 << q))
251 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500252 writeDepth(zBuffer, q, x, z[q], zMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400253
254 if(state.occlusionEnabled)
255 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500256 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
John Bauman89401822014-05-06 15:04:28 -0400257 }
258 }
259 }
260
261 if(colorUsed())
262 {
263 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400264 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400265 #endif
266
Nicolas Capens4f172c72016-01-13 08:34:30 -0500267 rasterOperation(f, cBuffer, x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400268 }
269 }
270
271 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500272 cycles[PERF_ROP] += Ticks() - ropTime;
John Bauman89401822014-05-06 15:04:28 -0400273 #endif
274 }
275 }
276
277 for(unsigned int q = 0; q < state.multiSample; q++)
278 {
279 if(state.multiSampleMask & (1 << q))
280 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500281 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400282 }
283 }
284
285 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500286 cycles[PERF_PIPE] += Ticks() - pipeTime;
John Bauman89401822014-05-06 15:04:28 -0400287 #endif
288 }
289
John Bauman89401822014-05-06 15:04:28 -0400290 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
291 {
292 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
293
294 if(!flat)
295 {
296 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
297 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
298
299 if(perspective)
300 {
301 interpolant *= rhw;
302 }
303 }
304
305 return interpolant;
306 }
307
Nicolas Capens4f172c72016-01-13 08:34:30 -0500308 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400309 {
310 if(!state.stencilActive)
311 {
312 return;
313 }
314
315 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
316
317 Pointer<Byte> buffer = sBuffer + 2 * x;
318
319 if(q > 0)
320 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500321 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400322 }
323
324 Byte8 value = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
325 Byte8 valueCCW = value;
326
327 if(!state.noStencilMask)
328 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500329 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400330 }
331
Nicolas Capens4f172c72016-01-13 08:34:30 -0500332 stencilTest(value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400333
334 if(state.twoSidedStencil)
335 {
336 if(!state.noStencilMaskCCW)
337 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500338 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400339 }
340
Nicolas Capens4f172c72016-01-13 08:34:30 -0500341 stencilTest(valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400342
Nicolas Capens4f172c72016-01-13 08:34:30 -0500343 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
344 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400345 value |= valueCCW;
346 }
347
348 sMask = SignMask(value) & cMask;
349 }
350
Nicolas Capens4f172c72016-01-13 08:34:30 -0500351 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400352 {
353 Byte8 equal;
354
355 switch(stencilCompareMode)
356 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400357 case STENCIL_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400358 value = Byte8(0xFFFFFFFFFFFFFFFF);
359 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400360 case STENCIL_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400361 value = Byte8(0x0000000000000000);
362 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400363 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400364 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500365 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400366 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400367 case STENCIL_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500368 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400369 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400370 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500371 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400372 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
373 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400374 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400375 equal = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500376 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400377 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500378 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400379 value |= equal;
380 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400381 case STENCIL_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500382 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
John Bauman89401822014-05-06 15:04:28 -0400383 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
384 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
385 value = equal;
386 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400387 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400388 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500389 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400390 value ^= Byte8(0xFFFFFFFFFFFFFFFF);
391 break;
392 default:
393 ASSERT(false);
394 }
395 }
396
Nicolas Capens4f172c72016-01-13 08:34:30 -0500397 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400398 {
399 if(!state.depthTestActive)
400 {
401 return true;
402 }
403
404 Float4 Z = z;
405
John Bauman19bac1e2014-05-06 15:23:49 -0400406 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400407 {
408 if(complementaryDepthBuffer)
409 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500410 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400411 }
412 else
413 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500414 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400415 }
416 }
417
418 Pointer<Byte> buffer;
419 Int pitch;
420
421 if(!state.quadLayoutDepthBuffer)
422 {
423 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500424 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400425 }
426 else
427 {
428 buffer = zBuffer + 8 * x;
429 }
430
431 if(q > 0)
432 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500433 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400434 }
435
436 Float4 zValue;
437
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400438 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400439 {
440 if(!state.quadLayoutDepthBuffer)
441 {
442 // FIXME: Properly optimizes?
443 zValue.xy = *Pointer<Float4>(buffer);
444 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
445 }
446 else
447 {
448 zValue = *Pointer<Float4>(buffer, 16);
449 }
450 }
451
452 Int4 zTest;
453
454 switch(state.depthCompareMode)
455 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400456 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400457 // Optimized
458 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400459 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400460 // Optimized
461 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400462 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400463 zTest = CmpEQ(zValue, Z);
464 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400465 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400466 zTest = CmpNEQ(zValue, Z);
467 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400468 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400469 if(complementaryDepthBuffer)
470 {
471 zTest = CmpLT(zValue, Z);
472 }
473 else
474 {
475 zTest = CmpNLE(zValue, Z);
476 }
477 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400478 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400479 if(complementaryDepthBuffer)
480 {
481 zTest = CmpNLT(zValue, Z);
482 }
483 else
484 {
485 zTest = CmpLE(zValue, Z);
486 }
487 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400488 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400489 if(complementaryDepthBuffer)
490 {
491 zTest = CmpLE(zValue, Z);
492 }
493 else
494 {
495 zTest = CmpNLT(zValue, Z);
496 }
497 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400498 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400499 if(complementaryDepthBuffer)
500 {
501 zTest = CmpNLE(zValue, Z);
502 }
503 else
504 {
505 zTest = CmpLT(zValue, Z);
506 }
507 break;
508 default:
509 ASSERT(false);
510 }
511
512 switch(state.depthCompareMode)
513 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400514 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400515 zMask = cMask;
516 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400517 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400518 zMask = 0x0;
519 break;
520 default:
521 zMask = SignMask(zTest) & cMask;
522 break;
523 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500524
John Bauman89401822014-05-06 15:04:28 -0400525 if(state.stencilActive)
526 {
527 zMask &= sMask;
528 }
529
530 return zMask != 0;
531 }
532
Nicolas Capens4f172c72016-01-13 08:34:30 -0500533 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400534 {
535 Short4 cmp;
536 Short4 equal;
537
538 switch(state.alphaCompareMode)
539 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400540 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400541 aMask = 0xF;
542 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400543 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400544 aMask = 0x0;
545 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400546 case ALPHA_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500547 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
John Bauman89401822014-05-06 15:04:28 -0400548 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
549 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400550 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500551 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
John Bauman89401822014-05-06 15:04:28 -0400552 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
553 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400554 case ALPHA_LESS: // a < b ~ b > a
Nicolas Capens4f172c72016-01-13 08:34:30 -0500555 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
John Bauman89401822014-05-06 15:04:28 -0400556 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
557 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400558 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
Nicolas Capens4f172c72016-01-13 08:34:30 -0500559 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
560 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
John Bauman89401822014-05-06 15:04:28 -0400561 cmp |= equal;
562 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
563 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400564 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500565 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF); // FIXME
John Bauman89401822014-05-06 15:04:28 -0400566 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
567 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400568 case ALPHA_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500569 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
John Bauman89401822014-05-06 15:04:28 -0400570 aMask = SignMask(Pack(cmp, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
571 break;
572 default:
573 ASSERT(false);
574 }
575 }
576
Nicolas Capens4f172c72016-01-13 08:34:30 -0500577 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400578 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500579 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
580 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
581 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
582 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
John Bauman89401822014-05-06 15:04:28 -0400583
584 Int aMask0 = SignMask(coverage0);
585 Int aMask1 = SignMask(coverage1);
586 Int aMask2 = SignMask(coverage2);
587 Int aMask3 = SignMask(coverage3);
588
589 cMask[0] &= aMask0;
590 cMask[1] &= aMask1;
591 cMask[2] &= aMask2;
592 cMask[3] &= aMask3;
593 }
594
Nicolas Capens4f172c72016-01-13 08:34:30 -0500595 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog)
John Bauman89401822014-05-06 15:04:28 -0400596 {
597 if(!state.fogActive)
598 {
599 return;
600 }
601
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400602 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400603 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500604 pixelFog(fog);
John Bauman89401822014-05-06 15:04:28 -0400605
John Bauman19bac1e2014-05-06 15:23:49 -0400606 fog = Min(fog, Float4(1.0f));
607 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400608 }
609
Nicolas Capens4f172c72016-01-13 08:34:30 -0500610 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
611 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
612 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400613
John Bauman19bac1e2014-05-06 15:23:49 -0400614 c0.x *= fog;
615 c0.y *= fog;
616 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -0400617
Nicolas Capens4f172c72016-01-13 08:34:30 -0500618 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
619 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
620 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400621 }
622
Nicolas Capens4f172c72016-01-13 08:34:30 -0500623 void PixelRoutine::pixelFog(Float4 &visibility)
John Bauman89401822014-05-06 15:04:28 -0400624 {
625 Float4 &zw = visibility;
626
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400627 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400628 {
629 if(state.wBasedFog)
630 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500631 zw = rhw;
John Bauman89401822014-05-06 15:04:28 -0400632 }
633 else
634 {
635 if(complementaryDepthBuffer)
636 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500637 zw = Float4(1.0f) - z[0];
John Bauman89401822014-05-06 15:04:28 -0400638 }
639 else
640 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500641 zw = z[0];
John Bauman89401822014-05-06 15:04:28 -0400642 }
643 }
644 }
645
646 switch(state.pixelFogMode)
647 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400648 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -0400649 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400650 case FOG_LINEAR:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500651 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale));
652 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
John Bauman89401822014-05-06 15:04:28 -0400653 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400654 case FOG_EXP:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500655 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -0400656 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400657 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400658 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -0400659 zw *= zw;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500660 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E));
John Bauman19bac1e2014-05-06 15:23:49 -0400661 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400662 break;
663 default:
664 ASSERT(false);
665 }
666 }
667
Nicolas Capens4f172c72016-01-13 08:34:30 -0500668 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
John Bauman89401822014-05-06 15:04:28 -0400669 {
670 if(!state.depthWriteEnable)
671 {
672 return;
673 }
674
675 Float4 Z = z;
676
John Bauman19bac1e2014-05-06 15:23:49 -0400677 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400678 {
679 if(complementaryDepthBuffer)
680 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500681 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400682 }
683 else
684 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500685 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400686 }
687 }
688
689 Pointer<Byte> buffer;
690 Int pitch;
691
692 if(!state.quadLayoutDepthBuffer)
Nicolas Capens05b3d662016-02-25 23:58:33 -0500693 {
John Bauman89401822014-05-06 15:04:28 -0400694 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500695 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400696 }
697 else
Nicolas Capens05b3d662016-02-25 23:58:33 -0500698 {
John Bauman89401822014-05-06 15:04:28 -0400699 buffer = zBuffer + 8 * x;
700 }
701
702 if(q > 0)
703 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500704 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400705 }
706
707 Float4 zValue;
708
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400709 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400710 {
711 if(!state.quadLayoutDepthBuffer)
712 {
713 // FIXME: Properly optimizes?
714 zValue.xy = *Pointer<Float4>(buffer);
715 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
716 }
717 else
718 {
719 zValue = *Pointer<Float4>(buffer, 16);
720 }
721 }
722
Nicolas Capens4f172c72016-01-13 08:34:30 -0500723 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
724 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -0400725 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
726
727 if(!state.quadLayoutDepthBuffer)
728 {
729 // FIXME: Properly optimizes?
730 *Pointer<Float2>(buffer) = Float2(Z.xy);
731 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
732 }
733 else
734 {
735 *Pointer<Float4>(buffer, 16) = Z;
736 }
737 }
738
Nicolas Capens4f172c72016-01-13 08:34:30 -0500739 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400740 {
741 if(!state.stencilActive)
742 {
743 return;
744 }
745
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400746 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -0400747 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400748 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -0400749 {
750 return;
751 }
752 }
753
754 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
755 {
756 return;
757 }
758
759 Pointer<Byte> buffer = sBuffer + 2 * x;
760
761 if(q > 0)
762 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500763 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400764 }
765
766 Byte8 bufferValue = As<Byte8>(Long1(*Pointer<UInt>(buffer)));
Nicolas Capens05b3d662016-02-25 23:58:33 -0500767
John Bauman89401822014-05-06 15:04:28 -0400768 Byte8 newValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500769 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400770
771 if(!state.noStencilWriteMask)
772 {
773 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500774 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
775 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400776 newValue |= maskedValue;
777 }
778
779 if(state.twoSidedStencil)
780 {
781 Byte8 newValueCCW;
782
Nicolas Capens4f172c72016-01-13 08:34:30 -0500783 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400784
785 if(!state.noStencilWriteMaskCCW)
786 {
787 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500788 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
789 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400790 newValueCCW |= maskedValue;
791 }
792
Nicolas Capens4f172c72016-01-13 08:34:30 -0500793 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
794 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400795 newValue |= newValueCCW;
796 }
797
Nicolas Capens4f172c72016-01-13 08:34:30 -0500798 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
799 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
John Bauman89401822014-05-06 15:04:28 -0400800 newValue |= bufferValue;
801
802 *Pointer<UInt>(buffer) = UInt(As<Long>(newValue));
803 }
804
Nicolas Capens4f172c72016-01-13 08:34:30 -0500805 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -0400806 {
807 Byte8 &pass = newValue;
808 Byte8 fail;
809 Byte8 zFail;
810
Nicolas Capens4f172c72016-01-13 08:34:30 -0500811 stencilOperation(pass, bufferValue, stencilPassOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400812
813 if(stencilZFailOperation != stencilPassOperation)
814 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500815 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400816 }
817
818 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
819 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500820 stencilOperation(fail, bufferValue, stencilFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400821 }
822
823 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
824 {
825 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
826 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500827 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
828 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
John Bauman89401822014-05-06 15:04:28 -0400829 pass |= zFail;
830 }
831
Nicolas Capens4f172c72016-01-13 08:34:30 -0500832 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
833 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
John Bauman89401822014-05-06 15:04:28 -0400834 pass |= fail;
835 }
836 }
837
Nicolas Capens4f172c72016-01-13 08:34:30 -0500838 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400839 {
840 switch(operation)
841 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400842 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -0400843 output = bufferValue;
844 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400845 case OPERATION_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400846 output = Byte8(0x0000000000000000);
847 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400848 case OPERATION_REPLACE:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500849 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ));
John Bauman89401822014-05-06 15:04:28 -0400850 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400851 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -0400852 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
853 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400854 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -0400855 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
856 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400857 case OPERATION_INVERT:
John Bauman89401822014-05-06 15:04:28 -0400858 output = bufferValue ^ Byte8(0xFFFFFFFFFFFFFFFF);
859 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400860 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -0400861 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
862 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400863 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -0400864 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
865 break;
866 default:
867 ASSERT(false);
868 }
869 }
870
Nicolas Capens4f172c72016-01-13 08:34:30 -0500871 void PixelRoutine::blendFactor(const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -0400872 {
873 switch(blendFactorActive)
874 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400875 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400876 // Optimized
877 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400878 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400879 // Optimized
880 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400881 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400882 blendFactor.x = current.x;
883 blendFactor.y = current.y;
884 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -0400885 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400886 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400887 blendFactor.x = Short4(0xFFFFu) - current.x;
888 blendFactor.y = Short4(0xFFFFu) - current.y;
889 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -0400890 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400891 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400892 blendFactor.x = pixel.x;
893 blendFactor.y = pixel.y;
894 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400895 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400896 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400897 blendFactor.x = Short4(0xFFFFu) - pixel.x;
898 blendFactor.y = Short4(0xFFFFu) - pixel.y;
899 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400900 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400901 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400902 blendFactor.x = current.w;
903 blendFactor.y = current.w;
904 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -0400905 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400906 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400907 blendFactor.x = Short4(0xFFFFu) - current.w;
908 blendFactor.y = Short4(0xFFFFu) - current.w;
909 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400910 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400911 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400912 blendFactor.x = pixel.w;
913 blendFactor.y = pixel.w;
914 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400915 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400916 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400917 blendFactor.x = Short4(0xFFFFu) - pixel.w;
918 blendFactor.y = Short4(0xFFFFu) - pixel.w;
919 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400920 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400921 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400922 blendFactor.x = Short4(0xFFFFu) - pixel.w;
923 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
924 blendFactor.y = blendFactor.x;
925 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -0400926 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400927 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500928 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
929 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
930 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400931 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400932 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500933 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
934 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
935 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400936 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400937 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500938 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
939 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
940 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400941 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400942 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500943 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
944 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
945 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400946 break;
947 default:
948 ASSERT(false);
949 }
950 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500951
Nicolas Capens4f172c72016-01-13 08:34:30 -0500952 void PixelRoutine::blendFactorAlpha(const Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -0400953 {
954 switch(blendFactorAlphaActive)
955 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400956 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400957 // Optimized
958 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400959 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400960 // Optimized
961 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400962 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400963 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400964 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400965 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400966 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400967 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400968 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400969 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400970 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400971 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400972 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400973 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400974 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400975 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400976 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400977 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400978 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400979 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400980 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400981 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400982 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400983 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400984 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400985 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400986 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400987 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -0400988 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400989 case BLEND_CONSTANT:
990 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500991 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400992 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400993 case BLEND_INVCONSTANT:
994 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500995 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400996 break;
997 default:
998 ASSERT(false);
999 }
1000 }
1001
Alexis Hetu049a1872016-04-25 16:59:58 -04001002 bool PixelRoutine::isSRGB(int index) const
1003 {
1004 return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8;
1005 }
1006
Nicolas Capens4f172c72016-01-13 08:34:30 -05001007 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
John Bauman89401822014-05-06 15:04:28 -04001008 {
John Bauman89401822014-05-06 15:04:28 -04001009 Short4 c01;
1010 Short4 c23;
Maxime Grégoired9762742015-07-08 16:43:48 -04001011 Pointer<Byte> buffer;
Nicolas Capensb40a2562016-01-05 00:08:45 -05001012 Pointer<Byte> buffer2;
John Bauman89401822014-05-06 15:04:28 -04001013
John Bauman89401822014-05-06 15:04:28 -04001014 switch(state.targetFormat[index])
1015 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001016 case FORMAT_R5G6B5:
1017 buffer = cBuffer + 2 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001018 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capensb40a2562016-01-05 00:08:45 -05001019 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001020
1021 pixel.x = c01 & Short4(0xF800u);
1022 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1023 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1024 pixel.w = Short4(0xFFFFu);
1025 break;
John Bauman89401822014-05-06 15:04:28 -04001026 case FORMAT_A8R8G8B8:
1027 buffer = cBuffer + 4 * x;
1028 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001029 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001030 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001031 pixel.z = c01;
1032 pixel.y = c01;
1033 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1034 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1035 pixel.x = pixel.z;
1036 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1037 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1038 pixel.y = pixel.z;
1039 pixel.w = pixel.x;
1040 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1041 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1042 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1043 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001044 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001045 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001046 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001047 buffer = cBuffer + 4 * x;
1048 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001049 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001050 c23 = *Pointer<Short4>(buffer);
1051 pixel.z = c01;
1052 pixel.y = c01;
1053 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1054 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1055 pixel.x = pixel.z;
1056 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1057 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1058 pixel.y = pixel.z;
1059 pixel.w = pixel.x;
1060 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1061 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1062 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1063 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1064 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001065 case FORMAT_A8:
1066 buffer = cBuffer + 1 * x;
1067 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001068 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001069 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
1070 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1071 pixel.x = Short4(0x0000);
1072 pixel.y = Short4(0x0000);
1073 pixel.z = Short4(0x0000);
1074 break;
John Bauman89401822014-05-06 15:04:28 -04001075 case FORMAT_X8R8G8B8:
1076 buffer = cBuffer + 4 * x;
1077 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001078 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001079 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001080 pixel.z = c01;
1081 pixel.y = c01;
1082 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1083 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1084 pixel.x = pixel.z;
1085 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1086 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1087 pixel.y = pixel.z;
1088 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1089 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1090 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1091 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001092 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001093 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001094 case FORMAT_SRGB8_X8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001095 buffer = cBuffer + 4 * x;
1096 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001097 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001098 c23 = *Pointer<Short4>(buffer);
1099 pixel.z = c01;
1100 pixel.y = c01;
1101 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1102 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1103 pixel.x = pixel.z;
1104 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1105 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1106 pixel.y = pixel.z;
1107 pixel.w = pixel.x;
1108 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1109 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1110 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1111 pixel.w = Short4(0xFFFFu);
1112 break;
John Bauman89401822014-05-06 15:04:28 -04001113 case FORMAT_A8G8R8B8Q:
1114 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001115 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1116 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1117 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1118 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04001119 break;
1120 case FORMAT_X8G8R8B8Q:
1121 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001122 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1123 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1124 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1125 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001126 break;
1127 case FORMAT_A16B16G16R16:
Maxime Grégoired9762742015-07-08 16:43:48 -04001128 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001129 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1130 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001131 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001132 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1133 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1134 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04001135 break;
1136 case FORMAT_G16R16:
1137 buffer = cBuffer;
Maxime Grégoired9762742015-07-08 16:43:48 -04001138 pixel.x = *Pointer<Short4>(buffer + 4 * x);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001139 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Maxime Grégoired9762742015-07-08 16:43:48 -04001140 pixel.y = *Pointer<Short4>(buffer + 4 * x);
John Bauman19bac1e2014-05-06 15:23:49 -04001141 pixel.z = pixel.x;
1142 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1143 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1144 pixel.y = pixel.z;
1145 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1146 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1147 pixel.z = Short4(0xFFFFu);
1148 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001149 break;
1150 default:
1151 ASSERT(false);
1152 }
1153
Alexis Hetu049a1872016-04-25 16:59:58 -04001154 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001155 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001156 sRGBtoLinear16_12_16(pixel);
John Bauman89401822014-05-06 15:04:28 -04001157 }
Maxime Grégoired9762742015-07-08 16:43:48 -04001158 }
1159
Nicolas Capens4f172c72016-01-13 08:34:30 -05001160 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001161 {
1162 if(!state.alphaBlendActive)
1163 {
1164 return;
1165 }
1166
1167 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001168 readPixel(index, cBuffer, x, pixel);
John Bauman89401822014-05-06 15:04:28 -04001169
1170 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04001171 Vector4s sourceFactor;
1172 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04001173
Nicolas Capens4f172c72016-01-13 08:34:30 -05001174 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1175 blendFactor(destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04001176
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001177 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001178 {
John Bauman19bac1e2014-05-06 15:23:49 -04001179 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1180 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1181 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001182 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001183
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001184 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001185 {
John Bauman19bac1e2014-05-06 15:23:49 -04001186 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1187 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1188 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001189 }
1190
1191 switch(state.blendOperation)
1192 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001193 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001194 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1195 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1196 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001197 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001198 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001199 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1200 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1201 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001202 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001203 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001204 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1205 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1206 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04001207 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001208 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001209 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1210 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1211 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001212 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001213 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001214 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1215 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1216 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001217 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001218 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001219 // No operation
1220 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001221 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001222 current.x = pixel.x;
1223 current.y = pixel.y;
1224 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001225 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001226 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04001227 current.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1228 current.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
1229 current.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04001230 break;
1231 default:
1232 ASSERT(false);
1233 }
1234
Nicolas Capens4f172c72016-01-13 08:34:30 -05001235 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1236 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04001237
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001238 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001239 {
John Bauman19bac1e2014-05-06 15:23:49 -04001240 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001241 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001242
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001243 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001244 {
John Bauman19bac1e2014-05-06 15:23:49 -04001245 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001246 }
1247
1248 switch(state.blendOperationAlpha)
1249 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001250 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001251 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001252 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001253 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001254 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001255 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001256 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001257 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04001258 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001259 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001260 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001261 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001262 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001263 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001264 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001265 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001266 // No operation
1267 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001268 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001269 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001270 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001271 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04001272 current.w = Short4(0x0000, 0x0000, 0x0000, 0x0000);
John Bauman89401822014-05-06 15:04:28 -04001273 break;
1274 default:
1275 ASSERT(false);
1276 }
1277 }
1278
Nicolas Capens4f172c72016-01-13 08:34:30 -05001279 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001280 {
Nicolas Capens2afcc802015-08-04 10:34:43 -04001281 if(state.logicalOperation == LOGICALOP_COPY)
Maxime Grégoired9762742015-07-08 16:43:48 -04001282 {
1283 return;
1284 }
1285
1286 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001287 readPixel(index, cBuffer, x, pixel);
Maxime Grégoired9762742015-07-08 16:43:48 -04001288
1289 switch(state.logicalOperation)
1290 {
1291 case LOGICALOP_CLEAR:
1292 current.x = 0;
1293 current.y = 0;
1294 current.z = 0;
1295 break;
1296 case LOGICALOP_SET:
Nicolas Capens2afcc802015-08-04 10:34:43 -04001297 current.x = 0xFFFFu;
1298 current.y = 0xFFFFu;
1299 current.z = 0xFFFFu;
Maxime Grégoired9762742015-07-08 16:43:48 -04001300 break;
1301 case LOGICALOP_COPY:
1302 ASSERT(false); // Optimized out
1303 break;
1304 case LOGICALOP_COPY_INVERTED:
1305 current.x = ~current.x;
1306 current.y = ~current.y;
1307 current.z = ~current.z;
1308 break;
1309 case LOGICALOP_NOOP:
1310 current.x = pixel.x;
1311 current.y = pixel.y;
1312 current.z = pixel.z;
1313 break;
1314 case LOGICALOP_INVERT:
1315 current.x = ~pixel.x;
1316 current.y = ~pixel.y;
1317 current.z = ~pixel.z;
1318 break;
1319 case LOGICALOP_AND:
1320 current.x = pixel.x & current.x;
1321 current.y = pixel.y & current.y;
1322 current.z = pixel.z & current.z;
1323 break;
1324 case LOGICALOP_NAND:
1325 current.x = ~(pixel.x & current.x);
1326 current.y = ~(pixel.y & current.y);
1327 current.z = ~(pixel.z & current.z);
1328 break;
1329 case LOGICALOP_OR:
1330 current.x = pixel.x | current.x;
1331 current.y = pixel.y | current.y;
1332 current.z = pixel.z | current.z;
1333 break;
1334 case LOGICALOP_NOR:
1335 current.x = ~(pixel.x | current.x);
1336 current.y = ~(pixel.y | current.y);
1337 current.z = ~(pixel.z | current.z);
1338 break;
1339 case LOGICALOP_XOR:
1340 current.x = pixel.x ^ current.x;
1341 current.y = pixel.y ^ current.y;
1342 current.z = pixel.z ^ current.z;
1343 break;
1344 case LOGICALOP_EQUIV:
1345 current.x = ~(pixel.x ^ current.x);
1346 current.y = ~(pixel.y ^ current.y);
1347 current.z = ~(pixel.z ^ current.z);
1348 break;
1349 case LOGICALOP_AND_REVERSE:
1350 current.x = ~pixel.x & current.x;
1351 current.y = ~pixel.y & current.y;
1352 current.z = ~pixel.z & current.z;
1353 break;
1354 case LOGICALOP_AND_INVERTED:
1355 current.x = pixel.x & ~current.x;
1356 current.y = pixel.y & ~current.y;
1357 current.z = pixel.z & ~current.z;
1358 break;
1359 case LOGICALOP_OR_REVERSE:
1360 current.x = ~pixel.x | current.x;
1361 current.y = ~pixel.y | current.y;
1362 current.z = ~pixel.z | current.z;
1363 break;
1364 case LOGICALOP_OR_INVERTED:
1365 current.x = pixel.x | ~current.x;
1366 current.y = pixel.y | ~current.y;
1367 current.z = pixel.z | ~current.z;
1368 break;
1369 default:
1370 ASSERT(false);
1371 }
1372 }
1373
Nicolas Capens4f172c72016-01-13 08:34:30 -05001374 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04001375 {
Alexis Hetu049a1872016-04-25 16:59:58 -04001376 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001377 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001378 linearToSRGB16_12_16(current);
John Bauman89401822014-05-06 15:04:28 -04001379 }
1380
1381 if(exactColorRounding)
1382 {
1383 switch(state.targetFormat[index])
1384 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001385 case FORMAT_R5G6B5:
Nicolas Capens26f37222015-09-22 09:53:45 -04001386 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1387 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1388 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001389 break;
John Bauman89401822014-05-06 15:04:28 -04001390 case FORMAT_X8G8R8B8Q:
1391 case FORMAT_A8G8R8B8Q:
1392 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001393 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001394 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001395 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001396 case FORMAT_SRGB8_X8:
1397 case FORMAT_SRGB8_A8:
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001398 case FORMAT_R8:
Nicolas Capens26f37222015-09-22 09:53:45 -04001399 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1400 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1401 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
1402 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080, 0x0080, 0x0080, 0x0080);
John Bauman89401822014-05-06 15:04:28 -04001403 break;
Nicolas Capensb69aa272016-01-02 00:06:41 -05001404 default:
1405 break;
John Bauman89401822014-05-06 15:04:28 -04001406 }
1407 }
1408
1409 int rgbaWriteMask = state.colorWriteActive(index);
Nicolas Capens3b396462016-01-02 00:23:53 -05001410 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
John Bauman89401822014-05-06 15:04:28 -04001411
1412 switch(state.targetFormat[index])
1413 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001414 case FORMAT_R5G6B5:
1415 {
1416 current.x = current.x & Short4(0xF800u);
1417 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1418 current.z = As<UShort4>(current.z) >> 11;
1419
1420 current.x = current.x | current.y | current.z;
1421 }
1422 break;
John Bauman89401822014-05-06 15:04:28 -04001423 case FORMAT_X8G8R8B8Q:
1424 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001425 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1426 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1427 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001428
John Bauman19bac1e2014-05-06 15:23:49 -04001429 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1430 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001431 break;
1432 case FORMAT_A8G8R8B8Q:
1433 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001434 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1435 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1436 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1437 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001438
John Bauman19bac1e2014-05-06 15:23:49 -04001439 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1440 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001441 break;
1442 case FORMAT_X8R8G8B8:
1443 case FORMAT_A8R8G8B8:
1444 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
1445 {
John Bauman19bac1e2014-05-06 15:23:49 -04001446 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1447 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1448 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001449
John Bauman19bac1e2014-05-06 15:23:49 -04001450 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1451 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001452
John Bauman19bac1e2014-05-06 15:23:49 -04001453 current.x = current.z;
1454 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1455 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1456 current.y = current.z;
1457 current.z = As<Short4>(UnpackLow(current.z, current.x));
1458 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001459 }
1460 else
1461 {
John Bauman19bac1e2014-05-06 15:23:49 -04001462 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1463 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1464 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1465 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001466
John Bauman19bac1e2014-05-06 15:23:49 -04001467 current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1468 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001469
John Bauman19bac1e2014-05-06 15:23:49 -04001470 current.x = current.z;
1471 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1472 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1473 current.y = current.z;
1474 current.z = As<Short4>(UnpackLow(current.z, current.x));
1475 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001476 }
1477 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001478 case FORMAT_X8B8G8R8:
1479 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001480 case FORMAT_SRGB8_X8:
1481 case FORMAT_SRGB8_A8:
1482 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7)
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001483 {
1484 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1485 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1486 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1487
1488 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
1489 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
1490
1491 current.x = current.z;
1492 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1493 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1494 current.y = current.z;
1495 current.z = As<Short4>(UnpackLow(current.z, current.x));
1496 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1497 }
1498 else
1499 {
1500 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1501 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1502 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1503 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1504
1505 current.z = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.z)));
1506 current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
1507
1508 current.x = current.z;
1509 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1510 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1511 current.y = current.z;
1512 current.z = As<Short4>(UnpackLow(current.z, current.x));
1513 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1514 }
1515 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001516 case FORMAT_R8:
1517 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1518 current.x = As<Short4>(Pack(As<UShort4>(current.x), As<UShort4>(current.x)));
1519 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001520 case FORMAT_A8:
1521 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1522 current.w = As<Short4>(Pack(As<UShort4>(current.w), As<UShort4>(current.w)));
1523 break;
John Bauman89401822014-05-06 15:04:28 -04001524 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001525 current.z = current.x;
1526 current.x = As<Short4>(UnpackLow(current.x, current.y));
1527 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1528 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04001529 break;
1530 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001531 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04001532 break;
John Bauman89401822014-05-06 15:04:28 -04001533 default:
1534 ASSERT(false);
1535 }
1536
John Bauman19bac1e2014-05-06 15:23:49 -04001537 Short4 c01 = current.z;
1538 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04001539
1540 Int xMask; // Combination of all masks
1541
1542 if(state.depthTestActive)
1543 {
1544 xMask = zMask;
1545 }
1546 else
1547 {
1548 xMask = cMask;
1549 }
1550
1551 if(state.stencilActive)
1552 {
1553 xMask &= sMask;
1554 }
1555
John Bauman89401822014-05-06 15:04:28 -04001556 switch(state.targetFormat[index])
1557 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001558 case FORMAT_R5G6B5:
1559 {
1560 Pointer<Byte> buffer = cBuffer + 2 * x;
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001561 Int value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001562
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001563 Int c01 = Extract(As<Int2>(current.x), 0);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001564
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001565 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001566 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001567 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001568 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1569 masked &= *Pointer<Int>(constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001570 c01 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001571 }
1572
Nicolas Capens4f172c72016-01-13 08:34:30 -05001573 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1574 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001575 c01 |= value;
1576 *Pointer<Int>(buffer) = c01;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001577
Nicolas Capens4f172c72016-01-13 08:34:30 -05001578 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001579 value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001580
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001581 Int c23 = Extract(As<Int2>(current.x), 1);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001582
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001583 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001584 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001585 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001586 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
1587 masked &= *Pointer<Int>(constants + OFFSET(Constants,invMask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001588 c23 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001589 }
1590
Nicolas Capens4f172c72016-01-13 08:34:30 -05001591 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1592 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001593 c23 |= value;
1594 *Pointer<Int>(buffer) = c23;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001595 }
1596 break;
John Bauman89401822014-05-06 15:04:28 -04001597 case FORMAT_A8G8R8B8Q:
1598 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
1599 UNIMPLEMENTED();
1600 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
1601
1602 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1603 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1604 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1605 // {
1606 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001607 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1608 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001609 // c01 |= masked;
1610 // }
1611
Nicolas Capens4f172c72016-01-13 08:34:30 -05001612 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1613 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001614 // c01 |= value;
1615 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
1616
1617 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
1618
1619 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1620 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1621 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1622 // {
1623 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001624 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1625 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001626 // c23 |= masked;
1627 // }
1628
Nicolas Capens4f172c72016-01-13 08:34:30 -05001629 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1630 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001631 // c23 |= value;
1632 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
1633 break;
1634 case FORMAT_A8R8G8B8:
1635 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
John Bauman89401822014-05-06 15:04:28 -04001636 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001637 Pointer<Byte> buffer = cBuffer + x * 4;
1638 Short4 value = *Pointer<Short4>(buffer);
1639
1640 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1641 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1642 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1643 {
1644 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001645 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1646 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001647 c01 |= masked;
1648 }
1649
Nicolas Capens4f172c72016-01-13 08:34:30 -05001650 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1651 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001652 c01 |= value;
1653 *Pointer<Short4>(buffer) = c01;
1654
Nicolas Capens4f172c72016-01-13 08:34:30 -05001655 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001656 value = *Pointer<Short4>(buffer);
1657
1658 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1659 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1660 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1661 {
1662 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001663 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1664 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001665 c23 |= masked;
1666 }
1667
Nicolas Capens4f172c72016-01-13 08:34:30 -05001668 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1669 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001670 c23 |= value;
1671 *Pointer<Short4>(buffer) = c23;
John Bauman89401822014-05-06 15:04:28 -04001672 }
John Bauman89401822014-05-06 15:04:28 -04001673 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001674 case FORMAT_A8B8G8R8:
1675 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
Alexis Hetu049a1872016-04-25 16:59:58 -04001676 case FORMAT_SRGB8_X8:
1677 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001678 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001679 Pointer<Byte> buffer = cBuffer + x * 4;
1680 Short4 value = *Pointer<Short4>(buffer);
1681
Alexis Hetu049a1872016-04-25 16:59:58 -04001682 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) ||
1683 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) &&
1684 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh?
1685
1686 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001687 {
1688 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001689 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1690 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001691 c01 |= masked;
1692 }
1693
Nicolas Capens4f172c72016-01-13 08:34:30 -05001694 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1695 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001696 c01 |= value;
1697 *Pointer<Short4>(buffer) = c01;
1698
Nicolas Capens4f172c72016-01-13 08:34:30 -05001699 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001700 value = *Pointer<Short4>(buffer);
1701
Alexis Hetu049a1872016-04-25 16:59:58 -04001702 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001703 {
1704 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001705 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1706 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001707 c23 |= masked;
1708 }
1709
Nicolas Capens4f172c72016-01-13 08:34:30 -05001710 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1711 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001712 c23 |= value;
1713 *Pointer<Short4>(buffer) = c23;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001714 }
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001715 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001716 case FORMAT_R8:
1717 if(rgbaWriteMask & 0x00000001)
1718 {
1719 Pointer<Byte> buffer = cBuffer + 1 * x;
1720 Short4 value;
1721 Insert(value, *Pointer<Short>(buffer), 0);
1722 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1723 Insert(value, *Pointer<Short>(buffer + pitch), 1);
1724 value = UnpackLow(As<Byte8>(value), As<Byte8>(value));
1725
1726 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1727 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1728 current.x |= value;
1729
1730 *Pointer<Short>(buffer) = Extract(current.x, 0);
1731 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1732 }
1733 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001734 case FORMAT_A8:
1735 if(rgbaWriteMask & 0x00000008)
1736 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001737 Pointer<Byte> buffer = cBuffer + 1 * x;
1738 Short4 value;
John Bauman66b8ab22014-05-06 15:57:45 -04001739 Insert(value, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001740 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001741 Insert(value, *Pointer<Short>(buffer + pitch), 1);
1742 value = UnpackLow(As<Byte8>(value), As<Byte8>(value));
1743
Nicolas Capens4f172c72016-01-13 08:34:30 -05001744 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
1745 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
John Bauman66b8ab22014-05-06 15:57:45 -04001746 current.w |= value;
1747
1748 *Pointer<Short>(buffer) = Extract(current.w, 0);
1749 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
1750 }
1751 break;
John Bauman89401822014-05-06 15:04:28 -04001752 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04001753 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001754 Pointer<Byte> buffer = cBuffer + 4 * x;
John Bauman89401822014-05-06 15:04:28 -04001755
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001756 Short4 value = *Pointer<Short4>(buffer);
John Bauman89401822014-05-06 15:04:28 -04001757
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001758 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001759 {
1760 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001761 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1762 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001763 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04001764 }
1765
Nicolas Capens4f172c72016-01-13 08:34:30 -05001766 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1767 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001768 current.x |= value;
1769 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001770
Nicolas Capens4f172c72016-01-13 08:34:30 -05001771 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001772
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001773 value = *Pointer<Short4>(buffer);
1774
1775 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001776 {
1777 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001778 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
1779 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW01Q[rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001780 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04001781 }
1782
Nicolas Capens4f172c72016-01-13 08:34:30 -05001783 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1784 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001785 current.y |= value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001786 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001787 }
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001788 break;
1789 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04001790 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001791 Pointer<Byte> buffer = cBuffer + 8 * x;
John Bauman89401822014-05-06 15:04:28 -04001792
John Bauman89401822014-05-06 15:04:28 -04001793 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001794 Short4 value = *Pointer<Short4>(buffer);
1795
1796 if(rgbaWriteMask != 0x0000000F)
1797 {
1798 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001799 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1800 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001801 current.x |= masked;
1802 }
1803
Nicolas Capens4f172c72016-01-13 08:34:30 -05001804 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1805 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001806 current.x |= value;
1807 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001808 }
1809
John Bauman89401822014-05-06 15:04:28 -04001810 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001811 Short4 value = *Pointer<Short4>(buffer + 8);
1812
1813 if(rgbaWriteMask != 0x0000000F)
1814 {
1815 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001816 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1817 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001818 current.y |= masked;
1819 }
1820
Nicolas Capens4f172c72016-01-13 08:34:30 -05001821 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1822 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001823 current.y |= value;
1824 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001825 }
1826
Nicolas Capens4f172c72016-01-13 08:34:30 -05001827 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001828
1829 {
1830 Short4 value = *Pointer<Short4>(buffer);
1831
1832 if(rgbaWriteMask != 0x0000000F)
1833 {
1834 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001835 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1836 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001837 current.z |= masked;
1838 }
1839
Nicolas Capens4f172c72016-01-13 08:34:30 -05001840 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1841 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001842 current.z |= value;
1843 *Pointer<Short4>(buffer) = current.z;
1844 }
1845
1846 {
1847 Short4 value = *Pointer<Short4>(buffer + 8);
1848
1849 if(rgbaWriteMask != 0x0000000F)
1850 {
1851 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001852 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1853 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001854 current.w |= masked;
1855 }
1856
Nicolas Capens4f172c72016-01-13 08:34:30 -05001857 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1858 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001859 current.w |= value;
1860 *Pointer<Short4>(buffer + 8) = current.w;
1861 }
John Bauman89401822014-05-06 15:04:28 -04001862 }
1863 break;
1864 default:
1865 ASSERT(false);
1866 }
1867 }
1868
Nicolas Capens05b3d662016-02-25 23:58:33 -05001869 void PixelRoutine::blendFactor(const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04001870 {
1871 switch(blendFactorActive)
1872 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001873 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001874 // Optimized
1875 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001876 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001877 // Optimized
1878 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001879 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001880 blendFactor.x = oC.x;
1881 blendFactor.y = oC.y;
1882 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04001883 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001884 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001885 blendFactor.x = Float4(1.0f) - oC.x;
1886 blendFactor.y = Float4(1.0f) - oC.y;
1887 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04001888 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001889 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001890 blendFactor.x = pixel.x;
1891 blendFactor.y = pixel.y;
1892 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001893 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001894 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001895 blendFactor.x = Float4(1.0f) - pixel.x;
1896 blendFactor.y = Float4(1.0f) - pixel.y;
1897 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001898 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001899 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001900 blendFactor.x = oC.w;
1901 blendFactor.y = oC.w;
1902 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001903 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001904 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001905 blendFactor.x = Float4(1.0f) - oC.w;
1906 blendFactor.y = Float4(1.0f) - oC.w;
1907 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001908 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001909 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001910 blendFactor.x = pixel.w;
1911 blendFactor.y = pixel.w;
1912 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001913 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001914 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001915 blendFactor.x = Float4(1.0f) - pixel.w;
1916 blendFactor.y = Float4(1.0f) - pixel.w;
1917 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001918 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001919 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001920 blendFactor.x = Float4(1.0f) - pixel.w;
1921 blendFactor.x = Min(blendFactor.x, oC.w);
1922 blendFactor.y = blendFactor.x;
1923 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04001924 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001925 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001926 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1927 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1928 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001929 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001930 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001931 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1932 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1933 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001934 break;
1935 default:
1936 ASSERT(false);
1937 }
1938 }
1939
Nicolas Capens05b3d662016-02-25 23:58:33 -05001940 void PixelRoutine::blendFactorAlpha(const Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04001941 {
1942 switch(blendFactorAlphaActive)
1943 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001944 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001945 // Optimized
1946 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001947 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001948 // Optimized
1949 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001950 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001951 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001952 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001953 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001954 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001955 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001956 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001957 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001958 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001959 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001960 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001961 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001962 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001963 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001964 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001965 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001966 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001967 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001968 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001969 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001970 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001971 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001972 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001973 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001974 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001975 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04001976 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001977 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001978 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04001979 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001980 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001981 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04001982 break;
1983 default:
1984 ASSERT(false);
1985 }
1986 }
1987
Nicolas Capens4f172c72016-01-13 08:34:30 -05001988 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04001989 {
1990 if(!state.alphaBlendActive)
1991 {
1992 return;
1993 }
1994
1995 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001996 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04001997
Alexis Hetu96517182015-04-15 10:30:23 -04001998 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04001999 Short4 c01;
2000 Short4 c23;
2001
Alexis Hetu1abb6382016-02-08 11:21:16 -05002002 Float4 one;
Alexis Hetu7208e932016-06-02 11:19:24 -04002003 if(Surface::isFloatFormat(state.targetFormat[index]))
John Bauman89401822014-05-06 15:04:28 -04002004 {
Alexis Hetu1abb6382016-02-08 11:21:16 -05002005 one = Float4(1.0f);
Alexis Hetu7208e932016-06-02 11:19:24 -04002006 }
2007 else if(Surface::isNonNormalizedInteger(state.targetFormat[index]))
2008 {
2009 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Alexis Hetu1abb6382016-02-08 11:21:16 -05002010 }
2011
2012 switch(state.targetFormat[index])
2013 {
2014 case FORMAT_R32I:
2015 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002016 case FORMAT_R32F:
2017 buffer = cBuffer;
2018 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002019 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
2020 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002021 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002022 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002023 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
2024 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
Alexis Hetu1abb6382016-02-08 11:21:16 -05002025 pixel.y = pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002026 break;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002027 case FORMAT_G32R32I:
2028 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002029 case FORMAT_G32R32F:
2030 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002031 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002032 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002033 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
2034 pixel.z = pixel.x;
2035 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
2036 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
2037 pixel.y = pixel.z;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002038 pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002039 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002040 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002041 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002042 case FORMAT_A32B32G32R32I:
2043 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002044 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002045 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
2046 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002047 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002048 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
2049 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
2050 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002051 if(state.targetFormat[index] == FORMAT_X32B32G32R32F)
2052 {
2053 pixel.w = Float4(1.0f);
2054 }
John Bauman89401822014-05-06 15:04:28 -04002055 break;
2056 default:
2057 ASSERT(false);
2058 }
2059
Alexis Hetu049a1872016-04-25 16:59:58 -04002060 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04002061 {
John Bauman19bac1e2014-05-06 15:23:49 -04002062 sRGBtoLinear(pixel.x);
2063 sRGBtoLinear(pixel.y);
2064 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002065 }
2066
2067 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04002068 Vector4f sourceFactor;
2069 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04002070
Nicolas Capens4f172c72016-01-13 08:34:30 -05002071 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
2072 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002073
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002074 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002075 {
John Bauman19bac1e2014-05-06 15:23:49 -04002076 oC.x *= sourceFactor.x;
2077 oC.y *= sourceFactor.y;
2078 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002079 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002080
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002081 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002082 {
John Bauman19bac1e2014-05-06 15:23:49 -04002083 pixel.x *= destFactor.x;
2084 pixel.y *= destFactor.y;
2085 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002086 }
2087
2088 switch(state.blendOperation)
2089 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002090 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002091 oC.x += pixel.x;
2092 oC.y += pixel.y;
2093 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002094 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002095 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002096 oC.x -= pixel.x;
2097 oC.y -= pixel.y;
2098 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002099 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002100 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002101 oC.x = pixel.x - oC.x;
2102 oC.y = pixel.y - oC.y;
2103 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04002104 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002105 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002106 oC.x = Min(oC.x, pixel.x);
2107 oC.y = Min(oC.y, pixel.y);
2108 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002109 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002110 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002111 oC.x = Max(oC.x, pixel.x);
2112 oC.y = Max(oC.y, pixel.y);
2113 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002114 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002115 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002116 // No operation
2117 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002118 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002119 oC.x = pixel.x;
2120 oC.y = pixel.y;
2121 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002122 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002123 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002124 oC.x = Float4(0.0f);
2125 oC.y = Float4(0.0f);
2126 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002127 break;
2128 default:
2129 ASSERT(false);
2130 }
2131
Nicolas Capens4f172c72016-01-13 08:34:30 -05002132 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2133 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002134
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002135 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002136 {
John Bauman19bac1e2014-05-06 15:23:49 -04002137 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002138 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002139
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002140 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002141 {
John Bauman19bac1e2014-05-06 15:23:49 -04002142 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002143 }
2144
2145 switch(state.blendOperationAlpha)
2146 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002147 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002148 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002149 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002150 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002151 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002152 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002153 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002154 pixel.w -= oC.w;
2155 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002156 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002157 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002158 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002159 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002160 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002161 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002162 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002163 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002164 // No operation
2165 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002166 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002167 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002168 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002169 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002170 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002171 break;
2172 default:
2173 ASSERT(false);
2174 }
2175 }
2176
Nicolas Capens4f172c72016-01-13 08:34:30 -05002177 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002178 {
John Bauman89401822014-05-06 15:04:28 -04002179 switch(state.targetFormat[index])
2180 {
John Bauman89401822014-05-06 15:04:28 -04002181 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002182 case FORMAT_R32I:
2183 case FORMAT_R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002184 case FORMAT_R16I:
2185 case FORMAT_R16UI:
2186 case FORMAT_R8I:
2187 case FORMAT_R8UI:
John Bauman89401822014-05-06 15:04:28 -04002188 break;
2189 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002190 case FORMAT_G32R32I:
2191 case FORMAT_G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002192 case FORMAT_G16R16I:
2193 case FORMAT_G16R16UI:
2194 case FORMAT_G8R8I:
2195 case FORMAT_G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002196 oC.z = oC.x;
2197 oC.x = UnpackLow(oC.x, oC.y);
2198 oC.z = UnpackHigh(oC.z, oC.y);
2199 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002200 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002201 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002202 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002203 case FORMAT_A32B32G32R32I:
2204 case FORMAT_A32B32G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002205 case FORMAT_A16B16G16R16I:
2206 case FORMAT_A16B16G16R16UI:
2207 case FORMAT_A8B8G8R8I:
2208 case FORMAT_A8B8G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002209 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04002210 break;
2211 default:
2212 ASSERT(false);
2213 }
2214
2215 int rgbaWriteMask = state.colorWriteActive(index);
2216
2217 Int xMask; // Combination of all masks
2218
2219 if(state.depthTestActive)
2220 {
2221 xMask = zMask;
2222 }
2223 else
2224 {
2225 xMask = cMask;
2226 }
2227
2228 if(state.stencilActive)
2229 {
2230 xMask &= sMask;
2231 }
2232
2233 Pointer<Byte> buffer;
2234 Float4 value;
2235
2236 switch(state.targetFormat[index])
2237 {
2238 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002239 case FORMAT_R32I:
2240 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002241 if(rgbaWriteMask & 0x00000001)
2242 {
2243 buffer = cBuffer + 4 * x;
2244
2245 // FIXME: movlps
2246 value.x = *Pointer<Float>(buffer + 0);
2247 value.y = *Pointer<Float>(buffer + 4);
2248
Nicolas Capens4f172c72016-01-13 08:34:30 -05002249 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002250
2251 // FIXME: movhps
2252 value.z = *Pointer<Float>(buffer + 0);
2253 value.w = *Pointer<Float>(buffer + 4);
2254
Nicolas Capens4f172c72016-01-13 08:34:30 -05002255 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2256 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002257 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04002258
2259 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002260 *Pointer<Float>(buffer + 0) = oC.x.z;
2261 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04002262
Nicolas Capens4f172c72016-01-13 08:34:30 -05002263 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002264
2265 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002266 *Pointer<Float>(buffer + 0) = oC.x.x;
2267 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04002268 }
2269 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002270 case FORMAT_R16I:
2271 case FORMAT_R16UI:
2272 if(rgbaWriteMask & 0x00000001)
2273 {
2274 buffer = cBuffer + 2 * x;
2275
2276 UShort4 xyzw;
2277 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2278
2279 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2280
2281 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2282 value = As<Float4>(Int4(xyzw));
2283
2284 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2285 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2286 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2287
2288 if(state.targetFormat[index] == FORMAT_R16I)
2289 {
2290 Float component = oC.x.z;
2291 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2292 component = oC.x.w;
2293 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2294
2295 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2296
2297 component = oC.x.x;
2298 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2299 component = oC.x.y;
2300 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2301 }
2302 else // FORMAT_R16UI
2303 {
2304 Float component = oC.x.z;
2305 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2306 component = oC.x.w;
2307 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2308
2309 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2310
2311 component = oC.x.x;
2312 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2313 component = oC.x.y;
2314 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2315 }
2316 }
2317 break;
2318 case FORMAT_R8I:
2319 case FORMAT_R8UI:
2320 ASSERT(false);
2321 break;
John Bauman89401822014-05-06 15:04:28 -04002322 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002323 case FORMAT_G32R32I:
2324 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002325 buffer = cBuffer + 8 * x;
2326
2327 value = *Pointer<Float4>(buffer);
2328
2329 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2330 {
2331 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002332 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2333 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002334 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002335 }
2336
Nicolas Capens4f172c72016-01-13 08:34:30 -05002337 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2338 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002339 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2340 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002341
Nicolas Capens4f172c72016-01-13 08:34:30 -05002342 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002343
2344 value = *Pointer<Float4>(buffer);
2345
2346 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2347 {
2348 Float4 masked;
2349
2350 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002351 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
2352 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD01X[rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002353 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002354 }
2355
Nicolas Capens4f172c72016-01-13 08:34:30 -05002356 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2357 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002358 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2359 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002360 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002361 case FORMAT_G16R16I:
2362 case FORMAT_G16R16UI:
2363 ASSERT(false);
2364 break;
2365 case FORMAT_G8R8I:
2366 case FORMAT_G8R8UI:
2367 ASSERT(false);
2368 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002369 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002370 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002371 case FORMAT_A32B32G32R32I:
2372 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002373 buffer = cBuffer + 16 * x;
2374
2375 {
2376 value = *Pointer<Float4>(buffer, 16);
2377
2378 if(rgbaWriteMask != 0x0000000F)
2379 {
2380 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002381 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2382 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002383 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002384 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002385
Nicolas Capens4f172c72016-01-13 08:34:30 -05002386 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2387 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002388 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2389 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002390 }
2391
2392 {
2393 value = *Pointer<Float4>(buffer + 16, 16);
2394
2395 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens05b3d662016-02-25 23:58:33 -05002396 {
John Bauman89401822014-05-06 15:04:28 -04002397 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002398 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2399 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002400 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002401 }
2402
Nicolas Capens4f172c72016-01-13 08:34:30 -05002403 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2404 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002405 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2406 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002407 }
2408
Nicolas Capens4f172c72016-01-13 08:34:30 -05002409 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002410
2411 {
2412 value = *Pointer<Float4>(buffer, 16);
2413
2414 if(rgbaWriteMask != 0x0000000F)
2415 {
2416 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002417 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2418 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002419 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002420 }
2421
Nicolas Capens4f172c72016-01-13 08:34:30 -05002422 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2423 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002424 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2425 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002426 }
2427
2428 {
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002429 value = (state.targetFormat[index] == FORMAT_X32B32G32R32F) ? Float4(1.0f) : *Pointer<Float4>(buffer + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04002430
2431 if(rgbaWriteMask != 0x0000000F)
2432 {
2433 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002434 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2435 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002436 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002437 }
2438
Nicolas Capens4f172c72016-01-13 08:34:30 -05002439 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2440 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002441 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2442 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002443 }
2444 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002445 case FORMAT_A16B16G16R16I:
2446 case FORMAT_A16B16G16R16UI:
2447 ASSERT(false);
2448 break;
2449 case FORMAT_A8B8G8R8I:
2450 case FORMAT_A8B8G8R8UI:
2451 ASSERT(false);
2452 break;
John Bauman89401822014-05-06 15:04:28 -04002453 default:
2454 ASSERT(false);
2455 }
2456 }
2457
John Bauman89401822014-05-06 15:04:28 -04002458 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2459 {
John Bauman19bac1e2014-05-06 15:23:49 -04002460 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04002461 }
2462
Nicolas Capens4f172c72016-01-13 08:34:30 -05002463 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002464 {
John Bauman19bac1e2014-05-06 15:23:49 -04002465 c.x = As<UShort4>(c.x) >> 4;
2466 c.y = As<UShort4>(c.y) >> 4;
2467 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002468
Nicolas Capens4f172c72016-01-13 08:34:30 -05002469 sRGBtoLinear12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002470 }
2471
Nicolas Capens4f172c72016-01-13 08:34:30 -05002472 void PixelRoutine::sRGBtoLinear12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002473 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002474 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
John Bauman89401822014-05-06 15:04:28 -04002475
John Bauman19bac1e2014-05-06 15:23:49 -04002476 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2477 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2478 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2479 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002480
John Bauman19bac1e2014-05-06 15:23:49 -04002481 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2482 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2483 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2484 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002485
John Bauman19bac1e2014-05-06 15:23:49 -04002486 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2487 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2488 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2489 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002490 }
2491
Nicolas Capens4f172c72016-01-13 08:34:30 -05002492 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002493 {
John Bauman19bac1e2014-05-06 15:23:49 -04002494 c.x = As<UShort4>(c.x) >> 4;
2495 c.y = As<UShort4>(c.y) >> 4;
2496 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002497
Nicolas Capens4f172c72016-01-13 08:34:30 -05002498 linearToSRGB12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002499 }
2500
Nicolas Capens4f172c72016-01-13 08:34:30 -05002501 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002502 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002503 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
John Bauman89401822014-05-06 15:04:28 -04002504
John Bauman19bac1e2014-05-06 15:23:49 -04002505 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2506 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2507 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2508 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002509
John Bauman19bac1e2014-05-06 15:23:49 -04002510 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2511 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2512 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2513 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002514
John Bauman19bac1e2014-05-06 15:23:49 -04002515 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2516 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2517 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2518 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002519 }
2520
John Bauman89401822014-05-06 15:04:28 -04002521 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2522 {
2523 Float4 linear = x * x;
2524 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2525
2526 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2527 }
2528
John Bauman19bac1e2014-05-06 15:23:49 -04002529 bool PixelRoutine::colorUsed()
2530 {
2531 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
2532 }
John Bauman89401822014-05-06 15:04:28 -04002533}