blob: 949fbfc531655cf044b7c10cde9265f7b974d735 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "PixelRoutine.hpp"
16
John Bauman89401822014-05-06 15:04:28 -040017#include "SamplerCore.hpp"
18#include "Constants.hpp"
Nicolas Capens708c24b2017-10-26 13:07:10 -040019#include "Renderer/Renderer.hpp"
20#include "Renderer/QuadRasterizer.hpp"
21#include "Renderer/Surface.hpp"
22#include "Renderer/Primitive.hpp"
23#include "Common/Debug.hpp"
John Bauman89401822014-05-06 15:04:28 -040024
John Bauman89401822014-05-06 15:04:28 -040025namespace sw
26{
27 extern bool complementaryDepthBuffer;
28 extern bool postBlendSRGB;
29 extern bool exactColorRounding;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040030 extern bool forceClearRegisters;
John Bauman89401822014-05-06 15:04:28 -040031
Nicolas Capens4f172c72016-01-13 08:34:30 -050032 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput)
John Bauman89401822014-05-06 15:04:28 -040033 {
Alexis Hetu53ad4af2017-12-06 14:49:07 -050034 if(!shader || shader->getShaderModel() < 0x0200 || forceClearRegisters)
John Bauman89401822014-05-06 15:04:28 -040035 {
Nicolas Capens3b4c93f2016-05-18 12:51:37 -040036 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++)
Alexis Hetuf2a8c372015-07-13 11:08:41 -040037 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -040038 v[i].x = Float4(0.0f);
39 v[i].y = Float4(0.0f);
40 v[i].z = Float4(0.0f);
41 v[i].w = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040042 }
John Bauman89401822014-05-06 15:04:28 -040043 }
44 }
45
46 PixelRoutine::~PixelRoutine()
47 {
John Bauman89401822014-05-06 15:04:28 -040048 }
49
Nicolas Capens4f172c72016-01-13 08:34:30 -050050 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
John Bauman89401822014-05-06 15:04:28 -040051 {
52 #if PERF_PROFILE
53 Long pipeTime = Ticks();
54 #endif
55
John Bauman89401822014-05-06 15:04:28 -040056 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman89401822014-05-06 15:04:28 -040057
58 Int zMask[4]; // Depth mask
59 Int sMask[4]; // Stencil mask
60
61 for(unsigned int q = 0; q < state.multiSample; q++)
62 {
63 zMask[q] = cMask[q];
64 sMask[q] = cMask[q];
65 }
66
67 for(unsigned int q = 0; q < state.multiSample; q++)
68 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050069 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -040070 }
71
72 Float4 f;
John Bauman89401822014-05-06 15:04:28 -040073 Float4 rhwCentroid;
74
Nicolas Capens4f172c72016-01-13 08:34:30 -050075 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040076
John Bauman19bac1e2014-05-06 15:23:49 -040077 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040078 {
79 for(unsigned int q = 0; q < state.multiSample; q++)
80 {
81 Float4 x = xxxx;
Nicolas Capens4f172c72016-01-13 08:34:30 -050082
John Bauman89401822014-05-06 15:04:28 -040083 if(state.multiSample > 1)
84 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050085 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
John Bauman89401822014-05-06 15:04:28 -040086 }
87
Nicolas Capens5ba372f2017-10-05 16:05:47 -040088 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
John Bauman89401822014-05-06 15:04:28 -040089 }
90 }
91
92 Bool depthPass = false;
93
94 if(earlyDepthTest)
95 {
96 for(unsigned int q = 0; q < state.multiSample; q++)
97 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050098 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -040099 }
100 }
101
102 If(depthPass || Bool(!earlyDepthTest))
103 {
104 #if PERF_PROFILE
105 Long interpTime = Ticks();
106 #endif
107
Nicolas Capens4f172c72016-01-13 08:34:30 -0500108 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400109
John Bauman89401822014-05-06 15:04:28 -0400110 // Centroid locations
111 Float4 XXXX = Float4(0.0f);
112 Float4 YYYY = Float4(0.0f);
113
114 if(state.centroid)
115 {
116 Float4 WWWW(1.0e-9f);
117
118 for(unsigned int q = 0; q < state.multiSample; q++)
119 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500120 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
121 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
122 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400123 }
124
125 WWWW = Rcp_pp(WWWW);
126 XXXX *= WWWW;
127 YYYY *= WWWW;
128
129 XXXX += xxxx;
130 YYYY += yyyy;
131 }
132
John Bauman19bac1e2014-05-06 15:23:49 -0400133 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400134 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400135 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500136 rhw = reciprocal(w, false, false, true);
John Bauman89401822014-05-06 15:04:28 -0400137
138 if(state.centroid)
139 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500140 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
John Bauman89401822014-05-06 15:04:28 -0400141 }
142 }
143
Nicolas Capens3b4c93f2016-05-18 12:51:37 -0400144 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
John Bauman89401822014-05-06 15:04:28 -0400145 {
146 for(int component = 0; component < 4; component++)
147 {
John Bauman89401822014-05-06 15:04:28 -0400148 if(state.interpolant[interpolant].component & (1 << component))
149 {
150 if(!state.interpolant[interpolant].centroid)
151 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400152 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective, false);
John Bauman89401822014-05-06 15:04:28 -0400153 }
154 else
155 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500156 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400157 }
158 }
159 }
160
161 Float4 rcp;
162
163 switch(state.interpolant[interpolant].project)
164 {
165 case 0:
166 break;
167 case 1:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500168 rcp = reciprocal(v[interpolant].y);
169 v[interpolant].x = v[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400170 break;
171 case 2:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500172 rcp = reciprocal(v[interpolant].z);
173 v[interpolant].x = v[interpolant].x * rcp;
174 v[interpolant].y = v[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400175 break;
176 case 3:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500177 rcp = reciprocal(v[interpolant].w);
178 v[interpolant].x = v[interpolant].x * rcp;
179 v[interpolant].y = v[interpolant].y * rcp;
180 v[interpolant].z = v[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400181 break;
182 }
183 }
184
185 if(state.fog.component)
186 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400187 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective, false);
John Bauman89401822014-05-06 15:04:28 -0400188 }
189
Nicolas Capens4f172c72016-01-13 08:34:30 -0500190 setBuiltins(x, y, z, w);
John Bauman89401822014-05-06 15:04:28 -0400191
192 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500193 cycles[PERF_INTERP] += Ticks() - interpTime;
John Bauman89401822014-05-06 15:04:28 -0400194 #endif
195
196 Bool alphaPass = true;
197
198 if(colorUsed())
199 {
200 #if PERF_PROFILE
201 Long shaderTime = Ticks();
202 #endif
203
Nicolas Capens4f172c72016-01-13 08:34:30 -0500204 applyShader(cMask);
John Bauman89401822014-05-06 15:04:28 -0400205
206 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500207 cycles[PERF_SHADER] += Ticks() - shaderTime;
John Bauman89401822014-05-06 15:04:28 -0400208 #endif
209
Nicolas Capens4f172c72016-01-13 08:34:30 -0500210 alphaPass = alphaTest(cMask);
John Bauman89401822014-05-06 15:04:28 -0400211
John Bauman19bac1e2014-05-06 15:23:49 -0400212 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400213 {
214 for(unsigned int q = 0; q < state.multiSample; q++)
215 {
216 zMask[q] &= cMask[q];
217 sMask[q] &= cMask[q];
218 }
219 }
220 }
221
222 If(alphaPass)
223 {
224 if(!earlyDepthTest)
225 {
226 for(unsigned int q = 0; q < state.multiSample; q++)
227 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500228 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400229 }
230 }
231
232 #if PERF_PROFILE
233 Long ropTime = Ticks();
234 #endif
235
236 If(depthPass || Bool(earlyDepthTest))
237 {
238 for(unsigned int q = 0; q < state.multiSample; q++)
239 {
240 if(state.multiSampleMask & (1 << q))
241 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500242 writeDepth(zBuffer, q, x, z[q], zMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400243
244 if(state.occlusionEnabled)
245 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500246 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
John Bauman89401822014-05-06 15:04:28 -0400247 }
248 }
249 }
250
251 if(colorUsed())
252 {
253 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400254 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400255 #endif
256
Nicolas Capens4f172c72016-01-13 08:34:30 -0500257 rasterOperation(f, cBuffer, x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400258 }
259 }
260
261 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500262 cycles[PERF_ROP] += Ticks() - ropTime;
John Bauman89401822014-05-06 15:04:28 -0400263 #endif
264 }
265 }
266
267 for(unsigned int q = 0; q < state.multiSample; q++)
268 {
269 if(state.multiSampleMask & (1 << q))
270 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500271 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400272 }
273 }
274
275 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500276 cycles[PERF_PIPE] += Ticks() - pipeTime;
John Bauman89401822014-05-06 15:04:28 -0400277 #endif
278 }
279
John Bauman89401822014-05-06 15:04:28 -0400280 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
281 {
282 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
283
284 if(!flat)
285 {
286 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
287 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
288
289 if(perspective)
290 {
291 interpolant *= rhw;
292 }
293 }
294
295 return interpolant;
296 }
297
Nicolas Capens4f172c72016-01-13 08:34:30 -0500298 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400299 {
300 if(!state.stencilActive)
301 {
302 return;
303 }
304
305 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
306
307 Pointer<Byte> buffer = sBuffer + 2 * x;
308
309 if(q > 0)
310 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500311 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400312 }
313
Nicolas Capens48ef1252016-11-07 15:30:33 -0500314 Byte8 value = *Pointer<Byte8>(buffer);
John Bauman89401822014-05-06 15:04:28 -0400315 Byte8 valueCCW = value;
316
317 if(!state.noStencilMask)
318 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500319 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400320 }
321
Nicolas Capens4f172c72016-01-13 08:34:30 -0500322 stencilTest(value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400323
324 if(state.twoSidedStencil)
325 {
326 if(!state.noStencilMaskCCW)
327 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500328 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400329 }
330
Nicolas Capens4f172c72016-01-13 08:34:30 -0500331 stencilTest(valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400332
Nicolas Capens4f172c72016-01-13 08:34:30 -0500333 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
334 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400335 value |= valueCCW;
336 }
337
338 sMask = SignMask(value) & cMask;
339 }
340
Nicolas Capens4f172c72016-01-13 08:34:30 -0500341 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400342 {
343 Byte8 equal;
344
345 switch(stencilCompareMode)
346 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400347 case STENCIL_ALWAYS:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400348 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400349 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400350 case STENCIL_NEVER:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400351 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400352 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400353 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400354 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500355 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400356 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400357 case STENCIL_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500358 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400359 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400360 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500361 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400362 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400363 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400364 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400365 equal = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500366 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400367 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500368 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400369 value |= equal;
370 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400371 case STENCIL_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500372 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
John Bauman89401822014-05-06 15:04:28 -0400373 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
374 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
375 value = equal;
376 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400377 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400378 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500379 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400380 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400381 break;
382 default:
383 ASSERT(false);
384 }
385 }
386
Nicolas Capens4f172c72016-01-13 08:34:30 -0500387 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400388 {
389 if(!state.depthTestActive)
390 {
391 return true;
392 }
393
394 Float4 Z = z;
395
John Bauman19bac1e2014-05-06 15:23:49 -0400396 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400397 {
398 if(complementaryDepthBuffer)
399 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500400 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400401 }
402 else
403 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500404 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400405 }
406 }
407
408 Pointer<Byte> buffer;
409 Int pitch;
410
411 if(!state.quadLayoutDepthBuffer)
412 {
413 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500414 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400415 }
416 else
417 {
418 buffer = zBuffer + 8 * x;
419 }
420
421 if(q > 0)
422 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500423 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400424 }
425
426 Float4 zValue;
427
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400428 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400429 {
430 if(!state.quadLayoutDepthBuffer)
431 {
432 // FIXME: Properly optimizes?
433 zValue.xy = *Pointer<Float4>(buffer);
434 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
435 }
436 else
437 {
438 zValue = *Pointer<Float4>(buffer, 16);
439 }
440 }
441
442 Int4 zTest;
443
444 switch(state.depthCompareMode)
445 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400446 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400447 // Optimized
448 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400449 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400450 // Optimized
451 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400452 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400453 zTest = CmpEQ(zValue, Z);
454 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400455 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400456 zTest = CmpNEQ(zValue, Z);
457 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400458 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400459 if(complementaryDepthBuffer)
460 {
461 zTest = CmpLT(zValue, Z);
462 }
463 else
464 {
465 zTest = CmpNLE(zValue, Z);
466 }
467 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400468 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400469 if(complementaryDepthBuffer)
470 {
471 zTest = CmpNLT(zValue, Z);
472 }
473 else
474 {
475 zTest = CmpLE(zValue, Z);
476 }
477 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400478 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400479 if(complementaryDepthBuffer)
480 {
481 zTest = CmpLE(zValue, Z);
482 }
483 else
484 {
485 zTest = CmpNLT(zValue, Z);
486 }
487 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400488 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400489 if(complementaryDepthBuffer)
490 {
491 zTest = CmpNLE(zValue, Z);
492 }
493 else
494 {
495 zTest = CmpLT(zValue, Z);
496 }
497 break;
498 default:
499 ASSERT(false);
500 }
501
502 switch(state.depthCompareMode)
503 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400504 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400505 zMask = cMask;
506 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400507 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400508 zMask = 0x0;
509 break;
510 default:
511 zMask = SignMask(zTest) & cMask;
512 break;
513 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500514
John Bauman89401822014-05-06 15:04:28 -0400515 if(state.stencilActive)
516 {
517 zMask &= sMask;
518 }
519
520 return zMask != 0;
521 }
522
Nicolas Capens4f172c72016-01-13 08:34:30 -0500523 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400524 {
525 Short4 cmp;
526 Short4 equal;
527
528 switch(state.alphaCompareMode)
529 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400530 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400531 aMask = 0xF;
532 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400533 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400534 aMask = 0x0;
535 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400536 case ALPHA_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500537 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400538 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400539 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400540 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
541 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400542 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400543 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400544 case ALPHA_LESS: // a < b ~ b > a
Nicolas Capens4f172c72016-01-13 08:34:30 -0500545 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
Nicolas Capens33438a62017-09-27 11:47:35 -0400546 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400547 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400548 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
Nicolas Capens4f172c72016-01-13 08:34:30 -0500549 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
550 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
John Bauman89401822014-05-06 15:04:28 -0400551 cmp |= equal;
Nicolas Capens33438a62017-09-27 11:47:35 -0400552 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400553 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400554 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
555 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400556 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400557 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400558 case ALPHA_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500559 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400560 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400561 break;
562 default:
563 ASSERT(false);
564 }
565 }
566
Nicolas Capens4f172c72016-01-13 08:34:30 -0500567 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400568 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500569 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
570 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
571 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
572 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
John Bauman89401822014-05-06 15:04:28 -0400573
574 Int aMask0 = SignMask(coverage0);
575 Int aMask1 = SignMask(coverage1);
576 Int aMask2 = SignMask(coverage2);
577 Int aMask3 = SignMask(coverage3);
578
579 cMask[0] &= aMask0;
580 cMask[1] &= aMask1;
581 cMask[2] &= aMask2;
582 cMask[3] &= aMask3;
583 }
584
Nicolas Capens4f172c72016-01-13 08:34:30 -0500585 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog)
John Bauman89401822014-05-06 15:04:28 -0400586 {
587 if(!state.fogActive)
588 {
589 return;
590 }
591
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400592 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400593 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500594 pixelFog(fog);
John Bauman89401822014-05-06 15:04:28 -0400595
John Bauman19bac1e2014-05-06 15:23:49 -0400596 fog = Min(fog, Float4(1.0f));
597 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400598 }
599
Nicolas Capens4f172c72016-01-13 08:34:30 -0500600 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
601 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
602 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400603
John Bauman19bac1e2014-05-06 15:23:49 -0400604 c0.x *= fog;
605 c0.y *= fog;
606 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -0400607
Nicolas Capens4f172c72016-01-13 08:34:30 -0500608 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
609 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
610 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400611 }
612
Nicolas Capens4f172c72016-01-13 08:34:30 -0500613 void PixelRoutine::pixelFog(Float4 &visibility)
John Bauman89401822014-05-06 15:04:28 -0400614 {
615 Float4 &zw = visibility;
616
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400617 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400618 {
619 if(state.wBasedFog)
620 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500621 zw = rhw;
John Bauman89401822014-05-06 15:04:28 -0400622 }
623 else
624 {
625 if(complementaryDepthBuffer)
626 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500627 zw = Float4(1.0f) - z[0];
John Bauman89401822014-05-06 15:04:28 -0400628 }
629 else
630 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500631 zw = z[0];
John Bauman89401822014-05-06 15:04:28 -0400632 }
633 }
634 }
635
636 switch(state.pixelFogMode)
637 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400638 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -0400639 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400640 case FOG_LINEAR:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500641 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale));
642 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
John Bauman89401822014-05-06 15:04:28 -0400643 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400644 case FOG_EXP:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500645 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -0400646 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400647 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400648 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -0400649 zw *= zw;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500650 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E));
John Bauman19bac1e2014-05-06 15:23:49 -0400651 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400652 break;
653 default:
654 ASSERT(false);
655 }
656 }
657
Nicolas Capens4f172c72016-01-13 08:34:30 -0500658 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
John Bauman89401822014-05-06 15:04:28 -0400659 {
660 if(!state.depthWriteEnable)
661 {
662 return;
663 }
664
665 Float4 Z = z;
666
John Bauman19bac1e2014-05-06 15:23:49 -0400667 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400668 {
669 if(complementaryDepthBuffer)
670 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500671 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400672 }
673 else
674 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500675 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400676 }
677 }
678
679 Pointer<Byte> buffer;
680 Int pitch;
681
682 if(!state.quadLayoutDepthBuffer)
Nicolas Capens05b3d662016-02-25 23:58:33 -0500683 {
John Bauman89401822014-05-06 15:04:28 -0400684 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500685 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400686 }
687 else
Nicolas Capens05b3d662016-02-25 23:58:33 -0500688 {
John Bauman89401822014-05-06 15:04:28 -0400689 buffer = zBuffer + 8 * x;
690 }
691
692 if(q > 0)
693 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500694 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400695 }
696
697 Float4 zValue;
698
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400699 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400700 {
701 if(!state.quadLayoutDepthBuffer)
702 {
703 // FIXME: Properly optimizes?
704 zValue.xy = *Pointer<Float4>(buffer);
705 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
706 }
707 else
708 {
709 zValue = *Pointer<Float4>(buffer, 16);
710 }
711 }
712
Nicolas Capens4f172c72016-01-13 08:34:30 -0500713 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
714 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -0400715 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
716
717 if(!state.quadLayoutDepthBuffer)
718 {
719 // FIXME: Properly optimizes?
720 *Pointer<Float2>(buffer) = Float2(Z.xy);
721 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
722 }
723 else
724 {
725 *Pointer<Float4>(buffer, 16) = Z;
726 }
727 }
728
Nicolas Capens4f172c72016-01-13 08:34:30 -0500729 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400730 {
731 if(!state.stencilActive)
732 {
733 return;
734 }
735
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400736 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -0400737 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400738 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -0400739 {
740 return;
741 }
742 }
743
744 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
745 {
746 return;
747 }
748
749 Pointer<Byte> buffer = sBuffer + 2 * x;
750
751 if(q > 0)
752 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500753 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400754 }
755
Nicolas Capens48ef1252016-11-07 15:30:33 -0500756 Byte8 bufferValue = *Pointer<Byte8>(buffer);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500757
John Bauman89401822014-05-06 15:04:28 -0400758 Byte8 newValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500759 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400760
761 if(!state.noStencilWriteMask)
762 {
763 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500764 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
765 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400766 newValue |= maskedValue;
767 }
768
769 if(state.twoSidedStencil)
770 {
771 Byte8 newValueCCW;
772
Nicolas Capens4f172c72016-01-13 08:34:30 -0500773 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400774
775 if(!state.noStencilWriteMaskCCW)
776 {
777 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500778 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
779 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400780 newValueCCW |= maskedValue;
781 }
782
Nicolas Capens4f172c72016-01-13 08:34:30 -0500783 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
784 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400785 newValue |= newValueCCW;
786 }
787
Nicolas Capens4f172c72016-01-13 08:34:30 -0500788 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
789 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
John Bauman89401822014-05-06 15:04:28 -0400790 newValue |= bufferValue;
791
Nicolas Capens16b5f152016-10-13 13:39:01 -0400792 *Pointer<Byte4>(buffer) = Byte4(newValue);
John Bauman89401822014-05-06 15:04:28 -0400793 }
794
Nicolas Capens4f172c72016-01-13 08:34:30 -0500795 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -0400796 {
797 Byte8 &pass = newValue;
798 Byte8 fail;
799 Byte8 zFail;
800
Nicolas Capens4f172c72016-01-13 08:34:30 -0500801 stencilOperation(pass, bufferValue, stencilPassOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400802
803 if(stencilZFailOperation != stencilPassOperation)
804 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500805 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400806 }
807
808 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
809 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500810 stencilOperation(fail, bufferValue, stencilFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400811 }
812
813 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
814 {
815 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
816 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500817 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
818 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
John Bauman89401822014-05-06 15:04:28 -0400819 pass |= zFail;
820 }
821
Nicolas Capens4f172c72016-01-13 08:34:30 -0500822 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
823 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
John Bauman89401822014-05-06 15:04:28 -0400824 pass |= fail;
825 }
826 }
827
Nicolas Capens4f172c72016-01-13 08:34:30 -0500828 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400829 {
830 switch(operation)
831 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400832 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -0400833 output = bufferValue;
834 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400835 case OPERATION_ZERO:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400836 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400837 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400838 case OPERATION_REPLACE:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500839 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ));
John Bauman89401822014-05-06 15:04:28 -0400840 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400841 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -0400842 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
843 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400844 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -0400845 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
846 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400847 case OPERATION_INVERT:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400848 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400849 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400850 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -0400851 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
852 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400853 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -0400854 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
855 break;
856 default:
857 ASSERT(false);
858 }
859 }
860
Nicolas Capens96d4e092016-11-18 14:22:38 -0500861 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -0400862 {
863 switch(blendFactorActive)
864 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400865 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400866 // Optimized
867 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400868 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400869 // Optimized
870 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400871 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400872 blendFactor.x = current.x;
873 blendFactor.y = current.y;
874 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -0400875 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400876 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400877 blendFactor.x = Short4(0xFFFFu) - current.x;
878 blendFactor.y = Short4(0xFFFFu) - current.y;
879 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -0400880 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400881 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400882 blendFactor.x = pixel.x;
883 blendFactor.y = pixel.y;
884 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400885 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400886 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400887 blendFactor.x = Short4(0xFFFFu) - pixel.x;
888 blendFactor.y = Short4(0xFFFFu) - pixel.y;
889 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400890 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400891 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400892 blendFactor.x = current.w;
893 blendFactor.y = current.w;
894 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -0400895 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400896 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400897 blendFactor.x = Short4(0xFFFFu) - current.w;
898 blendFactor.y = Short4(0xFFFFu) - current.w;
899 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400900 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400901 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400902 blendFactor.x = pixel.w;
903 blendFactor.y = pixel.w;
904 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400905 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400906 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400907 blendFactor.x = Short4(0xFFFFu) - pixel.w;
908 blendFactor.y = Short4(0xFFFFu) - pixel.w;
909 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400910 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400911 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400912 blendFactor.x = Short4(0xFFFFu) - pixel.w;
913 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
914 blendFactor.y = blendFactor.x;
915 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -0400916 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400917 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500918 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
919 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
920 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400921 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400922 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500923 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
924 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
925 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400926 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400927 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500928 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
929 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
930 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400931 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400932 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500933 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
934 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
935 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400936 break;
937 default:
938 ASSERT(false);
939 }
940 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500941
Nicolas Capens96d4e092016-11-18 14:22:38 -0500942 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -0400943 {
944 switch(blendFactorAlphaActive)
945 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400946 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400947 // Optimized
948 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400949 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400950 // Optimized
951 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400952 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400953 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400954 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400955 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400956 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400957 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400958 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400959 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400960 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400961 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400962 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400963 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400964 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400965 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400966 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400967 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400968 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400969 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400970 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400971 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400972 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400973 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400974 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400975 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400976 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400977 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -0400978 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400979 case BLEND_CONSTANT:
980 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500981 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400982 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400983 case BLEND_INVCONSTANT:
984 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500985 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400986 break;
987 default:
988 ASSERT(false);
989 }
990 }
991
Alexis Hetu049a1872016-04-25 16:59:58 -0400992 bool PixelRoutine::isSRGB(int index) const
993 {
994 return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8;
995 }
996
Nicolas Capens4f172c72016-01-13 08:34:30 -0500997 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
John Bauman89401822014-05-06 15:04:28 -0400998 {
John Bauman89401822014-05-06 15:04:28 -0400999 Short4 c01;
1000 Short4 c23;
Maxime Grégoired9762742015-07-08 16:43:48 -04001001 Pointer<Byte> buffer;
Nicolas Capensb40a2562016-01-05 00:08:45 -05001002 Pointer<Byte> buffer2;
John Bauman89401822014-05-06 15:04:28 -04001003
John Bauman89401822014-05-06 15:04:28 -04001004 switch(state.targetFormat[index])
1005 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001006 case FORMAT_R5G6B5:
1007 buffer = cBuffer + 2 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001008 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capensb40a2562016-01-05 00:08:45 -05001009 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001010
1011 pixel.x = c01 & Short4(0xF800u);
1012 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1013 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1014 pixel.w = Short4(0xFFFFu);
1015 break;
John Bauman89401822014-05-06 15:04:28 -04001016 case FORMAT_A8R8G8B8:
1017 buffer = cBuffer + 4 * x;
1018 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001019 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001020 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001021 pixel.z = c01;
1022 pixel.y = c01;
1023 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1024 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1025 pixel.x = pixel.z;
1026 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1027 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1028 pixel.y = pixel.z;
1029 pixel.w = pixel.x;
1030 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1031 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1032 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1033 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001034 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001035 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001036 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001037 buffer = cBuffer + 4 * x;
1038 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001039 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001040 c23 = *Pointer<Short4>(buffer);
1041 pixel.z = c01;
1042 pixel.y = c01;
1043 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1044 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1045 pixel.x = pixel.z;
1046 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1047 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1048 pixel.y = pixel.z;
1049 pixel.w = pixel.x;
1050 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1051 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1052 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1053 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1054 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001055 case FORMAT_A8:
1056 buffer = cBuffer + 1 * x;
1057 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001058 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001059 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
1060 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1061 pixel.x = Short4(0x0000);
1062 pixel.y = Short4(0x0000);
1063 pixel.z = Short4(0x0000);
1064 break;
Nicolas Capens7a473b72017-10-25 17:18:55 -04001065 case FORMAT_R8:
1066 buffer = cBuffer + 1 * x;
1067 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
1068 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1069 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
1070 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1071 pixel.y = Short4(0x0000);
1072 pixel.z = Short4(0x0000);
1073 pixel.w = Short4(0xFFFFu);
1074 break;
John Bauman89401822014-05-06 15:04:28 -04001075 case FORMAT_X8R8G8B8:
1076 buffer = cBuffer + 4 * x;
1077 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001078 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001079 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001080 pixel.z = c01;
1081 pixel.y = c01;
1082 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1083 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1084 pixel.x = pixel.z;
1085 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1086 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1087 pixel.y = pixel.z;
1088 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1089 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1090 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1091 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001092 break;
Nicolas Capense4bdbc32017-12-07 20:46:49 -05001093 case FORMAT_G8R8:
1094 buffer = cBuffer + 2 * x;
1095 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 0));
1096 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1097 c01 = As<Short4>(Insert(As<Int2>(c01), *Pointer<Int>(buffer), 1));
1098 pixel.x = (c01 & Short4(0x00FFu)) | (c01 << 8);
1099 pixel.y = (c01 & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c01) >> 8);
1100 pixel.z = Short4(0x0000u);
1101 pixel.w = Short4(0xFFFFu);
1102 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001103 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001104 case FORMAT_SRGB8_X8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001105 buffer = cBuffer + 4 * x;
1106 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001107 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001108 c23 = *Pointer<Short4>(buffer);
1109 pixel.z = c01;
1110 pixel.y = c01;
1111 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1112 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1113 pixel.x = pixel.z;
1114 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1115 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1116 pixel.y = pixel.z;
1117 pixel.w = pixel.x;
1118 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1119 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1120 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1121 pixel.w = Short4(0xFFFFu);
1122 break;
John Bauman89401822014-05-06 15:04:28 -04001123 case FORMAT_A8G8R8B8Q:
1124 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001125 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1126 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1127 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1128 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04001129 break;
1130 case FORMAT_X8G8R8B8Q:
1131 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001132 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1133 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1134 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1135 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001136 break;
1137 case FORMAT_A16B16G16R16:
Maxime Grégoired9762742015-07-08 16:43:48 -04001138 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001139 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1140 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001141 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001142 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1143 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1144 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04001145 break;
1146 case FORMAT_G16R16:
1147 buffer = cBuffer;
Maxime Grégoired9762742015-07-08 16:43:48 -04001148 pixel.x = *Pointer<Short4>(buffer + 4 * x);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001149 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Maxime Grégoired9762742015-07-08 16:43:48 -04001150 pixel.y = *Pointer<Short4>(buffer + 4 * x);
John Bauman19bac1e2014-05-06 15:23:49 -04001151 pixel.z = pixel.x;
1152 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1153 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1154 pixel.y = pixel.z;
1155 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1156 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1157 pixel.z = Short4(0xFFFFu);
1158 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001159 break;
1160 default:
1161 ASSERT(false);
1162 }
1163
Alexis Hetu049a1872016-04-25 16:59:58 -04001164 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001165 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001166 sRGBtoLinear16_12_16(pixel);
John Bauman89401822014-05-06 15:04:28 -04001167 }
Maxime Grégoired9762742015-07-08 16:43:48 -04001168 }
1169
Nicolas Capens4f172c72016-01-13 08:34:30 -05001170 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001171 {
1172 if(!state.alphaBlendActive)
1173 {
1174 return;
1175 }
1176
1177 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001178 readPixel(index, cBuffer, x, pixel);
John Bauman89401822014-05-06 15:04:28 -04001179
1180 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04001181 Vector4s sourceFactor;
1182 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04001183
Nicolas Capens4f172c72016-01-13 08:34:30 -05001184 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1185 blendFactor(destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04001186
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001187 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001188 {
John Bauman19bac1e2014-05-06 15:23:49 -04001189 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1190 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1191 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001192 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001193
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001194 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001195 {
John Bauman19bac1e2014-05-06 15:23:49 -04001196 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1197 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1198 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001199 }
1200
1201 switch(state.blendOperation)
1202 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001203 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001204 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1205 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1206 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001207 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001208 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001209 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1210 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1211 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001212 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001213 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001214 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1215 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1216 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04001217 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001218 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001219 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1220 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1221 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001222 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001223 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001224 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1225 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1226 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001227 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001228 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001229 // No operation
1230 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001231 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001232 current.x = pixel.x;
1233 current.y = pixel.y;
1234 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001235 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001236 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001237 current.x = Short4(0x0000);
1238 current.y = Short4(0x0000);
1239 current.z = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001240 break;
1241 default:
1242 ASSERT(false);
1243 }
1244
Nicolas Capens4f172c72016-01-13 08:34:30 -05001245 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1246 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04001247
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001248 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001249 {
John Bauman19bac1e2014-05-06 15:23:49 -04001250 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001251 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001252
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001253 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001254 {
John Bauman19bac1e2014-05-06 15:23:49 -04001255 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001256 }
1257
1258 switch(state.blendOperationAlpha)
1259 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001260 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001261 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001262 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001263 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001264 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001265 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001266 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001267 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04001268 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001269 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001270 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001271 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001272 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001273 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001274 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001275 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001276 // No operation
1277 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001278 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001279 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001280 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001281 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001282 current.w = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001283 break;
1284 default:
1285 ASSERT(false);
1286 }
1287 }
1288
Nicolas Capens4f172c72016-01-13 08:34:30 -05001289 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001290 {
Nicolas Capens2afcc802015-08-04 10:34:43 -04001291 if(state.logicalOperation == LOGICALOP_COPY)
Maxime Grégoired9762742015-07-08 16:43:48 -04001292 {
1293 return;
1294 }
1295
1296 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001297 readPixel(index, cBuffer, x, pixel);
Maxime Grégoired9762742015-07-08 16:43:48 -04001298
1299 switch(state.logicalOperation)
1300 {
1301 case LOGICALOP_CLEAR:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001302 current.x = UShort4(0);
1303 current.y = UShort4(0);
1304 current.z = UShort4(0);
Maxime Grégoired9762742015-07-08 16:43:48 -04001305 break;
1306 case LOGICALOP_SET:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001307 current.x = UShort4(0xFFFFu);
1308 current.y = UShort4(0xFFFFu);
1309 current.z = UShort4(0xFFFFu);
Maxime Grégoired9762742015-07-08 16:43:48 -04001310 break;
1311 case LOGICALOP_COPY:
1312 ASSERT(false); // Optimized out
1313 break;
1314 case LOGICALOP_COPY_INVERTED:
1315 current.x = ~current.x;
1316 current.y = ~current.y;
1317 current.z = ~current.z;
1318 break;
1319 case LOGICALOP_NOOP:
1320 current.x = pixel.x;
1321 current.y = pixel.y;
1322 current.z = pixel.z;
1323 break;
1324 case LOGICALOP_INVERT:
1325 current.x = ~pixel.x;
1326 current.y = ~pixel.y;
1327 current.z = ~pixel.z;
1328 break;
1329 case LOGICALOP_AND:
1330 current.x = pixel.x & current.x;
1331 current.y = pixel.y & current.y;
1332 current.z = pixel.z & current.z;
1333 break;
1334 case LOGICALOP_NAND:
1335 current.x = ~(pixel.x & current.x);
1336 current.y = ~(pixel.y & current.y);
1337 current.z = ~(pixel.z & current.z);
1338 break;
1339 case LOGICALOP_OR:
1340 current.x = pixel.x | current.x;
1341 current.y = pixel.y | current.y;
1342 current.z = pixel.z | current.z;
1343 break;
1344 case LOGICALOP_NOR:
1345 current.x = ~(pixel.x | current.x);
1346 current.y = ~(pixel.y | current.y);
1347 current.z = ~(pixel.z | current.z);
1348 break;
1349 case LOGICALOP_XOR:
1350 current.x = pixel.x ^ current.x;
1351 current.y = pixel.y ^ current.y;
1352 current.z = pixel.z ^ current.z;
1353 break;
1354 case LOGICALOP_EQUIV:
1355 current.x = ~(pixel.x ^ current.x);
1356 current.y = ~(pixel.y ^ current.y);
1357 current.z = ~(pixel.z ^ current.z);
1358 break;
1359 case LOGICALOP_AND_REVERSE:
1360 current.x = ~pixel.x & current.x;
1361 current.y = ~pixel.y & current.y;
1362 current.z = ~pixel.z & current.z;
1363 break;
1364 case LOGICALOP_AND_INVERTED:
1365 current.x = pixel.x & ~current.x;
1366 current.y = pixel.y & ~current.y;
1367 current.z = pixel.z & ~current.z;
1368 break;
1369 case LOGICALOP_OR_REVERSE:
1370 current.x = ~pixel.x | current.x;
1371 current.y = ~pixel.y | current.y;
1372 current.z = ~pixel.z | current.z;
1373 break;
1374 case LOGICALOP_OR_INVERTED:
1375 current.x = pixel.x | ~current.x;
1376 current.y = pixel.y | ~current.y;
1377 current.z = pixel.z | ~current.z;
1378 break;
1379 default:
1380 ASSERT(false);
1381 }
1382 }
1383
Nicolas Capens4f172c72016-01-13 08:34:30 -05001384 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04001385 {
Alexis Hetu049a1872016-04-25 16:59:58 -04001386 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001387 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001388 linearToSRGB16_12_16(current);
John Bauman89401822014-05-06 15:04:28 -04001389 }
1390
1391 if(exactColorRounding)
1392 {
1393 switch(state.targetFormat[index])
1394 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001395 case FORMAT_R5G6B5:
Nicolas Capens26f37222015-09-22 09:53:45 -04001396 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1397 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1398 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001399 break;
John Bauman89401822014-05-06 15:04:28 -04001400 case FORMAT_X8G8R8B8Q:
1401 case FORMAT_A8G8R8B8Q:
1402 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001403 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001404 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001405 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001406 case FORMAT_SRGB8_X8:
1407 case FORMAT_SRGB8_A8:
Alexis Hetu143dfc72016-09-13 18:41:27 -04001408 case FORMAT_G8R8:
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001409 case FORMAT_R8:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001410 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1411 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1412 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1413 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
John Bauman89401822014-05-06 15:04:28 -04001414 break;
Nicolas Capensb69aa272016-01-02 00:06:41 -05001415 default:
1416 break;
John Bauman89401822014-05-06 15:04:28 -04001417 }
1418 }
1419
1420 int rgbaWriteMask = state.colorWriteActive(index);
Nicolas Capens3b396462016-01-02 00:23:53 -05001421 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
John Bauman89401822014-05-06 15:04:28 -04001422
1423 switch(state.targetFormat[index])
1424 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001425 case FORMAT_R5G6B5:
1426 {
1427 current.x = current.x & Short4(0xF800u);
1428 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1429 current.z = As<UShort4>(current.z) >> 11;
1430
1431 current.x = current.x | current.y | current.z;
1432 }
1433 break;
John Bauman89401822014-05-06 15:04:28 -04001434 case FORMAT_X8G8R8B8Q:
1435 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001436 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1437 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1438 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001439
John Bauman19bac1e2014-05-06 15:23:49 -04001440 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1441 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001442 break;
1443 case FORMAT_A8G8R8B8Q:
1444 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001445 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1446 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1447 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1448 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001449
John Bauman19bac1e2014-05-06 15:23:49 -04001450 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1451 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001452 break;
1453 case FORMAT_X8R8G8B8:
1454 case FORMAT_A8R8G8B8:
1455 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
1456 {
John Bauman19bac1e2014-05-06 15:23:49 -04001457 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1458 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1459 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001460
Nicolas Capens33438a62017-09-27 11:47:35 -04001461 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1462 current.y = As<Short4>(PackUnsigned(current.y, current.y));
John Bauman89401822014-05-06 15:04:28 -04001463
John Bauman19bac1e2014-05-06 15:23:49 -04001464 current.x = current.z;
1465 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1466 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1467 current.y = current.z;
1468 current.z = As<Short4>(UnpackLow(current.z, current.x));
1469 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001470 }
1471 else
1472 {
John Bauman19bac1e2014-05-06 15:23:49 -04001473 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1474 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1475 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1476 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001477
Nicolas Capens33438a62017-09-27 11:47:35 -04001478 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1479 current.y = As<Short4>(PackUnsigned(current.y, current.w));
John Bauman89401822014-05-06 15:04:28 -04001480
John Bauman19bac1e2014-05-06 15:23:49 -04001481 current.x = current.z;
1482 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1483 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1484 current.y = current.z;
1485 current.z = As<Short4>(UnpackLow(current.z, current.x));
1486 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001487 }
1488 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001489 case FORMAT_X8B8G8R8:
1490 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001491 case FORMAT_SRGB8_X8:
1492 case FORMAT_SRGB8_A8:
1493 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7)
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001494 {
1495 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1496 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1497 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1498
Nicolas Capens33438a62017-09-27 11:47:35 -04001499 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1500 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001501
1502 current.x = current.z;
1503 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1504 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1505 current.y = current.z;
1506 current.z = As<Short4>(UnpackLow(current.z, current.x));
1507 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1508 }
1509 else
1510 {
1511 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1512 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1513 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1514 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1515
Nicolas Capens33438a62017-09-27 11:47:35 -04001516 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1517 current.y = As<Short4>(PackUnsigned(current.y, current.w));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001518
1519 current.x = current.z;
1520 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1521 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1522 current.y = current.z;
1523 current.z = As<Short4>(UnpackLow(current.z, current.x));
1524 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1525 }
1526 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001527 case FORMAT_G8R8:
1528 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1529 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001530 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1531 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Alexis Hetu143dfc72016-09-13 18:41:27 -04001532 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1533 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001534 case FORMAT_R8:
1535 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001536 current.x = As<Short4>(PackUnsigned(current.x, current.x));
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001537 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001538 case FORMAT_A8:
1539 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001540 current.w = As<Short4>(PackUnsigned(current.w, current.w));
John Bauman66b8ab22014-05-06 15:57:45 -04001541 break;
John Bauman89401822014-05-06 15:04:28 -04001542 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001543 current.z = current.x;
1544 current.x = As<Short4>(UnpackLow(current.x, current.y));
1545 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1546 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04001547 break;
1548 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001549 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04001550 break;
John Bauman89401822014-05-06 15:04:28 -04001551 default:
1552 ASSERT(false);
1553 }
1554
John Bauman19bac1e2014-05-06 15:23:49 -04001555 Short4 c01 = current.z;
1556 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04001557
1558 Int xMask; // Combination of all masks
1559
1560 if(state.depthTestActive)
1561 {
1562 xMask = zMask;
1563 }
1564 else
1565 {
1566 xMask = cMask;
1567 }
1568
1569 if(state.stencilActive)
1570 {
1571 xMask &= sMask;
1572 }
1573
John Bauman89401822014-05-06 15:04:28 -04001574 switch(state.targetFormat[index])
1575 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001576 case FORMAT_R5G6B5:
1577 {
1578 Pointer<Byte> buffer = cBuffer + 2 * x;
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001579 Int value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001580
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001581 Int c01 = Extract(As<Int2>(current.x), 0);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001582
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001583 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001584 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001585 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001586 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001587 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001588 c01 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001589 }
1590
Nicolas Capens4f172c72016-01-13 08:34:30 -05001591 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1592 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001593 c01 |= value;
1594 *Pointer<Int>(buffer) = c01;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001595
Nicolas Capens4f172c72016-01-13 08:34:30 -05001596 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001597 value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001598
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001599 Int c23 = Extract(As<Int2>(current.x), 1);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001600
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001601 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001602 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001603 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001604 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001605 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001606 c23 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001607 }
1608
Nicolas Capens4f172c72016-01-13 08:34:30 -05001609 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1610 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001611 c23 |= value;
1612 *Pointer<Int>(buffer) = c23;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001613 }
1614 break;
John Bauman89401822014-05-06 15:04:28 -04001615 case FORMAT_A8G8R8B8Q:
1616 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
1617 UNIMPLEMENTED();
1618 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
1619
1620 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1621 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1622 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1623 // {
1624 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001625 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1626 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001627 // c01 |= masked;
1628 // }
1629
Nicolas Capens4f172c72016-01-13 08:34:30 -05001630 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1631 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001632 // c01 |= value;
1633 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
1634
1635 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
1636
1637 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1638 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1639 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1640 // {
1641 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001642 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1643 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001644 // c23 |= masked;
1645 // }
1646
Nicolas Capens4f172c72016-01-13 08:34:30 -05001647 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1648 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001649 // c23 |= value;
1650 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
1651 break;
1652 case FORMAT_A8R8G8B8:
1653 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
John Bauman89401822014-05-06 15:04:28 -04001654 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001655 Pointer<Byte> buffer = cBuffer + x * 4;
1656 Short4 value = *Pointer<Short4>(buffer);
1657
1658 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1659 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1660 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1661 {
1662 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001663 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1664 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001665 c01 |= masked;
1666 }
1667
Nicolas Capens4f172c72016-01-13 08:34:30 -05001668 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1669 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001670 c01 |= value;
1671 *Pointer<Short4>(buffer) = c01;
1672
Nicolas Capens4f172c72016-01-13 08:34:30 -05001673 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001674 value = *Pointer<Short4>(buffer);
1675
1676 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1677 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1678 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1679 {
1680 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001681 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1682 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001683 c23 |= masked;
1684 }
1685
Nicolas Capens4f172c72016-01-13 08:34:30 -05001686 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1687 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001688 c23 |= value;
1689 *Pointer<Short4>(buffer) = c23;
John Bauman89401822014-05-06 15:04:28 -04001690 }
John Bauman89401822014-05-06 15:04:28 -04001691 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001692 case FORMAT_A8B8G8R8:
1693 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
Alexis Hetu049a1872016-04-25 16:59:58 -04001694 case FORMAT_SRGB8_X8:
1695 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001696 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001697 Pointer<Byte> buffer = cBuffer + x * 4;
1698 Short4 value = *Pointer<Short4>(buffer);
1699
Alexis Hetu049a1872016-04-25 16:59:58 -04001700 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) ||
1701 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) &&
1702 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh?
1703
1704 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001705 {
1706 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001707 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1708 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001709 c01 |= masked;
1710 }
1711
Nicolas Capens4f172c72016-01-13 08:34:30 -05001712 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1713 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001714 c01 |= value;
1715 *Pointer<Short4>(buffer) = c01;
1716
Nicolas Capens4f172c72016-01-13 08:34:30 -05001717 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001718 value = *Pointer<Short4>(buffer);
1719
Alexis Hetu049a1872016-04-25 16:59:58 -04001720 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001721 {
1722 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001723 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1724 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001725 c23 |= masked;
1726 }
1727
Nicolas Capens4f172c72016-01-13 08:34:30 -05001728 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1729 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001730 c23 |= value;
1731 *Pointer<Short4>(buffer) = c23;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001732 }
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001733 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001734 case FORMAT_G8R8:
1735 if((rgbaWriteMask & 0x00000003) != 0x0)
1736 {
1737 Pointer<Byte> buffer = cBuffer + 2 * x;
1738 Int2 value;
1739 value = Insert(value, *Pointer<Int>(buffer), 0);
1740 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1741 value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
1742
1743 Int2 packedCol = As<Int2>(current.x);
1744
1745 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1746 if((rgbaWriteMask & 0x3) != 0x3)
1747 {
1748 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1749 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1750 mergedMask &= rgbaMask;
1751 }
1752
1753 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1754
1755 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1756 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
1757 }
1758 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001759 case FORMAT_R8:
1760 if(rgbaWriteMask & 0x00000001)
1761 {
1762 Pointer<Byte> buffer = cBuffer + 1 * x;
1763 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001764 value = Insert(value, *Pointer<Short>(buffer), 0);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001765 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001766 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001767
1768 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1769 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1770 current.x |= value;
1771
1772 *Pointer<Short>(buffer) = Extract(current.x, 0);
1773 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1774 }
1775 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001776 case FORMAT_A8:
1777 if(rgbaWriteMask & 0x00000008)
1778 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001779 Pointer<Byte> buffer = cBuffer + 1 * x;
1780 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001781 value = Insert(value, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001782 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001783 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
John Bauman66b8ab22014-05-06 15:57:45 -04001784
Nicolas Capens4f172c72016-01-13 08:34:30 -05001785 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
1786 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
John Bauman66b8ab22014-05-06 15:57:45 -04001787 current.w |= value;
1788
1789 *Pointer<Short>(buffer) = Extract(current.w, 0);
1790 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
1791 }
1792 break;
John Bauman89401822014-05-06 15:04:28 -04001793 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04001794 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001795 Pointer<Byte> buffer = cBuffer + 4 * x;
John Bauman89401822014-05-06 15:04:28 -04001796
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001797 Short4 value = *Pointer<Short4>(buffer);
John Bauman89401822014-05-06 15:04:28 -04001798
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001799 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001800 {
1801 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001802 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001803 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001804 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04001805 }
1806
Nicolas Capens4f172c72016-01-13 08:34:30 -05001807 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1808 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001809 current.x |= value;
1810 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001811
Nicolas Capens4f172c72016-01-13 08:34:30 -05001812 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001813
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001814 value = *Pointer<Short4>(buffer);
1815
1816 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001817 {
1818 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001819 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001820 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001821 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04001822 }
1823
Nicolas Capens4f172c72016-01-13 08:34:30 -05001824 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1825 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001826 current.y |= value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001827 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001828 }
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001829 break;
1830 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04001831 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001832 Pointer<Byte> buffer = cBuffer + 8 * x;
John Bauman89401822014-05-06 15:04:28 -04001833
John Bauman89401822014-05-06 15:04:28 -04001834 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001835 Short4 value = *Pointer<Short4>(buffer);
1836
1837 if(rgbaWriteMask != 0x0000000F)
1838 {
1839 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001840 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1841 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001842 current.x |= masked;
1843 }
1844
Nicolas Capens4f172c72016-01-13 08:34:30 -05001845 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1846 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001847 current.x |= value;
1848 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001849 }
1850
John Bauman89401822014-05-06 15:04:28 -04001851 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001852 Short4 value = *Pointer<Short4>(buffer + 8);
1853
1854 if(rgbaWriteMask != 0x0000000F)
1855 {
1856 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001857 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1858 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001859 current.y |= masked;
1860 }
1861
Nicolas Capens4f172c72016-01-13 08:34:30 -05001862 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1863 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001864 current.y |= value;
1865 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001866 }
1867
Nicolas Capens4f172c72016-01-13 08:34:30 -05001868 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001869
1870 {
1871 Short4 value = *Pointer<Short4>(buffer);
1872
1873 if(rgbaWriteMask != 0x0000000F)
1874 {
1875 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001876 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1877 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001878 current.z |= masked;
1879 }
1880
Nicolas Capens4f172c72016-01-13 08:34:30 -05001881 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1882 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001883 current.z |= value;
1884 *Pointer<Short4>(buffer) = current.z;
1885 }
1886
1887 {
1888 Short4 value = *Pointer<Short4>(buffer + 8);
1889
1890 if(rgbaWriteMask != 0x0000000F)
1891 {
1892 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001893 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1894 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001895 current.w |= masked;
1896 }
1897
Nicolas Capens4f172c72016-01-13 08:34:30 -05001898 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1899 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001900 current.w |= value;
1901 *Pointer<Short4>(buffer + 8) = current.w;
1902 }
John Bauman89401822014-05-06 15:04:28 -04001903 }
1904 break;
1905 default:
1906 ASSERT(false);
1907 }
1908 }
1909
Nicolas Capens96d4e092016-11-18 14:22:38 -05001910 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04001911 {
1912 switch(blendFactorActive)
1913 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001914 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001915 // Optimized
1916 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001917 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001918 // Optimized
1919 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001920 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001921 blendFactor.x = oC.x;
1922 blendFactor.y = oC.y;
1923 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04001924 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001925 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001926 blendFactor.x = Float4(1.0f) - oC.x;
1927 blendFactor.y = Float4(1.0f) - oC.y;
1928 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04001929 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001930 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001931 blendFactor.x = pixel.x;
1932 blendFactor.y = pixel.y;
1933 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001934 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001935 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001936 blendFactor.x = Float4(1.0f) - pixel.x;
1937 blendFactor.y = Float4(1.0f) - pixel.y;
1938 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001939 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001940 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001941 blendFactor.x = oC.w;
1942 blendFactor.y = oC.w;
1943 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001944 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001945 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001946 blendFactor.x = Float4(1.0f) - oC.w;
1947 blendFactor.y = Float4(1.0f) - oC.w;
1948 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001949 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001950 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001951 blendFactor.x = pixel.w;
1952 blendFactor.y = pixel.w;
1953 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001954 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001955 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001956 blendFactor.x = Float4(1.0f) - pixel.w;
1957 blendFactor.y = Float4(1.0f) - pixel.w;
1958 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001959 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001960 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001961 blendFactor.x = Float4(1.0f) - pixel.w;
1962 blendFactor.x = Min(blendFactor.x, oC.w);
1963 blendFactor.y = blendFactor.x;
1964 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04001965 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001966 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001967 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1968 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1969 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001970 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001971 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001972 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1973 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1974 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001975 break;
1976 default:
1977 ASSERT(false);
1978 }
1979 }
1980
Nicolas Capens96d4e092016-11-18 14:22:38 -05001981 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04001982 {
1983 switch(blendFactorAlphaActive)
1984 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001985 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001986 // Optimized
1987 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001988 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001989 // Optimized
1990 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001991 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001992 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001993 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001994 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001995 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001996 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001997 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001998 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001999 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002000 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002001 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002002 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002003 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002004 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002005 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002006 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002007 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04002008 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002009 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002010 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002011 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002012 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002013 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002014 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002015 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04002016 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04002017 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002018 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002019 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002020 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002021 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002022 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002023 break;
2024 default:
2025 ASSERT(false);
2026 }
2027 }
2028
Nicolas Capens4f172c72016-01-13 08:34:30 -05002029 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04002030 {
2031 if(!state.alphaBlendActive)
2032 {
2033 return;
2034 }
2035
2036 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002037 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04002038
Alexis Hetu96517182015-04-15 10:30:23 -04002039 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04002040 Short4 c01;
2041 Short4 c23;
2042
Alexis Hetu1abb6382016-02-08 11:21:16 -05002043 Float4 one;
Alexis Hetu7208e932016-06-02 11:19:24 -04002044 if(Surface::isFloatFormat(state.targetFormat[index]))
John Bauman89401822014-05-06 15:04:28 -04002045 {
Alexis Hetu1abb6382016-02-08 11:21:16 -05002046 one = Float4(1.0f);
Alexis Hetu7208e932016-06-02 11:19:24 -04002047 }
2048 else if(Surface::isNonNormalizedInteger(state.targetFormat[index]))
2049 {
2050 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Alexis Hetu1abb6382016-02-08 11:21:16 -05002051 }
2052
2053 switch(state.targetFormat[index])
2054 {
2055 case FORMAT_R32I:
2056 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002057 case FORMAT_R32F:
2058 buffer = cBuffer;
2059 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002060 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
2061 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002062 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002063 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002064 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
2065 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
Alexis Hetu1abb6382016-02-08 11:21:16 -05002066 pixel.y = pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002067 break;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002068 case FORMAT_G32R32I:
2069 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002070 case FORMAT_G32R32F:
2071 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002072 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002073 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002074 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
2075 pixel.z = pixel.x;
2076 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
2077 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
2078 pixel.y = pixel.z;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002079 pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002080 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002081 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002082 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002083 case FORMAT_A32B32G32R32I:
2084 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002085 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002086 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
2087 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002088 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002089 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
2090 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
2091 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002092 if(state.targetFormat[index] == FORMAT_X32B32G32R32F)
2093 {
2094 pixel.w = Float4(1.0f);
2095 }
John Bauman89401822014-05-06 15:04:28 -04002096 break;
2097 default:
2098 ASSERT(false);
2099 }
2100
Alexis Hetu049a1872016-04-25 16:59:58 -04002101 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04002102 {
John Bauman19bac1e2014-05-06 15:23:49 -04002103 sRGBtoLinear(pixel.x);
2104 sRGBtoLinear(pixel.y);
2105 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002106 }
2107
2108 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04002109 Vector4f sourceFactor;
2110 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04002111
Nicolas Capens4f172c72016-01-13 08:34:30 -05002112 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
2113 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002114
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002115 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002116 {
John Bauman19bac1e2014-05-06 15:23:49 -04002117 oC.x *= sourceFactor.x;
2118 oC.y *= sourceFactor.y;
2119 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002120 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002121
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002122 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002123 {
John Bauman19bac1e2014-05-06 15:23:49 -04002124 pixel.x *= destFactor.x;
2125 pixel.y *= destFactor.y;
2126 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002127 }
2128
2129 switch(state.blendOperation)
2130 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002131 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002132 oC.x += pixel.x;
2133 oC.y += pixel.y;
2134 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002135 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002136 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002137 oC.x -= pixel.x;
2138 oC.y -= pixel.y;
2139 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002140 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002141 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002142 oC.x = pixel.x - oC.x;
2143 oC.y = pixel.y - oC.y;
2144 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04002145 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002146 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002147 oC.x = Min(oC.x, pixel.x);
2148 oC.y = Min(oC.y, pixel.y);
2149 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002150 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002151 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002152 oC.x = Max(oC.x, pixel.x);
2153 oC.y = Max(oC.y, pixel.y);
2154 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002155 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002156 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002157 // No operation
2158 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002159 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002160 oC.x = pixel.x;
2161 oC.y = pixel.y;
2162 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002163 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002164 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002165 oC.x = Float4(0.0f);
2166 oC.y = Float4(0.0f);
2167 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002168 break;
2169 default:
2170 ASSERT(false);
2171 }
2172
Nicolas Capens4f172c72016-01-13 08:34:30 -05002173 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2174 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002175
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002176 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002177 {
John Bauman19bac1e2014-05-06 15:23:49 -04002178 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002179 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002180
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002181 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002182 {
John Bauman19bac1e2014-05-06 15:23:49 -04002183 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002184 }
2185
2186 switch(state.blendOperationAlpha)
2187 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002188 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002189 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002190 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002191 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002192 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002193 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002194 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002195 pixel.w -= oC.w;
2196 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002197 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002198 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002199 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002200 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002201 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002202 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002203 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002204 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002205 // No operation
2206 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002207 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002208 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002209 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002210 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002211 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002212 break;
2213 default:
2214 ASSERT(false);
2215 }
2216 }
2217
Nicolas Capens4f172c72016-01-13 08:34:30 -05002218 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002219 {
John Bauman89401822014-05-06 15:04:28 -04002220 switch(state.targetFormat[index])
2221 {
John Bauman89401822014-05-06 15:04:28 -04002222 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002223 case FORMAT_R32I:
2224 case FORMAT_R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002225 case FORMAT_R16I:
2226 case FORMAT_R16UI:
2227 case FORMAT_R8I:
2228 case FORMAT_R8UI:
John Bauman89401822014-05-06 15:04:28 -04002229 break;
2230 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002231 case FORMAT_G32R32I:
2232 case FORMAT_G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002233 case FORMAT_G16R16I:
2234 case FORMAT_G16R16UI:
2235 case FORMAT_G8R8I:
2236 case FORMAT_G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002237 oC.z = oC.x;
2238 oC.x = UnpackLow(oC.x, oC.y);
2239 oC.z = UnpackHigh(oC.z, oC.y);
2240 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002241 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002242 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002243 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002244 case FORMAT_A32B32G32R32I:
2245 case FORMAT_A32B32G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002246 case FORMAT_A16B16G16R16I:
2247 case FORMAT_A16B16G16R16UI:
2248 case FORMAT_A8B8G8R8I:
2249 case FORMAT_A8B8G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002250 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04002251 break;
2252 default:
2253 ASSERT(false);
2254 }
2255
2256 int rgbaWriteMask = state.colorWriteActive(index);
2257
2258 Int xMask; // Combination of all masks
2259
2260 if(state.depthTestActive)
2261 {
2262 xMask = zMask;
2263 }
2264 else
2265 {
2266 xMask = cMask;
2267 }
2268
2269 if(state.stencilActive)
2270 {
2271 xMask &= sMask;
2272 }
2273
2274 Pointer<Byte> buffer;
2275 Float4 value;
2276
2277 switch(state.targetFormat[index])
2278 {
2279 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002280 case FORMAT_R32I:
2281 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002282 if(rgbaWriteMask & 0x00000001)
2283 {
2284 buffer = cBuffer + 4 * x;
2285
2286 // FIXME: movlps
2287 value.x = *Pointer<Float>(buffer + 0);
2288 value.y = *Pointer<Float>(buffer + 4);
2289
Nicolas Capens4f172c72016-01-13 08:34:30 -05002290 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002291
2292 // FIXME: movhps
2293 value.z = *Pointer<Float>(buffer + 0);
2294 value.w = *Pointer<Float>(buffer + 4);
2295
Nicolas Capens4f172c72016-01-13 08:34:30 -05002296 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2297 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002298 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04002299
2300 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002301 *Pointer<Float>(buffer + 0) = oC.x.z;
2302 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04002303
Nicolas Capens4f172c72016-01-13 08:34:30 -05002304 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002305
2306 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002307 *Pointer<Float>(buffer + 0) = oC.x.x;
2308 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04002309 }
2310 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002311 case FORMAT_R16I:
2312 case FORMAT_R16UI:
2313 if(rgbaWriteMask & 0x00000001)
2314 {
2315 buffer = cBuffer + 2 * x;
2316
2317 UShort4 xyzw;
2318 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2319
2320 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2321
2322 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2323 value = As<Float4>(Int4(xyzw));
2324
2325 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2326 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2327 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2328
2329 if(state.targetFormat[index] == FORMAT_R16I)
2330 {
2331 Float component = oC.x.z;
2332 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2333 component = oC.x.w;
2334 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2335
2336 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2337
2338 component = oC.x.x;
2339 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2340 component = oC.x.y;
2341 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2342 }
2343 else // FORMAT_R16UI
2344 {
2345 Float component = oC.x.z;
2346 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2347 component = oC.x.w;
2348 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2349
2350 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2351
2352 component = oC.x.x;
2353 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2354 component = oC.x.y;
2355 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2356 }
2357 }
2358 break;
2359 case FORMAT_R8I:
2360 case FORMAT_R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002361 if(rgbaWriteMask & 0x00000001)
2362 {
2363 buffer = cBuffer + x;
2364
2365 UInt xyzw, packedCol;
2366
Alexis Hetu827d07a2016-09-15 17:54:05 -04002367 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002368 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetu827d07a2016-09-15 17:54:05 -04002369 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002370
2371 Short4 tmpCol = Short4(As<Int4>(oC.x));
2372 if(state.targetFormat[index] == FORMAT_R8I)
2373 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002374 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002375 }
2376 else
2377 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002378 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002379 }
2380 packedCol = Extract(As<Int2>(tmpCol), 0);
2381
2382 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2383 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2384
2385 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2386 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2387 *Pointer<UShort>(buffer) = UShort(packedCol);
2388 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002389 break;
John Bauman89401822014-05-06 15:04:28 -04002390 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002391 case FORMAT_G32R32I:
2392 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002393 buffer = cBuffer + 8 * x;
2394
2395 value = *Pointer<Float4>(buffer);
2396
2397 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2398 {
2399 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002400 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002401 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002402 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002403 }
2404
Nicolas Capens4f172c72016-01-13 08:34:30 -05002405 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2406 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002407 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2408 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002409
Nicolas Capens4f172c72016-01-13 08:34:30 -05002410 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002411
2412 value = *Pointer<Float4>(buffer);
2413
2414 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2415 {
2416 Float4 masked;
2417
2418 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002419 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002420 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002421 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002422 }
2423
Nicolas Capens4f172c72016-01-13 08:34:30 -05002424 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2425 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002426 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2427 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002428 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002429 case FORMAT_G16R16I:
2430 case FORMAT_G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002431 if((rgbaWriteMask & 0x00000003) != 0x0)
2432 {
2433 buffer = cBuffer + 4 * x;
2434
2435 UInt2 rgbaMask;
2436 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2437 UShort4 value = *Pointer<UShort4>(buffer);
2438 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2439 if((rgbaWriteMask & 0x3) != 0x3)
2440 {
2441 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2442 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2443 mergedMask &= rgbaMask;
2444 }
2445 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2446
2447 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2448
2449 packedCol = UShort4(As<Int4>(oC.y));
2450 value = *Pointer<UShort4>(buffer);
2451 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2452 if((rgbaWriteMask & 0x3) != 0x3)
2453 {
2454 mergedMask &= rgbaMask;
2455 }
2456 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2457 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002458 break;
2459 case FORMAT_G8R8I:
2460 case FORMAT_G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002461 if((rgbaWriteMask & 0x00000003) != 0x0)
2462 {
2463 buffer = cBuffer + 2 * x;
2464
2465 Int2 xyzw, packedCol;
2466
2467 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2468 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2469 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2470
2471 if(state.targetFormat[index] == FORMAT_G8R8I)
2472 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002473 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002474 }
2475 else
2476 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002477 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002478 }
2479
2480 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2481 if((rgbaWriteMask & 0x3) != 0x3)
2482 {
2483 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2484 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2485 mergedMask &= rgbaMask;
2486 }
2487
2488 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2489
2490 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2491 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2492 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2493 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002494 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002495 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002496 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002497 case FORMAT_A32B32G32R32I:
2498 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002499 buffer = cBuffer + 16 * x;
2500
2501 {
2502 value = *Pointer<Float4>(buffer, 16);
2503
2504 if(rgbaWriteMask != 0x0000000F)
2505 {
2506 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002507 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2508 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002509 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002510 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002511
Nicolas Capens4f172c72016-01-13 08:34:30 -05002512 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2513 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002514 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2515 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002516 }
2517
2518 {
2519 value = *Pointer<Float4>(buffer + 16, 16);
2520
2521 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens05b3d662016-02-25 23:58:33 -05002522 {
John Bauman89401822014-05-06 15:04:28 -04002523 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002524 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2525 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002526 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002527 }
2528
Nicolas Capens4f172c72016-01-13 08:34:30 -05002529 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2530 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002531 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2532 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002533 }
2534
Nicolas Capens4f172c72016-01-13 08:34:30 -05002535 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002536
2537 {
2538 value = *Pointer<Float4>(buffer, 16);
2539
2540 if(rgbaWriteMask != 0x0000000F)
2541 {
2542 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002543 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2544 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002545 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002546 }
2547
Nicolas Capens4f172c72016-01-13 08:34:30 -05002548 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2549 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002550 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2551 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002552 }
2553
2554 {
Nicolas Capens400667e2017-03-29 14:40:14 -04002555 value = *Pointer<Float4>(buffer + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04002556
2557 if(rgbaWriteMask != 0x0000000F)
2558 {
2559 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002560 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2561 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002562 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002563 }
2564
Nicolas Capens4f172c72016-01-13 08:34:30 -05002565 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2566 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002567 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2568 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002569 }
2570 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002571 case FORMAT_A16B16G16R16I:
2572 case FORMAT_A16B16G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002573 if((rgbaWriteMask & 0x0000000F) != 0x0)
2574 {
2575 buffer = cBuffer + 8 * x;
2576
2577 UInt4 rgbaMask;
2578 UShort8 value = *Pointer<UShort8>(buffer);
2579 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2580 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2581 if((rgbaWriteMask & 0xF) != 0xF)
2582 {
2583 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2584 rgbaMask = UInt4(tmpMask, tmpMask);
2585 mergedMask &= rgbaMask;
2586 }
2587 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2588
2589 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2590
2591 value = *Pointer<UShort8>(buffer);
2592 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2593 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2594 if((rgbaWriteMask & 0xF) != 0xF)
2595 {
2596 mergedMask &= rgbaMask;
2597 }
2598 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2599 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002600 break;
2601 case FORMAT_A8B8G8R8I:
2602 case FORMAT_A8B8G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002603 if((rgbaWriteMask & 0x0000000F) != 0x0)
2604 {
2605 UInt2 value, packedCol, mergedMask;
2606
2607 buffer = cBuffer + 4 * x;
2608
2609 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2610 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002611 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002612 }
2613 else
2614 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002615 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002616 }
2617 value = *Pointer<UInt2>(buffer, 16);
2618 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2619 if(rgbaWriteMask != 0xF)
2620 {
2621 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2622 }
2623 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2624
2625 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2626
2627 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2628 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002629 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002630 }
2631 else
2632 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002633 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002634 }
2635 value = *Pointer<UInt2>(buffer, 16);
2636 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2637 if(rgbaWriteMask != 0xF)
2638 {
2639 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2640 }
2641 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2642 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002643 break;
John Bauman89401822014-05-06 15:04:28 -04002644 default:
2645 ASSERT(false);
2646 }
2647 }
2648
John Bauman89401822014-05-06 15:04:28 -04002649 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2650 {
John Bauman19bac1e2014-05-06 15:23:49 -04002651 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04002652 }
2653
Nicolas Capens4f172c72016-01-13 08:34:30 -05002654 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002655 {
John Bauman19bac1e2014-05-06 15:23:49 -04002656 c.x = As<UShort4>(c.x) >> 4;
2657 c.y = As<UShort4>(c.y) >> 4;
2658 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002659
Nicolas Capens4f172c72016-01-13 08:34:30 -05002660 sRGBtoLinear12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002661 }
2662
Nicolas Capens4f172c72016-01-13 08:34:30 -05002663 void PixelRoutine::sRGBtoLinear12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002664 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002665 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
John Bauman89401822014-05-06 15:04:28 -04002666
John Bauman19bac1e2014-05-06 15:23:49 -04002667 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2668 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2669 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2670 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002671
John Bauman19bac1e2014-05-06 15:23:49 -04002672 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2673 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2674 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2675 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002676
John Bauman19bac1e2014-05-06 15:23:49 -04002677 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2678 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2679 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2680 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002681 }
2682
Nicolas Capens4f172c72016-01-13 08:34:30 -05002683 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002684 {
John Bauman19bac1e2014-05-06 15:23:49 -04002685 c.x = As<UShort4>(c.x) >> 4;
2686 c.y = As<UShort4>(c.y) >> 4;
2687 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002688
Nicolas Capens4f172c72016-01-13 08:34:30 -05002689 linearToSRGB12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002690 }
2691
Nicolas Capens4f172c72016-01-13 08:34:30 -05002692 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002693 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002694 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
John Bauman89401822014-05-06 15:04:28 -04002695
John Bauman19bac1e2014-05-06 15:23:49 -04002696 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2697 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2698 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2699 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002700
John Bauman19bac1e2014-05-06 15:23:49 -04002701 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2702 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2703 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2704 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002705
John Bauman19bac1e2014-05-06 15:23:49 -04002706 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2707 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2708 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2709 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002710 }
2711
John Bauman89401822014-05-06 15:04:28 -04002712 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2713 {
2714 Float4 linear = x * x;
2715 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2716
2717 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2718 }
2719
John Bauman19bac1e2014-05-06 15:23:49 -04002720 bool PixelRoutine::colorUsed()
2721 {
2722 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
2723 }
John Bauman89401822014-05-06 15:04:28 -04002724}