blob: 975d3be04648be5bef6a0b9b7e1e3e3c01d09a49 [file] [log] [blame]
Nicolas Capens0bac2852016-05-07 06:09:58 -04001// Copyright 2016 The SwiftShader Authors. All Rights Reserved.
John Bauman89401822014-05-06 15:04:28 -04002//
Nicolas Capens0bac2852016-05-07 06:09:58 -04003// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
John Bauman89401822014-05-06 15:04:28 -04006//
Nicolas Capens0bac2852016-05-07 06:09:58 -04007// http://www.apache.org/licenses/LICENSE-2.0
John Bauman89401822014-05-06 15:04:28 -04008//
Nicolas Capens0bac2852016-05-07 06:09:58 -04009// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
John Bauman89401822014-05-06 15:04:28 -040014
15#include "PixelRoutine.hpp"
16
John Bauman89401822014-05-06 15:04:28 -040017#include "SamplerCore.hpp"
18#include "Constants.hpp"
Nicolas Capens708c24b2017-10-26 13:07:10 -040019#include "Renderer/Renderer.hpp"
20#include "Renderer/QuadRasterizer.hpp"
21#include "Renderer/Surface.hpp"
22#include "Renderer/Primitive.hpp"
23#include "Common/Debug.hpp"
John Bauman89401822014-05-06 15:04:28 -040024
John Bauman89401822014-05-06 15:04:28 -040025namespace sw
26{
27 extern bool complementaryDepthBuffer;
28 extern bool postBlendSRGB;
29 extern bool exactColorRounding;
Alexis Hetuf2a8c372015-07-13 11:08:41 -040030 extern bool forceClearRegisters;
John Bauman89401822014-05-06 15:04:28 -040031
Nicolas Capens4f172c72016-01-13 08:34:30 -050032 PixelRoutine::PixelRoutine(const PixelProcessor::State &state, const PixelShader *shader) : QuadRasterizer(state, shader), v(shader && shader->dynamicallyIndexedInput)
John Bauman89401822014-05-06 15:04:28 -040033 {
Alexis Hetuf2a8c372015-07-13 11:08:41 -040034 if(!shader || shader->getVersion() < 0x0200 || forceClearRegisters)
John Bauman89401822014-05-06 15:04:28 -040035 {
Nicolas Capens3b4c93f2016-05-18 12:51:37 -040036 for(int i = 0; i < MAX_FRAGMENT_INPUTS; i++)
Alexis Hetuf2a8c372015-07-13 11:08:41 -040037 {
Alexis Hetu3e1fd3a2015-08-11 10:15:34 -040038 v[i].x = Float4(0.0f);
39 v[i].y = Float4(0.0f);
40 v[i].z = Float4(0.0f);
41 v[i].w = Float4(0.0f);
Alexis Hetuf2a8c372015-07-13 11:08:41 -040042 }
John Bauman89401822014-05-06 15:04:28 -040043 }
44 }
45
46 PixelRoutine::~PixelRoutine()
47 {
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040048 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040049 {
50 delete sampler[i];
51 }
52 }
53
Nicolas Capens4f172c72016-01-13 08:34:30 -050054 void PixelRoutine::quad(Pointer<Byte> cBuffer[RENDERTARGETS], Pointer<Byte> &zBuffer, Pointer<Byte> &sBuffer, Int cMask[4], Int &x, Int &y)
John Bauman89401822014-05-06 15:04:28 -040055 {
56 #if PERF_PROFILE
57 Long pipeTime = Ticks();
58 #endif
59
Alexis Hetu0b65c5e2015-03-31 11:48:57 -040060 for(int i = 0; i < TEXTURE_IMAGE_UNITS; i++)
John Bauman89401822014-05-06 15:04:28 -040061 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050062 sampler[i] = new SamplerCore(constants, state.sampler[i]);
John Bauman89401822014-05-06 15:04:28 -040063 }
64
65 const bool earlyDepthTest = !state.depthOverride && !state.alphaTestActive();
John Bauman89401822014-05-06 15:04:28 -040066
67 Int zMask[4]; // Depth mask
68 Int sMask[4]; // Stencil mask
69
70 for(unsigned int q = 0; q < state.multiSample; q++)
71 {
72 zMask[q] = cMask[q];
73 sMask[q] = cMask[q];
74 }
75
76 for(unsigned int q = 0; q < state.multiSample; q++)
77 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050078 stencilTest(sBuffer, q, x, sMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -040079 }
80
81 Float4 f;
John Bauman89401822014-05-06 15:04:28 -040082 Float4 rhwCentroid;
83
Nicolas Capens4f172c72016-01-13 08:34:30 -050084 Float4 xxxx = Float4(Float(x)) + *Pointer<Float4>(primitive + OFFSET(Primitive,xQuad), 16);
John Bauman89401822014-05-06 15:04:28 -040085
John Bauman19bac1e2014-05-06 15:23:49 -040086 if(interpolateZ())
John Bauman89401822014-05-06 15:04:28 -040087 {
88 for(unsigned int q = 0; q < state.multiSample; q++)
89 {
90 Float4 x = xxxx;
Nicolas Capens4f172c72016-01-13 08:34:30 -050091
John Bauman89401822014-05-06 15:04:28 -040092 if(state.multiSample > 1)
93 {
Nicolas Capens4f172c72016-01-13 08:34:30 -050094 x -= *Pointer<Float4>(constants + OFFSET(Constants,X) + q * sizeof(float4));
John Bauman89401822014-05-06 15:04:28 -040095 }
96
Nicolas Capens5ba372f2017-10-05 16:05:47 -040097 z[q] = interpolate(x, Dz[q], z[q], primitive + OFFSET(Primitive,z), false, false, state.depthClamp);
John Bauman89401822014-05-06 15:04:28 -040098 }
99 }
100
101 Bool depthPass = false;
102
103 if(earlyDepthTest)
104 {
105 for(unsigned int q = 0; q < state.multiSample; q++)
106 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500107 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400108 }
109 }
110
111 If(depthPass || Bool(!earlyDepthTest))
112 {
113 #if PERF_PROFILE
114 Long interpTime = Ticks();
115 #endif
116
Nicolas Capens4f172c72016-01-13 08:34:30 -0500117 Float4 yyyy = Float4(Float(y)) + *Pointer<Float4>(primitive + OFFSET(Primitive,yQuad), 16);
Nicolas Capenscbefe532014-10-16 00:16:01 -0400118
John Bauman89401822014-05-06 15:04:28 -0400119 // Centroid locations
120 Float4 XXXX = Float4(0.0f);
121 Float4 YYYY = Float4(0.0f);
122
123 if(state.centroid)
124 {
125 Float4 WWWW(1.0e-9f);
126
127 for(unsigned int q = 0; q < state.multiSample; q++)
128 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500129 XXXX += *Pointer<Float4>(constants + OFFSET(Constants,sampleX[q]) + 16 * cMask[q]);
130 YYYY += *Pointer<Float4>(constants + OFFSET(Constants,sampleY[q]) + 16 * cMask[q]);
131 WWWW += *Pointer<Float4>(constants + OFFSET(Constants,weight) + 16 * cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400132 }
133
134 WWWW = Rcp_pp(WWWW);
135 XXXX *= WWWW;
136 YYYY *= WWWW;
137
138 XXXX += xxxx;
139 YYYY += yyyy;
140 }
141
John Bauman19bac1e2014-05-06 15:23:49 -0400142 if(interpolateW())
John Bauman89401822014-05-06 15:04:28 -0400143 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400144 w = interpolate(xxxx, Dw, rhw, primitive + OFFSET(Primitive,w), false, false, false);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500145 rhw = reciprocal(w, false, false, true);
John Bauman89401822014-05-06 15:04:28 -0400146
147 if(state.centroid)
148 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500149 rhwCentroid = reciprocal(interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive,w), false, false));
John Bauman89401822014-05-06 15:04:28 -0400150 }
151 }
152
Nicolas Capens3b4c93f2016-05-18 12:51:37 -0400153 for(int interpolant = 0; interpolant < MAX_FRAGMENT_INPUTS; interpolant++)
John Bauman89401822014-05-06 15:04:28 -0400154 {
155 for(int component = 0; component < 4; component++)
156 {
John Bauman89401822014-05-06 15:04:28 -0400157 if(state.interpolant[interpolant].component & (1 << component))
158 {
159 if(!state.interpolant[interpolant].centroid)
160 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400161 v[interpolant][component] = interpolate(xxxx, Dv[interpolant][component], rhw, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective, false);
John Bauman89401822014-05-06 15:04:28 -0400162 }
163 else
164 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500165 v[interpolant][component] = interpolateCentroid(XXXX, YYYY, rhwCentroid, primitive + OFFSET(Primitive, V[interpolant][component]), (state.interpolant[interpolant].flat & (1 << component)) != 0, state.perspective);
John Bauman89401822014-05-06 15:04:28 -0400166 }
167 }
168 }
169
170 Float4 rcp;
171
172 switch(state.interpolant[interpolant].project)
173 {
174 case 0:
175 break;
176 case 1:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500177 rcp = reciprocal(v[interpolant].y);
178 v[interpolant].x = v[interpolant].x * rcp;
John Bauman89401822014-05-06 15:04:28 -0400179 break;
180 case 2:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500181 rcp = reciprocal(v[interpolant].z);
182 v[interpolant].x = v[interpolant].x * rcp;
183 v[interpolant].y = v[interpolant].y * rcp;
John Bauman89401822014-05-06 15:04:28 -0400184 break;
185 case 3:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500186 rcp = reciprocal(v[interpolant].w);
187 v[interpolant].x = v[interpolant].x * rcp;
188 v[interpolant].y = v[interpolant].y * rcp;
189 v[interpolant].z = v[interpolant].z * rcp;
John Bauman89401822014-05-06 15:04:28 -0400190 break;
191 }
192 }
193
194 if(state.fog.component)
195 {
Nicolas Capens5ba372f2017-10-05 16:05:47 -0400196 f = interpolate(xxxx, Df, rhw, primitive + OFFSET(Primitive,f), state.fog.flat & 0x01, state.perspective, false);
John Bauman89401822014-05-06 15:04:28 -0400197 }
198
Nicolas Capens4f172c72016-01-13 08:34:30 -0500199 setBuiltins(x, y, z, w);
John Bauman89401822014-05-06 15:04:28 -0400200
201 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500202 cycles[PERF_INTERP] += Ticks() - interpTime;
John Bauman89401822014-05-06 15:04:28 -0400203 #endif
204
205 Bool alphaPass = true;
206
207 if(colorUsed())
208 {
209 #if PERF_PROFILE
210 Long shaderTime = Ticks();
211 #endif
212
Nicolas Capens4f172c72016-01-13 08:34:30 -0500213 applyShader(cMask);
John Bauman89401822014-05-06 15:04:28 -0400214
215 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500216 cycles[PERF_SHADER] += Ticks() - shaderTime;
John Bauman89401822014-05-06 15:04:28 -0400217 #endif
218
Nicolas Capens4f172c72016-01-13 08:34:30 -0500219 alphaPass = alphaTest(cMask);
John Bauman89401822014-05-06 15:04:28 -0400220
John Bauman19bac1e2014-05-06 15:23:49 -0400221 if((shader && shader->containsKill()) || state.alphaTestActive())
John Bauman89401822014-05-06 15:04:28 -0400222 {
223 for(unsigned int q = 0; q < state.multiSample; q++)
224 {
225 zMask[q] &= cMask[q];
226 sMask[q] &= cMask[q];
227 }
228 }
229 }
230
231 If(alphaPass)
232 {
233 if(!earlyDepthTest)
234 {
235 for(unsigned int q = 0; q < state.multiSample; q++)
236 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500237 depthPass = depthPass || depthTest(zBuffer, q, x, z[q], sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400238 }
239 }
240
241 #if PERF_PROFILE
242 Long ropTime = Ticks();
243 #endif
244
245 If(depthPass || Bool(earlyDepthTest))
246 {
247 for(unsigned int q = 0; q < state.multiSample; q++)
248 {
249 if(state.multiSampleMask & (1 << q))
250 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500251 writeDepth(zBuffer, q, x, z[q], zMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400252
253 if(state.occlusionEnabled)
254 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500255 occlusion += *Pointer<UInt>(constants + OFFSET(Constants,occlusionCount) + 4 * (zMask[q] & sMask[q]));
John Bauman89401822014-05-06 15:04:28 -0400256 }
257 }
258 }
259
260 if(colorUsed())
261 {
262 #if PERF_PROFILE
John Bauman66b8ab22014-05-06 15:57:45 -0400263 AddAtomic(Pointer<Long>(&profiler.ropOperations), 4);
John Bauman89401822014-05-06 15:04:28 -0400264 #endif
265
Nicolas Capens4f172c72016-01-13 08:34:30 -0500266 rasterOperation(f, cBuffer, x, sMask, zMask, cMask);
John Bauman89401822014-05-06 15:04:28 -0400267 }
268 }
269
270 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500271 cycles[PERF_ROP] += Ticks() - ropTime;
John Bauman89401822014-05-06 15:04:28 -0400272 #endif
273 }
274 }
275
276 for(unsigned int q = 0; q < state.multiSample; q++)
277 {
278 if(state.multiSampleMask & (1 << q))
279 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500280 writeStencil(sBuffer, q, x, sMask[q], zMask[q], cMask[q]);
John Bauman89401822014-05-06 15:04:28 -0400281 }
282 }
283
284 #if PERF_PROFILE
Nicolas Capens4f172c72016-01-13 08:34:30 -0500285 cycles[PERF_PIPE] += Ticks() - pipeTime;
John Bauman89401822014-05-06 15:04:28 -0400286 #endif
287 }
288
John Bauman89401822014-05-06 15:04:28 -0400289 Float4 PixelRoutine::interpolateCentroid(Float4 &x, Float4 &y, Float4 &rhw, Pointer<Byte> planeEquation, bool flat, bool perspective)
290 {
291 Float4 interpolant = *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,C), 16);
292
293 if(!flat)
294 {
295 interpolant += x * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,A), 16) +
296 y * *Pointer<Float4>(planeEquation + OFFSET(PlaneEquation,B), 16);
297
298 if(perspective)
299 {
300 interpolant *= rhw;
301 }
302 }
303
304 return interpolant;
305 }
306
Nicolas Capens4f172c72016-01-13 08:34:30 -0500307 void PixelRoutine::stencilTest(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400308 {
309 if(!state.stencilActive)
310 {
311 return;
312 }
313
314 // (StencilRef & StencilMask) CompFunc (StencilBufferValue & StencilMask)
315
316 Pointer<Byte> buffer = sBuffer + 2 * x;
317
318 if(q > 0)
319 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500320 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400321 }
322
Nicolas Capens48ef1252016-11-07 15:30:33 -0500323 Byte8 value = *Pointer<Byte8>(buffer);
John Bauman89401822014-05-06 15:04:28 -0400324 Byte8 valueCCW = value;
325
326 if(!state.noStencilMask)
327 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500328 value &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400329 }
330
Nicolas Capens4f172c72016-01-13 08:34:30 -0500331 stencilTest(value, state.stencilCompareMode, false);
John Bauman89401822014-05-06 15:04:28 -0400332
333 if(state.twoSidedStencil)
334 {
335 if(!state.noStencilMaskCCW)
336 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500337 valueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].testMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400338 }
339
Nicolas Capens4f172c72016-01-13 08:34:30 -0500340 stencilTest(valueCCW, state.stencilCompareModeCCW, true);
John Bauman89401822014-05-06 15:04:28 -0400341
Nicolas Capens4f172c72016-01-13 08:34:30 -0500342 value &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
343 valueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400344 value |= valueCCW;
345 }
346
347 sMask = SignMask(value) & cMask;
348 }
349
Nicolas Capens4f172c72016-01-13 08:34:30 -0500350 void PixelRoutine::stencilTest(Byte8 &value, StencilCompareMode stencilCompareMode, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400351 {
352 Byte8 equal;
353
354 switch(stencilCompareMode)
355 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400356 case STENCIL_ALWAYS:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400357 value = Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400358 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400359 case STENCIL_NEVER:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400360 value = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400361 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400362 case STENCIL_LESS: // a < b ~ b > a
John Bauman89401822014-05-06 15:04:28 -0400363 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500364 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400365 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400366 case STENCIL_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500367 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400368 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400369 case STENCIL_NOTEQUAL: // a != b ~ !(a == b)
Nicolas Capens4f172c72016-01-13 08:34:30 -0500370 value = CmpEQ(value, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400371 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400372 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400373 case STENCIL_LESSEQUAL: // a <= b ~ (b > a) || (a == b)
John Bauman89401822014-05-06 15:04:28 -0400374 equal = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500375 equal = CmpEQ(equal, *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedQ)));
John Bauman89401822014-05-06 15:04:28 -0400376 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500377 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
John Bauman89401822014-05-06 15:04:28 -0400378 value |= equal;
379 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400380 case STENCIL_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500381 equal = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ));
John Bauman89401822014-05-06 15:04:28 -0400382 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
383 equal = CmpGT(As<SByte8>(equal), As<SByte8>(value));
384 value = equal;
385 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400386 case STENCIL_GREATEREQUAL: // a >= b ~ !(a < b) ~ !(b > a)
John Bauman89401822014-05-06 15:04:28 -0400387 value += Byte8(0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
Nicolas Capens4f172c72016-01-13 08:34:30 -0500388 value = CmpGT(As<SByte8>(value), *Pointer<SByte8>(data + OFFSET(DrawData,stencil[CCW].referenceMaskedSignedQ)));
Nicolas Capens16b5f152016-10-13 13:39:01 -0400389 value ^= Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400390 break;
391 default:
392 ASSERT(false);
393 }
394 }
395
Nicolas Capens4f172c72016-01-13 08:34:30 -0500396 Bool PixelRoutine::depthTest(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400397 {
398 if(!state.depthTestActive)
399 {
400 return true;
401 }
402
403 Float4 Z = z;
404
John Bauman19bac1e2014-05-06 15:23:49 -0400405 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400406 {
407 if(complementaryDepthBuffer)
408 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500409 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400410 }
411 else
412 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500413 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400414 }
415 }
416
417 Pointer<Byte> buffer;
418 Int pitch;
419
420 if(!state.quadLayoutDepthBuffer)
421 {
422 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500423 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400424 }
425 else
426 {
427 buffer = zBuffer + 8 * x;
428 }
429
430 if(q > 0)
431 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500432 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400433 }
434
435 Float4 zValue;
436
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400437 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400438 {
439 if(!state.quadLayoutDepthBuffer)
440 {
441 // FIXME: Properly optimizes?
442 zValue.xy = *Pointer<Float4>(buffer);
443 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
444 }
445 else
446 {
447 zValue = *Pointer<Float4>(buffer, 16);
448 }
449 }
450
451 Int4 zTest;
452
453 switch(state.depthCompareMode)
454 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400455 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400456 // Optimized
457 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400458 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400459 // Optimized
460 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400461 case DEPTH_EQUAL:
John Bauman89401822014-05-06 15:04:28 -0400462 zTest = CmpEQ(zValue, Z);
463 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400464 case DEPTH_NOTEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400465 zTest = CmpNEQ(zValue, Z);
466 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400467 case DEPTH_LESS:
John Bauman89401822014-05-06 15:04:28 -0400468 if(complementaryDepthBuffer)
469 {
470 zTest = CmpLT(zValue, Z);
471 }
472 else
473 {
474 zTest = CmpNLE(zValue, Z);
475 }
476 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400477 case DEPTH_GREATEREQUAL:
John Bauman89401822014-05-06 15:04:28 -0400478 if(complementaryDepthBuffer)
479 {
480 zTest = CmpNLT(zValue, Z);
481 }
482 else
483 {
484 zTest = CmpLE(zValue, Z);
485 }
486 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400487 case DEPTH_LESSEQUAL:
John Bauman89401822014-05-06 15:04:28 -0400488 if(complementaryDepthBuffer)
489 {
490 zTest = CmpLE(zValue, Z);
491 }
492 else
493 {
494 zTest = CmpNLT(zValue, Z);
495 }
496 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400497 case DEPTH_GREATER:
John Bauman89401822014-05-06 15:04:28 -0400498 if(complementaryDepthBuffer)
499 {
500 zTest = CmpNLE(zValue, Z);
501 }
502 else
503 {
504 zTest = CmpLT(zValue, Z);
505 }
506 break;
507 default:
508 ASSERT(false);
509 }
510
511 switch(state.depthCompareMode)
512 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400513 case DEPTH_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400514 zMask = cMask;
515 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400516 case DEPTH_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400517 zMask = 0x0;
518 break;
519 default:
520 zMask = SignMask(zTest) & cMask;
521 break;
522 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500523
John Bauman89401822014-05-06 15:04:28 -0400524 if(state.stencilActive)
525 {
526 zMask &= sMask;
527 }
528
529 return zMask != 0;
530 }
531
Nicolas Capens4f172c72016-01-13 08:34:30 -0500532 void PixelRoutine::alphaTest(Int &aMask, Short4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400533 {
534 Short4 cmp;
535 Short4 equal;
536
537 switch(state.alphaCompareMode)
538 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400539 case ALPHA_ALWAYS:
John Bauman89401822014-05-06 15:04:28 -0400540 aMask = 0xF;
541 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400542 case ALPHA_NEVER:
John Bauman89401822014-05-06 15:04:28 -0400543 aMask = 0x0;
544 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400545 case ALPHA_EQUAL:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500546 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400547 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400548 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400549 case ALPHA_NOTEQUAL: // a != b ~ !(a == b)
550 cmp = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400551 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400552 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400553 case ALPHA_LESS: // a < b ~ b > a
Nicolas Capens4f172c72016-01-13 08:34:30 -0500554 cmp = CmpGT(*Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)), alpha);
Nicolas Capens33438a62017-09-27 11:47:35 -0400555 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400556 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400557 case ALPHA_GREATEREQUAL: // a >= b ~ (a > b) || (a == b) ~ !(b > a) // TODO: Approximate
Nicolas Capens4f172c72016-01-13 08:34:30 -0500558 equal = CmpEQ(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
559 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
John Bauman89401822014-05-06 15:04:28 -0400560 cmp |= equal;
Nicolas Capens33438a62017-09-27 11:47:35 -0400561 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400562 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400563 case ALPHA_LESSEQUAL: // a <= b ~ !(a > b)
564 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4))) ^ Short4(0xFFFFu); // FIXME
Nicolas Capens33438a62017-09-27 11:47:35 -0400565 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400566 break;
Alexis Hetu90c7ad62016-06-27 11:50:40 -0400567 case ALPHA_GREATER: // a > b
Nicolas Capens4f172c72016-01-13 08:34:30 -0500568 cmp = CmpGT(alpha, *Pointer<Short4>(data + OFFSET(DrawData,factor.alphaReference4)));
Nicolas Capens33438a62017-09-27 11:47:35 -0400569 aMask = SignMask(PackSigned(cmp, Short4(0x0000)));
John Bauman89401822014-05-06 15:04:28 -0400570 break;
571 default:
572 ASSERT(false);
573 }
574 }
575
Nicolas Capens4f172c72016-01-13 08:34:30 -0500576 void PixelRoutine::alphaToCoverage(Int cMask[4], Float4 &alpha)
John Bauman89401822014-05-06 15:04:28 -0400577 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500578 Int4 coverage0 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c0)));
579 Int4 coverage1 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c1)));
580 Int4 coverage2 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c2)));
581 Int4 coverage3 = CmpNLT(alpha, *Pointer<Float4>(data + OFFSET(DrawData,a2c3)));
John Bauman89401822014-05-06 15:04:28 -0400582
583 Int aMask0 = SignMask(coverage0);
584 Int aMask1 = SignMask(coverage1);
585 Int aMask2 = SignMask(coverage2);
586 Int aMask3 = SignMask(coverage3);
587
588 cMask[0] &= aMask0;
589 cMask[1] &= aMask1;
590 cMask[2] &= aMask2;
591 cMask[3] &= aMask3;
592 }
593
Nicolas Capens4f172c72016-01-13 08:34:30 -0500594 void PixelRoutine::fogBlend(Vector4f &c0, Float4 &fog)
John Bauman89401822014-05-06 15:04:28 -0400595 {
596 if(!state.fogActive)
597 {
598 return;
599 }
600
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400601 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400602 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500603 pixelFog(fog);
John Bauman89401822014-05-06 15:04:28 -0400604
John Bauman19bac1e2014-05-06 15:23:49 -0400605 fog = Min(fog, Float4(1.0f));
606 fog = Max(fog, Float4(0.0f));
John Bauman89401822014-05-06 15:04:28 -0400607 }
608
Nicolas Capens4f172c72016-01-13 08:34:30 -0500609 c0.x -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
610 c0.y -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
611 c0.z -= *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400612
John Bauman19bac1e2014-05-06 15:23:49 -0400613 c0.x *= fog;
614 c0.y *= fog;
615 c0.z *= fog;
John Bauman89401822014-05-06 15:04:28 -0400616
Nicolas Capens4f172c72016-01-13 08:34:30 -0500617 c0.x += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[0]));
618 c0.y += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[1]));
619 c0.z += *Pointer<Float4>(data + OFFSET(DrawData,fog.colorF[2]));
John Bauman89401822014-05-06 15:04:28 -0400620 }
621
Nicolas Capens4f172c72016-01-13 08:34:30 -0500622 void PixelRoutine::pixelFog(Float4 &visibility)
John Bauman89401822014-05-06 15:04:28 -0400623 {
624 Float4 &zw = visibility;
625
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400626 if(state.pixelFogMode != FOG_NONE)
John Bauman89401822014-05-06 15:04:28 -0400627 {
628 if(state.wBasedFog)
629 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500630 zw = rhw;
John Bauman89401822014-05-06 15:04:28 -0400631 }
632 else
633 {
634 if(complementaryDepthBuffer)
635 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500636 zw = Float4(1.0f) - z[0];
John Bauman89401822014-05-06 15:04:28 -0400637 }
638 else
639 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500640 zw = z[0];
John Bauman89401822014-05-06 15:04:28 -0400641 }
642 }
643 }
644
645 switch(state.pixelFogMode)
646 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400647 case FOG_NONE:
John Bauman89401822014-05-06 15:04:28 -0400648 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400649 case FOG_LINEAR:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500650 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.scale));
651 zw += *Pointer<Float4>(data + OFFSET(DrawData,fog.offset));
John Bauman89401822014-05-06 15:04:28 -0400652 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400653 case FOG_EXP:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500654 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.densityE));
John Bauman19bac1e2014-05-06 15:23:49 -0400655 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400656 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400657 case FOG_EXP2:
John Bauman89401822014-05-06 15:04:28 -0400658 zw *= zw;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500659 zw *= *Pointer<Float4>(data + OFFSET(DrawData,fog.density2E));
John Bauman19bac1e2014-05-06 15:23:49 -0400660 zw = exponential2(zw, true);
John Bauman89401822014-05-06 15:04:28 -0400661 break;
662 default:
663 ASSERT(false);
664 }
665 }
666
Nicolas Capens4f172c72016-01-13 08:34:30 -0500667 void PixelRoutine::writeDepth(Pointer<Byte> &zBuffer, int q, Int &x, Float4 &z, Int &zMask)
John Bauman89401822014-05-06 15:04:28 -0400668 {
669 if(!state.depthWriteEnable)
670 {
671 return;
672 }
673
674 Float4 Z = z;
675
John Bauman19bac1e2014-05-06 15:23:49 -0400676 if(shader && shader->depthOverride())
John Bauman89401822014-05-06 15:04:28 -0400677 {
678 if(complementaryDepthBuffer)
679 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500680 Z = Float4(1.0f) - oDepth;
John Bauman89401822014-05-06 15:04:28 -0400681 }
682 else
683 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500684 Z = oDepth;
John Bauman89401822014-05-06 15:04:28 -0400685 }
686 }
687
688 Pointer<Byte> buffer;
689 Int pitch;
690
691 if(!state.quadLayoutDepthBuffer)
Nicolas Capens05b3d662016-02-25 23:58:33 -0500692 {
John Bauman89401822014-05-06 15:04:28 -0400693 buffer = zBuffer + 4 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500694 pitch = *Pointer<Int>(data + OFFSET(DrawData,depthPitchB));
John Bauman89401822014-05-06 15:04:28 -0400695 }
696 else
Nicolas Capens05b3d662016-02-25 23:58:33 -0500697 {
John Bauman89401822014-05-06 15:04:28 -0400698 buffer = zBuffer + 8 * x;
699 }
700
701 if(q > 0)
702 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500703 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,depthSliceB));
John Bauman89401822014-05-06 15:04:28 -0400704 }
705
706 Float4 zValue;
707
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400708 if(state.depthCompareMode != DEPTH_NEVER || (state.depthCompareMode != DEPTH_ALWAYS && !state.depthWriteEnable))
John Bauman89401822014-05-06 15:04:28 -0400709 {
710 if(!state.quadLayoutDepthBuffer)
711 {
712 // FIXME: Properly optimizes?
713 zValue.xy = *Pointer<Float4>(buffer);
714 zValue.zw = *Pointer<Float4>(buffer + pitch - 8);
715 }
716 else
717 {
718 zValue = *Pointer<Float4>(buffer, 16);
719 }
720 }
721
Nicolas Capens4f172c72016-01-13 08:34:30 -0500722 Z = As<Float4>(As<Int4>(Z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + zMask * 16, 16));
723 zValue = As<Float4>(As<Int4>(zValue) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + zMask * 16, 16));
John Bauman89401822014-05-06 15:04:28 -0400724 Z = As<Float4>(As<Int4>(Z) | As<Int4>(zValue));
725
726 if(!state.quadLayoutDepthBuffer)
727 {
728 // FIXME: Properly optimizes?
729 *Pointer<Float2>(buffer) = Float2(Z.xy);
730 *Pointer<Float2>(buffer + pitch) = Float2(Z.zw);
731 }
732 else
733 {
734 *Pointer<Float4>(buffer, 16) = Z;
735 }
736 }
737
Nicolas Capens4f172c72016-01-13 08:34:30 -0500738 void PixelRoutine::writeStencil(Pointer<Byte> &sBuffer, int q, Int &x, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -0400739 {
740 if(!state.stencilActive)
741 {
742 return;
743 }
744
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400745 if(state.stencilPassOperation == OPERATION_KEEP && state.stencilZFailOperation == OPERATION_KEEP && state.stencilFailOperation == OPERATION_KEEP)
John Bauman89401822014-05-06 15:04:28 -0400746 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400747 if(!state.twoSidedStencil || (state.stencilPassOperationCCW == OPERATION_KEEP && state.stencilZFailOperationCCW == OPERATION_KEEP && state.stencilFailOperationCCW == OPERATION_KEEP))
John Bauman89401822014-05-06 15:04:28 -0400748 {
749 return;
750 }
751 }
752
753 if(state.stencilWriteMasked && (!state.twoSidedStencil || state.stencilWriteMaskedCCW))
754 {
755 return;
756 }
757
758 Pointer<Byte> buffer = sBuffer + 2 * x;
759
760 if(q > 0)
761 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500762 buffer += q * *Pointer<Int>(data + OFFSET(DrawData,stencilSliceB));
John Bauman89401822014-05-06 15:04:28 -0400763 }
764
Nicolas Capens48ef1252016-11-07 15:30:33 -0500765 Byte8 bufferValue = *Pointer<Byte8>(buffer);
Nicolas Capens05b3d662016-02-25 23:58:33 -0500766
John Bauman89401822014-05-06 15:04:28 -0400767 Byte8 newValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500768 stencilOperation(newValue, bufferValue, state.stencilPassOperation, state.stencilZFailOperation, state.stencilFailOperation, false, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400769
770 if(!state.noStencilWriteMask)
771 {
772 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500773 newValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].writeMaskQ));
774 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[0].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400775 newValue |= maskedValue;
776 }
777
778 if(state.twoSidedStencil)
779 {
780 Byte8 newValueCCW;
781
Nicolas Capens4f172c72016-01-13 08:34:30 -0500782 stencilOperation(newValueCCW, bufferValue, state.stencilPassOperationCCW, state.stencilZFailOperationCCW, state.stencilFailOperationCCW, true, zMask, sMask);
John Bauman89401822014-05-06 15:04:28 -0400783
784 if(!state.noStencilWriteMaskCCW)
785 {
786 Byte8 maskedValue = bufferValue;
Nicolas Capens4f172c72016-01-13 08:34:30 -0500787 newValueCCW &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].writeMaskQ));
788 maskedValue &= *Pointer<Byte8>(data + OFFSET(DrawData,stencil[1].invWriteMaskQ));
John Bauman89401822014-05-06 15:04:28 -0400789 newValueCCW |= maskedValue;
790 }
791
Nicolas Capens4f172c72016-01-13 08:34:30 -0500792 newValue &= *Pointer<Byte8>(primitive + OFFSET(Primitive,clockwiseMask));
793 newValueCCW &= *Pointer<Byte8>(primitive + OFFSET(Primitive,invClockwiseMask));
John Bauman89401822014-05-06 15:04:28 -0400794 newValue |= newValueCCW;
795 }
796
Nicolas Capens4f172c72016-01-13 08:34:30 -0500797 newValue &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * cMask);
798 bufferValue &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * cMask);
John Bauman89401822014-05-06 15:04:28 -0400799 newValue |= bufferValue;
800
Nicolas Capens16b5f152016-10-13 13:39:01 -0400801 *Pointer<Byte4>(buffer) = Byte4(newValue);
John Bauman89401822014-05-06 15:04:28 -0400802 }
803
Nicolas Capens4f172c72016-01-13 08:34:30 -0500804 void PixelRoutine::stencilOperation(Byte8 &newValue, Byte8 &bufferValue, StencilOperation stencilPassOperation, StencilOperation stencilZFailOperation, StencilOperation stencilFailOperation, bool CCW, Int &zMask, Int &sMask)
John Bauman89401822014-05-06 15:04:28 -0400805 {
806 Byte8 &pass = newValue;
807 Byte8 fail;
808 Byte8 zFail;
809
Nicolas Capens4f172c72016-01-13 08:34:30 -0500810 stencilOperation(pass, bufferValue, stencilPassOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400811
812 if(stencilZFailOperation != stencilPassOperation)
813 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500814 stencilOperation(zFail, bufferValue, stencilZFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400815 }
816
817 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
818 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500819 stencilOperation(fail, bufferValue, stencilFailOperation, CCW);
John Bauman89401822014-05-06 15:04:28 -0400820 }
821
822 if(stencilFailOperation != stencilPassOperation || stencilFailOperation != stencilZFailOperation)
823 {
824 if(state.depthTestActive && stencilZFailOperation != stencilPassOperation) // zMask valid and values not the same
825 {
Nicolas Capens4f172c72016-01-13 08:34:30 -0500826 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * zMask);
827 zFail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * zMask);
John Bauman89401822014-05-06 15:04:28 -0400828 pass |= zFail;
829 }
830
Nicolas Capens4f172c72016-01-13 08:34:30 -0500831 pass &= *Pointer<Byte8>(constants + OFFSET(Constants,maskB4Q) + 8 * sMask);
832 fail &= *Pointer<Byte8>(constants + OFFSET(Constants,invMaskB4Q) + 8 * sMask);
John Bauman89401822014-05-06 15:04:28 -0400833 pass |= fail;
834 }
835 }
836
Nicolas Capens4f172c72016-01-13 08:34:30 -0500837 void PixelRoutine::stencilOperation(Byte8 &output, Byte8 &bufferValue, StencilOperation operation, bool CCW)
John Bauman89401822014-05-06 15:04:28 -0400838 {
839 switch(operation)
840 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400841 case OPERATION_KEEP:
John Bauman89401822014-05-06 15:04:28 -0400842 output = bufferValue;
843 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400844 case OPERATION_ZERO:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400845 output = Byte8(0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00);
John Bauman89401822014-05-06 15:04:28 -0400846 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400847 case OPERATION_REPLACE:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500848 output = *Pointer<Byte8>(data + OFFSET(DrawData,stencil[CCW].referenceQ));
John Bauman89401822014-05-06 15:04:28 -0400849 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400850 case OPERATION_INCRSAT:
John Bauman89401822014-05-06 15:04:28 -0400851 output = AddSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
852 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400853 case OPERATION_DECRSAT:
John Bauman89401822014-05-06 15:04:28 -0400854 output = SubSat(bufferValue, Byte8(1, 1, 1, 1, 1, 1, 1, 1));
855 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400856 case OPERATION_INVERT:
Nicolas Capens16b5f152016-10-13 13:39:01 -0400857 output = bufferValue ^ Byte8(0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF);
John Bauman89401822014-05-06 15:04:28 -0400858 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400859 case OPERATION_INCR:
John Bauman89401822014-05-06 15:04:28 -0400860 output = bufferValue + Byte8(1, 1, 1, 1, 1, 1, 1, 1);
861 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400862 case OPERATION_DECR:
John Bauman89401822014-05-06 15:04:28 -0400863 output = bufferValue - Byte8(1, 1, 1, 1, 1, 1, 1, 1);
864 break;
865 default:
866 ASSERT(false);
867 }
868 }
869
Nicolas Capens96d4e092016-11-18 14:22:38 -0500870 void PixelRoutine::blendFactor(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -0400871 {
872 switch(blendFactorActive)
873 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400874 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400875 // Optimized
876 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400877 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400878 // Optimized
879 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400880 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400881 blendFactor.x = current.x;
882 blendFactor.y = current.y;
883 blendFactor.z = current.z;
John Bauman89401822014-05-06 15:04:28 -0400884 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400885 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400886 blendFactor.x = Short4(0xFFFFu) - current.x;
887 blendFactor.y = Short4(0xFFFFu) - current.y;
888 blendFactor.z = Short4(0xFFFFu) - current.z;
John Bauman89401822014-05-06 15:04:28 -0400889 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400890 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400891 blendFactor.x = pixel.x;
892 blendFactor.y = pixel.y;
893 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400894 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400895 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400896 blendFactor.x = Short4(0xFFFFu) - pixel.x;
897 blendFactor.y = Short4(0xFFFFu) - pixel.y;
898 blendFactor.z = Short4(0xFFFFu) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -0400899 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400900 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400901 blendFactor.x = current.w;
902 blendFactor.y = current.w;
903 blendFactor.z = current.w;
John Bauman89401822014-05-06 15:04:28 -0400904 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400905 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400906 blendFactor.x = Short4(0xFFFFu) - current.w;
907 blendFactor.y = Short4(0xFFFFu) - current.w;
908 blendFactor.z = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400909 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400910 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400911 blendFactor.x = pixel.w;
912 blendFactor.y = pixel.w;
913 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400914 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400915 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400916 blendFactor.x = Short4(0xFFFFu) - pixel.w;
917 blendFactor.y = Short4(0xFFFFu) - pixel.w;
918 blendFactor.z = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400919 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400920 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400921 blendFactor.x = Short4(0xFFFFu) - pixel.w;
922 blendFactor.x = Min(As<UShort4>(blendFactor.x), As<UShort4>(current.w));
923 blendFactor.y = blendFactor.x;
924 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -0400925 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400926 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500927 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[0]));
928 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[1]));
929 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400930 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400931 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500932 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[0]));
933 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[1]));
934 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[2]));
John Bauman89401822014-05-06 15:04:28 -0400935 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400936 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500937 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
938 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
939 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400940 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400941 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500942 blendFactor.x = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
943 blendFactor.y = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
944 blendFactor.z = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400945 break;
946 default:
947 ASSERT(false);
948 }
949 }
Nicolas Capens05b3d662016-02-25 23:58:33 -0500950
Nicolas Capens96d4e092016-11-18 14:22:38 -0500951 void PixelRoutine::blendFactorAlpha(Vector4s &blendFactor, const Vector4s &current, const Vector4s &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -0400952 {
953 switch(blendFactorAlphaActive)
954 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400955 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -0400956 // Optimized
957 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400958 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -0400959 // Optimized
960 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400961 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400962 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400963 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400964 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -0400965 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400966 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400967 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400968 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400969 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400970 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -0400971 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400972 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400973 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400974 blendFactor.w = current.w;
John Bauman89401822014-05-06 15:04:28 -0400975 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400976 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400977 blendFactor.w = Short4(0xFFFFu) - current.w;
John Bauman89401822014-05-06 15:04:28 -0400978 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400979 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400980 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400981 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400982 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -0400983 blendFactor.w = Short4(0xFFFFu) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -0400984 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400985 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -0400986 blendFactor.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -0400987 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400988 case BLEND_CONSTANT:
989 case BLEND_CONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500990 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.blendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400991 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -0400992 case BLEND_INVCONSTANT:
993 case BLEND_INVCONSTANTALPHA:
Nicolas Capens4f172c72016-01-13 08:34:30 -0500994 blendFactor.w = *Pointer<Short4>(data + OFFSET(DrawData,factor.invBlendConstant4W[3]));
John Bauman89401822014-05-06 15:04:28 -0400995 break;
996 default:
997 ASSERT(false);
998 }
999 }
1000
Alexis Hetu049a1872016-04-25 16:59:58 -04001001 bool PixelRoutine::isSRGB(int index) const
1002 {
1003 return state.targetFormat[index] == FORMAT_SRGB8_A8 || state.targetFormat[index] == FORMAT_SRGB8_X8;
1004 }
1005
Nicolas Capens4f172c72016-01-13 08:34:30 -05001006 void PixelRoutine::readPixel(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &pixel)
John Bauman89401822014-05-06 15:04:28 -04001007 {
John Bauman89401822014-05-06 15:04:28 -04001008 Short4 c01;
1009 Short4 c23;
Maxime Grégoired9762742015-07-08 16:43:48 -04001010 Pointer<Byte> buffer;
Nicolas Capensb40a2562016-01-05 00:08:45 -05001011 Pointer<Byte> buffer2;
John Bauman89401822014-05-06 15:04:28 -04001012
John Bauman89401822014-05-06 15:04:28 -04001013 switch(state.targetFormat[index])
1014 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001015 case FORMAT_R5G6B5:
1016 buffer = cBuffer + 2 * x;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001017 buffer2 = buffer + *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capensb40a2562016-01-05 00:08:45 -05001018 c01 = As<Short4>(Int2(*Pointer<Int>(buffer), *Pointer<Int>(buffer2)));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001019
1020 pixel.x = c01 & Short4(0xF800u);
1021 pixel.y = (c01 & Short4(0x07E0u)) << 5;
1022 pixel.z = (c01 & Short4(0x001Fu)) << 11;
1023 pixel.w = Short4(0xFFFFu);
1024 break;
John Bauman89401822014-05-06 15:04:28 -04001025 case FORMAT_A8R8G8B8:
1026 buffer = cBuffer + 4 * x;
1027 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001028 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001029 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001030 pixel.z = c01;
1031 pixel.y = c01;
1032 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1033 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1034 pixel.x = pixel.z;
1035 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1036 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1037 pixel.y = pixel.z;
1038 pixel.w = pixel.x;
1039 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1040 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1041 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1042 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001043 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001044 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001045 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001046 buffer = cBuffer + 4 * x;
1047 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001048 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001049 c23 = *Pointer<Short4>(buffer);
1050 pixel.z = c01;
1051 pixel.y = c01;
1052 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1053 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1054 pixel.x = pixel.z;
1055 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1056 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1057 pixel.y = pixel.z;
1058 pixel.w = pixel.x;
1059 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1060 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1061 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1062 pixel.w = UnpackHigh(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1063 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001064 case FORMAT_A8:
1065 buffer = cBuffer + 1 * x;
1066 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001067 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman66b8ab22014-05-06 15:57:45 -04001068 pixel.w = Insert(pixel.w, *Pointer<Short>(buffer), 1);
1069 pixel.w = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1070 pixel.x = Short4(0x0000);
1071 pixel.y = Short4(0x0000);
1072 pixel.z = Short4(0x0000);
1073 break;
Nicolas Capens7a473b72017-10-25 17:18:55 -04001074 case FORMAT_R8:
1075 buffer = cBuffer + 1 * x;
1076 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 0);
1077 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1078 pixel.x = Insert(pixel.x, *Pointer<Short>(buffer), 1);
1079 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1080 pixel.y = Short4(0x0000);
1081 pixel.z = Short4(0x0000);
1082 pixel.w = Short4(0xFFFFu);
1083 break;
John Bauman89401822014-05-06 15:04:28 -04001084 case FORMAT_X8R8G8B8:
1085 buffer = cBuffer + 4 * x;
1086 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001087 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001088 c23 = *Pointer<Short4>(buffer);
John Bauman19bac1e2014-05-06 15:23:49 -04001089 pixel.z = c01;
1090 pixel.y = c01;
1091 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1092 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1093 pixel.x = pixel.z;
1094 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1095 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1096 pixel.y = pixel.z;
1097 pixel.x = UnpackLow(As<Byte8>(pixel.x), As<Byte8>(pixel.x));
1098 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1099 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1100 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001101 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001102 case FORMAT_X8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001103 case FORMAT_SRGB8_X8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001104 buffer = cBuffer + 4 * x;
1105 c01 = *Pointer<Short4>(buffer);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001106 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001107 c23 = *Pointer<Short4>(buffer);
1108 pixel.z = c01;
1109 pixel.y = c01;
1110 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(c23));
1111 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(c23));
1112 pixel.x = pixel.z;
1113 pixel.z = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.y));
1114 pixel.x = UnpackHigh(As<Byte8>(pixel.x), As<Byte8>(pixel.y));
1115 pixel.y = pixel.z;
1116 pixel.w = pixel.x;
1117 pixel.x = UnpackLow(As<Byte8>(pixel.z), As<Byte8>(pixel.z));
1118 pixel.y = UnpackHigh(As<Byte8>(pixel.y), As<Byte8>(pixel.y));
1119 pixel.z = UnpackLow(As<Byte8>(pixel.w), As<Byte8>(pixel.w));
1120 pixel.w = Short4(0xFFFFu);
1121 break;
John Bauman89401822014-05-06 15:04:28 -04001122 case FORMAT_A8G8R8B8Q:
1123 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001124 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1125 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1126 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1127 // pixel.w = UnpackHigh(As<Byte8>(pixel.w), *Pointer<Byte8>(cBuffer + 8 * x + 8));
John Bauman89401822014-05-06 15:04:28 -04001128 break;
1129 case FORMAT_X8G8R8B8Q:
1130 UNIMPLEMENTED();
Nicolas Capens64750b52015-09-22 10:11:00 -04001131 // pixel.z = UnpackLow(As<Byte8>(pixel.z), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1132 // pixel.x = UnpackHigh(As<Byte8>(pixel.x), *Pointer<Byte8>(cBuffer + 8 * x + 0));
1133 // pixel.y = UnpackLow(As<Byte8>(pixel.y), *Pointer<Byte8>(cBuffer + 8 * x + 8));
1134 // pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001135 break;
1136 case FORMAT_A16B16G16R16:
Maxime Grégoired9762742015-07-08 16:43:48 -04001137 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04001138 pixel.x = *Pointer<Short4>(buffer + 8 * x);
1139 pixel.y = *Pointer<Short4>(buffer + 8 * x + 8);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001140 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04001141 pixel.z = *Pointer<Short4>(buffer + 8 * x);
1142 pixel.w = *Pointer<Short4>(buffer + 8 * x + 8);
1143 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04001144 break;
1145 case FORMAT_G16R16:
1146 buffer = cBuffer;
Maxime Grégoired9762742015-07-08 16:43:48 -04001147 pixel.x = *Pointer<Short4>(buffer + 4 * x);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001148 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Maxime Grégoired9762742015-07-08 16:43:48 -04001149 pixel.y = *Pointer<Short4>(buffer + 4 * x);
John Bauman19bac1e2014-05-06 15:23:49 -04001150 pixel.z = pixel.x;
1151 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.y));
1152 pixel.z = As<Short4>(UnpackHigh(pixel.z, pixel.y));
1153 pixel.y = pixel.z;
1154 pixel.x = As<Short4>(UnpackLow(pixel.x, pixel.z));
1155 pixel.y = As<Short4>(UnpackHigh(pixel.y, pixel.z));
1156 pixel.z = Short4(0xFFFFu);
1157 pixel.w = Short4(0xFFFFu);
John Bauman89401822014-05-06 15:04:28 -04001158 break;
1159 default:
1160 ASSERT(false);
1161 }
1162
Alexis Hetu049a1872016-04-25 16:59:58 -04001163 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001164 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001165 sRGBtoLinear16_12_16(pixel);
John Bauman89401822014-05-06 15:04:28 -04001166 }
Maxime Grégoired9762742015-07-08 16:43:48 -04001167 }
1168
Nicolas Capens4f172c72016-01-13 08:34:30 -05001169 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001170 {
1171 if(!state.alphaBlendActive)
1172 {
1173 return;
1174 }
1175
1176 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001177 readPixel(index, cBuffer, x, pixel);
John Bauman89401822014-05-06 15:04:28 -04001178
1179 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
Alexis Hetu96517182015-04-15 10:30:23 -04001180 Vector4s sourceFactor;
1181 Vector4s destFactor;
John Bauman89401822014-05-06 15:04:28 -04001182
Nicolas Capens4f172c72016-01-13 08:34:30 -05001183 blendFactor(sourceFactor, current, pixel, state.sourceBlendFactor);
1184 blendFactor(destFactor, current, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04001185
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001186 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001187 {
John Bauman19bac1e2014-05-06 15:23:49 -04001188 current.x = MulHigh(As<UShort4>(current.x), As<UShort4>(sourceFactor.x));
1189 current.y = MulHigh(As<UShort4>(current.y), As<UShort4>(sourceFactor.y));
1190 current.z = MulHigh(As<UShort4>(current.z), As<UShort4>(sourceFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001191 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001192
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001193 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001194 {
John Bauman19bac1e2014-05-06 15:23:49 -04001195 pixel.x = MulHigh(As<UShort4>(pixel.x), As<UShort4>(destFactor.x));
1196 pixel.y = MulHigh(As<UShort4>(pixel.y), As<UShort4>(destFactor.y));
1197 pixel.z = MulHigh(As<UShort4>(pixel.z), As<UShort4>(destFactor.z));
John Bauman89401822014-05-06 15:04:28 -04001198 }
1199
1200 switch(state.blendOperation)
1201 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001202 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001203 current.x = AddSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1204 current.y = AddSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1205 current.z = AddSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001206 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001207 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001208 current.x = SubSat(As<UShort4>(current.x), As<UShort4>(pixel.x));
1209 current.y = SubSat(As<UShort4>(current.y), As<UShort4>(pixel.y));
1210 current.z = SubSat(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001211 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001212 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001213 current.x = SubSat(As<UShort4>(pixel.x), As<UShort4>(current.x));
1214 current.y = SubSat(As<UShort4>(pixel.y), As<UShort4>(current.y));
1215 current.z = SubSat(As<UShort4>(pixel.z), As<UShort4>(current.z));
John Bauman89401822014-05-06 15:04:28 -04001216 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001217 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001218 current.x = Min(As<UShort4>(current.x), As<UShort4>(pixel.x));
1219 current.y = Min(As<UShort4>(current.y), As<UShort4>(pixel.y));
1220 current.z = Min(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001221 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001222 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001223 current.x = Max(As<UShort4>(current.x), As<UShort4>(pixel.x));
1224 current.y = Max(As<UShort4>(current.y), As<UShort4>(pixel.y));
1225 current.z = Max(As<UShort4>(current.z), As<UShort4>(pixel.z));
John Bauman89401822014-05-06 15:04:28 -04001226 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001227 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001228 // No operation
1229 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001230 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001231 current.x = pixel.x;
1232 current.y = pixel.y;
1233 current.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001234 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001235 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001236 current.x = Short4(0x0000);
1237 current.y = Short4(0x0000);
1238 current.z = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001239 break;
1240 default:
1241 ASSERT(false);
1242 }
1243
Nicolas Capens4f172c72016-01-13 08:34:30 -05001244 blendFactorAlpha(sourceFactor, current, pixel, state.sourceBlendFactorAlpha);
1245 blendFactorAlpha(destFactor, current, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04001246
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001247 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001248 {
John Bauman19bac1e2014-05-06 15:23:49 -04001249 current.w = MulHigh(As<UShort4>(current.w), As<UShort4>(sourceFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001250 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05001251
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001252 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04001253 {
John Bauman19bac1e2014-05-06 15:23:49 -04001254 pixel.w = MulHigh(As<UShort4>(pixel.w), As<UShort4>(destFactor.w));
John Bauman89401822014-05-06 15:04:28 -04001255 }
1256
1257 switch(state.blendOperationAlpha)
1258 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001259 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04001260 current.w = AddSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001261 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001262 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001263 current.w = SubSat(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001264 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001265 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04001266 current.w = SubSat(As<UShort4>(pixel.w), As<UShort4>(current.w));
John Bauman89401822014-05-06 15:04:28 -04001267 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001268 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04001269 current.w = Min(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001270 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001271 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04001272 current.w = Max(As<UShort4>(current.w), As<UShort4>(pixel.w));
John Bauman89401822014-05-06 15:04:28 -04001273 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001274 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04001275 // No operation
1276 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001277 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001278 current.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001279 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001280 case BLENDOP_NULL:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001281 current.w = Short4(0x0000);
John Bauman89401822014-05-06 15:04:28 -04001282 break;
1283 default:
1284 ASSERT(false);
1285 }
1286 }
1287
Nicolas Capens4f172c72016-01-13 08:34:30 -05001288 void PixelRoutine::logicOperation(int index, Pointer<Byte> &cBuffer, Vector4s &current, Int &x)
Maxime Grégoired9762742015-07-08 16:43:48 -04001289 {
Nicolas Capens2afcc802015-08-04 10:34:43 -04001290 if(state.logicalOperation == LOGICALOP_COPY)
Maxime Grégoired9762742015-07-08 16:43:48 -04001291 {
1292 return;
1293 }
1294
1295 Vector4s pixel;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001296 readPixel(index, cBuffer, x, pixel);
Maxime Grégoired9762742015-07-08 16:43:48 -04001297
1298 switch(state.logicalOperation)
1299 {
1300 case LOGICALOP_CLEAR:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001301 current.x = UShort4(0);
1302 current.y = UShort4(0);
1303 current.z = UShort4(0);
Maxime Grégoired9762742015-07-08 16:43:48 -04001304 break;
1305 case LOGICALOP_SET:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001306 current.x = UShort4(0xFFFFu);
1307 current.y = UShort4(0xFFFFu);
1308 current.z = UShort4(0xFFFFu);
Maxime Grégoired9762742015-07-08 16:43:48 -04001309 break;
1310 case LOGICALOP_COPY:
1311 ASSERT(false); // Optimized out
1312 break;
1313 case LOGICALOP_COPY_INVERTED:
1314 current.x = ~current.x;
1315 current.y = ~current.y;
1316 current.z = ~current.z;
1317 break;
1318 case LOGICALOP_NOOP:
1319 current.x = pixel.x;
1320 current.y = pixel.y;
1321 current.z = pixel.z;
1322 break;
1323 case LOGICALOP_INVERT:
1324 current.x = ~pixel.x;
1325 current.y = ~pixel.y;
1326 current.z = ~pixel.z;
1327 break;
1328 case LOGICALOP_AND:
1329 current.x = pixel.x & current.x;
1330 current.y = pixel.y & current.y;
1331 current.z = pixel.z & current.z;
1332 break;
1333 case LOGICALOP_NAND:
1334 current.x = ~(pixel.x & current.x);
1335 current.y = ~(pixel.y & current.y);
1336 current.z = ~(pixel.z & current.z);
1337 break;
1338 case LOGICALOP_OR:
1339 current.x = pixel.x | current.x;
1340 current.y = pixel.y | current.y;
1341 current.z = pixel.z | current.z;
1342 break;
1343 case LOGICALOP_NOR:
1344 current.x = ~(pixel.x | current.x);
1345 current.y = ~(pixel.y | current.y);
1346 current.z = ~(pixel.z | current.z);
1347 break;
1348 case LOGICALOP_XOR:
1349 current.x = pixel.x ^ current.x;
1350 current.y = pixel.y ^ current.y;
1351 current.z = pixel.z ^ current.z;
1352 break;
1353 case LOGICALOP_EQUIV:
1354 current.x = ~(pixel.x ^ current.x);
1355 current.y = ~(pixel.y ^ current.y);
1356 current.z = ~(pixel.z ^ current.z);
1357 break;
1358 case LOGICALOP_AND_REVERSE:
1359 current.x = ~pixel.x & current.x;
1360 current.y = ~pixel.y & current.y;
1361 current.z = ~pixel.z & current.z;
1362 break;
1363 case LOGICALOP_AND_INVERTED:
1364 current.x = pixel.x & ~current.x;
1365 current.y = pixel.y & ~current.y;
1366 current.z = pixel.z & ~current.z;
1367 break;
1368 case LOGICALOP_OR_REVERSE:
1369 current.x = ~pixel.x | current.x;
1370 current.y = ~pixel.y | current.y;
1371 current.z = ~pixel.z | current.z;
1372 break;
1373 case LOGICALOP_OR_INVERTED:
1374 current.x = pixel.x | ~current.x;
1375 current.y = pixel.y | ~current.y;
1376 current.z = pixel.z | ~current.z;
1377 break;
1378 default:
1379 ASSERT(false);
1380 }
1381 }
1382
Nicolas Capens4f172c72016-01-13 08:34:30 -05001383 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4s &current, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04001384 {
Alexis Hetu049a1872016-04-25 16:59:58 -04001385 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04001386 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05001387 linearToSRGB16_12_16(current);
John Bauman89401822014-05-06 15:04:28 -04001388 }
1389
1390 if(exactColorRounding)
1391 {
1392 switch(state.targetFormat[index])
1393 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001394 case FORMAT_R5G6B5:
Nicolas Capens26f37222015-09-22 09:53:45 -04001395 current.x = AddSat(As<UShort4>(current.x), UShort4(0x0400));
1396 current.y = AddSat(As<UShort4>(current.y), UShort4(0x0200));
1397 current.z = AddSat(As<UShort4>(current.z), UShort4(0x0400));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001398 break;
John Bauman89401822014-05-06 15:04:28 -04001399 case FORMAT_X8G8R8B8Q:
1400 case FORMAT_A8G8R8B8Q:
1401 case FORMAT_X8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001402 case FORMAT_X8B8G8R8:
John Bauman89401822014-05-06 15:04:28 -04001403 case FORMAT_A8R8G8B8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001404 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001405 case FORMAT_SRGB8_X8:
1406 case FORMAT_SRGB8_A8:
Alexis Hetu143dfc72016-09-13 18:41:27 -04001407 case FORMAT_G8R8:
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001408 case FORMAT_R8:
Alexis Hetu90c7ad62016-06-27 11:50:40 -04001409 current.x = current.x - As<Short4>(As<UShort4>(current.x) >> 8) + Short4(0x0080);
1410 current.y = current.y - As<Short4>(As<UShort4>(current.y) >> 8) + Short4(0x0080);
1411 current.z = current.z - As<Short4>(As<UShort4>(current.z) >> 8) + Short4(0x0080);
1412 current.w = current.w - As<Short4>(As<UShort4>(current.w) >> 8) + Short4(0x0080);
John Bauman89401822014-05-06 15:04:28 -04001413 break;
Nicolas Capensb69aa272016-01-02 00:06:41 -05001414 default:
1415 break;
John Bauman89401822014-05-06 15:04:28 -04001416 }
1417 }
1418
1419 int rgbaWriteMask = state.colorWriteActive(index);
Nicolas Capens3b396462016-01-02 00:23:53 -05001420 int bgraWriteMask = (rgbaWriteMask & 0x0000000A) | (rgbaWriteMask & 0x00000001) << 2 | (rgbaWriteMask & 0x00000004) >> 2;
John Bauman89401822014-05-06 15:04:28 -04001421
1422 switch(state.targetFormat[index])
1423 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001424 case FORMAT_R5G6B5:
1425 {
1426 current.x = current.x & Short4(0xF800u);
1427 current.y = As<UShort4>(current.y & Short4(0xFC00u)) >> 5;
1428 current.z = As<UShort4>(current.z) >> 11;
1429
1430 current.x = current.x | current.y | current.z;
1431 }
1432 break;
John Bauman89401822014-05-06 15:04:28 -04001433 case FORMAT_X8G8R8B8Q:
1434 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001435 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1436 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1437 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001438
John Bauman19bac1e2014-05-06 15:23:49 -04001439 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1440 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.y)));
John Bauman89401822014-05-06 15:04:28 -04001441 break;
1442 case FORMAT_A8G8R8B8Q:
1443 UNIMPLEMENTED();
John Bauman19bac1e2014-05-06 15:23:49 -04001444 // current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1445 // current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1446 // current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1447 // current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001448
John Bauman19bac1e2014-05-06 15:23:49 -04001449 // current.z = As<Short4>(Pack(As<UShort4>(current.z), As<UShort4>(current.x)));
1450 // current.y = As<Short4>(Pack(As<UShort4>(current.y), As<UShort4>(current.w)));
John Bauman89401822014-05-06 15:04:28 -04001451 break;
1452 case FORMAT_X8R8G8B8:
1453 case FORMAT_A8R8G8B8:
1454 if(state.targetFormat[index] == FORMAT_X8R8G8B8 || rgbaWriteMask == 0x7)
1455 {
John Bauman19bac1e2014-05-06 15:23:49 -04001456 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1457 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1458 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001459
Nicolas Capens33438a62017-09-27 11:47:35 -04001460 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1461 current.y = As<Short4>(PackUnsigned(current.y, current.y));
John Bauman89401822014-05-06 15:04:28 -04001462
John Bauman19bac1e2014-05-06 15:23:49 -04001463 current.x = current.z;
1464 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1465 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1466 current.y = current.z;
1467 current.z = As<Short4>(UnpackLow(current.z, current.x));
1468 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001469 }
1470 else
1471 {
John Bauman19bac1e2014-05-06 15:23:49 -04001472 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1473 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1474 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1475 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
John Bauman89401822014-05-06 15:04:28 -04001476
Nicolas Capens33438a62017-09-27 11:47:35 -04001477 current.z = As<Short4>(PackUnsigned(current.z, current.x));
1478 current.y = As<Short4>(PackUnsigned(current.y, current.w));
John Bauman89401822014-05-06 15:04:28 -04001479
John Bauman19bac1e2014-05-06 15:23:49 -04001480 current.x = current.z;
1481 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1482 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1483 current.y = current.z;
1484 current.z = As<Short4>(UnpackLow(current.z, current.x));
1485 current.y = As<Short4>(UnpackHigh(current.y, current.x));
John Bauman89401822014-05-06 15:04:28 -04001486 }
1487 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001488 case FORMAT_X8B8G8R8:
1489 case FORMAT_A8B8G8R8:
Alexis Hetu049a1872016-04-25 16:59:58 -04001490 case FORMAT_SRGB8_X8:
1491 case FORMAT_SRGB8_A8:
1492 if(state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8 || rgbaWriteMask == 0x7)
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001493 {
1494 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1495 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1496 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1497
Nicolas Capens33438a62017-09-27 11:47:35 -04001498 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1499 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001500
1501 current.x = current.z;
1502 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1503 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1504 current.y = current.z;
1505 current.z = As<Short4>(UnpackLow(current.z, current.x));
1506 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1507 }
1508 else
1509 {
1510 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1511 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
1512 current.z = As<Short4>(As<UShort4>(current.z) >> 8);
1513 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
1514
Nicolas Capens33438a62017-09-27 11:47:35 -04001515 current.z = As<Short4>(PackUnsigned(current.x, current.z));
1516 current.y = As<Short4>(PackUnsigned(current.y, current.w));
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001517
1518 current.x = current.z;
1519 current.z = UnpackLow(As<Byte8>(current.z), As<Byte8>(current.y));
1520 current.x = UnpackHigh(As<Byte8>(current.x), As<Byte8>(current.y));
1521 current.y = current.z;
1522 current.z = As<Short4>(UnpackLow(current.z, current.x));
1523 current.y = As<Short4>(UnpackHigh(current.y, current.x));
1524 }
1525 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001526 case FORMAT_G8R8:
1527 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
1528 current.y = As<Short4>(As<UShort4>(current.y) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001529 current.x = As<Short4>(PackUnsigned(current.x, current.x));
1530 current.y = As<Short4>(PackUnsigned(current.y, current.y));
Alexis Hetu143dfc72016-09-13 18:41:27 -04001531 current.x = UnpackLow(As<Byte8>(current.x), As<Byte8>(current.y));
1532 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001533 case FORMAT_R8:
1534 current.x = As<Short4>(As<UShort4>(current.x) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001535 current.x = As<Short4>(PackUnsigned(current.x, current.x));
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001536 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001537 case FORMAT_A8:
1538 current.w = As<Short4>(As<UShort4>(current.w) >> 8);
Nicolas Capens33438a62017-09-27 11:47:35 -04001539 current.w = As<Short4>(PackUnsigned(current.w, current.w));
John Bauman66b8ab22014-05-06 15:57:45 -04001540 break;
John Bauman89401822014-05-06 15:04:28 -04001541 case FORMAT_G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001542 current.z = current.x;
1543 current.x = As<Short4>(UnpackLow(current.x, current.y));
1544 current.z = As<Short4>(UnpackHigh(current.z, current.y));
1545 current.y = current.z;
John Bauman89401822014-05-06 15:04:28 -04001546 break;
1547 case FORMAT_A16B16G16R16:
John Bauman19bac1e2014-05-06 15:23:49 -04001548 transpose4x4(current.x, current.y, current.z, current.w);
John Bauman89401822014-05-06 15:04:28 -04001549 break;
John Bauman89401822014-05-06 15:04:28 -04001550 default:
1551 ASSERT(false);
1552 }
1553
John Bauman19bac1e2014-05-06 15:23:49 -04001554 Short4 c01 = current.z;
1555 Short4 c23 = current.y;
John Bauman89401822014-05-06 15:04:28 -04001556
1557 Int xMask; // Combination of all masks
1558
1559 if(state.depthTestActive)
1560 {
1561 xMask = zMask;
1562 }
1563 else
1564 {
1565 xMask = cMask;
1566 }
1567
1568 if(state.stencilActive)
1569 {
1570 xMask &= sMask;
1571 }
1572
John Bauman89401822014-05-06 15:04:28 -04001573 switch(state.targetFormat[index])
1574 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001575 case FORMAT_R5G6B5:
1576 {
1577 Pointer<Byte> buffer = cBuffer + 2 * x;
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001578 Int value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001579
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001580 Int c01 = Extract(As<Int2>(current.x), 0);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001581
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001582 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001583 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001584 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001585 c01 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001586 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001587 c01 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001588 }
1589
Nicolas Capens4f172c72016-01-13 08:34:30 -05001590 c01 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][0]) + xMask * 8);
1591 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][0]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001592 c01 |= value;
1593 *Pointer<Int>(buffer) = c01;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001594
Nicolas Capens4f172c72016-01-13 08:34:30 -05001595 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001596 value = *Pointer<Int>(buffer);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001597
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001598 Int c23 = Extract(As<Int2>(current.x), 1);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001599
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001600 if((bgraWriteMask & 0x00000007) != 0x00000007)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001601 {
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001602 Int masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001603 c23 &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[bgraWriteMask & 0x7][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001604 masked &= *Pointer<Int>(constants + OFFSET(Constants,mask565Q[~bgraWriteMask & 0x7][0]));
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001605 c23 |= masked;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001606 }
1607
Nicolas Capens4f172c72016-01-13 08:34:30 -05001608 c23 &= *Pointer<Int>(constants + OFFSET(Constants,maskW4Q[0][2]) + xMask * 8);
1609 value &= *Pointer<Int>(constants + OFFSET(Constants,invMaskW4Q[0][2]) + xMask * 8);
Nicolas Capens9919b6c2015-05-26 01:11:26 -04001610 c23 |= value;
1611 *Pointer<Int>(buffer) = c23;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001612 }
1613 break;
John Bauman89401822014-05-06 15:04:28 -04001614 case FORMAT_A8G8R8B8Q:
1615 case FORMAT_X8G8R8B8Q: // FIXME: Don't touch alpha?
1616 UNIMPLEMENTED();
1617 // value = *Pointer<Short4>(cBuffer + 8 * x + 0);
1618
1619 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1620 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1621 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1622 // {
1623 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001624 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1625 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001626 // c01 |= masked;
1627 // }
1628
Nicolas Capens4f172c72016-01-13 08:34:30 -05001629 // c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1630 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001631 // c01 |= value;
1632 // *Pointer<Short4>(cBuffer + 8 * x + 0) = c01;
1633
1634 // value = *Pointer<Short4>(cBuffer + 8 * x + 8);
1635
1636 // if((state.targetFormat[index] == FORMAT_A8G8R8B8Q && bgraWriteMask != 0x0000000F) ||
1637 // ((state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x00000007) &&
1638 // (state.targetFormat[index] == FORMAT_X8G8R8B8Q && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1639 // {
1640 // Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001641 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1642 // masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
John Bauman89401822014-05-06 15:04:28 -04001643 // c23 |= masked;
1644 // }
1645
Nicolas Capens4f172c72016-01-13 08:34:30 -05001646 // c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1647 // value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman89401822014-05-06 15:04:28 -04001648 // c23 |= value;
1649 // *Pointer<Short4>(cBuffer + 8 * x + 8) = c23;
1650 break;
1651 case FORMAT_A8R8G8B8:
1652 case FORMAT_X8R8G8B8: // FIXME: Don't touch alpha?
John Bauman89401822014-05-06 15:04:28 -04001653 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001654 Pointer<Byte> buffer = cBuffer + x * 4;
1655 Short4 value = *Pointer<Short4>(buffer);
1656
1657 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1658 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1659 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1660 {
1661 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001662 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1663 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001664 c01 |= masked;
1665 }
1666
Nicolas Capens4f172c72016-01-13 08:34:30 -05001667 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1668 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001669 c01 |= value;
1670 *Pointer<Short4>(buffer) = c01;
1671
Nicolas Capens4f172c72016-01-13 08:34:30 -05001672 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001673 value = *Pointer<Short4>(buffer);
1674
1675 if((state.targetFormat[index] == FORMAT_A8R8G8B8 && bgraWriteMask != 0x0000000F) ||
1676 ((state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x00000007) &&
1677 (state.targetFormat[index] == FORMAT_X8R8G8B8 && bgraWriteMask != 0x0000000F))) // FIXME: Need for masking when XRGB && Fh?
1678 {
1679 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001680 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[bgraWriteMask][0]));
1681 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[bgraWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001682 c23 |= masked;
1683 }
1684
Nicolas Capens4f172c72016-01-13 08:34:30 -05001685 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1686 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001687 c23 |= value;
1688 *Pointer<Short4>(buffer) = c23;
John Bauman89401822014-05-06 15:04:28 -04001689 }
John Bauman89401822014-05-06 15:04:28 -04001690 break;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001691 case FORMAT_A8B8G8R8:
1692 case FORMAT_X8B8G8R8: // FIXME: Don't touch alpha?
Alexis Hetu049a1872016-04-25 16:59:58 -04001693 case FORMAT_SRGB8_X8:
1694 case FORMAT_SRGB8_A8:
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001695 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001696 Pointer<Byte> buffer = cBuffer + x * 4;
1697 Short4 value = *Pointer<Short4>(buffer);
1698
Alexis Hetu049a1872016-04-25 16:59:58 -04001699 bool masked = (((state.targetFormat[index] == FORMAT_A8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_A8) && rgbaWriteMask != 0x0000000F) ||
1700 (((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x00000007) &&
1701 ((state.targetFormat[index] == FORMAT_X8B8G8R8 || state.targetFormat[index] == FORMAT_SRGB8_X8) && rgbaWriteMask != 0x0000000F))); // FIXME: Need for masking when XBGR && Fh?
1702
1703 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001704 {
1705 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001706 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1707 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001708 c01 |= masked;
1709 }
1710
Nicolas Capens4f172c72016-01-13 08:34:30 -05001711 c01 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1712 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001713 c01 |= value;
1714 *Pointer<Short4>(buffer) = c01;
1715
Nicolas Capens4f172c72016-01-13 08:34:30 -05001716 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001717 value = *Pointer<Short4>(buffer);
1718
Alexis Hetu049a1872016-04-25 16:59:58 -04001719 if(masked)
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001720 {
1721 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001722 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q[rgbaWriteMask][0]));
1723 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001724 c23 |= masked;
1725 }
1726
Nicolas Capens4f172c72016-01-13 08:34:30 -05001727 c23 &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1728 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001729 c23 |= value;
1730 *Pointer<Short4>(buffer) = c23;
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001731 }
Nicolas Capens0c42ee12015-03-28 18:54:07 -04001732 break;
Alexis Hetu143dfc72016-09-13 18:41:27 -04001733 case FORMAT_G8R8:
1734 if((rgbaWriteMask & 0x00000003) != 0x0)
1735 {
1736 Pointer<Byte> buffer = cBuffer + 2 * x;
1737 Int2 value;
1738 value = Insert(value, *Pointer<Int>(buffer), 0);
1739 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
1740 value = Insert(value, *Pointer<Int>(buffer + pitch), 1);
1741
1742 Int2 packedCol = As<Int2>(current.x);
1743
1744 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
1745 if((rgbaWriteMask & 0x3) != 0x3)
1746 {
1747 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
1748 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
1749 mergedMask &= rgbaMask;
1750 }
1751
1752 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask));
1753
1754 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
1755 *Pointer<UInt>(buffer + pitch) = As<UInt>(Extract(packedCol, 1));
1756 }
1757 break;
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001758 case FORMAT_R8:
1759 if(rgbaWriteMask & 0x00000001)
1760 {
1761 Pointer<Byte> buffer = cBuffer + 1 * x;
1762 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001763 value = Insert(value, *Pointer<Short>(buffer), 0);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001764 Int pitch = *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001765 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
Alexis Hetu0cff3cb2016-05-04 16:23:50 -04001766
1767 current.x &= *Pointer<Short4>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask);
1768 value &= *Pointer<Short4>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask);
1769 current.x |= value;
1770
1771 *Pointer<Short>(buffer) = Extract(current.x, 0);
1772 *Pointer<Short>(buffer + pitch) = Extract(current.x, 1);
1773 }
1774 break;
John Bauman66b8ab22014-05-06 15:57:45 -04001775 case FORMAT_A8:
1776 if(rgbaWriteMask & 0x00000008)
1777 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001778 Pointer<Byte> buffer = cBuffer + 1 * x;
1779 Short4 value;
Alexis Hetub14ed802016-06-15 13:06:50 -04001780 value = Insert(value, *Pointer<Short>(buffer), 0);
Nicolas Capens4f172c72016-01-13 08:34:30 -05001781 Int pitch = *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Alexis Hetub14ed802016-06-15 13:06:50 -04001782 value = Insert(value, *Pointer<Short>(buffer + pitch), 1);
John Bauman66b8ab22014-05-06 15:57:45 -04001783
Nicolas Capens4f172c72016-01-13 08:34:30 -05001784 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskB4Q) + 8 * xMask);
1785 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskB4Q) + 8 * xMask);
John Bauman66b8ab22014-05-06 15:57:45 -04001786 current.w |= value;
1787
1788 *Pointer<Short>(buffer) = Extract(current.w, 0);
1789 *Pointer<Short>(buffer + pitch) = Extract(current.w, 1);
1790 }
1791 break;
John Bauman89401822014-05-06 15:04:28 -04001792 case FORMAT_G16R16:
John Bauman89401822014-05-06 15:04:28 -04001793 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001794 Pointer<Byte> buffer = cBuffer + 4 * x;
John Bauman89401822014-05-06 15:04:28 -04001795
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001796 Short4 value = *Pointer<Short4>(buffer);
John Bauman89401822014-05-06 15:04:28 -04001797
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001798 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001799 {
1800 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001801 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001802 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001803 current.x |= masked;
John Bauman89401822014-05-06 15:04:28 -04001804 }
1805
Nicolas Capens4f172c72016-01-13 08:34:30 -05001806 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskD01Q) + xMask * 8);
1807 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD01Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001808 current.x |= value;
1809 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001810
Nicolas Capens4f172c72016-01-13 08:34:30 -05001811 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04001812
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001813 value = *Pointer<Short4>(buffer);
1814
1815 if((rgbaWriteMask & 0x00000003) != 0x00000003)
John Bauman89401822014-05-06 15:04:28 -04001816 {
1817 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001818 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[rgbaWriteMask & 0x3][0]));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04001819 masked &= *Pointer<Short4>(constants + OFFSET(Constants,maskW01Q[~rgbaWriteMask & 0x3][0]));
John Bauman19bac1e2014-05-06 15:23:49 -04001820 current.y |= masked;
John Bauman89401822014-05-06 15:04:28 -04001821 }
1822
Nicolas Capens4f172c72016-01-13 08:34:30 -05001823 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskD23Q) + xMask * 8);
1824 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskD23Q) + xMask * 8);
John Bauman19bac1e2014-05-06 15:23:49 -04001825 current.y |= value;
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001826 *Pointer<Short4>(buffer) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001827 }
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001828 break;
1829 case FORMAT_A16B16G16R16:
John Bauman89401822014-05-06 15:04:28 -04001830 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001831 Pointer<Byte> buffer = cBuffer + 8 * x;
John Bauman89401822014-05-06 15:04:28 -04001832
John Bauman89401822014-05-06 15:04:28 -04001833 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001834 Short4 value = *Pointer<Short4>(buffer);
1835
1836 if(rgbaWriteMask != 0x0000000F)
1837 {
1838 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001839 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1840 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001841 current.x |= masked;
1842 }
1843
Nicolas Capens4f172c72016-01-13 08:34:30 -05001844 current.x &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ0Q) + xMask * 8);
1845 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ0Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001846 current.x |= value;
1847 *Pointer<Short4>(buffer) = current.x;
John Bauman89401822014-05-06 15:04:28 -04001848 }
1849
John Bauman89401822014-05-06 15:04:28 -04001850 {
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001851 Short4 value = *Pointer<Short4>(buffer + 8);
1852
1853 if(rgbaWriteMask != 0x0000000F)
1854 {
1855 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001856 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1857 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001858 current.y |= masked;
1859 }
1860
Nicolas Capens4f172c72016-01-13 08:34:30 -05001861 current.y &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ1Q) + xMask * 8);
1862 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ1Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001863 current.y |= value;
1864 *Pointer<Short4>(buffer + 8) = current.y;
John Bauman89401822014-05-06 15:04:28 -04001865 }
1866
Nicolas Capens4f172c72016-01-13 08:34:30 -05001867 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001868
1869 {
1870 Short4 value = *Pointer<Short4>(buffer);
1871
1872 if(rgbaWriteMask != 0x0000000F)
1873 {
1874 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001875 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1876 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001877 current.z |= masked;
1878 }
1879
Nicolas Capens4f172c72016-01-13 08:34:30 -05001880 current.z &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ2Q) + xMask * 8);
1881 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ2Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001882 current.z |= value;
1883 *Pointer<Short4>(buffer) = current.z;
1884 }
1885
1886 {
1887 Short4 value = *Pointer<Short4>(buffer + 8);
1888
1889 if(rgbaWriteMask != 0x0000000F)
1890 {
1891 Short4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05001892 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskW4Q[rgbaWriteMask][0]));
1893 masked &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskW4Q[rgbaWriteMask][0]));
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001894 current.w |= masked;
1895 }
1896
Nicolas Capens4f172c72016-01-13 08:34:30 -05001897 current.w &= *Pointer<Short4>(constants + OFFSET(Constants,maskQ3Q) + xMask * 8);
1898 value &= *Pointer<Short4>(constants + OFFSET(Constants,invMaskQ3Q) + xMask * 8);
Nicolas Capensd5f0a6c2015-05-26 00:18:01 -04001899 current.w |= value;
1900 *Pointer<Short4>(buffer + 8) = current.w;
1901 }
John Bauman89401822014-05-06 15:04:28 -04001902 }
1903 break;
1904 default:
1905 ASSERT(false);
1906 }
1907 }
1908
Nicolas Capens96d4e092016-11-18 14:22:38 -05001909 void PixelRoutine::blendFactor(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorActive)
John Bauman89401822014-05-06 15:04:28 -04001910 {
1911 switch(blendFactorActive)
1912 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001913 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001914 // Optimized
1915 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001916 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001917 // Optimized
1918 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001919 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001920 blendFactor.x = oC.x;
1921 blendFactor.y = oC.y;
1922 blendFactor.z = oC.z;
John Bauman89401822014-05-06 15:04:28 -04001923 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001924 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001925 blendFactor.x = Float4(1.0f) - oC.x;
1926 blendFactor.y = Float4(1.0f) - oC.y;
1927 blendFactor.z = Float4(1.0f) - oC.z;
John Bauman89401822014-05-06 15:04:28 -04001928 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001929 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001930 blendFactor.x = pixel.x;
1931 blendFactor.y = pixel.y;
1932 blendFactor.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001933 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001934 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001935 blendFactor.x = Float4(1.0f) - pixel.x;
1936 blendFactor.y = Float4(1.0f) - pixel.y;
1937 blendFactor.z = Float4(1.0f) - pixel.z;
John Bauman89401822014-05-06 15:04:28 -04001938 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001939 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001940 blendFactor.x = oC.w;
1941 blendFactor.y = oC.w;
1942 blendFactor.z = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001943 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001944 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001945 blendFactor.x = Float4(1.0f) - oC.w;
1946 blendFactor.y = Float4(1.0f) - oC.w;
1947 blendFactor.z = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001948 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001949 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001950 blendFactor.x = pixel.w;
1951 blendFactor.y = pixel.w;
1952 blendFactor.z = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001953 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001954 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04001955 blendFactor.x = Float4(1.0f) - pixel.w;
1956 blendFactor.y = Float4(1.0f) - pixel.w;
1957 blendFactor.z = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001958 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001959 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04001960 blendFactor.x = Float4(1.0f) - pixel.w;
1961 blendFactor.x = Min(blendFactor.x, oC.w);
1962 blendFactor.y = blendFactor.x;
1963 blendFactor.z = blendFactor.x;
John Bauman89401822014-05-06 15:04:28 -04001964 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001965 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001966 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[0]));
1967 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[1]));
1968 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001969 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001970 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05001971 blendFactor.x = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[0]));
1972 blendFactor.y = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[1]));
1973 blendFactor.z = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[2]));
John Bauman89401822014-05-06 15:04:28 -04001974 break;
1975 default:
1976 ASSERT(false);
1977 }
1978 }
1979
Nicolas Capens96d4e092016-11-18 14:22:38 -05001980 void PixelRoutine::blendFactorAlpha(Vector4f &blendFactor, const Vector4f &oC, const Vector4f &pixel, BlendFactor blendFactorAlphaActive)
John Bauman89401822014-05-06 15:04:28 -04001981 {
1982 switch(blendFactorAlphaActive)
1983 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001984 case BLEND_ZERO:
John Bauman89401822014-05-06 15:04:28 -04001985 // Optimized
1986 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001987 case BLEND_ONE:
John Bauman89401822014-05-06 15:04:28 -04001988 // Optimized
1989 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001990 case BLEND_SOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001991 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04001992 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001993 case BLEND_INVSOURCE:
John Bauman19bac1e2014-05-06 15:23:49 -04001994 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04001995 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001996 case BLEND_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04001997 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04001998 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04001999 case BLEND_INVDEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002000 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002001 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002002 case BLEND_SOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002003 blendFactor.w = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002004 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002005 case BLEND_INVSOURCEALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002006 blendFactor.w = Float4(1.0f) - oC.w;
John Bauman89401822014-05-06 15:04:28 -04002007 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002008 case BLEND_DESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002009 blendFactor.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002010 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002011 case BLEND_INVDESTALPHA:
John Bauman19bac1e2014-05-06 15:23:49 -04002012 blendFactor.w = Float4(1.0f) - pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002013 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002014 case BLEND_SRCALPHASAT:
John Bauman19bac1e2014-05-06 15:23:49 -04002015 blendFactor.w = Float4(1.0f);
John Bauman89401822014-05-06 15:04:28 -04002016 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002017 case BLEND_CONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002018 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.blendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002019 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002020 case BLEND_INVCONSTANT:
Nicolas Capens4f172c72016-01-13 08:34:30 -05002021 blendFactor.w = *Pointer<Float4>(data + OFFSET(DrawData,factor.invBlendConstant4F[3]));
John Bauman89401822014-05-06 15:04:28 -04002022 break;
2023 default:
2024 ASSERT(false);
2025 }
2026 }
2027
Nicolas Capens4f172c72016-01-13 08:34:30 -05002028 void PixelRoutine::alphaBlend(int index, Pointer<Byte> &cBuffer, Vector4f &oC, Int &x)
John Bauman89401822014-05-06 15:04:28 -04002029 {
2030 if(!state.alphaBlendActive)
2031 {
2032 return;
2033 }
2034
2035 Pointer<Byte> buffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002036 Vector4f pixel;
John Bauman89401822014-05-06 15:04:28 -04002037
Alexis Hetu96517182015-04-15 10:30:23 -04002038 Vector4s color;
John Bauman89401822014-05-06 15:04:28 -04002039 Short4 c01;
2040 Short4 c23;
2041
Alexis Hetu1abb6382016-02-08 11:21:16 -05002042 Float4 one;
Alexis Hetu7208e932016-06-02 11:19:24 -04002043 if(Surface::isFloatFormat(state.targetFormat[index]))
John Bauman89401822014-05-06 15:04:28 -04002044 {
Alexis Hetu1abb6382016-02-08 11:21:16 -05002045 one = Float4(1.0f);
Alexis Hetu7208e932016-06-02 11:19:24 -04002046 }
2047 else if(Surface::isNonNormalizedInteger(state.targetFormat[index]))
2048 {
2049 one = As<Float4>(Surface::isUnsignedComponent(state.targetFormat[index], 0) ? Int4(0xFFFFFFFF) : Int4(0x7FFFFFFF));
Alexis Hetu1abb6382016-02-08 11:21:16 -05002050 }
2051
2052 switch(state.targetFormat[index])
2053 {
2054 case FORMAT_R32I:
2055 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002056 case FORMAT_R32F:
2057 buffer = cBuffer;
2058 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002059 pixel.x.x = *Pointer<Float>(buffer + 4 * x + 0);
2060 pixel.x.y = *Pointer<Float>(buffer + 4 * x + 4);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002061 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002062 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002063 pixel.x.z = *Pointer<Float>(buffer + 4 * x + 0);
2064 pixel.x.w = *Pointer<Float>(buffer + 4 * x + 4);
Alexis Hetu1abb6382016-02-08 11:21:16 -05002065 pixel.y = pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002066 break;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002067 case FORMAT_G32R32I:
2068 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002069 case FORMAT_G32R32F:
2070 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002071 pixel.x = *Pointer<Float4>(buffer + 8 * x, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002072 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002073 pixel.y = *Pointer<Float4>(buffer + 8 * x, 16);
2074 pixel.z = pixel.x;
2075 pixel.x = ShuffleLowHigh(pixel.x, pixel.y, 0x88);
2076 pixel.z = ShuffleLowHigh(pixel.z, pixel.y, 0xDD);
2077 pixel.y = pixel.z;
Alexis Hetu1abb6382016-02-08 11:21:16 -05002078 pixel.z = pixel.w = one;
John Bauman89401822014-05-06 15:04:28 -04002079 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002080 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002081 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002082 case FORMAT_A32B32G32R32I:
2083 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002084 buffer = cBuffer;
John Bauman19bac1e2014-05-06 15:23:49 -04002085 pixel.x = *Pointer<Float4>(buffer + 16 * x, 16);
2086 pixel.y = *Pointer<Float4>(buffer + 16 * x + 16, 16);
Nicolas Capens4f172c72016-01-13 08:34:30 -05002087 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman19bac1e2014-05-06 15:23:49 -04002088 pixel.z = *Pointer<Float4>(buffer + 16 * x, 16);
2089 pixel.w = *Pointer<Float4>(buffer + 16 * x + 16, 16);
2090 transpose4x4(pixel.x, pixel.y, pixel.z, pixel.w);
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002091 if(state.targetFormat[index] == FORMAT_X32B32G32R32F)
2092 {
2093 pixel.w = Float4(1.0f);
2094 }
John Bauman89401822014-05-06 15:04:28 -04002095 break;
2096 default:
2097 ASSERT(false);
2098 }
2099
Alexis Hetu049a1872016-04-25 16:59:58 -04002100 if((postBlendSRGB && state.writeSRGB) || isSRGB(index))
John Bauman89401822014-05-06 15:04:28 -04002101 {
John Bauman19bac1e2014-05-06 15:23:49 -04002102 sRGBtoLinear(pixel.x);
2103 sRGBtoLinear(pixel.y);
2104 sRGBtoLinear(pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002105 }
2106
2107 // Final Color = ObjectColor * SourceBlendFactor + PixelColor * DestinationBlendFactor
John Bauman19bac1e2014-05-06 15:23:49 -04002108 Vector4f sourceFactor;
2109 Vector4f destFactor;
John Bauman89401822014-05-06 15:04:28 -04002110
Nicolas Capens4f172c72016-01-13 08:34:30 -05002111 blendFactor(sourceFactor, oC, pixel, state.sourceBlendFactor);
2112 blendFactor(destFactor, oC, pixel, state.destBlendFactor);
John Bauman89401822014-05-06 15:04:28 -04002113
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002114 if(state.sourceBlendFactor != BLEND_ONE && state.sourceBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002115 {
John Bauman19bac1e2014-05-06 15:23:49 -04002116 oC.x *= sourceFactor.x;
2117 oC.y *= sourceFactor.y;
2118 oC.z *= sourceFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002119 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002120
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002121 if(state.destBlendFactor != BLEND_ONE && state.destBlendFactor != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002122 {
John Bauman19bac1e2014-05-06 15:23:49 -04002123 pixel.x *= destFactor.x;
2124 pixel.y *= destFactor.y;
2125 pixel.z *= destFactor.z;
John Bauman89401822014-05-06 15:04:28 -04002126 }
2127
2128 switch(state.blendOperation)
2129 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002130 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002131 oC.x += pixel.x;
2132 oC.y += pixel.y;
2133 oC.z += pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002134 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002135 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002136 oC.x -= pixel.x;
2137 oC.y -= pixel.y;
2138 oC.z -= pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002139 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002140 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002141 oC.x = pixel.x - oC.x;
2142 oC.y = pixel.y - oC.y;
2143 oC.z = pixel.z - oC.z;
John Bauman89401822014-05-06 15:04:28 -04002144 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002145 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002146 oC.x = Min(oC.x, pixel.x);
2147 oC.y = Min(oC.y, pixel.y);
2148 oC.z = Min(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002149 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002150 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002151 oC.x = Max(oC.x, pixel.x);
2152 oC.y = Max(oC.y, pixel.y);
2153 oC.z = Max(oC.z, pixel.z);
John Bauman89401822014-05-06 15:04:28 -04002154 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002155 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002156 // No operation
2157 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002158 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002159 oC.x = pixel.x;
2160 oC.y = pixel.y;
2161 oC.z = pixel.z;
John Bauman89401822014-05-06 15:04:28 -04002162 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002163 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002164 oC.x = Float4(0.0f);
2165 oC.y = Float4(0.0f);
2166 oC.z = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002167 break;
2168 default:
2169 ASSERT(false);
2170 }
2171
Nicolas Capens4f172c72016-01-13 08:34:30 -05002172 blendFactorAlpha(sourceFactor, oC, pixel, state.sourceBlendFactorAlpha);
2173 blendFactorAlpha(destFactor, oC, pixel, state.destBlendFactorAlpha);
John Bauman89401822014-05-06 15:04:28 -04002174
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002175 if(state.sourceBlendFactorAlpha != BLEND_ONE && state.sourceBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002176 {
John Bauman19bac1e2014-05-06 15:23:49 -04002177 oC.w *= sourceFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002178 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002179
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002180 if(state.destBlendFactorAlpha != BLEND_ONE && state.destBlendFactorAlpha != BLEND_ZERO)
John Bauman89401822014-05-06 15:04:28 -04002181 {
John Bauman19bac1e2014-05-06 15:23:49 -04002182 pixel.w *= destFactor.w;
John Bauman89401822014-05-06 15:04:28 -04002183 }
2184
2185 switch(state.blendOperationAlpha)
2186 {
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002187 case BLENDOP_ADD:
John Bauman19bac1e2014-05-06 15:23:49 -04002188 oC.w += pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002189 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002190 case BLENDOP_SUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002191 oC.w -= pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002192 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002193 case BLENDOP_INVSUB:
John Bauman19bac1e2014-05-06 15:23:49 -04002194 pixel.w -= oC.w;
2195 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002196 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002197 case BLENDOP_MIN:
John Bauman19bac1e2014-05-06 15:23:49 -04002198 oC.w = Min(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002199 break;
Nicolas Capens05b3d662016-02-25 23:58:33 -05002200 case BLENDOP_MAX:
John Bauman19bac1e2014-05-06 15:23:49 -04002201 oC.w = Max(oC.w, pixel.w);
John Bauman89401822014-05-06 15:04:28 -04002202 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002203 case BLENDOP_SOURCE:
John Bauman89401822014-05-06 15:04:28 -04002204 // No operation
2205 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002206 case BLENDOP_DEST:
John Bauman19bac1e2014-05-06 15:23:49 -04002207 oC.w = pixel.w;
John Bauman89401822014-05-06 15:04:28 -04002208 break;
Nicolas Capensa0f4be82014-10-22 14:35:30 -04002209 case BLENDOP_NULL:
John Bauman19bac1e2014-05-06 15:23:49 -04002210 oC.w = Float4(0.0f);
John Bauman89401822014-05-06 15:04:28 -04002211 break;
2212 default:
2213 ASSERT(false);
2214 }
2215 }
2216
Nicolas Capens4f172c72016-01-13 08:34:30 -05002217 void PixelRoutine::writeColor(int index, Pointer<Byte> &cBuffer, Int &x, Vector4f &oC, Int &sMask, Int &zMask, Int &cMask)
John Bauman89401822014-05-06 15:04:28 -04002218 {
John Bauman89401822014-05-06 15:04:28 -04002219 switch(state.targetFormat[index])
2220 {
John Bauman89401822014-05-06 15:04:28 -04002221 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002222 case FORMAT_R32I:
2223 case FORMAT_R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002224 case FORMAT_R16I:
2225 case FORMAT_R16UI:
2226 case FORMAT_R8I:
2227 case FORMAT_R8UI:
John Bauman89401822014-05-06 15:04:28 -04002228 break;
2229 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002230 case FORMAT_G32R32I:
2231 case FORMAT_G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002232 case FORMAT_G16R16I:
2233 case FORMAT_G16R16UI:
2234 case FORMAT_G8R8I:
2235 case FORMAT_G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002236 oC.z = oC.x;
2237 oC.x = UnpackLow(oC.x, oC.y);
2238 oC.z = UnpackHigh(oC.z, oC.y);
2239 oC.y = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002240 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002241 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002242 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002243 case FORMAT_A32B32G32R32I:
2244 case FORMAT_A32B32G32R32UI:
Alexis Hetubd7117d2016-06-02 10:35:59 -04002245 case FORMAT_A16B16G16R16I:
2246 case FORMAT_A16B16G16R16UI:
2247 case FORMAT_A8B8G8R8I:
2248 case FORMAT_A8B8G8R8UI:
John Bauman19bac1e2014-05-06 15:23:49 -04002249 transpose4x4(oC.x, oC.y, oC.z, oC.w);
John Bauman89401822014-05-06 15:04:28 -04002250 break;
2251 default:
2252 ASSERT(false);
2253 }
2254
2255 int rgbaWriteMask = state.colorWriteActive(index);
2256
2257 Int xMask; // Combination of all masks
2258
2259 if(state.depthTestActive)
2260 {
2261 xMask = zMask;
2262 }
2263 else
2264 {
2265 xMask = cMask;
2266 }
2267
2268 if(state.stencilActive)
2269 {
2270 xMask &= sMask;
2271 }
2272
2273 Pointer<Byte> buffer;
2274 Float4 value;
2275
2276 switch(state.targetFormat[index])
2277 {
2278 case FORMAT_R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002279 case FORMAT_R32I:
2280 case FORMAT_R32UI:
John Bauman89401822014-05-06 15:04:28 -04002281 if(rgbaWriteMask & 0x00000001)
2282 {
2283 buffer = cBuffer + 4 * x;
2284
2285 // FIXME: movlps
2286 value.x = *Pointer<Float>(buffer + 0);
2287 value.y = *Pointer<Float>(buffer + 4);
2288
Nicolas Capens4f172c72016-01-13 08:34:30 -05002289 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002290
2291 // FIXME: movhps
2292 value.z = *Pointer<Float>(buffer + 0);
2293 value.w = *Pointer<Float>(buffer + 4);
2294
Nicolas Capens4f172c72016-01-13 08:34:30 -05002295 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X) + xMask * 16, 16));
2296 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002297 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
John Bauman89401822014-05-06 15:04:28 -04002298
2299 // FIXME: movhps
John Bauman19bac1e2014-05-06 15:23:49 -04002300 *Pointer<Float>(buffer + 0) = oC.x.z;
2301 *Pointer<Float>(buffer + 4) = oC.x.w;
John Bauman89401822014-05-06 15:04:28 -04002302
Nicolas Capens4f172c72016-01-13 08:34:30 -05002303 buffer -= *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002304
2305 // FIXME: movlps
John Bauman19bac1e2014-05-06 15:23:49 -04002306 *Pointer<Float>(buffer + 0) = oC.x.x;
2307 *Pointer<Float>(buffer + 4) = oC.x.y;
John Bauman89401822014-05-06 15:04:28 -04002308 }
2309 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002310 case FORMAT_R16I:
2311 case FORMAT_R16UI:
2312 if(rgbaWriteMask & 0x00000001)
2313 {
2314 buffer = cBuffer + 2 * x;
2315
2316 UShort4 xyzw;
2317 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 0));
2318
2319 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2320
2321 xyzw = As<UShort4>(Insert(As<Int2>(xyzw), *Pointer<Int>(buffer), 1));
2322 value = As<Float4>(Int4(xyzw));
2323
2324 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants, maskD4X) + xMask * 16, 16));
2325 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants, invMaskD4X) + xMask * 16, 16));
2326 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2327
2328 if(state.targetFormat[index] == FORMAT_R16I)
2329 {
2330 Float component = oC.x.z;
2331 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2332 component = oC.x.w;
2333 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2334
2335 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2336
2337 component = oC.x.x;
2338 *Pointer<Short>(buffer + 0) = Short(As<Int>(component));
2339 component = oC.x.y;
2340 *Pointer<Short>(buffer + 2) = Short(As<Int>(component));
2341 }
2342 else // FORMAT_R16UI
2343 {
2344 Float component = oC.x.z;
2345 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2346 component = oC.x.w;
2347 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2348
2349 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2350
2351 component = oC.x.x;
2352 *Pointer<UShort>(buffer + 0) = UShort(As<Int>(component));
2353 component = oC.x.y;
2354 *Pointer<UShort>(buffer + 2) = UShort(As<Int>(component));
2355 }
2356 }
2357 break;
2358 case FORMAT_R8I:
2359 case FORMAT_R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002360 if(rgbaWriteMask & 0x00000001)
2361 {
2362 buffer = cBuffer + x;
2363
2364 UInt xyzw, packedCol;
2365
Alexis Hetu827d07a2016-09-15 17:54:05 -04002366 xyzw = UInt(*Pointer<UShort>(buffer)) & 0xFFFF;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002367 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
Alexis Hetu827d07a2016-09-15 17:54:05 -04002368 xyzw |= UInt(*Pointer<UShort>(buffer)) << 16;
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002369
2370 Short4 tmpCol = Short4(As<Int4>(oC.x));
2371 if(state.targetFormat[index] == FORMAT_R8I)
2372 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002373 tmpCol = As<Short4>(PackSigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002374 }
2375 else
2376 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002377 tmpCol = As<Short4>(PackUnsigned(tmpCol, tmpCol));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002378 }
2379 packedCol = Extract(As<Int2>(tmpCol), 0);
2380
2381 packedCol = (packedCol & *Pointer<UInt>(constants + OFFSET(Constants, maskB4Q) + 8 * xMask)) |
2382 (xyzw & *Pointer<UInt>(constants + OFFSET(Constants, invMaskB4Q) + 8 * xMask));
2383
2384 *Pointer<UShort>(buffer) = UShort(packedCol >> 16);
2385 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2386 *Pointer<UShort>(buffer) = UShort(packedCol);
2387 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002388 break;
John Bauman89401822014-05-06 15:04:28 -04002389 case FORMAT_G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002390 case FORMAT_G32R32I:
2391 case FORMAT_G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002392 buffer = cBuffer + 8 * x;
2393
2394 value = *Pointer<Float4>(buffer);
2395
2396 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2397 {
2398 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002399 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002400 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002401 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002402 }
2403
Nicolas Capens4f172c72016-01-13 08:34:30 -05002404 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ01X) + xMask * 16, 16));
2405 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ01X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002406 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2407 *Pointer<Float4>(buffer) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002408
Nicolas Capens4f172c72016-01-13 08:34:30 -05002409 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002410
2411 value = *Pointer<Float4>(buffer);
2412
2413 if((rgbaWriteMask & 0x00000003) != 0x00000003)
2414 {
2415 Float4 masked;
2416
2417 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002418 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[rgbaWriteMask & 0x3][0])));
Nicolas Capens7d9bdcb2015-05-26 02:09:27 -04002419 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,maskD01X[~rgbaWriteMask & 0x3][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002420 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002421 }
2422
Nicolas Capens4f172c72016-01-13 08:34:30 -05002423 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskQ23X) + xMask * 16, 16));
2424 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskQ23X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002425 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2426 *Pointer<Float4>(buffer) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002427 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002428 case FORMAT_G16R16I:
2429 case FORMAT_G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002430 if((rgbaWriteMask & 0x00000003) != 0x0)
2431 {
2432 buffer = cBuffer + 4 * x;
2433
2434 UInt2 rgbaMask;
2435 UShort4 packedCol = UShort4(As<Int4>(oC.x));
2436 UShort4 value = *Pointer<UShort4>(buffer);
2437 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2438 if((rgbaWriteMask & 0x3) != 0x3)
2439 {
2440 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask & 0x3][0]));
2441 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2442 mergedMask &= rgbaMask;
2443 }
2444 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2445
2446 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2447
2448 packedCol = UShort4(As<Int4>(oC.y));
2449 value = *Pointer<UShort4>(buffer);
2450 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2451 if((rgbaWriteMask & 0x3) != 0x3)
2452 {
2453 mergedMask &= rgbaMask;
2454 }
2455 *Pointer<UInt2>(buffer) = (As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(value) & ~mergedMask);
2456 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002457 break;
2458 case FORMAT_G8R8I:
2459 case FORMAT_G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002460 if((rgbaWriteMask & 0x00000003) != 0x0)
2461 {
2462 buffer = cBuffer + 2 * x;
2463
2464 Int2 xyzw, packedCol;
2465
2466 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 0);
2467 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2468 xyzw = Insert(xyzw, *Pointer<Int>(buffer), 1);
2469
2470 if(state.targetFormat[index] == FORMAT_G8R8I)
2471 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002472 packedCol = As<Int2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002473 }
2474 else
2475 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002476 packedCol = As<Int2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002477 }
2478
2479 UInt2 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q) + xMask * 8);
2480 if((rgbaWriteMask & 0x3) != 0x3)
2481 {
2482 Int tmpMask = *Pointer<Int>(constants + OFFSET(Constants, maskB4Q[5 * (rgbaWriteMask & 0x3)][0]));
2483 UInt2 rgbaMask = As<UInt2>(Int2(tmpMask, tmpMask));
2484 mergedMask &= rgbaMask;
2485 }
2486
2487 packedCol = As<Int2>((As<UInt2>(packedCol) & mergedMask) | (As<UInt2>(xyzw) & ~mergedMask));
2488
2489 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 1));
2490 buffer -= *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2491 *Pointer<UInt>(buffer) = As<UInt>(Extract(packedCol, 0));
2492 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002493 break;
Alexis Hetudbd1a8e2016-04-13 11:40:30 -04002494 case FORMAT_X32B32G32R32F:
John Bauman89401822014-05-06 15:04:28 -04002495 case FORMAT_A32B32G32R32F:
Alexis Hetu1abb6382016-02-08 11:21:16 -05002496 case FORMAT_A32B32G32R32I:
2497 case FORMAT_A32B32G32R32UI:
John Bauman89401822014-05-06 15:04:28 -04002498 buffer = cBuffer + 16 * x;
2499
2500 {
2501 value = *Pointer<Float4>(buffer, 16);
2502
2503 if(rgbaWriteMask != 0x0000000F)
2504 {
2505 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002506 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2507 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002508 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002509 }
Nicolas Capens05b3d662016-02-25 23:58:33 -05002510
Nicolas Capens4f172c72016-01-13 08:34:30 -05002511 oC.x = As<Float4>(As<Int4>(oC.x) & *Pointer<Int4>(constants + OFFSET(Constants,maskX0X) + xMask * 16, 16));
2512 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX0X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002513 oC.x = As<Float4>(As<Int4>(oC.x) | As<Int4>(value));
2514 *Pointer<Float4>(buffer, 16) = oC.x;
John Bauman89401822014-05-06 15:04:28 -04002515 }
2516
2517 {
2518 value = *Pointer<Float4>(buffer + 16, 16);
2519
2520 if(rgbaWriteMask != 0x0000000F)
Nicolas Capens05b3d662016-02-25 23:58:33 -05002521 {
John Bauman89401822014-05-06 15:04:28 -04002522 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002523 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2524 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002525 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002526 }
2527
Nicolas Capens4f172c72016-01-13 08:34:30 -05002528 oC.y = As<Float4>(As<Int4>(oC.y) & *Pointer<Int4>(constants + OFFSET(Constants,maskX1X) + xMask * 16, 16));
2529 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX1X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002530 oC.y = As<Float4>(As<Int4>(oC.y) | As<Int4>(value));
2531 *Pointer<Float4>(buffer + 16, 16) = oC.y;
John Bauman89401822014-05-06 15:04:28 -04002532 }
2533
Nicolas Capens4f172c72016-01-13 08:34:30 -05002534 buffer += *Pointer<Int>(data + OFFSET(DrawData,colorPitchB[index]));
John Bauman89401822014-05-06 15:04:28 -04002535
2536 {
2537 value = *Pointer<Float4>(buffer, 16);
2538
2539 if(rgbaWriteMask != 0x0000000F)
2540 {
2541 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002542 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2543 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002544 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002545 }
2546
Nicolas Capens4f172c72016-01-13 08:34:30 -05002547 oC.z = As<Float4>(As<Int4>(oC.z) & *Pointer<Int4>(constants + OFFSET(Constants,maskX2X) + xMask * 16, 16));
2548 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX2X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002549 oC.z = As<Float4>(As<Int4>(oC.z) | As<Int4>(value));
2550 *Pointer<Float4>(buffer, 16) = oC.z;
John Bauman89401822014-05-06 15:04:28 -04002551 }
2552
2553 {
Nicolas Capens400667e2017-03-29 14:40:14 -04002554 value = *Pointer<Float4>(buffer + 16, 16);
John Bauman89401822014-05-06 15:04:28 -04002555
2556 if(rgbaWriteMask != 0x0000000F)
2557 {
2558 Float4 masked = value;
Nicolas Capens4f172c72016-01-13 08:34:30 -05002559 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskD4X[rgbaWriteMask][0])));
2560 masked = As<Float4>(As<Int4>(masked) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskD4X[rgbaWriteMask][0])));
John Bauman19bac1e2014-05-06 15:23:49 -04002561 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(masked));
John Bauman89401822014-05-06 15:04:28 -04002562 }
2563
Nicolas Capens4f172c72016-01-13 08:34:30 -05002564 oC.w = As<Float4>(As<Int4>(oC.w) & *Pointer<Int4>(constants + OFFSET(Constants,maskX3X) + xMask * 16, 16));
2565 value = As<Float4>(As<Int4>(value) & *Pointer<Int4>(constants + OFFSET(Constants,invMaskX3X) + xMask * 16, 16));
John Bauman19bac1e2014-05-06 15:23:49 -04002566 oC.w = As<Float4>(As<Int4>(oC.w) | As<Int4>(value));
2567 *Pointer<Float4>(buffer + 16, 16) = oC.w;
John Bauman89401822014-05-06 15:04:28 -04002568 }
2569 break;
Alexis Hetubd7117d2016-06-02 10:35:59 -04002570 case FORMAT_A16B16G16R16I:
2571 case FORMAT_A16B16G16R16UI:
Meng-Lin Wu1978ff72016-06-20 15:08:42 -04002572 if((rgbaWriteMask & 0x0000000F) != 0x0)
2573 {
2574 buffer = cBuffer + 8 * x;
2575
2576 UInt4 rgbaMask;
2577 UShort8 value = *Pointer<UShort8>(buffer);
2578 UShort8 packedCol = UShort8(UShort4(As<Int4>(oC.x)), UShort4(As<Int4>(oC.y)));
2579 UInt4 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ01X) + xMask * 16);
2580 if((rgbaWriteMask & 0xF) != 0xF)
2581 {
2582 UInt2 tmpMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskW4Q[rgbaWriteMask][0]));
2583 rgbaMask = UInt4(tmpMask, tmpMask);
2584 mergedMask &= rgbaMask;
2585 }
2586 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2587
2588 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2589
2590 value = *Pointer<UShort8>(buffer);
2591 packedCol = UShort8(UShort4(As<Int4>(oC.z)), UShort4(As<Int4>(oC.w)));
2592 mergedMask = *Pointer<UInt4>(constants + OFFSET(Constants, maskQ23X) + xMask * 16);
2593 if((rgbaWriteMask & 0xF) != 0xF)
2594 {
2595 mergedMask &= rgbaMask;
2596 }
2597 *Pointer<UInt4>(buffer) = (As<UInt4>(packedCol) & mergedMask) | (As<UInt4>(value) & ~mergedMask);
2598 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002599 break;
2600 case FORMAT_A8B8G8R8I:
2601 case FORMAT_A8B8G8R8UI:
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002602 if((rgbaWriteMask & 0x0000000F) != 0x0)
2603 {
2604 UInt2 value, packedCol, mergedMask;
2605
2606 buffer = cBuffer + 4 * x;
2607
2608 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2609 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002610 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002611 }
2612 else
2613 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002614 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.x)), Short4(As<Int4>(oC.y))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002615 }
2616 value = *Pointer<UInt2>(buffer, 16);
2617 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD01Q) + xMask * 8);
2618 if(rgbaWriteMask != 0xF)
2619 {
2620 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2621 }
2622 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2623
2624 buffer += *Pointer<Int>(data + OFFSET(DrawData, colorPitchB[index]));
2625
2626 if(state.targetFormat[index] == FORMAT_A8B8G8R8I)
2627 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002628 packedCol = As<UInt2>(PackSigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002629 }
2630 else
2631 {
Nicolas Capens33438a62017-09-27 11:47:35 -04002632 packedCol = As<UInt2>(PackUnsigned(Short4(As<Int4>(oC.z)), Short4(As<Int4>(oC.w))));
Meng-Lin Wu92a95e92016-06-16 16:56:15 -04002633 }
2634 value = *Pointer<UInt2>(buffer, 16);
2635 mergedMask = *Pointer<UInt2>(constants + OFFSET(Constants, maskD23Q) + xMask * 8);
2636 if(rgbaWriteMask != 0xF)
2637 {
2638 mergedMask &= *Pointer<UInt2>(constants + OFFSET(Constants, maskB4Q[rgbaWriteMask][0]));
2639 }
2640 *Pointer<UInt2>(buffer) = (packedCol & mergedMask) | (value & ~mergedMask);
2641 }
Alexis Hetubd7117d2016-06-02 10:35:59 -04002642 break;
John Bauman89401822014-05-06 15:04:28 -04002643 default:
2644 ASSERT(false);
2645 }
2646 }
2647
John Bauman89401822014-05-06 15:04:28 -04002648 UShort4 PixelRoutine::convertFixed16(Float4 &cf, bool saturate)
2649 {
John Bauman19bac1e2014-05-06 15:23:49 -04002650 return UShort4(cf * Float4(0xFFFF), saturate);
John Bauman89401822014-05-06 15:04:28 -04002651 }
2652
Nicolas Capens4f172c72016-01-13 08:34:30 -05002653 void PixelRoutine::sRGBtoLinear16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002654 {
John Bauman19bac1e2014-05-06 15:23:49 -04002655 c.x = As<UShort4>(c.x) >> 4;
2656 c.y = As<UShort4>(c.y) >> 4;
2657 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002658
Nicolas Capens4f172c72016-01-13 08:34:30 -05002659 sRGBtoLinear12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002660 }
2661
Nicolas Capens4f172c72016-01-13 08:34:30 -05002662 void PixelRoutine::sRGBtoLinear12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002663 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002664 Pointer<Byte> LUT = constants + OFFSET(Constants,sRGBtoLinear12_16);
John Bauman89401822014-05-06 15:04:28 -04002665
John Bauman19bac1e2014-05-06 15:23:49 -04002666 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2667 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2668 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2669 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002670
John Bauman19bac1e2014-05-06 15:23:49 -04002671 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2672 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2673 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2674 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002675
John Bauman19bac1e2014-05-06 15:23:49 -04002676 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2677 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2678 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2679 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002680 }
2681
Nicolas Capens4f172c72016-01-13 08:34:30 -05002682 void PixelRoutine::linearToSRGB16_12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002683 {
John Bauman19bac1e2014-05-06 15:23:49 -04002684 c.x = As<UShort4>(c.x) >> 4;
2685 c.y = As<UShort4>(c.y) >> 4;
2686 c.z = As<UShort4>(c.z) >> 4;
John Bauman89401822014-05-06 15:04:28 -04002687
Nicolas Capens4f172c72016-01-13 08:34:30 -05002688 linearToSRGB12_16(c);
John Bauman89401822014-05-06 15:04:28 -04002689 }
2690
Nicolas Capens4f172c72016-01-13 08:34:30 -05002691 void PixelRoutine::linearToSRGB12_16(Vector4s &c)
John Bauman89401822014-05-06 15:04:28 -04002692 {
Nicolas Capens4f172c72016-01-13 08:34:30 -05002693 Pointer<Byte> LUT = constants + OFFSET(Constants,linearToSRGB12_16);
John Bauman89401822014-05-06 15:04:28 -04002694
John Bauman19bac1e2014-05-06 15:23:49 -04002695 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 0))), 0);
2696 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 1))), 1);
2697 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 2))), 2);
2698 c.x = Insert(c.x, *Pointer<Short>(LUT + 2 * Int(Extract(c.x, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002699
John Bauman19bac1e2014-05-06 15:23:49 -04002700 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 0))), 0);
2701 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 1))), 1);
2702 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 2))), 2);
2703 c.y = Insert(c.y, *Pointer<Short>(LUT + 2 * Int(Extract(c.y, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002704
John Bauman19bac1e2014-05-06 15:23:49 -04002705 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 0))), 0);
2706 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 1))), 1);
2707 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 2))), 2);
2708 c.z = Insert(c.z, *Pointer<Short>(LUT + 2 * Int(Extract(c.z, 3))), 3);
John Bauman89401822014-05-06 15:04:28 -04002709 }
2710
John Bauman89401822014-05-06 15:04:28 -04002711 Float4 PixelRoutine::sRGBtoLinear(const Float4 &x) // Approximates x^2.2
2712 {
2713 Float4 linear = x * x;
2714 linear = linear * Float4(0.73f) + linear * x * Float4(0.27f);
2715
2716 return Min(Max(linear, Float4(0.0f)), Float4(1.0f));
2717 }
2718
John Bauman19bac1e2014-05-06 15:23:49 -04002719 bool PixelRoutine::colorUsed()
2720 {
2721 return state.colorWriteMask || state.alphaTestActive() || state.shaderContainsKill;
2722 }
John Bauman89401822014-05-06 15:04:28 -04002723}