blob: cf54f293100d78c90adde690f8e8755d4d244f5c [file] [log] [blame]
Nicolas Capens3b0ad202022-06-02 15:02:31 -04001// Copyright 2022 The SwiftShader Authors. All Rights Reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#ifndef rr_SIMD_hpp
16#define rr_SIMD_hpp
17
18#include "Reactor.hpp"
19
Nicolas Capensbe30aa62022-08-10 11:34:51 -040020#include <functional>
Nicolas Capens942c6392022-06-30 00:20:40 -040021#include <vector>
22
Nicolas Capens44f94692022-06-20 23:15:46 -040023namespace rr {
24
25namespace scalar {
26using Int = rr::Int;
27using UInt = rr::UInt;
28using Float = rr::Float;
Nicolas Capensd1116fa2022-06-29 10:39:18 -040029template<class T>
30using Pointer = rr::Pointer<T>;
Nicolas Capens44f94692022-06-20 23:15:46 -040031} // namespace scalar
32
Nicolas Capens0ed3fa62022-06-22 16:48:07 -040033namespace packed {
34using Int4 = rr::Int4;
35using UInt4 = rr::UInt4;
36using Float4 = rr::Float4;
37} // namespace packed
38
Nicolas Capens44f94692022-06-20 23:15:46 -040039namespace SIMD {
Nicolas Capens3b0ad202022-06-02 15:02:31 -040040
41extern const int Width;
42
Nicolas Capens44f94692022-06-20 23:15:46 -040043class Int;
44class UInt;
45class Float;
Nicolas Capensd1116fa2022-06-29 10:39:18 -040046class Pointer;
Nicolas Capens44f94692022-06-20 23:15:46 -040047
Nicolas Capens7e960682022-06-30 15:53:27 -040048class Int : public LValue<SIMD::Int>,
49 public XYZW<SIMD::Int> // TODO(b/214583550): Eliminate and replace with SwizzleQuad() and/or other intrinsics.
Nicolas Capens3b0ad202022-06-02 15:02:31 -040050{
51public:
Nicolas Capens44f94692022-06-20 23:15:46 -040052 explicit Int(RValue<SIMD::Float> cast);
53
54 Int();
55 Int(int broadcast);
Nicolas Capensd1116fa2022-06-29 10:39:18 -040056 Int(int x, int y, int z, int w);
57 Int(std::vector<int> v);
Nicolas Capensbe30aa62022-08-10 11:34:51 -040058 Int(std::function<int(int)> LaneValueProducer);
Nicolas Capens44f94692022-06-20 23:15:46 -040059 Int(RValue<SIMD::Int> rhs);
60 Int(const Int &rhs);
61 Int(const Reference<SIMD::Int> &rhs);
62 Int(RValue<SIMD::UInt> rhs);
63 Int(const UInt &rhs);
64 Int(const Reference<SIMD::UInt> &rhs);
65 Int(RValue<scalar::Int> rhs);
66 Int(const scalar::Int &rhs);
67 Int(const Reference<scalar::Int> &rhs);
68
Nicolas Capensd1116fa2022-06-29 10:39:18 -040069 template<int T>
70 Int(const SwizzleMask1<packed::Int4, T> &rhs);
71
Nicolas Capens44f94692022-06-20 23:15:46 -040072 RValue<SIMD::Int> operator=(int broadcast);
73 RValue<SIMD::Int> operator=(RValue<SIMD::Int> rhs);
74 RValue<SIMD::Int> operator=(const Int &rhs);
75 RValue<SIMD::Int> operator=(const Reference<SIMD::Int> &rhs);
Nicolas Capens3b0ad202022-06-02 15:02:31 -040076
77 static Type *type();
Nicolas Capens442e25b2022-06-22 12:02:52 -040078 static int element_count() { return SIMD::Width; }
Nicolas Capens3b0ad202022-06-02 15:02:31 -040079};
80
Nicolas Capens7e960682022-06-30 15:53:27 -040081class UInt : public LValue<SIMD::UInt>,
82 public XYZW<SIMD::UInt> // TODO(b/214583550): Eliminate and replace with SwizzleQuad() and/or other intrinsics.
Nicolas Capens44f94692022-06-20 23:15:46 -040083{
84public:
85 explicit UInt(RValue<SIMD::Float> cast);
Nicolas Capens3b0ad202022-06-02 15:02:31 -040086
Nicolas Capens44f94692022-06-20 23:15:46 -040087 UInt();
88 UInt(int broadcast);
Nicolas Capensd1116fa2022-06-29 10:39:18 -040089 UInt(int x, int y, int z, int w);
90 UInt(std::vector<int> v);
Nicolas Capensbe30aa62022-08-10 11:34:51 -040091 UInt(std::function<int(int)> LaneValueProducer);
Nicolas Capens44f94692022-06-20 23:15:46 -040092 UInt(RValue<SIMD::UInt> rhs);
93 UInt(const UInt &rhs);
94 UInt(const Reference<SIMD::UInt> &rhs);
95 UInt(RValue<SIMD::Int> rhs);
96 UInt(const Int &rhs);
97 UInt(const Reference<SIMD::Int> &rhs);
98 UInt(RValue<scalar::UInt> rhs);
99 UInt(const scalar::UInt &rhs);
100 UInt(const Reference<scalar::UInt> &rhs);
101
102 RValue<SIMD::UInt> operator=(RValue<SIMD::UInt> rhs);
103 RValue<SIMD::UInt> operator=(const UInt &rhs);
104 RValue<SIMD::UInt> operator=(const Reference<SIMD::UInt> &rhs);
105
106 static Type *type();
107 static int element_count() { return SIMD::Width; }
108};
109
Nicolas Capens7e960682022-06-30 15:53:27 -0400110class Float : public LValue<SIMD::Float>,
111 public XYZW<SIMD::Float> // TODO(b/214583550): Eliminate and replace with SwizzleQuad() and/or other intrinsics.
Nicolas Capens44f94692022-06-20 23:15:46 -0400112{
113public:
114 explicit Float(RValue<SIMD::Int> cast);
115 explicit Float(RValue<SIMD::UInt> cast);
116
117 Float();
118 Float(float broadcast);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400119 Float(float x, float y, float z, float w);
120 Float(std::vector<float> v);
Nicolas Capensbe30aa62022-08-10 11:34:51 -0400121 Float(std::function<float(int)> LaneValueProducer);
Nicolas Capens44f94692022-06-20 23:15:46 -0400122 Float(RValue<SIMD::Float> rhs);
123 Float(const Float &rhs);
124 Float(const Reference<SIMD::Float> &rhs);
125 Float(RValue<scalar::Float> rhs);
126 Float(const scalar::Float &rhs);
127 Float(const Reference<scalar::Float> &rhs);
128
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400129 Float(RValue<packed::Float4> rhs);
130 RValue<SIMD::Float> operator=(RValue<packed::Float4> rhs);
131 template<int T>
132 Float(const SwizzleMask1<packed::Float4, T> &rhs);
133
Nicolas Capens44f94692022-06-20 23:15:46 -0400134 RValue<SIMD::Float> operator=(float broadcast);
135 RValue<SIMD::Float> operator=(RValue<SIMD::Float> rhs);
136 RValue<SIMD::Float> operator=(const Float &rhs);
137 RValue<SIMD::Float> operator=(const Reference<SIMD::Float> &rhs);
138 RValue<SIMD::Float> operator=(RValue<scalar::Float> rhs);
139 RValue<SIMD::Float> operator=(const scalar::Float &rhs);
140 RValue<SIMD::Float> operator=(const Reference<scalar::Float> &rhs);
141
142 static SIMD::Float infinity();
143
144 static Type *type();
145 static int element_count() { return SIMD::Width; }
146};
147
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400148class Pointer
Nicolas Capens01e41802022-06-29 23:12:57 -0400149{
Nicolas Capens942c6392022-06-30 00:20:40 -0400150public:
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400151 Pointer(scalar::Pointer<Byte> base, scalar::Int limit);
152 Pointer(scalar::Pointer<Byte> base, unsigned int limit);
153 Pointer(scalar::Pointer<Byte> base, scalar::Int limit, SIMD::Int offset);
154 Pointer(scalar::Pointer<Byte> base, unsigned int limit, SIMD::Int offset);
155 Pointer(std::vector<scalar::Pointer<Byte>> pointers);
156 explicit Pointer(SIMD::UInt cast); // Cast from 32-bit integers to 32-bit pointers
157 explicit Pointer(SIMD::UInt castLow, SIMD::UInt castHight); // Cast from pairs of 32-bit integers to 64-bit pointers
Nicolas Capens01e41802022-06-29 23:12:57 -0400158
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400159 Pointer &operator+=(SIMD::Int i);
160 Pointer operator+(SIMD::Int i);
161 Pointer &operator+=(int i);
162 Pointer operator+(int i);
Nicolas Capens01e41802022-06-29 23:12:57 -0400163
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400164 SIMD::Int offsets() const;
Nicolas Capens01e41802022-06-29 23:12:57 -0400165
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400166 SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const;
Nicolas Capens01e41802022-06-29 23:12:57 -0400167
168 bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const;
169
170 Int limit() const;
171
Nicolas Capens43183d82022-06-30 02:28:48 -0400172 // Returns true if all offsets are compile-time static and sequential
Nicolas Capens01e41802022-06-29 23:12:57 -0400173 // (N+0*step, N+1*step, N+2*step, N+3*step)
Nicolas Capens01e41802022-06-29 23:12:57 -0400174 bool hasStaticSequentialOffsets(unsigned int step) const;
175
Nicolas Capens43183d82022-06-30 02:28:48 -0400176 // Returns true if all offsets are compile-time static and equal
Nicolas Capens01e41802022-06-29 23:12:57 -0400177 // (N, N, N, N)
178 bool hasStaticEqualOffsets() const;
179
180 template<typename T>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400181 inline T Load(OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float));
Nicolas Capens01e41802022-06-29 23:12:57 -0400182
183 template<typename T>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400184 inline void Store(T val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
Nicolas Capens01e41802022-06-29 23:12:57 -0400185
186 template<typename T>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400187 inline void Store(RValue<T> val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed);
Nicolas Capens01e41802022-06-29 23:12:57 -0400188
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400189 scalar::Pointer<Byte> getUniformPointer() const;
190 scalar::Pointer<Byte> getPointerForLane(int lane) const;
191 static Pointer IfThenElse(SIMD::Int condition, const Pointer &lhs, const Pointer &rhs);
Nicolas Capens01e41802022-06-29 23:12:57 -0400192
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400193 void castTo(SIMD::UInt &bits) const; // Cast from 32-bit pointers to 32-bit integers
194 void castTo(SIMD::UInt &lowerBits, SIMD::UInt &upperBits) const; // Cast from 64-bit pointers to pairs of 32-bit integers
Nicolas Capens01e41802022-06-29 23:12:57 -0400195
196#ifdef ENABLE_RR_PRINT
197 std::vector<rr::Value *> getPrintValues() const;
198#endif
199
200private:
201 // Base address for the pointer, common across all lanes.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400202 scalar::Pointer<Byte> base;
Nicolas Capens01e41802022-06-29 23:12:57 -0400203 // Per-lane address for dealing with non-uniform data
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400204 std::vector<scalar::Pointer<Byte>> pointers;
Nicolas Capens01e41802022-06-29 23:12:57 -0400205
206public:
207 // Upper (non-inclusive) limit for offsets from base.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400208 scalar::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero.
Nicolas Capens942c6392022-06-30 00:20:40 -0400209 unsigned int staticLimit = 0;
Nicolas Capens01e41802022-06-29 23:12:57 -0400210
211 // Per lane offsets from base.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400212 SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero.
Nicolas Capens942c6392022-06-30 00:20:40 -0400213 std::vector<int32_t> staticOffsets;
Nicolas Capens01e41802022-06-29 23:12:57 -0400214
Nicolas Capens942c6392022-06-30 00:20:40 -0400215 bool hasDynamicLimit = false; // True if dynamicLimit is non-zero.
216 bool hasDynamicOffsets = false; // True if any dynamicOffsets are non-zero.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400217 bool isBasePlusOffset = false; // True if this uses base+offset. False if this is a collection of Pointers
Nicolas Capens01e41802022-06-29 23:12:57 -0400218};
219
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400220} // namespace SIMD
221
Nicolas Capens44f94692022-06-20 23:15:46 -0400222RValue<SIMD::Int> operator+(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
223RValue<SIMD::Int> operator-(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
224RValue<SIMD::Int> operator*(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
225RValue<SIMD::Int> operator/(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
226RValue<SIMD::Int> operator%(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
227RValue<SIMD::Int> operator&(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
228RValue<SIMD::Int> operator|(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
229RValue<SIMD::Int> operator^(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
230RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, unsigned char rhs);
231RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, unsigned char rhs);
232RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
233RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
234RValue<SIMD::Int> operator+=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
235RValue<SIMD::Int> operator-=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
236RValue<SIMD::Int> operator*=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
237// RValue<SIMD::Int> operator/=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
238// RValue<SIMD::Int> operator%=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
239RValue<SIMD::Int> operator&=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
240RValue<SIMD::Int> operator|=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
241RValue<SIMD::Int> operator^=(SIMD::Int &lhs, RValue<SIMD::Int> rhs);
242RValue<SIMD::Int> operator<<=(SIMD::Int &lhs, unsigned char rhs);
243RValue<SIMD::Int> operator>>=(SIMD::Int &lhs, unsigned char rhs);
244RValue<SIMD::Int> operator+(RValue<SIMD::Int> val);
245RValue<SIMD::Int> operator-(RValue<SIMD::Int> val);
246RValue<SIMD::Int> operator~(RValue<SIMD::Int> val);
247// RValue<SIMD::Int> operator++(SIMD::Int &val, int); // Post-increment
248// const Int &operator++(SIMD::Int &val); // Pre-increment
249// RValue<SIMD::Int> operator--(SIMD::Int &val, int); // Post-decrement
250// const Int &operator--(SIMD::Int &val); // Pre-decrement
251// RValue<Bool> operator<(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
252// RValue<Bool> operator<=(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
253// RValue<Bool> operator>(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
254// RValue<Bool> operator>=(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
255// RValue<Bool> operator!=(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
256// RValue<Bool> operator==(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs);
257
258RValue<SIMD::Int> CmpEQ(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
259RValue<SIMD::Int> CmpLT(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
260RValue<SIMD::Int> CmpLE(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
261RValue<SIMD::Int> CmpNEQ(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
262RValue<SIMD::Int> CmpNLT(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
263RValue<SIMD::Int> CmpNLE(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
264inline RValue<SIMD::Int> CmpGT(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
265{
266 return CmpNLE(x, y);
267}
268inline RValue<SIMD::Int> CmpGE(RValue<SIMD::Int> x, RValue<SIMD::Int> y)
269{
270 return CmpNLT(x, y);
271}
272RValue<SIMD::Int> Abs(RValue<SIMD::Int> x);
273RValue<SIMD::Int> Max(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
274RValue<SIMD::Int> Min(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
275// Convert to nearest integer. If a converted value is outside of the integer
276// range, the returned result is undefined.
277RValue<SIMD::Int> RoundInt(RValue<SIMD::Float> cast);
278// Rounds to the nearest integer, but clamps very large values to an
279// implementation-dependent range.
280// Specifically, on x86, values larger than 2147483583.0 are converted to
281// 2147483583 (0x7FFFFFBF) instead of producing 0x80000000.
282RValue<SIMD::Int> RoundIntClamped(RValue<SIMD::Float> cast);
283RValue<scalar::Int> Extract(RValue<SIMD::Int> val, int i);
284RValue<SIMD::Int> Insert(RValue<SIMD::Int> val, RValue<scalar::Int> element, int i);
Nicolas Capens0ed3fa62022-06-22 16:48:07 -0400285RValue<packed::Int4> Extract128(RValue<SIMD::Int> val, int i);
286RValue<SIMD::Int> Insert128(RValue<SIMD::Int> val, RValue<packed::Int4> element, int i);
Nicolas Capens44f94692022-06-20 23:15:46 -0400287
288RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
289RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
290RValue<SIMD::UInt> operator*(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
291RValue<SIMD::UInt> operator/(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
292RValue<SIMD::UInt> operator%(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
293RValue<SIMD::UInt> operator&(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
294RValue<SIMD::UInt> operator|(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
295RValue<SIMD::UInt> operator^(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
296RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, unsigned char rhs);
297RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, unsigned char rhs);
298RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
299RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
300RValue<SIMD::UInt> operator+=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
301RValue<SIMD::UInt> operator-=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
302RValue<SIMD::UInt> operator*=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
303// RValue<SIMD::UInt> operator/=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
304// RValue<SIMD::UInt> operator%=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
305RValue<SIMD::UInt> operator&=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
306RValue<SIMD::UInt> operator|=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
307RValue<SIMD::UInt> operator^=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs);
308RValue<SIMD::UInt> operator<<=(SIMD::UInt &lhs, unsigned char rhs);
309RValue<SIMD::UInt> operator>>=(SIMD::UInt &lhs, unsigned char rhs);
310RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> val);
311RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> val);
312RValue<SIMD::UInt> operator~(RValue<SIMD::UInt> val);
313// RValue<SIMD::UInt> operator++(SIMD::UInt &val, int); // Post-increment
314// const UInt &operator++(SIMD::UInt &val); // Pre-increment
315// RValue<SIMD::UInt> operator--(SIMD::UInt &val, int); // Post-decrement
316// const UInt &operator--(SIMD::UInt &val); // Pre-decrement
317// RValue<Bool> operator<(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
318// RValue<Bool> operator<=(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
319// RValue<Bool> operator>(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
320// RValue<Bool> operator>=(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
321// RValue<Bool> operator!=(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
322// RValue<Bool> operator==(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs);
323
324RValue<SIMD::UInt> CmpEQ(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
325RValue<SIMD::UInt> CmpLT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
326RValue<SIMD::UInt> CmpLE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
327RValue<SIMD::UInt> CmpNEQ(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
328RValue<SIMD::UInt> CmpNLT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
329RValue<SIMD::UInt> CmpNLE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
330inline RValue<SIMD::UInt> CmpGT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
331{
332 return CmpNLE(x, y);
333}
334inline RValue<SIMD::UInt> CmpGE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y)
335{
336 return CmpNLT(x, y);
337}
338RValue<SIMD::UInt> Max(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
339RValue<SIMD::UInt> Min(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
340RValue<scalar::UInt> Extract(RValue<SIMD::UInt> val, int i);
341RValue<SIMD::UInt> Insert(RValue<SIMD::UInt> val, RValue<scalar::UInt> element, int i);
Nicolas Capens0ed3fa62022-06-22 16:48:07 -0400342RValue<packed::UInt4> Extract128(RValue<SIMD::UInt> val, int i);
343RValue<SIMD::UInt> Insert128(RValue<SIMD::UInt> val, RValue<packed::UInt4> element, int i);
Nicolas Capens44f94692022-06-20 23:15:46 -0400344// RValue<SIMD::UInt> RoundInt(RValue<SIMD::Float> cast);
345
346RValue<SIMD::Float> operator+(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs);
347RValue<SIMD::Float> operator-(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs);
348RValue<SIMD::Float> operator*(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs);
349RValue<SIMD::Float> operator/(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs);
350RValue<SIMD::Float> operator%(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs);
351RValue<SIMD::Float> operator+=(SIMD::Float &lhs, RValue<SIMD::Float> rhs);
352RValue<SIMD::Float> operator-=(SIMD::Float &lhs, RValue<SIMD::Float> rhs);
353RValue<SIMD::Float> operator*=(SIMD::Float &lhs, RValue<SIMD::Float> rhs);
354RValue<SIMD::Float> operator/=(SIMD::Float &lhs, RValue<SIMD::Float> rhs);
355RValue<SIMD::Float> operator%=(SIMD::Float &lhs, RValue<SIMD::Float> rhs);
356RValue<SIMD::Float> operator+(RValue<SIMD::Float> val);
357RValue<SIMD::Float> operator-(RValue<SIMD::Float> val);
358
359// Computes `x * y + z`, which may be fused into one operation to produce a higher-precision result.
360RValue<SIMD::Float> MulAdd(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z);
361// Computes a fused `x * y + z` operation. Caps::fmaIsFast indicates whether it emits an FMA instruction.
362RValue<SIMD::Float> FMA(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z);
363
364RValue<SIMD::Float> Abs(RValue<SIMD::Float> x);
365RValue<SIMD::Float> Max(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
366RValue<SIMD::Float> Min(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
367
368RValue<SIMD::Float> Rcp(RValue<SIMD::Float> x, bool relaxedPrecision, bool exactAtPow2 = false);
369RValue<SIMD::Float> RcpSqrt(RValue<SIMD::Float> x, bool relaxedPrecision);
370RValue<SIMD::Float> Sqrt(RValue<SIMD::Float> x);
371RValue<SIMD::Float> Insert(RValue<SIMD::Float> val, RValue<rr ::Float> element, int i);
372RValue<rr ::Float> Extract(RValue<SIMD::Float> x, int i);
Nicolas Capens0ed3fa62022-06-22 16:48:07 -0400373RValue<packed::Float4> Extract128(RValue<SIMD::Float> val, int i);
374RValue<SIMD::Float> Insert128(RValue<SIMD::Float> val, RValue<packed::Float4> element, int i);
Nicolas Capens44f94692022-06-20 23:15:46 -0400375
376// Ordered comparison functions
377RValue<SIMD::Int> CmpEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
378RValue<SIMD::Int> CmpLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
379RValue<SIMD::Int> CmpLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
380RValue<SIMD::Int> CmpNEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
381RValue<SIMD::Int> CmpNLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
382RValue<SIMD::Int> CmpNLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
383inline RValue<SIMD::Int> CmpGT(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
384{
385 return CmpNLE(x, y);
386}
387inline RValue<SIMD::Int> CmpGE(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
388{
389 return CmpNLT(x, y);
390}
391
392// Unordered comparison functions
393RValue<SIMD::Int> CmpUEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
394RValue<SIMD::Int> CmpULT(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
395RValue<SIMD::Int> CmpULE(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
396RValue<SIMD::Int> CmpUNEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
397RValue<SIMD::Int> CmpUNLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
398RValue<SIMD::Int> CmpUNLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
399inline RValue<SIMD::Int> CmpUGT(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
400{
401 return CmpUNLE(x, y);
402}
403inline RValue<SIMD::Int> CmpUGE(RValue<SIMD::Float> x, RValue<SIMD::Float> y)
404{
405 return CmpUNLT(x, y);
406}
407
408RValue<SIMD::Int> IsInf(RValue<SIMD::Float> x);
409RValue<SIMD::Int> IsNan(RValue<SIMD::Float> x);
410RValue<SIMD::Float> Round(RValue<SIMD::Float> x);
411RValue<SIMD::Float> Trunc(RValue<SIMD::Float> x);
412RValue<SIMD::Float> Frac(RValue<SIMD::Float> x);
413RValue<SIMD::Float> Floor(RValue<SIMD::Float> x);
414RValue<SIMD::Float> Ceil(RValue<SIMD::Float> x);
415
416// Trigonometric functions
417RValue<SIMD::Float> Sin(RValue<SIMD::Float> x);
418RValue<SIMD::Float> Cos(RValue<SIMD::Float> x);
419RValue<SIMD::Float> Tan(RValue<SIMD::Float> x);
420RValue<SIMD::Float> Asin(RValue<SIMD::Float> x);
421RValue<SIMD::Float> Acos(RValue<SIMD::Float> x);
422RValue<SIMD::Float> Atan(RValue<SIMD::Float> x);
423RValue<SIMD::Float> Sinh(RValue<SIMD::Float> x);
424RValue<SIMD::Float> Cosh(RValue<SIMD::Float> x);
425RValue<SIMD::Float> Tanh(RValue<SIMD::Float> x);
426RValue<SIMD::Float> Asinh(RValue<SIMD::Float> x);
427RValue<SIMD::Float> Acosh(RValue<SIMD::Float> x);
428RValue<SIMD::Float> Atanh(RValue<SIMD::Float> x);
429RValue<SIMD::Float> Atan2(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
430
431// Exponential functions
432RValue<SIMD::Float> Pow(RValue<SIMD::Float> x, RValue<SIMD::Float> y);
433RValue<SIMD::Float> Exp(RValue<SIMD::Float> x);
434RValue<SIMD::Float> Log(RValue<SIMD::Float> x);
435RValue<SIMD::Float> Exp2(RValue<SIMD::Float> x);
436RValue<SIMD::Float> Log2(RValue<SIMD::Float> x);
437
Nicolas Capens0e34c252022-06-30 14:02:00 -0400438RValue<Int> SignMask(RValue<SIMD::Int> x);
439RValue<SIMD::UInt> Ctlz(RValue<SIMD::UInt> x, bool isZeroUndef);
440RValue<SIMD::UInt> Cttz(RValue<SIMD::UInt> x, bool isZeroUndef);
441RValue<SIMD::Int> MulHigh(RValue<SIMD::Int> x, RValue<SIMD::Int> y);
442RValue<SIMD::UInt> MulHigh(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y);
443RValue<Bool> AnyTrue(const RValue<SIMD::Int> &bools);
444RValue<Bool> AnyFalse(const RValue<SIMD::Int> &bools);
445RValue<Bool> Divergent(const RValue<SIMD::Int> &ints);
446RValue<SIMD::Int> Swizzle(RValue<SIMD::Int> x, uint16_t select);
447RValue<SIMD::UInt> Swizzle(RValue<SIMD::UInt> x, uint16_t select);
448RValue<SIMD::Float> Swizzle(RValue<SIMD::Float> x, uint16_t select);
449RValue<SIMD::Int> Shuffle(RValue<SIMD::Int> x, RValue<SIMD::Int> y, uint16_t select);
450RValue<SIMD::UInt> Shuffle(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y, uint16_t select);
451RValue<SIMD::Float> Shuffle(RValue<SIMD::Float> x, RValue<SIMD::Float> y, uint16_t select);
452
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400453RValue<SIMD::Float> Gather(RValue<Pointer<Float>> base, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment, bool zeroMaskedLanes = false);
454RValue<SIMD::Int> Gather(RValue<Pointer<Int>> base, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment, bool zeroMaskedLanes = false);
455void Scatter(RValue<Pointer<Float>> base, RValue<SIMD::Float> val, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment);
456void Scatter(RValue<Pointer<Int>> base, RValue<SIMD::Int> val, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment);
Nicolas Capens01e41802022-06-29 23:12:57 -0400457
Nicolas Capens44f94692022-06-20 23:15:46 -0400458template<>
459inline RValue<SIMD::Int>::RValue(int i)
460 : val(broadcast(i, SIMD::Int::type()))
461{
462 RR_DEBUG_INFO_EMIT_VAR(val);
463}
464
465template<>
466inline RValue<SIMD::UInt>::RValue(unsigned int i)
467 : val(broadcast(int(i), SIMD::UInt::type()))
468{
469 RR_DEBUG_INFO_EMIT_VAR(val);
470}
471
472template<>
473inline RValue<SIMD::Float>::RValue(float f)
474 : val(broadcast(f, SIMD::Float::type()))
475{
476 RR_DEBUG_INFO_EMIT_VAR(val);
477}
478
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400479template<int T>
480SIMD::Int::Int(const SwizzleMask1<packed::Int4, T> &rhs)
481 : XYZW(this)
Nicolas Capens01e41802022-06-29 23:12:57 -0400482{
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400483 *this = rhs.operator RValue<scalar::Int>();
484}
485
486template<int T>
487SIMD::Float::Float(const SwizzleMask1<packed::Float4, T> &rhs)
488 : XYZW(this)
Nicolas Capens01e41802022-06-29 23:12:57 -0400489{
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400490 *this = rhs.operator RValue<scalar::Float>();
491}
Nicolas Capens01e41802022-06-29 23:12:57 -0400492
493template<typename T>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400494inline T SIMD::Pointer::Load(OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
Nicolas Capens01e41802022-06-29 23:12:57 -0400495{
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400496 using EL = typename Scalar<T>::Type;
Nicolas Capens01e41802022-06-29 23:12:57 -0400497
498 if(!isBasePlusOffset)
499 {
500 T out = T(0);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400501 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens01e41802022-06-29 23:12:57 -0400502 {
503 If(Extract(mask, i) != 0)
504 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400505 auto el = rr::Load(scalar::Pointer<EL>(pointers[i]), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400506 out = Insert(out, el, i);
507 }
508 }
509 return out;
510 }
511
512 if(isStaticallyInBounds(sizeof(float), robustness))
513 {
514 // All elements are statically known to be in-bounds.
515 // We can avoid costly conditional on masks.
516
517 if(hasStaticSequentialOffsets(sizeof(float)))
518 {
519 // Offsets are sequential. Perform regular load.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400520 return rr::Load(scalar::Pointer<T>(base + staticOffsets[0]), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400521 }
522
523 if(hasStaticEqualOffsets())
524 {
525 // Load one, replicate.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400526 return T(*scalar::Pointer<EL>(base + staticOffsets[0], alignment));
Nicolas Capens01e41802022-06-29 23:12:57 -0400527 }
528 }
529 else
530 {
531 switch(robustness)
532 {
533 case OutOfBoundsBehavior::Nullify:
534 case OutOfBoundsBehavior::RobustBufferAccess:
535 case OutOfBoundsBehavior::UndefinedValue:
536 mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads.
537 break;
538 case OutOfBoundsBehavior::UndefinedBehavior:
539 // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
540 break;
541 }
542 }
543
544 auto offs = offsets();
545
546 if(!atomic && order == std::memory_order_relaxed)
547 {
548 if(hasStaticEqualOffsets())
549 {
550 // Load one, replicate.
551 // Be careful of the case where the post-bounds-check mask
552 // is 0, in which case we must not load.
553 T out = T(0);
554 If(AnyTrue(mask))
555 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400556 EL el = *scalar::Pointer<EL>(base + staticOffsets[0], alignment);
Nicolas Capens01e41802022-06-29 23:12:57 -0400557 out = T(el);
558 }
559 return out;
560 }
561
562 bool zeroMaskedLanes = true;
563 switch(robustness)
564 {
565 case OutOfBoundsBehavior::Nullify:
566 case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero.
567 zeroMaskedLanes = true;
568 break;
569 case OutOfBoundsBehavior::UndefinedValue:
570 case OutOfBoundsBehavior::UndefinedBehavior:
571 zeroMaskedLanes = false;
572 break;
573 }
574
575 // TODO(b/195446858): Optimize static sequential offsets case by using masked load.
576
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400577 return Gather(scalar::Pointer<EL>(base), offs, mask, alignment, zeroMaskedLanes);
Nicolas Capens01e41802022-06-29 23:12:57 -0400578 }
579 else
580 {
581 T out;
582 auto anyLanesDisabled = AnyFalse(mask);
Nicolas Capens43183d82022-06-30 02:28:48 -0400583 If(hasStaticEqualOffsets() && !anyLanesDisabled)
Nicolas Capens01e41802022-06-29 23:12:57 -0400584 {
585 // Load one, replicate.
586 auto offset = Extract(offs, 0);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400587 out = T(rr::Load(scalar::Pointer<EL>(&base[offset]), alignment, atomic, order));
Nicolas Capens01e41802022-06-29 23:12:57 -0400588 }
Nicolas Capens43183d82022-06-30 02:28:48 -0400589 Else If(hasStaticSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
Nicolas Capens01e41802022-06-29 23:12:57 -0400590 {
591 // Load all elements in a single SIMD instruction.
592 auto offset = Extract(offs, 0);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400593 out = rr::Load(scalar::Pointer<T>(&base[offset]), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400594 }
595 Else
596 {
597 // Divergent offsets or masked lanes.
598 out = T(0);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400599 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens01e41802022-06-29 23:12:57 -0400600 {
601 If(Extract(mask, i) != 0)
602 {
603 auto offset = Extract(offs, i);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400604 auto el = rr::Load(scalar::Pointer<EL>(&base[offset]), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400605 out = Insert(out, el, i);
606 }
607 }
608 }
609 return out;
610 }
611}
612
613template<>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400614inline SIMD::Pointer SIMD::Pointer::Load(OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */)
Nicolas Capens01e41802022-06-29 23:12:57 -0400615{
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400616 std::vector<scalar::Pointer<Byte>> pointers(SIMD::Width);
Nicolas Capens01e41802022-06-29 23:12:57 -0400617
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400618 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens01e41802022-06-29 23:12:57 -0400619 {
620 If(Extract(mask, i) != 0)
621 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400622 pointers[i] = rr::Load(scalar::Pointer<scalar::Pointer<Byte>>(getPointerForLane(i)), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400623 }
624 }
625
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400626 return SIMD::Pointer(pointers);
Nicolas Capens01e41802022-06-29 23:12:57 -0400627}
628
629template<typename T>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400630inline void SIMD::Pointer::Store(T val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
Nicolas Capens01e41802022-06-29 23:12:57 -0400631{
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400632 using EL = typename Scalar<T>::Type;
Nicolas Capens01e41802022-06-29 23:12:57 -0400633 constexpr size_t alignment = sizeof(float);
634
635 if(!isBasePlusOffset)
636 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400637 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens01e41802022-06-29 23:12:57 -0400638 {
639 If(Extract(mask, i) != 0)
640 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400641 rr::Store(Extract(val, i), scalar::Pointer<EL>(pointers[i]), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400642 }
643 }
644 return;
645 }
646
647 auto offs = offsets();
648 switch(robustness)
649 {
650 case OutOfBoundsBehavior::Nullify:
651 case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking.
652 case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access.
653 mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes.
654 break;
655 case OutOfBoundsBehavior::UndefinedBehavior:
656 // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses.
657 break;
658 }
659
660 if(!atomic && order == std::memory_order_relaxed)
661 {
662 if(hasStaticEqualOffsets())
663 {
664 If(AnyTrue(mask))
665 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400666 assert(SIMD::Width == 4);
667
Nicolas Capens01e41802022-06-29 23:12:57 -0400668 // All equal. One of these writes will win -- elect the winning lane.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400669 auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF);
Nicolas Capens01e41802022-06-29 23:12:57 -0400670 auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx));
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400671 auto maskedVal = As<SIMD::Int>(val) & elect;
Nicolas Capens01e41802022-06-29 23:12:57 -0400672 auto scalarVal = Extract(maskedVal, 0) |
673 Extract(maskedVal, 1) |
674 Extract(maskedVal, 2) |
675 Extract(maskedVal, 3);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400676 *scalar::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal);
Nicolas Capens01e41802022-06-29 23:12:57 -0400677 }
678 }
679 else if(hasStaticSequentialOffsets(sizeof(float)) &&
680 isStaticallyInBounds(sizeof(float), robustness))
681 {
682 // TODO(b/195446858): Optimize using masked store.
683 // Pointer has no elements OOB, and the store is not atomic.
684 // Perform a read-modify-write.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400685 auto p = scalar::Pointer<SIMD::Int>(base + staticOffsets[0], alignment);
Nicolas Capens01e41802022-06-29 23:12:57 -0400686 auto prev = *p;
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400687 *p = (prev & ~mask) | (As<SIMD::Int>(val) & mask);
Nicolas Capens01e41802022-06-29 23:12:57 -0400688 }
689 else
690 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400691 Scatter(scalar::Pointer<EL>(base), val, offs, mask, alignment);
Nicolas Capens01e41802022-06-29 23:12:57 -0400692 }
693 }
694 else
695 {
696 auto anyLanesDisabled = AnyFalse(mask);
Nicolas Capens43183d82022-06-30 02:28:48 -0400697 If(hasStaticSequentialOffsets(sizeof(float)) && !anyLanesDisabled)
Nicolas Capens01e41802022-06-29 23:12:57 -0400698 {
699 // Store all elements in a single SIMD instruction.
700 auto offset = Extract(offs, 0);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400701 rr::Store(val, scalar::Pointer<T>(&base[offset]), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400702 }
703 Else
704 {
705 // Divergent offsets or masked lanes.
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400706 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens01e41802022-06-29 23:12:57 -0400707 {
708 If(Extract(mask, i) != 0)
709 {
710 auto offset = Extract(offs, i);
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400711 rr::Store(Extract(val, i), scalar::Pointer<EL>(&base[offset]), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400712 }
713 }
714 }
715 }
716}
717
718template<>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400719inline void SIMD::Pointer::Store(SIMD::Pointer val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
Nicolas Capens01e41802022-06-29 23:12:57 -0400720{
721 constexpr size_t alignment = sizeof(void *);
722
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400723 for(int i = 0; i < SIMD::Width; i++)
Nicolas Capens01e41802022-06-29 23:12:57 -0400724 {
725 If(Extract(mask, i) != 0)
726 {
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400727 rr::Store(val.getPointerForLane(i), scalar::Pointer<scalar::Pointer<Byte>>(getPointerForLane(i)), alignment, atomic, order);
Nicolas Capens01e41802022-06-29 23:12:57 -0400728 }
729 }
730}
731
732template<typename T>
Nicolas Capensd1116fa2022-06-29 10:39:18 -0400733inline void SIMD::Pointer::Store(RValue<T> val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */)
Nicolas Capens01e41802022-06-29 23:12:57 -0400734{
735 Store(T(val), robustness, mask, atomic, order);
736}
737
Nicolas Capens44f94692022-06-20 23:15:46 -0400738} // namespace rr
Nicolas Capens3b0ad202022-06-02 15:02:31 -0400739
740#endif // rr_SIMD_hpp