Nicolas Capens | 3b0ad20 | 2022-06-02 15:02:31 -0400 | [diff] [blame] | 1 | // Copyright 2022 The SwiftShader Authors. All Rights Reserved. |
| 2 | // |
| 3 | // Licensed under the Apache License, Version 2.0 (the "License"); |
| 4 | // you may not use this file except in compliance with the License. |
| 5 | // You may obtain a copy of the License at |
| 6 | // |
| 7 | // http://www.apache.org/licenses/LICENSE-2.0 |
| 8 | // |
| 9 | // Unless required by applicable law or agreed to in writing, software |
| 10 | // distributed under the License is distributed on an "AS IS" BASIS, |
| 11 | // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. |
| 12 | // See the License for the specific language governing permissions and |
| 13 | // limitations under the License. |
| 14 | |
| 15 | #ifndef rr_SIMD_hpp |
| 16 | #define rr_SIMD_hpp |
| 17 | |
| 18 | #include "Reactor.hpp" |
| 19 | |
Nicolas Capens | be30aa6 | 2022-08-10 11:34:51 -0400 | [diff] [blame] | 20 | #include <functional> |
Nicolas Capens | 942c639 | 2022-06-30 00:20:40 -0400 | [diff] [blame] | 21 | #include <vector> |
| 22 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 23 | namespace rr { |
| 24 | |
| 25 | namespace scalar { |
| 26 | using Int = rr::Int; |
| 27 | using UInt = rr::UInt; |
| 28 | using Float = rr::Float; |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 29 | template<class T> |
| 30 | using Pointer = rr::Pointer<T>; |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 31 | } // namespace scalar |
| 32 | |
Nicolas Capens | 0ed3fa6 | 2022-06-22 16:48:07 -0400 | [diff] [blame] | 33 | namespace packed { |
| 34 | using Int4 = rr::Int4; |
| 35 | using UInt4 = rr::UInt4; |
| 36 | using Float4 = rr::Float4; |
| 37 | } // namespace packed |
| 38 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 39 | namespace SIMD { |
Nicolas Capens | 3b0ad20 | 2022-06-02 15:02:31 -0400 | [diff] [blame] | 40 | |
| 41 | extern const int Width; |
| 42 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 43 | class Int; |
| 44 | class UInt; |
| 45 | class Float; |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 46 | class Pointer; |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 47 | |
Nicolas Capens | 7e96068 | 2022-06-30 15:53:27 -0400 | [diff] [blame] | 48 | class Int : public LValue<SIMD::Int>, |
| 49 | public XYZW<SIMD::Int> // TODO(b/214583550): Eliminate and replace with SwizzleQuad() and/or other intrinsics. |
Nicolas Capens | 3b0ad20 | 2022-06-02 15:02:31 -0400 | [diff] [blame] | 50 | { |
| 51 | public: |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 52 | explicit Int(RValue<SIMD::Float> cast); |
| 53 | |
| 54 | Int(); |
| 55 | Int(int broadcast); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 56 | Int(int x, int y, int z, int w); |
| 57 | Int(std::vector<int> v); |
Nicolas Capens | be30aa6 | 2022-08-10 11:34:51 -0400 | [diff] [blame] | 58 | Int(std::function<int(int)> LaneValueProducer); |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 59 | Int(RValue<SIMD::Int> rhs); |
| 60 | Int(const Int &rhs); |
| 61 | Int(const Reference<SIMD::Int> &rhs); |
| 62 | Int(RValue<SIMD::UInt> rhs); |
| 63 | Int(const UInt &rhs); |
| 64 | Int(const Reference<SIMD::UInt> &rhs); |
| 65 | Int(RValue<scalar::Int> rhs); |
| 66 | Int(const scalar::Int &rhs); |
| 67 | Int(const Reference<scalar::Int> &rhs); |
| 68 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 69 | template<int T> |
| 70 | Int(const SwizzleMask1<packed::Int4, T> &rhs); |
| 71 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 72 | RValue<SIMD::Int> operator=(int broadcast); |
| 73 | RValue<SIMD::Int> operator=(RValue<SIMD::Int> rhs); |
| 74 | RValue<SIMD::Int> operator=(const Int &rhs); |
| 75 | RValue<SIMD::Int> operator=(const Reference<SIMD::Int> &rhs); |
Nicolas Capens | 3b0ad20 | 2022-06-02 15:02:31 -0400 | [diff] [blame] | 76 | |
| 77 | static Type *type(); |
Nicolas Capens | 442e25b | 2022-06-22 12:02:52 -0400 | [diff] [blame] | 78 | static int element_count() { return SIMD::Width; } |
Nicolas Capens | 3b0ad20 | 2022-06-02 15:02:31 -0400 | [diff] [blame] | 79 | }; |
| 80 | |
Nicolas Capens | 7e96068 | 2022-06-30 15:53:27 -0400 | [diff] [blame] | 81 | class UInt : public LValue<SIMD::UInt>, |
| 82 | public XYZW<SIMD::UInt> // TODO(b/214583550): Eliminate and replace with SwizzleQuad() and/or other intrinsics. |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 83 | { |
| 84 | public: |
| 85 | explicit UInt(RValue<SIMD::Float> cast); |
Nicolas Capens | 3b0ad20 | 2022-06-02 15:02:31 -0400 | [diff] [blame] | 86 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 87 | UInt(); |
| 88 | UInt(int broadcast); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 89 | UInt(int x, int y, int z, int w); |
| 90 | UInt(std::vector<int> v); |
Nicolas Capens | be30aa6 | 2022-08-10 11:34:51 -0400 | [diff] [blame] | 91 | UInt(std::function<int(int)> LaneValueProducer); |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 92 | UInt(RValue<SIMD::UInt> rhs); |
| 93 | UInt(const UInt &rhs); |
| 94 | UInt(const Reference<SIMD::UInt> &rhs); |
| 95 | UInt(RValue<SIMD::Int> rhs); |
| 96 | UInt(const Int &rhs); |
| 97 | UInt(const Reference<SIMD::Int> &rhs); |
| 98 | UInt(RValue<scalar::UInt> rhs); |
| 99 | UInt(const scalar::UInt &rhs); |
| 100 | UInt(const Reference<scalar::UInt> &rhs); |
| 101 | |
| 102 | RValue<SIMD::UInt> operator=(RValue<SIMD::UInt> rhs); |
| 103 | RValue<SIMD::UInt> operator=(const UInt &rhs); |
| 104 | RValue<SIMD::UInt> operator=(const Reference<SIMD::UInt> &rhs); |
| 105 | |
| 106 | static Type *type(); |
| 107 | static int element_count() { return SIMD::Width; } |
| 108 | }; |
| 109 | |
Nicolas Capens | 7e96068 | 2022-06-30 15:53:27 -0400 | [diff] [blame] | 110 | class Float : public LValue<SIMD::Float>, |
| 111 | public XYZW<SIMD::Float> // TODO(b/214583550): Eliminate and replace with SwizzleQuad() and/or other intrinsics. |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 112 | { |
| 113 | public: |
| 114 | explicit Float(RValue<SIMD::Int> cast); |
| 115 | explicit Float(RValue<SIMD::UInt> cast); |
| 116 | |
| 117 | Float(); |
| 118 | Float(float broadcast); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 119 | Float(float x, float y, float z, float w); |
| 120 | Float(std::vector<float> v); |
Nicolas Capens | be30aa6 | 2022-08-10 11:34:51 -0400 | [diff] [blame] | 121 | Float(std::function<float(int)> LaneValueProducer); |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 122 | Float(RValue<SIMD::Float> rhs); |
| 123 | Float(const Float &rhs); |
| 124 | Float(const Reference<SIMD::Float> &rhs); |
| 125 | Float(RValue<scalar::Float> rhs); |
| 126 | Float(const scalar::Float &rhs); |
| 127 | Float(const Reference<scalar::Float> &rhs); |
| 128 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 129 | Float(RValue<packed::Float4> rhs); |
| 130 | RValue<SIMD::Float> operator=(RValue<packed::Float4> rhs); |
| 131 | template<int T> |
| 132 | Float(const SwizzleMask1<packed::Float4, T> &rhs); |
| 133 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 134 | RValue<SIMD::Float> operator=(float broadcast); |
| 135 | RValue<SIMD::Float> operator=(RValue<SIMD::Float> rhs); |
| 136 | RValue<SIMD::Float> operator=(const Float &rhs); |
| 137 | RValue<SIMD::Float> operator=(const Reference<SIMD::Float> &rhs); |
| 138 | RValue<SIMD::Float> operator=(RValue<scalar::Float> rhs); |
| 139 | RValue<SIMD::Float> operator=(const scalar::Float &rhs); |
| 140 | RValue<SIMD::Float> operator=(const Reference<scalar::Float> &rhs); |
| 141 | |
| 142 | static SIMD::Float infinity(); |
| 143 | |
| 144 | static Type *type(); |
| 145 | static int element_count() { return SIMD::Width; } |
| 146 | }; |
| 147 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 148 | class Pointer |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 149 | { |
Nicolas Capens | 942c639 | 2022-06-30 00:20:40 -0400 | [diff] [blame] | 150 | public: |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 151 | Pointer(scalar::Pointer<Byte> base, scalar::Int limit); |
| 152 | Pointer(scalar::Pointer<Byte> base, unsigned int limit); |
| 153 | Pointer(scalar::Pointer<Byte> base, scalar::Int limit, SIMD::Int offset); |
| 154 | Pointer(scalar::Pointer<Byte> base, unsigned int limit, SIMD::Int offset); |
| 155 | Pointer(std::vector<scalar::Pointer<Byte>> pointers); |
| 156 | explicit Pointer(SIMD::UInt cast); // Cast from 32-bit integers to 32-bit pointers |
| 157 | explicit Pointer(SIMD::UInt castLow, SIMD::UInt castHight); // Cast from pairs of 32-bit integers to 64-bit pointers |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 158 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 159 | Pointer &operator+=(SIMD::Int i); |
| 160 | Pointer operator+(SIMD::Int i); |
| 161 | Pointer &operator+=(int i); |
| 162 | Pointer operator+(int i); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 163 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 164 | SIMD::Int offsets() const; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 165 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 166 | SIMD::Int isInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 167 | |
| 168 | bool isStaticallyInBounds(unsigned int accessSize, OutOfBoundsBehavior robustness) const; |
| 169 | |
| 170 | Int limit() const; |
| 171 | |
Nicolas Capens | 43183d8 | 2022-06-30 02:28:48 -0400 | [diff] [blame] | 172 | // Returns true if all offsets are compile-time static and sequential |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 173 | // (N+0*step, N+1*step, N+2*step, N+3*step) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 174 | bool hasStaticSequentialOffsets(unsigned int step) const; |
| 175 | |
Nicolas Capens | 43183d8 | 2022-06-30 02:28:48 -0400 | [diff] [blame] | 176 | // Returns true if all offsets are compile-time static and equal |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 177 | // (N, N, N, N) |
| 178 | bool hasStaticEqualOffsets() const; |
| 179 | |
| 180 | template<typename T> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 181 | inline T Load(OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed, int alignment = sizeof(float)); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 182 | |
| 183 | template<typename T> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 184 | inline void Store(T val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 185 | |
| 186 | template<typename T> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 187 | inline void Store(RValue<T> val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic = false, std::memory_order order = std::memory_order_relaxed); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 188 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 189 | scalar::Pointer<Byte> getUniformPointer() const; |
| 190 | scalar::Pointer<Byte> getPointerForLane(int lane) const; |
| 191 | static Pointer IfThenElse(SIMD::Int condition, const Pointer &lhs, const Pointer &rhs); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 192 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 193 | void castTo(SIMD::UInt &bits) const; // Cast from 32-bit pointers to 32-bit integers |
| 194 | void castTo(SIMD::UInt &lowerBits, SIMD::UInt &upperBits) const; // Cast from 64-bit pointers to pairs of 32-bit integers |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 195 | |
| 196 | #ifdef ENABLE_RR_PRINT |
| 197 | std::vector<rr::Value *> getPrintValues() const; |
| 198 | #endif |
| 199 | |
| 200 | private: |
| 201 | // Base address for the pointer, common across all lanes. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 202 | scalar::Pointer<Byte> base; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 203 | // Per-lane address for dealing with non-uniform data |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 204 | std::vector<scalar::Pointer<Byte>> pointers; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 205 | |
| 206 | public: |
| 207 | // Upper (non-inclusive) limit for offsets from base. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 208 | scalar::Int dynamicLimit; // If hasDynamicLimit is false, dynamicLimit is zero. |
Nicolas Capens | 942c639 | 2022-06-30 00:20:40 -0400 | [diff] [blame] | 209 | unsigned int staticLimit = 0; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 210 | |
| 211 | // Per lane offsets from base. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 212 | SIMD::Int dynamicOffsets; // If hasDynamicOffsets is false, all dynamicOffsets are zero. |
Nicolas Capens | 942c639 | 2022-06-30 00:20:40 -0400 | [diff] [blame] | 213 | std::vector<int32_t> staticOffsets; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 214 | |
Nicolas Capens | 942c639 | 2022-06-30 00:20:40 -0400 | [diff] [blame] | 215 | bool hasDynamicLimit = false; // True if dynamicLimit is non-zero. |
| 216 | bool hasDynamicOffsets = false; // True if any dynamicOffsets are non-zero. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 217 | bool isBasePlusOffset = false; // True if this uses base+offset. False if this is a collection of Pointers |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 218 | }; |
| 219 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 220 | } // namespace SIMD |
| 221 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 222 | RValue<SIMD::Int> operator+(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 223 | RValue<SIMD::Int> operator-(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 224 | RValue<SIMD::Int> operator*(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 225 | RValue<SIMD::Int> operator/(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 226 | RValue<SIMD::Int> operator%(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 227 | RValue<SIMD::Int> operator&(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 228 | RValue<SIMD::Int> operator|(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 229 | RValue<SIMD::Int> operator^(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 230 | RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, unsigned char rhs); |
| 231 | RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, unsigned char rhs); |
| 232 | RValue<SIMD::Int> operator<<(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 233 | RValue<SIMD::Int> operator>>(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 234 | RValue<SIMD::Int> operator+=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 235 | RValue<SIMD::Int> operator-=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 236 | RValue<SIMD::Int> operator*=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 237 | // RValue<SIMD::Int> operator/=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 238 | // RValue<SIMD::Int> operator%=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 239 | RValue<SIMD::Int> operator&=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 240 | RValue<SIMD::Int> operator|=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 241 | RValue<SIMD::Int> operator^=(SIMD::Int &lhs, RValue<SIMD::Int> rhs); |
| 242 | RValue<SIMD::Int> operator<<=(SIMD::Int &lhs, unsigned char rhs); |
| 243 | RValue<SIMD::Int> operator>>=(SIMD::Int &lhs, unsigned char rhs); |
| 244 | RValue<SIMD::Int> operator+(RValue<SIMD::Int> val); |
| 245 | RValue<SIMD::Int> operator-(RValue<SIMD::Int> val); |
| 246 | RValue<SIMD::Int> operator~(RValue<SIMD::Int> val); |
| 247 | // RValue<SIMD::Int> operator++(SIMD::Int &val, int); // Post-increment |
| 248 | // const Int &operator++(SIMD::Int &val); // Pre-increment |
| 249 | // RValue<SIMD::Int> operator--(SIMD::Int &val, int); // Post-decrement |
| 250 | // const Int &operator--(SIMD::Int &val); // Pre-decrement |
| 251 | // RValue<Bool> operator<(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 252 | // RValue<Bool> operator<=(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 253 | // RValue<Bool> operator>(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 254 | // RValue<Bool> operator>=(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 255 | // RValue<Bool> operator!=(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 256 | // RValue<Bool> operator==(RValue<SIMD::Int> lhs, RValue<SIMD::Int> rhs); |
| 257 | |
| 258 | RValue<SIMD::Int> CmpEQ(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 259 | RValue<SIMD::Int> CmpLT(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 260 | RValue<SIMD::Int> CmpLE(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 261 | RValue<SIMD::Int> CmpNEQ(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 262 | RValue<SIMD::Int> CmpNLT(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 263 | RValue<SIMD::Int> CmpNLE(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 264 | inline RValue<SIMD::Int> CmpGT(RValue<SIMD::Int> x, RValue<SIMD::Int> y) |
| 265 | { |
| 266 | return CmpNLE(x, y); |
| 267 | } |
| 268 | inline RValue<SIMD::Int> CmpGE(RValue<SIMD::Int> x, RValue<SIMD::Int> y) |
| 269 | { |
| 270 | return CmpNLT(x, y); |
| 271 | } |
| 272 | RValue<SIMD::Int> Abs(RValue<SIMD::Int> x); |
| 273 | RValue<SIMD::Int> Max(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 274 | RValue<SIMD::Int> Min(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 275 | // Convert to nearest integer. If a converted value is outside of the integer |
| 276 | // range, the returned result is undefined. |
| 277 | RValue<SIMD::Int> RoundInt(RValue<SIMD::Float> cast); |
| 278 | // Rounds to the nearest integer, but clamps very large values to an |
| 279 | // implementation-dependent range. |
| 280 | // Specifically, on x86, values larger than 2147483583.0 are converted to |
| 281 | // 2147483583 (0x7FFFFFBF) instead of producing 0x80000000. |
| 282 | RValue<SIMD::Int> RoundIntClamped(RValue<SIMD::Float> cast); |
| 283 | RValue<scalar::Int> Extract(RValue<SIMD::Int> val, int i); |
| 284 | RValue<SIMD::Int> Insert(RValue<SIMD::Int> val, RValue<scalar::Int> element, int i); |
Nicolas Capens | 0ed3fa6 | 2022-06-22 16:48:07 -0400 | [diff] [blame] | 285 | RValue<packed::Int4> Extract128(RValue<SIMD::Int> val, int i); |
| 286 | RValue<SIMD::Int> Insert128(RValue<SIMD::Int> val, RValue<packed::Int4> element, int i); |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 287 | |
| 288 | RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 289 | RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 290 | RValue<SIMD::UInt> operator*(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 291 | RValue<SIMD::UInt> operator/(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 292 | RValue<SIMD::UInt> operator%(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 293 | RValue<SIMD::UInt> operator&(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 294 | RValue<SIMD::UInt> operator|(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 295 | RValue<SIMD::UInt> operator^(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 296 | RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, unsigned char rhs); |
| 297 | RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, unsigned char rhs); |
| 298 | RValue<SIMD::UInt> operator<<(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 299 | RValue<SIMD::UInt> operator>>(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 300 | RValue<SIMD::UInt> operator+=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 301 | RValue<SIMD::UInt> operator-=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 302 | RValue<SIMD::UInt> operator*=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 303 | // RValue<SIMD::UInt> operator/=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 304 | // RValue<SIMD::UInt> operator%=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 305 | RValue<SIMD::UInt> operator&=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 306 | RValue<SIMD::UInt> operator|=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 307 | RValue<SIMD::UInt> operator^=(SIMD::UInt &lhs, RValue<SIMD::UInt> rhs); |
| 308 | RValue<SIMD::UInt> operator<<=(SIMD::UInt &lhs, unsigned char rhs); |
| 309 | RValue<SIMD::UInt> operator>>=(SIMD::UInt &lhs, unsigned char rhs); |
| 310 | RValue<SIMD::UInt> operator+(RValue<SIMD::UInt> val); |
| 311 | RValue<SIMD::UInt> operator-(RValue<SIMD::UInt> val); |
| 312 | RValue<SIMD::UInt> operator~(RValue<SIMD::UInt> val); |
| 313 | // RValue<SIMD::UInt> operator++(SIMD::UInt &val, int); // Post-increment |
| 314 | // const UInt &operator++(SIMD::UInt &val); // Pre-increment |
| 315 | // RValue<SIMD::UInt> operator--(SIMD::UInt &val, int); // Post-decrement |
| 316 | // const UInt &operator--(SIMD::UInt &val); // Pre-decrement |
| 317 | // RValue<Bool> operator<(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 318 | // RValue<Bool> operator<=(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 319 | // RValue<Bool> operator>(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 320 | // RValue<Bool> operator>=(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 321 | // RValue<Bool> operator!=(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 322 | // RValue<Bool> operator==(RValue<SIMD::UInt> lhs, RValue<SIMD::UInt> rhs); |
| 323 | |
| 324 | RValue<SIMD::UInt> CmpEQ(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 325 | RValue<SIMD::UInt> CmpLT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 326 | RValue<SIMD::UInt> CmpLE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 327 | RValue<SIMD::UInt> CmpNEQ(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 328 | RValue<SIMD::UInt> CmpNLT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 329 | RValue<SIMD::UInt> CmpNLE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 330 | inline RValue<SIMD::UInt> CmpGT(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y) |
| 331 | { |
| 332 | return CmpNLE(x, y); |
| 333 | } |
| 334 | inline RValue<SIMD::UInt> CmpGE(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y) |
| 335 | { |
| 336 | return CmpNLT(x, y); |
| 337 | } |
| 338 | RValue<SIMD::UInt> Max(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 339 | RValue<SIMD::UInt> Min(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 340 | RValue<scalar::UInt> Extract(RValue<SIMD::UInt> val, int i); |
| 341 | RValue<SIMD::UInt> Insert(RValue<SIMD::UInt> val, RValue<scalar::UInt> element, int i); |
Nicolas Capens | 0ed3fa6 | 2022-06-22 16:48:07 -0400 | [diff] [blame] | 342 | RValue<packed::UInt4> Extract128(RValue<SIMD::UInt> val, int i); |
| 343 | RValue<SIMD::UInt> Insert128(RValue<SIMD::UInt> val, RValue<packed::UInt4> element, int i); |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 344 | // RValue<SIMD::UInt> RoundInt(RValue<SIMD::Float> cast); |
| 345 | |
| 346 | RValue<SIMD::Float> operator+(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs); |
| 347 | RValue<SIMD::Float> operator-(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs); |
| 348 | RValue<SIMD::Float> operator*(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs); |
| 349 | RValue<SIMD::Float> operator/(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs); |
| 350 | RValue<SIMD::Float> operator%(RValue<SIMD::Float> lhs, RValue<SIMD::Float> rhs); |
| 351 | RValue<SIMD::Float> operator+=(SIMD::Float &lhs, RValue<SIMD::Float> rhs); |
| 352 | RValue<SIMD::Float> operator-=(SIMD::Float &lhs, RValue<SIMD::Float> rhs); |
| 353 | RValue<SIMD::Float> operator*=(SIMD::Float &lhs, RValue<SIMD::Float> rhs); |
| 354 | RValue<SIMD::Float> operator/=(SIMD::Float &lhs, RValue<SIMD::Float> rhs); |
| 355 | RValue<SIMD::Float> operator%=(SIMD::Float &lhs, RValue<SIMD::Float> rhs); |
| 356 | RValue<SIMD::Float> operator+(RValue<SIMD::Float> val); |
| 357 | RValue<SIMD::Float> operator-(RValue<SIMD::Float> val); |
| 358 | |
| 359 | // Computes `x * y + z`, which may be fused into one operation to produce a higher-precision result. |
| 360 | RValue<SIMD::Float> MulAdd(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z); |
| 361 | // Computes a fused `x * y + z` operation. Caps::fmaIsFast indicates whether it emits an FMA instruction. |
| 362 | RValue<SIMD::Float> FMA(RValue<SIMD::Float> x, RValue<SIMD::Float> y, RValue<SIMD::Float> z); |
| 363 | |
| 364 | RValue<SIMD::Float> Abs(RValue<SIMD::Float> x); |
| 365 | RValue<SIMD::Float> Max(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 366 | RValue<SIMD::Float> Min(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 367 | |
| 368 | RValue<SIMD::Float> Rcp(RValue<SIMD::Float> x, bool relaxedPrecision, bool exactAtPow2 = false); |
| 369 | RValue<SIMD::Float> RcpSqrt(RValue<SIMD::Float> x, bool relaxedPrecision); |
| 370 | RValue<SIMD::Float> Sqrt(RValue<SIMD::Float> x); |
| 371 | RValue<SIMD::Float> Insert(RValue<SIMD::Float> val, RValue<rr ::Float> element, int i); |
| 372 | RValue<rr ::Float> Extract(RValue<SIMD::Float> x, int i); |
Nicolas Capens | 0ed3fa6 | 2022-06-22 16:48:07 -0400 | [diff] [blame] | 373 | RValue<packed::Float4> Extract128(RValue<SIMD::Float> val, int i); |
| 374 | RValue<SIMD::Float> Insert128(RValue<SIMD::Float> val, RValue<packed::Float4> element, int i); |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 375 | |
| 376 | // Ordered comparison functions |
| 377 | RValue<SIMD::Int> CmpEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 378 | RValue<SIMD::Int> CmpLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 379 | RValue<SIMD::Int> CmpLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 380 | RValue<SIMD::Int> CmpNEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 381 | RValue<SIMD::Int> CmpNLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 382 | RValue<SIMD::Int> CmpNLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 383 | inline RValue<SIMD::Int> CmpGT(RValue<SIMD::Float> x, RValue<SIMD::Float> y) |
| 384 | { |
| 385 | return CmpNLE(x, y); |
| 386 | } |
| 387 | inline RValue<SIMD::Int> CmpGE(RValue<SIMD::Float> x, RValue<SIMD::Float> y) |
| 388 | { |
| 389 | return CmpNLT(x, y); |
| 390 | } |
| 391 | |
| 392 | // Unordered comparison functions |
| 393 | RValue<SIMD::Int> CmpUEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 394 | RValue<SIMD::Int> CmpULT(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 395 | RValue<SIMD::Int> CmpULE(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 396 | RValue<SIMD::Int> CmpUNEQ(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 397 | RValue<SIMD::Int> CmpUNLT(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 398 | RValue<SIMD::Int> CmpUNLE(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 399 | inline RValue<SIMD::Int> CmpUGT(RValue<SIMD::Float> x, RValue<SIMD::Float> y) |
| 400 | { |
| 401 | return CmpUNLE(x, y); |
| 402 | } |
| 403 | inline RValue<SIMD::Int> CmpUGE(RValue<SIMD::Float> x, RValue<SIMD::Float> y) |
| 404 | { |
| 405 | return CmpUNLT(x, y); |
| 406 | } |
| 407 | |
| 408 | RValue<SIMD::Int> IsInf(RValue<SIMD::Float> x); |
| 409 | RValue<SIMD::Int> IsNan(RValue<SIMD::Float> x); |
| 410 | RValue<SIMD::Float> Round(RValue<SIMD::Float> x); |
| 411 | RValue<SIMD::Float> Trunc(RValue<SIMD::Float> x); |
| 412 | RValue<SIMD::Float> Frac(RValue<SIMD::Float> x); |
| 413 | RValue<SIMD::Float> Floor(RValue<SIMD::Float> x); |
| 414 | RValue<SIMD::Float> Ceil(RValue<SIMD::Float> x); |
| 415 | |
| 416 | // Trigonometric functions |
| 417 | RValue<SIMD::Float> Sin(RValue<SIMD::Float> x); |
| 418 | RValue<SIMD::Float> Cos(RValue<SIMD::Float> x); |
| 419 | RValue<SIMD::Float> Tan(RValue<SIMD::Float> x); |
| 420 | RValue<SIMD::Float> Asin(RValue<SIMD::Float> x); |
| 421 | RValue<SIMD::Float> Acos(RValue<SIMD::Float> x); |
| 422 | RValue<SIMD::Float> Atan(RValue<SIMD::Float> x); |
| 423 | RValue<SIMD::Float> Sinh(RValue<SIMD::Float> x); |
| 424 | RValue<SIMD::Float> Cosh(RValue<SIMD::Float> x); |
| 425 | RValue<SIMD::Float> Tanh(RValue<SIMD::Float> x); |
| 426 | RValue<SIMD::Float> Asinh(RValue<SIMD::Float> x); |
| 427 | RValue<SIMD::Float> Acosh(RValue<SIMD::Float> x); |
| 428 | RValue<SIMD::Float> Atanh(RValue<SIMD::Float> x); |
| 429 | RValue<SIMD::Float> Atan2(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 430 | |
| 431 | // Exponential functions |
| 432 | RValue<SIMD::Float> Pow(RValue<SIMD::Float> x, RValue<SIMD::Float> y); |
| 433 | RValue<SIMD::Float> Exp(RValue<SIMD::Float> x); |
| 434 | RValue<SIMD::Float> Log(RValue<SIMD::Float> x); |
| 435 | RValue<SIMD::Float> Exp2(RValue<SIMD::Float> x); |
| 436 | RValue<SIMD::Float> Log2(RValue<SIMD::Float> x); |
| 437 | |
Nicolas Capens | 0e34c25 | 2022-06-30 14:02:00 -0400 | [diff] [blame] | 438 | RValue<Int> SignMask(RValue<SIMD::Int> x); |
| 439 | RValue<SIMD::UInt> Ctlz(RValue<SIMD::UInt> x, bool isZeroUndef); |
| 440 | RValue<SIMD::UInt> Cttz(RValue<SIMD::UInt> x, bool isZeroUndef); |
| 441 | RValue<SIMD::Int> MulHigh(RValue<SIMD::Int> x, RValue<SIMD::Int> y); |
| 442 | RValue<SIMD::UInt> MulHigh(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y); |
| 443 | RValue<Bool> AnyTrue(const RValue<SIMD::Int> &bools); |
| 444 | RValue<Bool> AnyFalse(const RValue<SIMD::Int> &bools); |
| 445 | RValue<Bool> Divergent(const RValue<SIMD::Int> &ints); |
| 446 | RValue<SIMD::Int> Swizzle(RValue<SIMD::Int> x, uint16_t select); |
| 447 | RValue<SIMD::UInt> Swizzle(RValue<SIMD::UInt> x, uint16_t select); |
| 448 | RValue<SIMD::Float> Swizzle(RValue<SIMD::Float> x, uint16_t select); |
| 449 | RValue<SIMD::Int> Shuffle(RValue<SIMD::Int> x, RValue<SIMD::Int> y, uint16_t select); |
| 450 | RValue<SIMD::UInt> Shuffle(RValue<SIMD::UInt> x, RValue<SIMD::UInt> y, uint16_t select); |
| 451 | RValue<SIMD::Float> Shuffle(RValue<SIMD::Float> x, RValue<SIMD::Float> y, uint16_t select); |
| 452 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 453 | RValue<SIMD::Float> Gather(RValue<Pointer<Float>> base, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment, bool zeroMaskedLanes = false); |
| 454 | RValue<SIMD::Int> Gather(RValue<Pointer<Int>> base, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment, bool zeroMaskedLanes = false); |
| 455 | void Scatter(RValue<Pointer<Float>> base, RValue<SIMD::Float> val, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment); |
| 456 | void Scatter(RValue<Pointer<Int>> base, RValue<SIMD::Int> val, RValue<SIMD::Int> offsets, RValue<SIMD::Int> mask, unsigned int alignment); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 457 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 458 | template<> |
| 459 | inline RValue<SIMD::Int>::RValue(int i) |
| 460 | : val(broadcast(i, SIMD::Int::type())) |
| 461 | { |
| 462 | RR_DEBUG_INFO_EMIT_VAR(val); |
| 463 | } |
| 464 | |
| 465 | template<> |
| 466 | inline RValue<SIMD::UInt>::RValue(unsigned int i) |
| 467 | : val(broadcast(int(i), SIMD::UInt::type())) |
| 468 | { |
| 469 | RR_DEBUG_INFO_EMIT_VAR(val); |
| 470 | } |
| 471 | |
| 472 | template<> |
| 473 | inline RValue<SIMD::Float>::RValue(float f) |
| 474 | : val(broadcast(f, SIMD::Float::type())) |
| 475 | { |
| 476 | RR_DEBUG_INFO_EMIT_VAR(val); |
| 477 | } |
| 478 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 479 | template<int T> |
| 480 | SIMD::Int::Int(const SwizzleMask1<packed::Int4, T> &rhs) |
| 481 | : XYZW(this) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 482 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 483 | *this = rhs.operator RValue<scalar::Int>(); |
| 484 | } |
| 485 | |
| 486 | template<int T> |
| 487 | SIMD::Float::Float(const SwizzleMask1<packed::Float4, T> &rhs) |
| 488 | : XYZW(this) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 489 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 490 | *this = rhs.operator RValue<scalar::Float>(); |
| 491 | } |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 492 | |
| 493 | template<typename T> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 494 | inline T SIMD::Pointer::Load(OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 495 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 496 | using EL = typename Scalar<T>::Type; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 497 | |
| 498 | if(!isBasePlusOffset) |
| 499 | { |
| 500 | T out = T(0); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 501 | for(int i = 0; i < SIMD::Width; i++) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 502 | { |
| 503 | If(Extract(mask, i) != 0) |
| 504 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 505 | auto el = rr::Load(scalar::Pointer<EL>(pointers[i]), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 506 | out = Insert(out, el, i); |
| 507 | } |
| 508 | } |
| 509 | return out; |
| 510 | } |
| 511 | |
| 512 | if(isStaticallyInBounds(sizeof(float), robustness)) |
| 513 | { |
| 514 | // All elements are statically known to be in-bounds. |
| 515 | // We can avoid costly conditional on masks. |
| 516 | |
| 517 | if(hasStaticSequentialOffsets(sizeof(float))) |
| 518 | { |
| 519 | // Offsets are sequential. Perform regular load. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 520 | return rr::Load(scalar::Pointer<T>(base + staticOffsets[0]), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 521 | } |
| 522 | |
| 523 | if(hasStaticEqualOffsets()) |
| 524 | { |
| 525 | // Load one, replicate. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 526 | return T(*scalar::Pointer<EL>(base + staticOffsets[0], alignment)); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 527 | } |
| 528 | } |
| 529 | else |
| 530 | { |
| 531 | switch(robustness) |
| 532 | { |
| 533 | case OutOfBoundsBehavior::Nullify: |
| 534 | case OutOfBoundsBehavior::RobustBufferAccess: |
| 535 | case OutOfBoundsBehavior::UndefinedValue: |
| 536 | mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds reads. |
| 537 | break; |
| 538 | case OutOfBoundsBehavior::UndefinedBehavior: |
| 539 | // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. |
| 540 | break; |
| 541 | } |
| 542 | } |
| 543 | |
| 544 | auto offs = offsets(); |
| 545 | |
| 546 | if(!atomic && order == std::memory_order_relaxed) |
| 547 | { |
| 548 | if(hasStaticEqualOffsets()) |
| 549 | { |
| 550 | // Load one, replicate. |
| 551 | // Be careful of the case where the post-bounds-check mask |
| 552 | // is 0, in which case we must not load. |
| 553 | T out = T(0); |
| 554 | If(AnyTrue(mask)) |
| 555 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 556 | EL el = *scalar::Pointer<EL>(base + staticOffsets[0], alignment); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 557 | out = T(el); |
| 558 | } |
| 559 | return out; |
| 560 | } |
| 561 | |
| 562 | bool zeroMaskedLanes = true; |
| 563 | switch(robustness) |
| 564 | { |
| 565 | case OutOfBoundsBehavior::Nullify: |
| 566 | case OutOfBoundsBehavior::RobustBufferAccess: // Must either return an in-bounds value, or zero. |
| 567 | zeroMaskedLanes = true; |
| 568 | break; |
| 569 | case OutOfBoundsBehavior::UndefinedValue: |
| 570 | case OutOfBoundsBehavior::UndefinedBehavior: |
| 571 | zeroMaskedLanes = false; |
| 572 | break; |
| 573 | } |
| 574 | |
| 575 | // TODO(b/195446858): Optimize static sequential offsets case by using masked load. |
| 576 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 577 | return Gather(scalar::Pointer<EL>(base), offs, mask, alignment, zeroMaskedLanes); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 578 | } |
| 579 | else |
| 580 | { |
| 581 | T out; |
| 582 | auto anyLanesDisabled = AnyFalse(mask); |
Nicolas Capens | 43183d8 | 2022-06-30 02:28:48 -0400 | [diff] [blame] | 583 | If(hasStaticEqualOffsets() && !anyLanesDisabled) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 584 | { |
| 585 | // Load one, replicate. |
| 586 | auto offset = Extract(offs, 0); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 587 | out = T(rr::Load(scalar::Pointer<EL>(&base[offset]), alignment, atomic, order)); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 588 | } |
Nicolas Capens | 43183d8 | 2022-06-30 02:28:48 -0400 | [diff] [blame] | 589 | Else If(hasStaticSequentialOffsets(sizeof(float)) && !anyLanesDisabled) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 590 | { |
| 591 | // Load all elements in a single SIMD instruction. |
| 592 | auto offset = Extract(offs, 0); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 593 | out = rr::Load(scalar::Pointer<T>(&base[offset]), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 594 | } |
| 595 | Else |
| 596 | { |
| 597 | // Divergent offsets or masked lanes. |
| 598 | out = T(0); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 599 | for(int i = 0; i < SIMD::Width; i++) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 600 | { |
| 601 | If(Extract(mask, i) != 0) |
| 602 | { |
| 603 | auto offset = Extract(offs, i); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 604 | auto el = rr::Load(scalar::Pointer<EL>(&base[offset]), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 605 | out = Insert(out, el, i); |
| 606 | } |
| 607 | } |
| 608 | } |
| 609 | return out; |
| 610 | } |
| 611 | } |
| 612 | |
| 613 | template<> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 614 | inline SIMD::Pointer SIMD::Pointer::Load(OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */, int alignment /* = sizeof(float) */) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 615 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 616 | std::vector<scalar::Pointer<Byte>> pointers(SIMD::Width); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 617 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 618 | for(int i = 0; i < SIMD::Width; i++) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 619 | { |
| 620 | If(Extract(mask, i) != 0) |
| 621 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 622 | pointers[i] = rr::Load(scalar::Pointer<scalar::Pointer<Byte>>(getPointerForLane(i)), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 623 | } |
| 624 | } |
| 625 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 626 | return SIMD::Pointer(pointers); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 627 | } |
| 628 | |
| 629 | template<typename T> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 630 | inline void SIMD::Pointer::Store(T val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 631 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 632 | using EL = typename Scalar<T>::Type; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 633 | constexpr size_t alignment = sizeof(float); |
| 634 | |
| 635 | if(!isBasePlusOffset) |
| 636 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 637 | for(int i = 0; i < SIMD::Width; i++) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 638 | { |
| 639 | If(Extract(mask, i) != 0) |
| 640 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 641 | rr::Store(Extract(val, i), scalar::Pointer<EL>(pointers[i]), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 642 | } |
| 643 | } |
| 644 | return; |
| 645 | } |
| 646 | |
| 647 | auto offs = offsets(); |
| 648 | switch(robustness) |
| 649 | { |
| 650 | case OutOfBoundsBehavior::Nullify: |
| 651 | case OutOfBoundsBehavior::RobustBufferAccess: // TODO: Allows writing anywhere within bounds. Could be faster than masking. |
| 652 | case OutOfBoundsBehavior::UndefinedValue: // Should not be used for store operations. Treat as robust buffer access. |
| 653 | mask &= isInBounds(sizeof(float), robustness); // Disable out-of-bounds writes. |
| 654 | break; |
| 655 | case OutOfBoundsBehavior::UndefinedBehavior: |
| 656 | // Nothing to do. Application/compiler must guarantee no out-of-bounds accesses. |
| 657 | break; |
| 658 | } |
| 659 | |
| 660 | if(!atomic && order == std::memory_order_relaxed) |
| 661 | { |
| 662 | if(hasStaticEqualOffsets()) |
| 663 | { |
| 664 | If(AnyTrue(mask)) |
| 665 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 666 | assert(SIMD::Width == 4); |
| 667 | |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 668 | // All equal. One of these writes will win -- elect the winning lane. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 669 | auto v0111 = SIMD::Int(0, 0xFFFFFFFF, 0xFFFFFFFF, 0xFFFFFFFF); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 670 | auto elect = mask & ~(v0111 & (mask.xxyz | mask.xxxy | mask.xxxx)); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 671 | auto maskedVal = As<SIMD::Int>(val) & elect; |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 672 | auto scalarVal = Extract(maskedVal, 0) | |
| 673 | Extract(maskedVal, 1) | |
| 674 | Extract(maskedVal, 2) | |
| 675 | Extract(maskedVal, 3); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 676 | *scalar::Pointer<EL>(base + staticOffsets[0], alignment) = As<EL>(scalarVal); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 677 | } |
| 678 | } |
| 679 | else if(hasStaticSequentialOffsets(sizeof(float)) && |
| 680 | isStaticallyInBounds(sizeof(float), robustness)) |
| 681 | { |
| 682 | // TODO(b/195446858): Optimize using masked store. |
| 683 | // Pointer has no elements OOB, and the store is not atomic. |
| 684 | // Perform a read-modify-write. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 685 | auto p = scalar::Pointer<SIMD::Int>(base + staticOffsets[0], alignment); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 686 | auto prev = *p; |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 687 | *p = (prev & ~mask) | (As<SIMD::Int>(val) & mask); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 688 | } |
| 689 | else |
| 690 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 691 | Scatter(scalar::Pointer<EL>(base), val, offs, mask, alignment); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 692 | } |
| 693 | } |
| 694 | else |
| 695 | { |
| 696 | auto anyLanesDisabled = AnyFalse(mask); |
Nicolas Capens | 43183d8 | 2022-06-30 02:28:48 -0400 | [diff] [blame] | 697 | If(hasStaticSequentialOffsets(sizeof(float)) && !anyLanesDisabled) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 698 | { |
| 699 | // Store all elements in a single SIMD instruction. |
| 700 | auto offset = Extract(offs, 0); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 701 | rr::Store(val, scalar::Pointer<T>(&base[offset]), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 702 | } |
| 703 | Else |
| 704 | { |
| 705 | // Divergent offsets or masked lanes. |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 706 | for(int i = 0; i < SIMD::Width; i++) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 707 | { |
| 708 | If(Extract(mask, i) != 0) |
| 709 | { |
| 710 | auto offset = Extract(offs, i); |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 711 | rr::Store(Extract(val, i), scalar::Pointer<EL>(&base[offset]), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 712 | } |
| 713 | } |
| 714 | } |
| 715 | } |
| 716 | } |
| 717 | |
| 718 | template<> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 719 | inline void SIMD::Pointer::Store(SIMD::Pointer val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 720 | { |
| 721 | constexpr size_t alignment = sizeof(void *); |
| 722 | |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 723 | for(int i = 0; i < SIMD::Width; i++) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 724 | { |
| 725 | If(Extract(mask, i) != 0) |
| 726 | { |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 727 | rr::Store(val.getPointerForLane(i), scalar::Pointer<scalar::Pointer<Byte>>(getPointerForLane(i)), alignment, atomic, order); |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 728 | } |
| 729 | } |
| 730 | } |
| 731 | |
| 732 | template<typename T> |
Nicolas Capens | d1116fa | 2022-06-29 10:39:18 -0400 | [diff] [blame] | 733 | inline void SIMD::Pointer::Store(RValue<T> val, OutOfBoundsBehavior robustness, SIMD::Int mask, bool atomic /* = false */, std::memory_order order /* = std::memory_order_relaxed */) |
Nicolas Capens | 01e4180 | 2022-06-29 23:12:57 -0400 | [diff] [blame] | 734 | { |
| 735 | Store(T(val), robustness, mask, atomic, order); |
| 736 | } |
| 737 | |
Nicolas Capens | 44f9469 | 2022-06-20 23:15:46 -0400 | [diff] [blame] | 738 | } // namespace rr |
Nicolas Capens | 3b0ad20 | 2022-06-02 15:02:31 -0400 | [diff] [blame] | 739 | |
| 740 | #endif // rr_SIMD_hpp |