blob: ef09cbcd0cf2e087810b39278d9060a51f325bbb [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039#include "Passes.h"
40#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
46#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
47
48namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000049
David Neto22f144c2017-06-12 14:26:21 -040050uint32_t clz(uint32_t v) {
51 uint32_t r;
52 uint32_t shift;
53
54 r = (v > 0xFFFF) << 4;
55 v >>= r;
56 shift = (v > 0xFF) << 3;
57 v >>= shift;
58 r |= shift;
59 shift = (v > 0xF) << 2;
60 v >>= shift;
61 r |= shift;
62 shift = (v > 0x3) << 1;
63 v >>= shift;
64 r |= shift;
65 r |= (v >> 1);
66
67 return r;
68}
69
Kévin Petitfdfa92e2019-09-25 14:20:58 +010070Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
71 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040072 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040073 IntTy = FixedVectorType::get(IntTy,
74 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010075 }
76 return IntTy;
77}
78
alan-baker4986eff2020-10-29 13:38:00 -040079Value *MemoryOrderSemantics(Value *order, bool is_global,
80 Instruction *InsertBefore,
alan-baker36309f92021-02-05 12:28:03 -050081 spv::MemorySemanticsMask base_semantics,
82 bool include_storage = true) {
alan-baker4986eff2020-10-29 13:38:00 -040083 enum AtomicMemoryOrder : uint32_t {
84 kMemoryOrderRelaxed = 0,
85 kMemoryOrderAcquire = 2,
86 kMemoryOrderRelease = 3,
87 kMemoryOrderAcqRel = 4,
88 kMemoryOrderSeqCst = 5
89 };
90
91 IRBuilder<> builder(InsertBefore);
92
93 // Constants for OpenCL C 2.0 memory_order.
94 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
95 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
96 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
97 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
98
99 // Constants for SPIR-V ordering memory semantics.
100 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
101 const auto AcquireSemantics =
102 builder.getInt32(spv::MemorySemanticsAcquireMask);
103 const auto ReleaseSemantics =
104 builder.getInt32(spv::MemorySemanticsReleaseMask);
105 const auto AcqRelSemantics =
106 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
107
108 // Constants for SPIR-V storage class semantics.
109 const auto UniformSemantics =
110 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
111 const auto WorkgroupSemantics =
112 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
113
114 // Instead of sequentially consistent, use acquire, release or acquire
115 // release semantics.
116 Value *base_order = nullptr;
117 switch (base_semantics) {
118 case spv::MemorySemanticsAcquireMask:
119 base_order = AcquireSemantics;
120 break;
121 case spv::MemorySemanticsReleaseMask:
122 base_order = ReleaseSemantics;
123 break;
124 default:
125 base_order = AcqRelSemantics;
126 break;
127 }
128
129 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
alan-baker36309f92021-02-05 12:28:03 -0500130 if (order == nullptr) {
131 if (include_storage)
132 return builder.CreateOr({storage, base_order});
133 else
134 return base_order;
135 }
alan-baker4986eff2020-10-29 13:38:00 -0400136
137 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
138 auto is_acquire = builder.CreateICmpEQ(order, acquire);
139 auto is_release = builder.CreateICmpEQ(order, release);
140 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
141 auto semantics =
142 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
143 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
144 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
145 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
alan-baker36309f92021-02-05 12:28:03 -0500146 if (include_storage)
147 return builder.CreateOr({storage, semantics});
148 else
149 return semantics;
alan-baker4986eff2020-10-29 13:38:00 -0400150}
151
152Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
153 enum AtomicMemoryScope : uint32_t {
154 kMemoryScopeWorkItem = 0,
155 kMemoryScopeWorkGroup = 1,
156 kMemoryScopeDevice = 2,
157 kMemoryScopeAllSVMDevices = 3, // not supported
158 kMemoryScopeSubGroup = 4
159 };
160
161 IRBuilder<> builder(InsertBefore);
162
163 // Constants for OpenCL C 2.0 memory_scope.
164 const auto work_item =
165 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
166 const auto work_group =
167 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
168 const auto sub_group =
169 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
170 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
171
172 // Constants for SPIR-V memory scopes.
173 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
174 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
175 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
176 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
177
178 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
179 if (scope == nullptr)
180 return base_scope;
181
182 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
183 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
184 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
185 auto is_device = builder.CreateICmpEQ(scope, device);
186
187 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
188 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
189 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
190 scope = builder.CreateSelect(is_device, DeviceScope, scope);
191
192 return scope;
193}
194
SJW2c317da2020-03-23 07:39:13 -0500195bool replaceCallsWithValue(Function &F,
196 std::function<Value *(CallInst *)> Replacer) {
197
198 bool Changed = false;
199
200 SmallVector<Instruction *, 4> ToRemoves;
201
202 // Walk the users of the function.
203 for (auto &U : F.uses()) {
204 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
205
206 auto NewValue = Replacer(CI);
207
208 if (NewValue != nullptr) {
209 CI->replaceAllUsesWith(NewValue);
210
211 // Lastly, remember to remove the user.
212 ToRemoves.push_back(CI);
213 }
214 }
215 }
216
217 Changed = !ToRemoves.empty();
218
219 // And cleanup the calls we don't use anymore.
220 for (auto V : ToRemoves) {
221 V->eraseFromParent();
222 }
223
224 return Changed;
225}
226
David Neto22f144c2017-06-12 14:26:21 -0400227struct ReplaceOpenCLBuiltinPass final : public ModulePass {
228 static char ID;
229 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
230
231 bool runOnModule(Module &M) override;
alan-baker6b9d1ee2020-11-03 23:11:32 -0500232
233private:
SJW2c317da2020-03-23 07:39:13 -0500234 bool runOnFunction(Function &F);
235 bool replaceAbs(Function &F);
236 bool replaceAbsDiff(Function &F, bool is_signed);
237 bool replaceCopysign(Function &F);
238 bool replaceRecip(Function &F);
239 bool replaceDivide(Function &F);
240 bool replaceDot(Function &F);
241 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500242 bool replaceExp10(Function &F, const std::string &basename);
243 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100244 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400245 bool replaceBarrier(Function &F, bool subgroup = false);
alan-baker36309f92021-02-05 12:28:03 -0500246 bool replaceMemFence(Function &F, spv::MemorySemanticsMask semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100247 bool replacePrefetch(Function &F);
alan-baker3e217772020-11-07 17:29:40 -0500248 bool replaceRelational(Function &F, CmpInst::Predicate P);
SJW2c317da2020-03-23 07:39:13 -0500249 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
250 bool replaceIsFinite(Function &F);
251 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
252 bool replaceUpsample(Function &F);
253 bool replaceRotate(Function &F);
254 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
255 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
256 bool replaceSelect(Function &F);
257 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500258 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500259 bool replaceSignbit(Function &F, bool is_vec);
260 bool replaceMul(Function &F, bool is_float, bool is_mad);
261 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
262 bool replaceVloadHalf(Function &F);
263 bool replaceVloadHalf2(Function &F);
264 bool replaceVloadHalf4(Function &F);
265 bool replaceClspvVloadaHalf2(Function &F);
266 bool replaceClspvVloadaHalf4(Function &F);
267 bool replaceVstoreHalf(Function &F, int vec_size);
268 bool replaceVstoreHalf(Function &F);
269 bool replaceVstoreHalf2(Function &F);
270 bool replaceVstoreHalf4(Function &F);
271 bool replaceHalfReadImage(Function &F);
272 bool replaceHalfWriteImage(Function &F);
273 bool replaceSampledReadImageWithIntCoords(Function &F);
274 bool replaceAtomics(Function &F, spv::Op Op);
275 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
alan-baker4986eff2020-10-29 13:38:00 -0400276 bool replaceAtomicLoad(Function &F);
277 bool replaceExplicitAtomics(Function &F, spv::Op Op,
278 spv::MemorySemanticsMask semantics =
279 spv::MemorySemanticsAcquireReleaseMask);
280 bool replaceAtomicCompareExchange(Function &);
SJW2c317da2020-03-23 07:39:13 -0500281 bool replaceCross(Function &F);
282 bool replaceFract(Function &F, int vec_size);
283 bool replaceVload(Function &F);
284 bool replaceVstore(Function &F);
alan-baker3f1bf492020-11-05 09:07:36 -0500285 bool replaceAddSubSat(Function &F, bool is_signed, bool is_add);
Kévin Petit8576f682020-11-02 14:51:32 +0000286 bool replaceHadd(Function &F, bool is_signed,
287 Instruction::BinaryOps join_opcode);
alan-baker2cecaa72020-11-05 14:05:20 -0500288 bool replaceCountZeroes(Function &F, bool leading);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500289 bool replaceMadSat(Function &F, bool is_signed);
alan-baker15106572020-11-06 15:08:10 -0500290 bool replaceOrdered(Function &F, bool is_ordered);
alan-baker497920b2020-11-09 16:41:36 -0500291 bool replaceIsNormal(Function &F);
alan-bakere0406e72020-11-10 12:32:04 -0500292 bool replaceFDim(Function &F);
alan-baker3e0de472020-12-08 15:57:17 -0500293 bool replaceRound(Function &F);
294 bool replaceTrigPi(Function &F, Builtins::BuiltinType type);
alan-baker8b968112020-12-15 15:53:29 -0500295 bool replaceSincos(Function &F);
296 bool replaceExpm1(Function &F);
297 bool replacePown(Function &F);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500298
rjodinchr791203f2021-10-07 20:42:41 +0200299 bool replaceWaitGroupEvents(Function &F);
300 GlobalVariable *
301 getOrCreateGlobalVariable(Module &M, std::string VariableName,
302 AddressSpace::Type VariableAddressSpace);
303 Value *replaceAsyncWorkGroupCopies(Module &M, CallInst *CI, Value *Dst,
304 Value *Src, Value *NumGentypes,
305 Value *Stride, Value *Event);
306 bool replaceAsyncWorkGroupCopy(Function &F);
307 bool replaceAsyncWorkGroupStridedCopy(Function &F);
308
alan-baker6b9d1ee2020-11-03 23:11:32 -0500309 // Caches struct types for { |type|, |type| }. This prevents
310 // getOrInsertFunction from introducing a bitcasts between structs with
311 // identical contents.
312 Type *GetPairStruct(Type *type);
313
Romaric Jodinc507f312022-04-08 19:09:45 +0200314 Value *InsertOpMulExtended(Instruction *InsertPoint, Value *a, Value *b,
315 bool IsSigned, bool Int64 = false);
316
alan-baker6b9d1ee2020-11-03 23:11:32 -0500317 DenseMap<Type *, Type *> PairStructMap;
David Neto22f144c2017-06-12 14:26:21 -0400318};
SJW2c317da2020-03-23 07:39:13 -0500319
Kévin Petit91bc72e2019-04-08 15:17:46 +0100320} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400321
322char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400323INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
324 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400325
326namespace clspv {
327ModulePass *createReplaceOpenCLBuiltinPass() {
328 return new ReplaceOpenCLBuiltinPass();
329}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400330} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400331
332bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500333 std::list<Function *> func_list;
334 for (auto &F : M.getFunctionList()) {
335 // process only function declarations
336 if (F.isDeclaration() && runOnFunction(F)) {
337 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000338 }
339 }
SJW2c317da2020-03-23 07:39:13 -0500340 if (func_list.size() != 0) {
341 // recursively convert functions, but first remove dead
342 for (auto *F : func_list) {
343 if (F->use_empty()) {
344 F->eraseFromParent();
345 }
346 }
347 runOnModule(M);
348 return true;
349 }
350 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000351}
352
SJW2c317da2020-03-23 07:39:13 -0500353bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
354 auto &FI = Builtins::Lookup(&F);
355 switch (FI.getType()) {
356 case Builtins::kAbs:
357 if (!FI.getParameter(0).is_signed) {
358 return replaceAbs(F);
359 }
360 break;
361 case Builtins::kAbsDiff:
362 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400363
364 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500365 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400366
alan-bakercc2bafb2020-11-02 08:30:18 -0500367 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500368 return replaceCountZeroes(F, true);
369
370 case Builtins::kCtz:
371 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500372
alan-bakerb6da5132020-10-29 15:59:06 -0400373 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000374 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400375 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000376 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400377
SJW2c317da2020-03-23 07:39:13 -0500378 case Builtins::kCopysign:
379 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100380
SJW2c317da2020-03-23 07:39:13 -0500381 case Builtins::kHalfRecip:
382 case Builtins::kNativeRecip:
383 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100384
SJW2c317da2020-03-23 07:39:13 -0500385 case Builtins::kHalfDivide:
386 case Builtins::kNativeDivide:
387 return replaceDivide(F);
388
389 case Builtins::kDot:
390 return replaceDot(F);
391
392 case Builtins::kExp10:
393 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500394 case Builtins::kNativeExp10:
395 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500396
alan-baker8b968112020-12-15 15:53:29 -0500397 case Builtins::kExpm1:
398 return replaceExpm1(F);
399
SJW2c317da2020-03-23 07:39:13 -0500400 case Builtins::kLog10:
401 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500402 case Builtins::kNativeLog10:
403 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500404
gnl21636e7992020-09-09 16:08:16 +0100405 case Builtins::kLog1p:
406 return replaceLog1p(F);
407
alan-bakere0406e72020-11-10 12:32:04 -0500408 case Builtins::kFdim:
409 return replaceFDim(F);
410
SJW2c317da2020-03-23 07:39:13 -0500411 case Builtins::kFmod:
412 return replaceFmod(F);
413
alan-baker8b968112020-12-15 15:53:29 -0500414 case Builtins::kPown:
415 return replacePown(F);
416
alan-baker3e0de472020-12-08 15:57:17 -0500417 case Builtins::kRound:
418 return replaceRound(F);
419
420 case Builtins::kCospi:
421 case Builtins::kSinpi:
422 case Builtins::kTanpi:
423 return replaceTrigPi(F, FI.getType());
424
alan-baker8b968112020-12-15 15:53:29 -0500425 case Builtins::kSincos:
426 return replaceSincos(F);
427
SJW2c317da2020-03-23 07:39:13 -0500428 case Builtins::kBarrier:
429 case Builtins::kWorkGroupBarrier:
430 return replaceBarrier(F);
431
alan-baker12d2c182020-07-20 08:22:42 -0400432 case Builtins::kSubGroupBarrier:
433 return replaceBarrier(F, true);
434
alan-baker36309f92021-02-05 12:28:03 -0500435 case Builtins::kAtomicWorkItemFence:
436 return replaceMemFence(F, spv::MemorySemanticsMaskNone);
SJW2c317da2020-03-23 07:39:13 -0500437 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400438 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500439 case Builtins::kReadMemFence:
440 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
441 case Builtins::kWriteMemFence:
442 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
443
444 // Relational
445 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500446 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500447 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500448 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500449 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500450 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500451 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500452 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500453 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500454 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500455 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500456 return replaceRelational(F, CmpInst::FCMP_UNE);
457 case Builtins::kIslessgreater:
458 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500459
alan-baker15106572020-11-06 15:08:10 -0500460 case Builtins::kIsordered:
461 return replaceOrdered(F, true);
462
463 case Builtins::kIsunordered:
464 return replaceOrdered(F, false);
465
SJW2c317da2020-03-23 07:39:13 -0500466 case Builtins::kIsinf: {
467 bool is_vec = FI.getParameter(0).vector_size != 0;
468 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
469 }
470 case Builtins::kIsnan: {
471 bool is_vec = FI.getParameter(0).vector_size != 0;
472 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
473 }
474
475 case Builtins::kIsfinite:
476 return replaceIsFinite(F);
477
478 case Builtins::kAll: {
479 bool is_vec = FI.getParameter(0).vector_size != 0;
480 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
481 }
482 case Builtins::kAny: {
483 bool is_vec = FI.getParameter(0).vector_size != 0;
484 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
485 }
486
alan-baker497920b2020-11-09 16:41:36 -0500487 case Builtins::kIsnormal:
488 return replaceIsNormal(F);
489
SJW2c317da2020-03-23 07:39:13 -0500490 case Builtins::kUpsample:
491 return replaceUpsample(F);
492
493 case Builtins::kRotate:
494 return replaceRotate(F);
495
496 case Builtins::kConvert:
497 return replaceConvert(F, FI.getParameter(0).is_signed,
498 FI.getReturnType().is_signed);
499
alan-baker4986eff2020-10-29 13:38:00 -0400500 // OpenCL 2.0 explicit atomics have different default scopes and semantics
501 // than legacy atomic functions.
502 case Builtins::kAtomicLoad:
503 case Builtins::kAtomicLoadExplicit:
504 return replaceAtomicLoad(F);
505 case Builtins::kAtomicStore:
506 case Builtins::kAtomicStoreExplicit:
507 return replaceExplicitAtomics(F, spv::OpAtomicStore,
508 spv::MemorySemanticsReleaseMask);
509 case Builtins::kAtomicExchange:
510 case Builtins::kAtomicExchangeExplicit:
511 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
512 case Builtins::kAtomicFetchAdd:
513 case Builtins::kAtomicFetchAddExplicit:
514 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
515 case Builtins::kAtomicFetchSub:
516 case Builtins::kAtomicFetchSubExplicit:
517 return replaceExplicitAtomics(F, spv::OpAtomicISub);
518 case Builtins::kAtomicFetchOr:
519 case Builtins::kAtomicFetchOrExplicit:
520 return replaceExplicitAtomics(F, spv::OpAtomicOr);
521 case Builtins::kAtomicFetchXor:
522 case Builtins::kAtomicFetchXorExplicit:
523 return replaceExplicitAtomics(F, spv::OpAtomicXor);
524 case Builtins::kAtomicFetchAnd:
525 case Builtins::kAtomicFetchAndExplicit:
526 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
527 case Builtins::kAtomicFetchMin:
528 case Builtins::kAtomicFetchMinExplicit:
529 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
530 ? spv::OpAtomicSMin
531 : spv::OpAtomicUMin);
532 case Builtins::kAtomicFetchMax:
533 case Builtins::kAtomicFetchMaxExplicit:
534 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
535 ? spv::OpAtomicSMax
536 : spv::OpAtomicUMax);
537 // Weak compare exchange is generated as strong compare exchange.
538 case Builtins::kAtomicCompareExchangeWeak:
539 case Builtins::kAtomicCompareExchangeWeakExplicit:
540 case Builtins::kAtomicCompareExchangeStrong:
541 case Builtins::kAtomicCompareExchangeStrongExplicit:
542 return replaceAtomicCompareExchange(F);
543
544 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500545 case Builtins::kAtomicInc:
546 return replaceAtomics(F, spv::OpAtomicIIncrement);
547 case Builtins::kAtomicDec:
548 return replaceAtomics(F, spv::OpAtomicIDecrement);
549 case Builtins::kAtomicCmpxchg:
550 return replaceAtomics(F, spv::OpAtomicCompareExchange);
551 case Builtins::kAtomicAdd:
552 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
553 case Builtins::kAtomicSub:
554 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
555 case Builtins::kAtomicXchg:
556 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
557 case Builtins::kAtomicMin:
558 return replaceAtomics(F, FI.getParameter(0).is_signed
559 ? llvm::AtomicRMWInst::Min
560 : llvm::AtomicRMWInst::UMin);
561 case Builtins::kAtomicMax:
562 return replaceAtomics(F, FI.getParameter(0).is_signed
563 ? llvm::AtomicRMWInst::Max
564 : llvm::AtomicRMWInst::UMax);
565 case Builtins::kAtomicAnd:
566 return replaceAtomics(F, llvm::AtomicRMWInst::And);
567 case Builtins::kAtomicOr:
568 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
569 case Builtins::kAtomicXor:
570 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
571
572 case Builtins::kCross:
573 if (FI.getParameter(0).vector_size == 4) {
574 return replaceCross(F);
575 }
576 break;
577
578 case Builtins::kFract:
579 if (FI.getParameterCount()) {
580 return replaceFract(F, FI.getParameter(0).vector_size);
581 }
582 break;
583
584 case Builtins::kMadHi:
585 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
586 case Builtins::kMulHi:
587 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
588
alan-baker6b9d1ee2020-11-03 23:11:32 -0500589 case Builtins::kMadSat:
590 return replaceMadSat(F, FI.getParameter(0).is_signed);
591
SJW2c317da2020-03-23 07:39:13 -0500592 case Builtins::kMad:
593 case Builtins::kMad24:
594 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
595 true);
596 case Builtins::kMul24:
597 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
598 false);
599
600 case Builtins::kSelect:
601 return replaceSelect(F);
602
603 case Builtins::kBitselect:
604 return replaceBitSelect(F);
605
606 case Builtins::kVload:
607 return replaceVload(F);
608
609 case Builtins::kVloadaHalf:
610 case Builtins::kVloadHalf:
611 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
612
613 case Builtins::kVstore:
614 return replaceVstore(F);
615
616 case Builtins::kVstoreHalf:
617 case Builtins::kVstoreaHalf:
618 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
619
620 case Builtins::kSmoothstep: {
621 int vec_size = FI.getLastParameter().vector_size;
622 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500623 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500624 }
625 break;
626 }
627 case Builtins::kStep: {
628 int vec_size = FI.getLastParameter().vector_size;
629 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500630 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500631 }
632 break;
633 }
634
635 case Builtins::kSignbit:
636 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
637
alan-baker3f1bf492020-11-05 09:07:36 -0500638 case Builtins::kSubSat:
639 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
640
SJW2c317da2020-03-23 07:39:13 -0500641 case Builtins::kReadImageh:
642 return replaceHalfReadImage(F);
643 case Builtins::kReadImagef:
644 case Builtins::kReadImagei:
645 case Builtins::kReadImageui: {
646 if (FI.getParameter(1).isSampler() &&
647 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
648 return replaceSampledReadImageWithIntCoords(F);
649 }
650 break;
651 }
652
653 case Builtins::kWriteImageh:
654 return replaceHalfWriteImage(F);
655
Kévin Petit1cb45112020-04-27 18:55:48 +0100656 case Builtins::kPrefetch:
657 return replacePrefetch(F);
658
rjodinchr791203f2021-10-07 20:42:41 +0200659 // Asynchronous copies
660 case Builtins::kAsyncWorkGroupCopy:
661 return replaceAsyncWorkGroupCopy(F);
662 case Builtins::kAsyncWorkGroupStridedCopy:
663 return replaceAsyncWorkGroupStridedCopy(F);
664 case Builtins::kWaitGroupEvents:
665 return replaceWaitGroupEvents(F);
666
SJW2c317da2020-03-23 07:39:13 -0500667 default:
668 break;
669 }
670
671 return false;
672}
673
alan-baker6b9d1ee2020-11-03 23:11:32 -0500674Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
675 auto iter = PairStructMap.find(type);
676 if (iter != PairStructMap.end())
677 return iter->second;
678
679 auto new_struct = StructType::get(type->getContext(), {type, type});
680 PairStructMap[type] = new_struct;
681 return new_struct;
682}
683
Romaric Jodinc507f312022-04-08 19:09:45 +0200684Value *ReplaceOpenCLBuiltinPass::InsertOpMulExtended(Instruction *InsertPoint,
685 Value *a, Value *b,
686 bool IsSigned, bool Int64) {
687
688 Type *Ty = a->getType();
689 Type *RetTy = GetPairStruct(a->getType());
690 assert(Ty == b->getType());
691
692 if (!Option::HackMulExtended()) {
693 spv::Op opcode = IsSigned ? spv::OpSMulExtended : spv::OpUMulExtended;
694
695 return clspv::InsertSPIRVOp(InsertPoint, opcode, {Attribute::ReadNone},
696 RetTy, {a, b});
697 }
698
699 unsigned int ScalarSizeInBits = Ty->getScalarSizeInBits();
700 bool IsVector = Ty->isVectorTy();
701
702 IRBuilder<> Builder(InsertPoint);
703
704 if (ScalarSizeInBits < 32 || (ScalarSizeInBits == 32 && Int64)) {
705 /*
706 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
707 * S = SizeInBits(a)
708 * a_ext = ext2S(a, IsSigned)
709 * b_ext = ext2S(b, IsSigned)
710 * mul = a_ext * b_ext
711 * mul_lo = truncS(mul)
712 * mul_hi = truncS(mul >> S)
713 * return {mul_lo, mul_hi}
714 * }
715 */
716 Type *TyTimes2 =
717 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits * 2);
718 if (IsVector) {
719 TyTimes2 = VectorType::get(TyTimes2, dyn_cast<VectorType>(Ty));
720 }
721 Value *aExtended, *bExtended;
722 if (IsSigned) {
723 aExtended = Builder.CreateSExt(a, TyTimes2);
724 bExtended = Builder.CreateSExt(b, TyTimes2);
725 } else {
726 aExtended = Builder.CreateZExt(a, TyTimes2);
727 bExtended = Builder.CreateZExt(b, TyTimes2);
728 }
729 auto mul = Builder.CreateMul(aExtended, bExtended);
730 auto mul_lo = Builder.CreateTrunc(mul, Ty);
731 auto mul_hi =
732 Builder.CreateTrunc(Builder.CreateLShr(mul, ScalarSizeInBits), Ty);
733
734 return Builder.CreateInsertValue(
735 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
736 {1});
737 } else if (ScalarSizeInBits == 64 || (ScalarSizeInBits == 32 && !Int64)) {
738 /*
739 * {mul_lo, mul_hi} = OpMulExtended(a, b, IsSigned) {
740 * S = SizeInBits(a)
741 * hS = S / 2
742 * if (IsSigned) {
743 * res_neg = (a > 0) ^ (b > 0) = (a ^ b) < 0
744 * a = abs(a)
745 * b = abs(b)
746 * }
747 * a0 = trunchS(a)
748 * a1 = trunchS(a >> hS)
749 * b0 = trunchS(b)
750 * b1 = trunchS(b >> hS)
751 * {a0b0_0, a0b0_1} = zextS(OpUMulExtended(a0, b0))
752 * {a1b0_0, a1b0_1} = zextS(OpUMulExtended(a1, b0))
753 * {a0b1_0, a0b1_1} = zextS(OpUMulExtended(a0, b1))
754 * {a1b1_0, a1b1_1} = zextS(OpUMulExtended(a1, b1))
755 *
756 * mul_lo_hi = a0b0_1 + a1b0_0 + a0b1_0
757 * carry_mul_lo_hi = mul_lo_hi >> hS
758 * mul_hi_lo = a1b1_0 + a1b0_1 + a0b1_1 + carry_mul_lo_hi
759 * mul_lo = a0b0_0 + mul_lo_hi << hS
760 * mul_hi = mul_hi_lo + a1b1_1 << hS
761 *
762 * if (IsSigned) {
763 * mul_lo_xor = mul_lo ^ -1
764 * {mul_lo_inv, carry} = OpIAddCarry(mul_lo_xor, 1)
765 * mul_hi_inv = mul_hi ^ -1 + carry
766 * mul_lo = res_neg ? mul_lo_inv : mul_lo
767 * mul_hi = res_neg ? mul_hi_inv : mul_hi
768 * }
769 * return {mul_lo, mul_hi}
770 * }
771 */
772 Type *TyDiv2 =
773 Ty->getIntNTy(InsertPoint->getContext(), ScalarSizeInBits / 2);
774 if (IsVector) {
775 TyDiv2 = VectorType::get(TyDiv2, dyn_cast<VectorType>(Ty));
776 }
777
778 Value *res_neg;
779 if (IsSigned) {
780 // We want to work with unsigned value.
781 // Convert everything to unsigned and remember the signed of the end
782 // result.
783 auto a_b_xor = Builder.CreateXor(a, b);
784 res_neg = Builder.CreateICmpSLT(a_b_xor, ConstantInt::get(Ty, 0, true));
785
786 auto F = InsertPoint->getFunction();
787 auto abs = Intrinsic::getDeclaration(F->getParent(), Intrinsic::abs, Ty);
788 a = Builder.CreateCall(abs, {a, Builder.getInt1(false)});
789 b = Builder.CreateCall(abs, {b, Builder.getInt1(false)});
790 }
791
792 auto a0 = Builder.CreateTrunc(a, TyDiv2);
793 auto a1 = Builder.CreateTrunc(Builder.CreateLShr(a, ScalarSizeInBits / 2),
794 TyDiv2);
795 auto b0 = Builder.CreateTrunc(b, TyDiv2);
796 auto b1 = Builder.CreateTrunc(Builder.CreateLShr(b, ScalarSizeInBits / 2),
797 TyDiv2);
798
799 auto a0b0 = InsertOpMulExtended(InsertPoint, a0, b0, false, true);
800 auto a1b0 = InsertOpMulExtended(InsertPoint, a1, b0, false, true);
801 auto a0b1 = InsertOpMulExtended(InsertPoint, a0, b1, false, true);
802 auto a1b1 = InsertOpMulExtended(InsertPoint, a1, b1, false, true);
803 auto a0b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {0}), Ty);
804 auto a0b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b0, {1}), Ty);
805 auto a1b0_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {0}), Ty);
806 auto a1b0_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b0, {1}), Ty);
807 auto a0b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {0}), Ty);
808 auto a0b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a0b1, {1}), Ty);
809 auto a1b1_0 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {0}), Ty);
810 auto a1b1_1 = Builder.CreateZExt(Builder.CreateExtractValue(a1b1, {1}), Ty);
811
812 auto mul_lo_hi =
813 Builder.CreateAdd(Builder.CreateAdd(a0b0_1, a1b0_0), a0b1_0);
814 auto carry_mul_lo_hi = Builder.CreateLShr(mul_lo_hi, ScalarSizeInBits / 2);
815 auto mul_hi_lo = Builder.CreateAdd(
816 Builder.CreateAdd(Builder.CreateAdd(a1b1_0, a1b0_1), a0b1_1),
817 carry_mul_lo_hi);
818 auto mul_lo = Builder.CreateAdd(
819 a0b0_0, Builder.CreateShl(mul_lo_hi, ScalarSizeInBits / 2));
820 auto mul_hi = Builder.CreateAdd(
821 mul_hi_lo, Builder.CreateShl(a1b1_1, ScalarSizeInBits / 2));
822
823 if (IsSigned) {
824 // Apply the sign that we got from the previous if statement setting
825 // res_neg.
826 auto mul_lo_xor =
827 Builder.CreateXor(mul_lo, Constant::getAllOnesValue(Ty));
828 auto mul_lo_xor_add =
829 InsertSPIRVOp(InsertPoint, spv::OpIAddCarry, {Attribute::ReadNone},
830 RetTy, {mul_lo_xor, ConstantInt::get(Ty, 1)});
831 auto mul_lo_inv = Builder.CreateExtractValue(mul_lo_xor_add, {0});
832 auto carry = Builder.CreateExtractValue(mul_lo_xor_add, {1});
833 auto mul_hi_inv = Builder.CreateAdd(
834 carry, Builder.CreateXor(mul_hi, Constant::getAllOnesValue(Ty)));
835 mul_lo = Builder.CreateSelect(res_neg, mul_lo_inv, mul_lo);
836 mul_hi = Builder.CreateSelect(res_neg, mul_hi_inv, mul_hi);
837 }
838
839 return Builder.CreateInsertValue(
840 Builder.CreateInsertValue(UndefValue::get(RetTy), mul_lo, {0}), mul_hi,
841 {1});
842 } else {
843 llvm_unreachable("Unexpected type for InsertOpMulExtended");
844 }
845}
846
rjodinchr791203f2021-10-07 20:42:41 +0200847bool ReplaceOpenCLBuiltinPass::replaceWaitGroupEvents(Function &F) {
848 /* Simple implementation for wait_group_events to avoid dealing with the event
849 * list:
850 *
851 * void wait_group_events(int num_events, event_t *event_list) {
852 * barrier(CLK_LOCAL_MEM_FENCE);
853 * }
854 *
855 */
856
857 enum {
858 CLK_LOCAL_MEM_FENCE = 0x01,
859 CLK_GLOBAL_MEM_FENCE = 0x02,
860 CLK_IMAGE_MEM_FENCE = 0x04
861 };
862
863 return replaceCallsWithValue(F, [](CallInst *CI) {
864 IRBuilder<> Builder(CI);
865
866 const auto ConstantScopeWorkgroup = Builder.getInt32(spv::ScopeWorkgroup);
867 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
868 Instruction::Shl, Builder.getInt32(CLK_LOCAL_MEM_FENCE),
869 Builder.getInt32(clz(spv::MemorySemanticsWorkgroupMemoryMask) -
870 clz(CLK_LOCAL_MEM_FENCE)),
871 "", CI);
872 auto MemorySemantics = BinaryOperator::Create(
873 Instruction::Or, MemorySemanticsWorkgroup,
874 ConstantInt::get(Builder.getInt32Ty(),
875 spv::MemorySemanticsAcquireReleaseMask),
876 "", CI);
877
878 return clspv::InsertSPIRVOp(
879 CI, spv::OpControlBarrier,
880 {Attribute::NoDuplicate, Attribute::Convergent}, Builder.getVoidTy(),
881 {ConstantScopeWorkgroup, ConstantScopeWorkgroup, MemorySemantics});
882 });
883}
884
885GlobalVariable *ReplaceOpenCLBuiltinPass::getOrCreateGlobalVariable(
886 Module &M, std::string VariableName,
887 AddressSpace::Type VariableAddressSpace) {
888 GlobalVariable *GV = M.getGlobalVariable(VariableName);
889 if (GV == nullptr) {
890 IntegerType *IT = IntegerType::get(M.getContext(), 32);
891 VectorType *VT = FixedVectorType::get(IT, 3);
892
893 GV = new GlobalVariable(M, VT, false, GlobalValue::ExternalLinkage, nullptr,
894 VariableName, nullptr,
895 GlobalValue::ThreadLocalMode::NotThreadLocal,
896 VariableAddressSpace);
897 GV->setInitializer(Constant::getNullValue(VT));
898 }
899 return GV;
900}
901
902Value *ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopies(
903 Module &M, CallInst *CI, Value *Dst, Value *Src, Value *NumGentypes,
904 Value *Stride, Value *Event) {
905 /*
906 * event_t *async_work_group_strided_copy(T *dst, T *src, size_t num_gentypes,
907 * size_t stride, event_t event) {
908 * size_t start_id = ((get_local_id(2) * get_local_size(1))
909 * + get_local_id(1)) * get_local_size(0)
910 * + get_local_id(0);
911 * size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
912 * for (size_t it = start_id; it < num_gentypes; it += incr) {
913 * dst[it] = src[it * stride];
914 * }
915 * return event;
916 * }
917 */
918
919 /* BB:
920 * before
921 * async_work_group_strided_copy
922 * after
923 *
924 * ================================
925 *
926 * BB:
927 * before
928 * start_id = f(get_local_ids, get_local_sizes)
929 * incr = g(get_local_sizes)
930 * br CmpBB
931 *
932 * CmpBB:
933 * it = PHI(start_id, it)
934 * cmp = it < NumGentypes
935 * condBr cmp, LoopBB, ExitBB
936 *
937 * LoopBB:
938 * dstI = dst[it]
939 * srcI = src[it * stride]
940 * OpCopyMemory dstI, srcI
941 * it += incr
942 * br CmpBB
943 *
944 * ExitBB:
945 * after
946 */
947
948 IRBuilder<> Builder(CI);
949
950 auto Cst0 = Builder.getInt32(0);
951 auto Cst1 = Builder.getInt32(1);
952 auto Cst2 = Builder.getInt32(2);
953
954 // get_local_id({0, 1, 2});
955 GlobalVariable *GVId =
956 getOrCreateGlobalVariable(M, clspv::LocalInvocationIdVariableName(),
957 clspv::LocalInvocationIdAddressSpace());
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100958 Type *GVIdElTy = GVId->getType()->getScalarType()->getPointerElementType();
959 Value *GEP0 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst0});
960 Value *LocalId0 =
961 Builder.CreateLoad(GEP0->getType()->getPointerElementType(), GEP0);
962 Value *GEP1 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst1});
963 Value *LocalId1 =
964 Builder.CreateLoad(GEP1->getType()->getPointerElementType(), GEP1);
965 Value *GEP2 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst2});
966 Value *LocalId2 =
967 Builder.CreateLoad(GEP2->getType()->getPointerElementType(), GEP2);
rjodinchr791203f2021-10-07 20:42:41 +0200968
969 // get_local_size({0, 1, 2});
970 GlobalVariable *GVSize =
971 getOrCreateGlobalVariable(M, clspv::WorkgroupSizeVariableName(),
972 clspv::WorkgroupSizeAddressSpace());
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100973 auto LocalSize =
974 Builder.CreateLoad(GVSize->getType()->getPointerElementType(), GVSize);
rjodinchr791203f2021-10-07 20:42:41 +0200975 auto LocalSize0 = Builder.CreateExtractElement(LocalSize, Cst0);
976 auto LocalSize1 = Builder.CreateExtractElement(LocalSize, Cst1);
977 auto LocalSize2 = Builder.CreateExtractElement(LocalSize, Cst2);
978
979 // size_t start_id = ((get_local_id(2) * get_local_size(1))
980 // + get_local_id(1)) * get_local_size(0)
981 // + get_local_id(0);
982 auto tmp0 = Builder.CreateMul(LocalId2, LocalSize1);
983 auto tmp1 = Builder.CreateAdd(tmp0, LocalId1);
984 auto tmp2 = Builder.CreateMul(tmp1, LocalSize0);
985 auto StartId = Builder.CreateAdd(tmp2, LocalId0);
986
987 // size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
988 auto tmp3 = Builder.CreateMul(LocalSize0, LocalSize1);
989 auto Incr = Builder.CreateMul(tmp3, LocalSize2);
990
991 // Create BasicBlocks
992 auto BB = CI->getParent();
993 auto CmpBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
994 auto LoopBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
995 auto ExitBB = SplitBlock(BB, CI);
996
997 // BB
998 auto BrCmpBB = BranchInst::Create(CmpBB);
999 ReplaceInstWithInst(BB->getTerminator(), BrCmpBB);
1000
1001 // CmpBB
1002 Builder.SetInsertPoint(CmpBB);
1003 auto PHIIterator = Builder.CreatePHI(Builder.getInt32Ty(), 2);
1004 auto Cmp = Builder.CreateCmp(CmpInst::ICMP_ULT, PHIIterator, NumGentypes);
1005 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
1006
1007 // LoopBB
1008 Builder.SetInsertPoint(LoopBB);
1009
1010 // default values for non-strided copies
1011 Value *SrcIterator = PHIIterator;
1012 Value *DstIterator = PHIIterator;
1013 if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
1014 clspv::AddressSpace::Global)) {
1015 // async_work_group_strided_copy local to global case
1016 DstIterator = Builder.CreateMul(PHIIterator, Stride);
1017 } else if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
1018 clspv::AddressSpace::Local)) {
1019 // async_work_group_strided_copy global to local case
1020 SrcIterator = Builder.CreateMul(PHIIterator, Stride);
1021 }
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01001022 auto DstI = Builder.CreateGEP(
1023 Dst->getType()->getScalarType()->getPointerElementType(), Dst,
1024 DstIterator);
1025 auto SrcI = Builder.CreateGEP(
1026 Src->getType()->getScalarType()->getPointerElementType(), Src,
1027 SrcIterator);
rjodinchr791203f2021-10-07 20:42:41 +02001028 auto NewIterator = Builder.CreateAdd(PHIIterator, Incr);
1029 auto Br = Builder.CreateBr(CmpBB);
1030 clspv::InsertSPIRVOp(Br, spv::OpCopyMemory, {}, Builder.getVoidTy(),
1031 {DstI, SrcI});
1032
1033 // Set PHIIterator for CmpBB now that we have NewIterator
1034 PHIIterator->addIncoming(StartId, BB);
1035 PHIIterator->addIncoming(NewIterator, LoopBB);
1036
1037 return Event;
1038}
1039
1040bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopy(Function &F) {
1041 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
1042 Module &M = *F.getParent();
1043
1044 auto Dst = CI->getOperand(0);
1045 auto Src = CI->getOperand(1);
1046 auto NumGentypes = CI->getOperand(2);
1047 auto Event = CI->getOperand(3);
1048
1049 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, nullptr,
1050 Event);
1051 });
1052}
1053
1054bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupStridedCopy(Function &F) {
1055 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
1056 Module &M = *F.getParent();
1057
1058 auto Dst = CI->getOperand(0);
1059 auto Src = CI->getOperand(1);
1060 auto NumGentypes = CI->getOperand(2);
1061 auto Stride = CI->getOperand(3);
1062 auto Event = CI->getOperand(4);
1063
1064 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, Stride,
1065 Event);
1066 });
1067}
1068
SJW2c317da2020-03-23 07:39:13 -05001069bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
1070 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001071 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +01001072}
1073
SJW2c317da2020-03-23 07:39:13 -05001074bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
1075 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001076 auto XValue = CI->getOperand(0);
1077 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +01001078
Kévin Petite8edce32019-04-10 14:23:32 +01001079 IRBuilder<> Builder(CI);
1080 auto XmY = Builder.CreateSub(XValue, YValue);
1081 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +01001082
SJW2c317da2020-03-23 07:39:13 -05001083 Value *Cmp = nullptr;
1084 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +01001085 Cmp = Builder.CreateICmpSGT(YValue, XValue);
1086 } else {
1087 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +01001088 }
Kévin Petit91bc72e2019-04-08 15:17:46 +01001089
Kévin Petite8edce32019-04-10 14:23:32 +01001090 return Builder.CreateSelect(Cmp, YmX, XmY);
1091 });
Kévin Petit91bc72e2019-04-08 15:17:46 +01001092}
1093
SJW2c317da2020-03-23 07:39:13 -05001094bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -05001095 return replaceCallsWithValue(F, [&F](CallInst *Call) {
1096 const auto x = Call->getArgOperand(0);
1097 const auto y = Call->getArgOperand(1);
1098 auto intrinsic = Intrinsic::getDeclaration(
1099 F.getParent(), Intrinsic::copysign, Call->getType());
1100 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
1101 Call);
Kévin Petite8edce32019-04-10 14:23:32 +01001102 });
Kévin Petit8c1be282019-04-02 19:34:25 +01001103}
1104
SJW2c317da2020-03-23 07:39:13 -05001105bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
1106 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001107 // Recip has one arg.
1108 auto Arg = CI->getOperand(0);
1109 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
1110 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
1111 });
David Neto22f144c2017-06-12 14:26:21 -04001112}
1113
SJW2c317da2020-03-23 07:39:13 -05001114bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
1115 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001116 auto Op0 = CI->getOperand(0);
1117 auto Op1 = CI->getOperand(1);
1118 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
1119 });
David Neto22f144c2017-06-12 14:26:21 -04001120}
1121
SJW2c317da2020-03-23 07:39:13 -05001122bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
1123 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +01001124 auto Op0 = CI->getOperand(0);
1125 auto Op1 = CI->getOperand(1);
1126
SJW2c317da2020-03-23 07:39:13 -05001127 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +01001128 if (Op0->getType()->isVectorTy()) {
1129 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
1130 CI->getType(), {Op0, Op1});
1131 } else {
1132 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
1133 }
1134
1135 return V;
1136 });
1137}
1138
SJW2c317da2020-03-23 07:39:13 -05001139bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -05001140 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001141 // convert to natural
1142 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001143 std::string NewFName = basename.substr(0, slen);
1144 NewFName =
1145 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001146
SJW2c317da2020-03-23 07:39:13 -05001147 Module &M = *F.getParent();
1148 return replaceCallsWithValue(F, [&](CallInst *CI) {
1149 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1150
1151 auto Arg = CI->getOperand(0);
1152
1153 // Constant of the natural log of 10 (ln(10)).
1154 const double Ln10 =
1155 2.302585092994045684017991454684364207601101488628772976033;
1156
1157 auto Mul = BinaryOperator::Create(
1158 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
1159
1160 return CallInst::Create(NewF, Mul, "", CI);
1161 });
David Neto22f144c2017-06-12 14:26:21 -04001162}
1163
SJW2c317da2020-03-23 07:39:13 -05001164bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001165 // OpenCL fmod(x,y) is x - y * trunc(x/y)
1166 // The sign for a non-zero result is taken from x.
1167 // (Try an example.)
1168 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -05001169 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001170 auto Op0 = CI->getOperand(0);
1171 auto Op1 = CI->getOperand(1);
1172 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
1173 });
1174}
1175
SJW2c317da2020-03-23 07:39:13 -05001176bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -05001177 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001178 // convert to natural
1179 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001180 std::string NewFName = basename.substr(0, slen);
1181 NewFName =
1182 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001183
SJW2c317da2020-03-23 07:39:13 -05001184 Module &M = *F.getParent();
1185 return replaceCallsWithValue(F, [&](CallInst *CI) {
1186 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1187
1188 auto Arg = CI->getOperand(0);
1189
1190 // Constant of the reciprocal of the natural log of 10 (ln(10)).
1191 const double Ln10 =
1192 0.434294481903251827651128918916605082294397005803666566114;
1193
1194 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
1195
1196 return BinaryOperator::Create(Instruction::FMul,
1197 ConstantFP::get(Arg->getType(), Ln10), NewCI,
1198 "", CI);
1199 });
David Neto22f144c2017-06-12 14:26:21 -04001200}
1201
gnl21636e7992020-09-09 16:08:16 +01001202bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
1203 // convert to natural
alan-baker8b968112020-12-15 15:53:29 -05001204 return replaceCallsWithValue(F, [&F](CallInst *CI) {
gnl21636e7992020-09-09 16:08:16 +01001205 auto Arg = CI->getOperand(0);
1206
1207 auto ArgP1 = BinaryOperator::Create(
1208 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
1209
alan-baker8b968112020-12-15 15:53:29 -05001210 auto log =
1211 Intrinsic::getDeclaration(F.getParent(), Intrinsic::log, CI->getType());
1212 return CallInst::Create(log, ArgP1, "", CI);
gnl21636e7992020-09-09 16:08:16 +01001213 });
1214}
1215
alan-baker12d2c182020-07-20 08:22:42 -04001216bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -04001217
alan-bakerf6bc8252020-09-23 14:58:55 -04001218 enum {
1219 CLK_LOCAL_MEM_FENCE = 0x01,
1220 CLK_GLOBAL_MEM_FENCE = 0x02,
1221 CLK_IMAGE_MEM_FENCE = 0x04
1222 };
David Neto22f144c2017-06-12 14:26:21 -04001223
alan-baker12d2c182020-07-20 08:22:42 -04001224 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +01001225 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001226
Kévin Petitc4643922019-06-17 19:32:05 +01001227 // We need to map the OpenCL constants to the SPIR-V equivalents.
1228 const auto LocalMemFence =
1229 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1230 const auto GlobalMemFence =
1231 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001232 const auto ImageMemFence =
1233 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -04001234 const auto ConstantAcquireRelease = ConstantInt::get(
1235 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +01001236 const auto ConstantScopeDevice =
1237 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
1238 const auto ConstantScopeWorkgroup =
1239 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -04001240 const auto ConstantScopeSubgroup =
1241 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -04001242
Kévin Petitc4643922019-06-17 19:32:05 +01001243 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1244 const auto LocalMemFenceMask =
1245 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1246 const auto WorkgroupShiftAmount =
1247 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1248 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1249 Instruction::Shl, LocalMemFenceMask,
1250 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001251
Kévin Petitc4643922019-06-17 19:32:05 +01001252 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1253 const auto GlobalMemFenceMask =
1254 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1255 const auto UniformShiftAmount =
1256 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1257 const auto MemorySemanticsUniform = BinaryOperator::Create(
1258 Instruction::Shl, GlobalMemFenceMask,
1259 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001260
alan-bakerf6bc8252020-09-23 14:58:55 -04001261 // OpenCL 2.0
1262 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1263 const auto ImageMemFenceMask =
1264 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1265 const auto ImageShiftAmount =
1266 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1267 const auto MemorySemanticsImage = BinaryOperator::Create(
1268 Instruction::Shl, ImageMemFenceMask,
1269 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1270
Kévin Petitc4643922019-06-17 19:32:05 +01001271 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -04001272 // MemorySemanticsSequentiallyConsistentMask.
1273 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +01001274 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -04001275 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -04001276 auto MemorySemantics2 = BinaryOperator::Create(
1277 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
1278 auto MemorySemantics = BinaryOperator::Create(
1279 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001280
alan-baker12d2c182020-07-20 08:22:42 -04001281 // If the memory scope is not specified explicitly, it is either Subgroup
1282 // or Workgroup depending on the type of barrier.
1283 Value *MemoryScope =
1284 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
1285 if (CI->data_operands_size() > 1) {
1286 enum {
1287 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
1288 CL_MEMORY_SCOPE_DEVICE = 0x2,
1289 CL_MEMORY_SCOPE_SUBGROUP = 0x4
1290 };
1291 // The call was given an explicit memory scope.
1292 const auto MemoryScopeSubgroup =
1293 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
1294 const auto MemoryScopeDevice =
1295 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -04001296
alan-baker12d2c182020-07-20 08:22:42 -04001297 auto Cmp =
1298 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1299 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
1300 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
1301 ConstantScopeWorkgroup, "", CI);
1302 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1303 MemoryScopeDevice, CI->getOperand(1), "", CI);
1304 MemoryScope =
1305 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
1306 }
1307
1308 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
1309 // the type of barrier;
1310 const auto ExecutionScope =
1311 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -04001312
Kévin Petitc4643922019-06-17 19:32:05 +01001313 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -04001314 {Attribute::NoDuplicate, Attribute::Convergent},
1315 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +01001316 {ExecutionScope, MemoryScope, MemorySemantics});
1317 });
David Neto22f144c2017-06-12 14:26:21 -04001318}
1319
alan-baker36309f92021-02-05 12:28:03 -05001320bool ReplaceOpenCLBuiltinPass::replaceMemFence(
1321 Function &F, spv::MemorySemanticsMask semantics) {
David Neto22f144c2017-06-12 14:26:21 -04001322
SJW2c317da2020-03-23 07:39:13 -05001323 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -04001324 enum {
1325 CLK_LOCAL_MEM_FENCE = 0x01,
1326 CLK_GLOBAL_MEM_FENCE = 0x02,
1327 CLK_IMAGE_MEM_FENCE = 0x04,
1328 };
David Neto22f144c2017-06-12 14:26:21 -04001329
SJW2c317da2020-03-23 07:39:13 -05001330 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001331
SJW2c317da2020-03-23 07:39:13 -05001332 // We need to map the OpenCL constants to the SPIR-V equivalents.
1333 const auto LocalMemFence =
1334 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1335 const auto GlobalMemFence =
1336 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001337 const auto ImageMemFence =
1338 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -05001339 const auto ConstantMemorySemantics =
1340 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -04001341 const auto ConstantScopeWorkgroup =
1342 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -04001343
SJW2c317da2020-03-23 07:39:13 -05001344 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1345 const auto LocalMemFenceMask =
1346 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1347 const auto WorkgroupShiftAmount =
1348 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1349 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1350 Instruction::Shl, LocalMemFenceMask,
1351 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001352
SJW2c317da2020-03-23 07:39:13 -05001353 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1354 const auto GlobalMemFenceMask =
1355 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1356 const auto UniformShiftAmount =
1357 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1358 const auto MemorySemanticsUniform = BinaryOperator::Create(
1359 Instruction::Shl, GlobalMemFenceMask,
1360 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001361
alan-bakerf6bc8252020-09-23 14:58:55 -04001362 // OpenCL 2.0
1363 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1364 const auto ImageMemFenceMask =
1365 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1366 const auto ImageShiftAmount =
1367 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1368 const auto MemorySemanticsImage = BinaryOperator::Create(
1369 Instruction::Shl, ImageMemFenceMask,
1370 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1371
alan-baker36309f92021-02-05 12:28:03 -05001372 Value *MemOrder = ConstantMemorySemantics;
1373 Value *MemScope = ConstantScopeWorkgroup;
1374 IRBuilder<> builder(CI);
alan-baker5641f5c2021-10-15 09:16:04 -04001375 if (CI->arg_size() > 1) {
alan-baker36309f92021-02-05 12:28:03 -05001376 MemOrder = MemoryOrderSemantics(CI->getArgOperand(1), false, CI,
1377 semantics, false);
1378 MemScope = MemoryScope(CI->getArgOperand(2), false, CI);
1379 }
1380 // Join the storage semantics and the order semantics.
alan-bakerf6bc8252020-09-23 14:58:55 -04001381 auto MemorySemantics1 =
alan-baker36309f92021-02-05 12:28:03 -05001382 builder.CreateOr({MemorySemanticsWorkgroup, MemorySemanticsUniform});
1383 auto MemorySemantics2 = builder.CreateOr({MemorySemanticsImage, MemOrder});
1384 auto MemorySemantics =
1385 builder.CreateOr({MemorySemantics1, MemorySemantics2});
David Neto22f144c2017-06-12 14:26:21 -04001386
alan-baker3d905692020-10-28 14:02:37 -04001387 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
1388 {Attribute::Convergent}, CI->getType(),
alan-baker36309f92021-02-05 12:28:03 -05001389 {MemScope, MemorySemantics});
SJW2c317da2020-03-23 07:39:13 -05001390 });
David Neto22f144c2017-06-12 14:26:21 -04001391}
1392
Kévin Petit1cb45112020-04-27 18:55:48 +01001393bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
1394 bool Changed = false;
1395
1396 SmallVector<Instruction *, 4> ToRemoves;
1397
1398 // Find all calls to the function
1399 for (auto &U : F.uses()) {
1400 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1401 ToRemoves.push_back(CI);
1402 }
1403 }
1404
1405 Changed = !ToRemoves.empty();
1406
1407 // Delete them
1408 for (auto V : ToRemoves) {
1409 V->eraseFromParent();
1410 }
1411
1412 return Changed;
1413}
1414
SJW2c317da2020-03-23 07:39:13 -05001415bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001416 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001417 return replaceCallsWithValue(F, [&](CallInst *CI) {
1418 // The predicate to use in the CmpInst.
1419 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001420
SJW2c317da2020-03-23 07:39:13 -05001421 auto Arg1 = CI->getOperand(0);
1422 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001423
SJW2c317da2020-03-23 07:39:13 -05001424 const auto Cmp =
1425 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001426 if (isa<VectorType>(F.getReturnType()))
1427 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1428 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001429 });
David Neto22f144c2017-06-12 14:26:21 -04001430}
1431
SJW2c317da2020-03-23 07:39:13 -05001432bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1433 spv::Op SPIRVOp,
1434 int32_t C) {
1435 Module &M = *F.getParent();
1436 return replaceCallsWithValue(F, [&](CallInst *CI) {
1437 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001438
SJW2c317da2020-03-23 07:39:13 -05001439 // The value to return for true.
1440 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001441
SJW2c317da2020-03-23 07:39:13 -05001442 // The value to return for false.
1443 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001444
SJW2c317da2020-03-23 07:39:13 -05001445 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001446 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001447 CorrespondingBoolTy =
1448 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1449 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001450 }
David Neto22f144c2017-06-12 14:26:21 -04001451
SJW2c317da2020-03-23 07:39:13 -05001452 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1453 CorrespondingBoolTy, {CI->getOperand(0)});
1454
1455 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1456 });
David Neto22f144c2017-06-12 14:26:21 -04001457}
1458
SJW2c317da2020-03-23 07:39:13 -05001459bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1460 Module &M = *F.getParent();
1461 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001462 auto &C = M.getContext();
1463 auto Val = CI->getOperand(0);
1464 auto ValTy = Val->getType();
1465 auto RetTy = CI->getType();
1466
1467 // Get a suitable integer type to represent the number
1468 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1469
1470 // Create Mask
1471 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001472 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001473 switch (ScalarSize) {
1474 case 16:
1475 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1476 break;
1477 case 32:
1478 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1479 break;
1480 case 64:
1481 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1482 break;
1483 default:
1484 llvm_unreachable("Unsupported floating-point type");
1485 }
1486
1487 IRBuilder<> Builder(CI);
1488
1489 // Bitcast to int
1490 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1491
1492 // Mask and compare
1493 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1494 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1495
1496 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001497 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001498 if (ValTy->isVectorTy()) {
1499 RetTrue = ConstantInt::getSigned(RetTy, -1);
1500 } else {
1501 RetTrue = ConstantInt::get(RetTy, 1);
1502 }
1503 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1504 });
1505}
1506
SJW2c317da2020-03-23 07:39:13 -05001507bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1508 Module &M = *F.getParent();
1509 return replaceCallsWithValue(F, [&](CallInst *CI) {
1510 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001511
SJW2c317da2020-03-23 07:39:13 -05001512 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001513
SJW2c317da2020-03-23 07:39:13 -05001514 // If the argument is a 32-bit int, just use a shift
1515 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1516 V = BinaryOperator::Create(Instruction::LShr, Arg,
1517 ConstantInt::get(Arg->getType(), 31), "", CI);
1518 } else {
1519 // The value for zero to compare against.
1520 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001521
SJW2c317da2020-03-23 07:39:13 -05001522 // The value to return for true.
1523 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001524
SJW2c317da2020-03-23 07:39:13 -05001525 // The value to return for false.
1526 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001527
SJW2c317da2020-03-23 07:39:13 -05001528 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1529 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001530
SJW2c317da2020-03-23 07:39:13 -05001531 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001532
SJW2c317da2020-03-23 07:39:13 -05001533 // If we have a function to call, call it!
1534 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001535
SJW2c317da2020-03-23 07:39:13 -05001536 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001537
SJW2c317da2020-03-23 07:39:13 -05001538 const auto NewCI = clspv::InsertSPIRVOp(
1539 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1540 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001541
SJW2c317da2020-03-23 07:39:13 -05001542 } else {
1543 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001544 }
1545
SJW2c317da2020-03-23 07:39:13 -05001546 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001547 }
SJW2c317da2020-03-23 07:39:13 -05001548 return V;
1549 });
David Neto22f144c2017-06-12 14:26:21 -04001550}
1551
SJW2c317da2020-03-23 07:39:13 -05001552bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1553 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1554 // Get arguments
1555 auto HiValue = CI->getOperand(0);
1556 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001557
SJW2c317da2020-03-23 07:39:13 -05001558 // Don't touch overloads that aren't in OpenCL C
1559 auto HiType = HiValue->getType();
1560 auto LoType = LoValue->getType();
1561
1562 if (HiType != LoType) {
1563 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001564 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001565
SJW2c317da2020-03-23 07:39:13 -05001566 if (!HiType->isIntOrIntVectorTy()) {
1567 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001568 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001569
SJW2c317da2020-03-23 07:39:13 -05001570 if (HiType->getScalarSizeInBits() * 2 !=
1571 CI->getType()->getScalarSizeInBits()) {
1572 return nullptr;
1573 }
1574
1575 if ((HiType->getScalarSizeInBits() != 8) &&
1576 (HiType->getScalarSizeInBits() != 16) &&
1577 (HiType->getScalarSizeInBits() != 32)) {
1578 return nullptr;
1579 }
1580
James Pricecf53df42020-04-20 14:41:24 -04001581 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001582 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001583 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1584 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001585 return nullptr;
1586 }
1587 }
1588
1589 // Convert both operands to the result type
1590 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1591 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1592
1593 // Shift high operand
1594 auto ShiftAmount =
1595 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1596 auto HiShifted =
1597 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1598
1599 // OR both results
1600 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1601 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001602}
1603
SJW2c317da2020-03-23 07:39:13 -05001604bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1605 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1606 // Get arguments
1607 auto SrcValue = CI->getOperand(0);
1608 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001609
SJW2c317da2020-03-23 07:39:13 -05001610 // Don't touch overloads that aren't in OpenCL C
1611 auto SrcType = SrcValue->getType();
1612 auto RotType = RotAmount->getType();
1613
1614 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1615 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001616 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001617
SJW2c317da2020-03-23 07:39:13 -05001618 if (!SrcType->isIntOrIntVectorTy()) {
1619 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001620 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001621
SJW2c317da2020-03-23 07:39:13 -05001622 if ((SrcType->getScalarSizeInBits() != 8) &&
1623 (SrcType->getScalarSizeInBits() != 16) &&
1624 (SrcType->getScalarSizeInBits() != 32) &&
1625 (SrcType->getScalarSizeInBits() != 64)) {
1626 return nullptr;
1627 }
1628
James Pricecf53df42020-04-20 14:41:24 -04001629 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001630 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001631 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1632 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001633 return nullptr;
1634 }
1635 }
1636
alan-bakerfd22ae12020-10-29 15:59:22 -04001637 // Replace with LLVM's funnel shift left intrinsic because it is more
1638 // generic than rotate.
1639 Function *intrinsic =
1640 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1641 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1642 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001643 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001644}
1645
SJW2c317da2020-03-23 07:39:13 -05001646bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1647 bool DstIsSigned) {
1648 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1649 Value *V = nullptr;
1650 // Get arguments
1651 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001652
SJW2c317da2020-03-23 07:39:13 -05001653 // Don't touch overloads that aren't in OpenCL C
1654 auto SrcType = SrcValue->getType();
1655 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001656
SJW2c317da2020-03-23 07:39:13 -05001657 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1658 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1659 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001660 }
1661
James Pricecf53df42020-04-20 14:41:24 -04001662 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001663 unsigned SrcNumElements =
1664 SrcVecType->getElementCount().getKnownMinValue();
1665 unsigned DstNumElements =
1666 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001667 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001668 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001669 }
1670
James Pricecf53df42020-04-20 14:41:24 -04001671 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1672 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1673 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001674 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001675 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001676 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001677
SJW2c317da2020-03-23 07:39:13 -05001678 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1679 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1680
1681 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1682 bool DstIsInt = DstType->isIntOrIntVectorTy();
1683
1684 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1685 // Unnecessary cast operation.
1686 V = SrcValue;
1687 } else if (SrcIsFloat && DstIsFloat) {
1688 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1689 } else if (SrcIsFloat && DstIsInt) {
1690 if (DstIsSigned) {
1691 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1692 } else {
1693 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1694 }
1695 } else if (SrcIsInt && DstIsFloat) {
1696 if (SrcIsSigned) {
1697 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1698 } else {
1699 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1700 }
1701 } else if (SrcIsInt && DstIsInt) {
1702 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1703 } else {
1704 // Not something we're supposed to handle, just move on
1705 }
1706
1707 return V;
1708 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001709}
1710
SJW2c317da2020-03-23 07:39:13 -05001711bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1712 bool is_mad) {
1713 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1714 Value *V = nullptr;
1715 // Get arguments
1716 auto AValue = CI->getOperand(0);
1717 auto BValue = CI->getOperand(1);
1718 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001719
SJW2c317da2020-03-23 07:39:13 -05001720 // Don't touch overloads that aren't in OpenCL C
1721 auto AType = AValue->getType();
1722 auto BType = BValue->getType();
1723 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001724
SJW2c317da2020-03-23 07:39:13 -05001725 if ((AType != BType) || (CI->getType() != AType) ||
1726 (is_mad && (AType != CType))) {
1727 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001728 }
1729
SJW2c317da2020-03-23 07:39:13 -05001730 if (!AType->isIntOrIntVectorTy()) {
1731 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001732 }
Kévin Petit8a560882019-03-21 15:24:34 +00001733
SJW2c317da2020-03-23 07:39:13 -05001734 if ((AType->getScalarSizeInBits() != 8) &&
1735 (AType->getScalarSizeInBits() != 16) &&
1736 (AType->getScalarSizeInBits() != 32) &&
1737 (AType->getScalarSizeInBits() != 64)) {
1738 return V;
1739 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001740
James Pricecf53df42020-04-20 14:41:24 -04001741 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001742 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001743 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1744 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001745 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001746 }
1747 }
1748
Romaric Jodinc507f312022-04-08 19:09:45 +02001749 auto Call = InsertOpMulExtended(CI, AValue, BValue, is_signed);
SJW2c317da2020-03-23 07:39:13 -05001750
1751 // Get the high part of the result
1752 unsigned Idxs[] = {1};
1753 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1754
1755 // If we're handling a mad_hi, add the third argument to the result
1756 if (is_mad) {
1757 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001758 }
1759
SJW2c317da2020-03-23 07:39:13 -05001760 return V;
1761 });
Kévin Petit8a560882019-03-21 15:24:34 +00001762}
1763
SJW2c317da2020-03-23 07:39:13 -05001764bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1765 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1766 // Get arguments
1767 auto FalseValue = CI->getOperand(0);
1768 auto TrueValue = CI->getOperand(1);
1769 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001770
SJW2c317da2020-03-23 07:39:13 -05001771 // Don't touch overloads that aren't in OpenCL C
1772 auto FalseType = FalseValue->getType();
1773 auto TrueType = TrueValue->getType();
1774 auto PredicateType = PredicateValue->getType();
1775
1776 if (FalseType != TrueType) {
1777 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001778 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001779
SJW2c317da2020-03-23 07:39:13 -05001780 if (!PredicateType->isIntOrIntVectorTy()) {
1781 return nullptr;
1782 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001783
SJW2c317da2020-03-23 07:39:13 -05001784 if (!FalseType->isIntOrIntVectorTy() &&
1785 !FalseType->getScalarType()->isFloatingPointTy()) {
1786 return nullptr;
1787 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001788
SJW2c317da2020-03-23 07:39:13 -05001789 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1790 return nullptr;
1791 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001792
SJW2c317da2020-03-23 07:39:13 -05001793 if (FalseType->getScalarSizeInBits() !=
1794 PredicateType->getScalarSizeInBits()) {
1795 return nullptr;
1796 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001797
James Pricecf53df42020-04-20 14:41:24 -04001798 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001799 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1800 if (NumElements != cast<VectorType>(PredicateType)
1801 ->getElementCount()
1802 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001803 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001804 }
1805
James Pricecf53df42020-04-20 14:41:24 -04001806 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1807 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001808 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001809 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001810 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001811
SJW2c317da2020-03-23 07:39:13 -05001812 // Create constant
1813 const auto ZeroValue = Constant::getNullValue(PredicateType);
1814
1815 // Scalar and vector are to be treated differently
1816 CmpInst::Predicate Pred;
1817 if (PredicateType->isVectorTy()) {
1818 Pred = CmpInst::ICMP_SLT;
1819 } else {
1820 Pred = CmpInst::ICMP_NE;
1821 }
1822
1823 // Create comparison instruction
1824 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1825 ZeroValue, "", CI);
1826
1827 // Create select
1828 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1829 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001830}
1831
SJW2c317da2020-03-23 07:39:13 -05001832bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1833 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1834 Value *V = nullptr;
1835 if (CI->getNumOperands() != 4) {
1836 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001837 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001838
SJW2c317da2020-03-23 07:39:13 -05001839 // Get arguments
1840 auto FalseValue = CI->getOperand(0);
1841 auto TrueValue = CI->getOperand(1);
1842 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001843
SJW2c317da2020-03-23 07:39:13 -05001844 // Don't touch overloads that aren't in OpenCL C
1845 auto FalseType = FalseValue->getType();
1846 auto TrueType = TrueValue->getType();
1847 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001848
SJW2c317da2020-03-23 07:39:13 -05001849 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1850 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001851 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001852
James Pricecf53df42020-04-20 14:41:24 -04001853 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001854 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1855 !TrueType->getScalarType()->isIntegerTy()) {
1856 return V;
1857 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001858 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001859 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1860 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001861 return V;
1862 }
1863 }
1864
1865 // Remember the type of the operands
1866 auto OpType = TrueType;
1867
1868 // The actual bit selection will always be done on an integer type,
1869 // declare it here
1870 Type *BitType;
1871
1872 // If the operands are float, then bitcast them to int
1873 if (OpType->getScalarType()->isFloatingPointTy()) {
1874
1875 // First create the new type
1876 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1877
1878 // Then bitcast all operands
1879 PredicateValue =
1880 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1881 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1882 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1883
1884 } else {
1885 // The operands have an integer type, use it directly
1886 BitType = OpType;
1887 }
1888
1889 // All the operands are now always integers
1890 // implement as (c & b) | (~c & a)
1891
1892 // Create our negated predicate value
1893 auto AllOnes = Constant::getAllOnesValue(BitType);
1894 auto NotPredicateValue = BinaryOperator::Create(
1895 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1896
1897 // Then put everything together
1898 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1899 FalseValue, "", CI);
1900 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1901 TrueValue, "", CI);
1902
1903 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1904
1905 // If we were dealing with a floating point type, we must bitcast
1906 // the result back to that
1907 if (OpType->getScalarType()->isFloatingPointTy()) {
1908 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1909 }
1910
1911 return V;
1912 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001913}
1914
SJW61531372020-06-09 07:31:08 -05001915bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001916 // convert to vector versions
1917 Module &M = *F.getParent();
1918 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1919 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1920 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001921
SJW2c317da2020-03-23 07:39:13 -05001922 // First figure out which function we're dealing with
1923 if (is_smooth) {
1924 ArgsToSplat.push_back(CI->getOperand(1));
1925 VectorArg = CI->getOperand(2);
1926 } else {
1927 VectorArg = CI->getOperand(1);
1928 }
1929
1930 // Splat arguments that need to be
1931 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001932 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001933
1934 for (auto arg : ArgsToSplat) {
1935 Value *NewVectorArg = UndefValue::get(VecType);
Marco Antognini7e338402021-03-15 12:48:37 +00001936 for (size_t i = 0; i < VecType->getElementCount().getKnownMinValue();
1937 i++) {
SJW2c317da2020-03-23 07:39:13 -05001938 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1939 NewVectorArg =
1940 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1941 }
1942 SplatArgs.push_back(NewVectorArg);
1943 }
1944
1945 // Replace the call with the vector/vector flavour
1946 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1947 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1948
SJW61531372020-06-09 07:31:08 -05001949 std::string NewFName = Builtins::GetMangledFunctionName(
1950 is_smooth ? "smoothstep" : "step", NewFType);
1951
SJW2c317da2020-03-23 07:39:13 -05001952 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1953
1954 SmallVector<Value *, 3> NewArgs;
1955 for (auto arg : SplatArgs) {
1956 NewArgs.push_back(arg);
1957 }
1958 NewArgs.push_back(VectorArg);
1959
1960 return CallInst::Create(NewF, NewArgs, "", CI);
1961 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001962}
1963
SJW2c317da2020-03-23 07:39:13 -05001964bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001965 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1966 auto Arg = CI->getOperand(0);
1967 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001968
SJW2c317da2020-03-23 07:39:13 -05001969 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001970
SJW2c317da2020-03-23 07:39:13 -05001971 return BinaryOperator::Create(Op, Bitcast,
1972 ConstantInt::get(CI->getType(), 31), "", CI);
1973 });
David Neto22f144c2017-06-12 14:26:21 -04001974}
1975
SJW2c317da2020-03-23 07:39:13 -05001976bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1977 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001978 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1979 // The multiply instruction to use.
1980 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001981
SJW2c317da2020-03-23 07:39:13 -05001982 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001983
SJW2c317da2020-03-23 07:39:13 -05001984 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1985 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001986
SJW2c317da2020-03-23 07:39:13 -05001987 if (is_mad) {
1988 // The add instruction to use.
1989 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001990
SJW2c317da2020-03-23 07:39:13 -05001991 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001992 }
David Neto22f144c2017-06-12 14:26:21 -04001993
SJW2c317da2020-03-23 07:39:13 -05001994 return V;
1995 });
David Neto22f144c2017-06-12 14:26:21 -04001996}
1997
SJW2c317da2020-03-23 07:39:13 -05001998bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001999 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
2000 Value *V = nullptr;
2001 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07002002
SJW2c317da2020-03-23 07:39:13 -05002003 auto data_type = data->getType();
2004 if (!data_type->isVectorTy())
2005 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07002006
James Pricecf53df42020-04-20 14:41:24 -04002007 auto vec_data_type = cast<VectorType>(data_type);
2008
alan-baker5a8c3be2020-09-09 13:44:26 -04002009 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05002010 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
2011 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07002012
SJW2c317da2020-03-23 07:39:13 -05002013 auto offset = CI->getOperand(1);
2014 auto ptr = CI->getOperand(2);
2015 auto ptr_type = ptr->getType();
2016 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04002017 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05002018 return V;
alan-bakerf795f392019-06-11 18:24:34 -04002019
SJW2c317da2020-03-23 07:39:13 -05002020 // Avoid pointer casts. Instead generate the correct number of stores
2021 // and rely on drivers to coalesce appropriately.
2022 IRBuilder<> builder(CI);
2023 auto elems_const = builder.getInt32(elems);
2024 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00002025 for (size_t i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05002026 auto idx = builder.getInt32(i);
2027 auto add = builder.CreateAdd(adjust, idx);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01002028 auto gep = builder.CreateGEP(
2029 ptr->getType()->getScalarType()->getPointerElementType(), ptr, add);
SJW2c317da2020-03-23 07:39:13 -05002030 auto extract = builder.CreateExtractElement(data, i);
2031 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07002032 }
SJW2c317da2020-03-23 07:39:13 -05002033 return V;
2034 });
Derek Chowcfd368b2017-10-19 20:58:45 -07002035}
2036
SJW2c317da2020-03-23 07:39:13 -05002037bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05002038 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
2039 Value *V = nullptr;
2040 auto ret_type = F.getReturnType();
2041 if (!ret_type->isVectorTy())
2042 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07002043
James Pricecf53df42020-04-20 14:41:24 -04002044 auto vec_ret_type = cast<VectorType>(ret_type);
2045
alan-baker5a8c3be2020-09-09 13:44:26 -04002046 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05002047 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
2048 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07002049
SJW2c317da2020-03-23 07:39:13 -05002050 auto offset = CI->getOperand(0);
2051 auto ptr = CI->getOperand(1);
2052 auto ptr_type = ptr->getType();
2053 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04002054 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05002055 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07002056
SJW2c317da2020-03-23 07:39:13 -05002057 // Avoid pointer casts. Instead generate the correct number of loads
2058 // and rely on drivers to coalesce appropriately.
2059 IRBuilder<> builder(CI);
2060 auto elems_const = builder.getInt32(elems);
2061 V = UndefValue::get(ret_type);
2062 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00002063 for (unsigned i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05002064 auto idx = builder.getInt32(i);
2065 auto add = builder.CreateAdd(adjust, idx);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01002066 auto gep = builder.CreateGEP(
2067 ptr_type->getScalarType()->getPointerElementType(), ptr, add);
2068 auto load =
2069 builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
SJW2c317da2020-03-23 07:39:13 -05002070 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07002071 }
SJW2c317da2020-03-23 07:39:13 -05002072 return V;
2073 });
Derek Chowcfd368b2017-10-19 20:58:45 -07002074}
2075
SJW2c317da2020-03-23 07:39:13 -05002076bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
2077 const std::string &name,
2078 int vec_size) {
2079 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
2080 if (!vec_size) {
2081 // deduce vec_size from last character of name (e.g. vload_half4)
2082 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04002083 }
SJW2c317da2020-03-23 07:39:13 -05002084 switch (vec_size) {
2085 case 2:
2086 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
2087 case 4:
2088 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
2089 case 0:
2090 if (!is_clspv_version) {
2091 return replaceVloadHalf(F);
2092 }
Marco Antognini7e338402021-03-15 12:48:37 +00002093 // Fall-through
SJW2c317da2020-03-23 07:39:13 -05002094 default:
2095 llvm_unreachable("Unsupported vload_half vector size");
2096 break;
2097 }
2098 return false;
David Neto22f144c2017-06-12 14:26:21 -04002099}
2100
SJW2c317da2020-03-23 07:39:13 -05002101bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
2102 Module &M = *F.getParent();
2103 return replaceCallsWithValue(F, [&](CallInst *CI) {
2104 // The index argument from vload_half.
2105 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002106
SJW2c317da2020-03-23 07:39:13 -05002107 // The pointer argument from vload_half.
2108 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002109
SJW2c317da2020-03-23 07:39:13 -05002110 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002111 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05002112 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2113
2114 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002115 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05002116
2117 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2118
2119 Value *V = nullptr;
2120
alan-baker7efcaaa2020-05-06 19:33:27 -04002121 bool supports_16bit_storage = true;
2122 switch (Arg1->getType()->getPointerAddressSpace()) {
2123 case clspv::AddressSpace::Global:
2124 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2125 clspv::Option::StorageClass::kSSBO);
2126 break;
2127 case clspv::AddressSpace::Constant:
2128 if (clspv::Option::ConstantArgsInUniformBuffer())
2129 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2130 clspv::Option::StorageClass::kUBO);
2131 else
2132 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2133 clspv::Option::StorageClass::kSSBO);
2134 break;
2135 default:
2136 // Clspv will emit the Float16 capability if the half type is
2137 // encountered. That capability covers private and local addressspaces.
2138 break;
2139 }
2140
2141 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05002142 auto ShortTy = Type::getInt16Ty(M.getContext());
2143 auto ShortPointerTy =
2144 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
2145
2146 // Cast the half* pointer to short*.
2147 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
2148
2149 // Index into the correct address of the casted pointer.
2150 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2151
2152 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002153 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002154
2155 // ZExt the short -> int.
2156 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2157
2158 // Get our float2.
2159 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2160
2161 // Extract out the bottom element which is our float result.
2162 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
2163 } else {
2164 // Assume the pointer argument points to storage aligned to 32bits
2165 // or more.
2166 // TODO(dneto): Do more analysis to make sure this is true?
2167 //
2168 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2169 // with:
2170 //
2171 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2172 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2173 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2174 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2175 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2176 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2177 // x float> %converted, %index_is_odd32
2178
2179 auto IntPointerTy =
2180 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
2181
2182 // Cast the base pointer to int*.
2183 // In a valid call (according to assumptions), this should get
2184 // optimized away in the simplify GEP pass.
2185 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2186
2187 auto One = ConstantInt::get(IntTy, 1);
2188 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2189 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2190
2191 // Index into the correct address of the casted pointer.
2192 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2193
2194 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002195 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002196
2197 // Get our float2.
2198 auto Call = CallInst::Create(NewF, Load, "", CI);
2199
2200 // Extract out the float result, where the element number is
2201 // determined by whether the original index was even or odd.
2202 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2203 }
2204 return V;
2205 });
2206}
2207
2208bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
2209 Module &M = *F.getParent();
2210 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002211 // The index argument from vload_half.
2212 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002213
Kévin Petite8edce32019-04-10 14:23:32 +01002214 // The pointer argument from vload_half.
2215 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002216
Kévin Petite8edce32019-04-10 14:23:32 +01002217 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002218 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002219 auto NewPointerTy =
2220 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002221 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002222
Kévin Petite8edce32019-04-10 14:23:32 +01002223 // Cast the half* pointer to int*.
2224 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002225
Kévin Petite8edce32019-04-10 14:23:32 +01002226 // Index into the correct address of the casted pointer.
2227 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002228
Kévin Petite8edce32019-04-10 14:23:32 +01002229 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002230 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002231
Kévin Petite8edce32019-04-10 14:23:32 +01002232 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002233 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002234
Kévin Petite8edce32019-04-10 14:23:32 +01002235 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002236
Kévin Petite8edce32019-04-10 14:23:32 +01002237 // Get our float2.
2238 return CallInst::Create(NewF, Load, "", CI);
2239 });
David Neto22f144c2017-06-12 14:26:21 -04002240}
2241
SJW2c317da2020-03-23 07:39:13 -05002242bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
2243 Module &M = *F.getParent();
2244 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002245 // The index argument from vload_half.
2246 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002247
Kévin Petite8edce32019-04-10 14:23:32 +01002248 // The pointer argument from vload_half.
2249 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002250
Kévin Petite8edce32019-04-10 14:23:32 +01002251 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002252 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2253 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002254 auto NewPointerTy =
2255 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002256 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002257
Kévin Petite8edce32019-04-10 14:23:32 +01002258 // Cast the half* pointer to int2*.
2259 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002260
Kévin Petite8edce32019-04-10 14:23:32 +01002261 // Index into the correct address of the casted pointer.
2262 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002263
Kévin Petite8edce32019-04-10 14:23:32 +01002264 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002265 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002266
Kévin Petite8edce32019-04-10 14:23:32 +01002267 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002268 auto X =
2269 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2270 auto Y =
2271 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002272
Kévin Petite8edce32019-04-10 14:23:32 +01002273 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002274 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002275
Kévin Petite8edce32019-04-10 14:23:32 +01002276 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002277
Kévin Petite8edce32019-04-10 14:23:32 +01002278 // Get the lower (x & y) components of our final float4.
2279 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002280
Kévin Petite8edce32019-04-10 14:23:32 +01002281 // Get the higher (z & w) components of our final float4.
2282 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002283
Kévin Petite8edce32019-04-10 14:23:32 +01002284 Constant *ShuffleMask[4] = {
2285 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2286 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002287
Kévin Petite8edce32019-04-10 14:23:32 +01002288 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002289 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2290 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002291 });
David Neto22f144c2017-06-12 14:26:21 -04002292}
2293
SJW2c317da2020-03-23 07:39:13 -05002294bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002295
2296 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2297 //
2298 // %u = load i32 %ptr
2299 // %fxy = call <2 x float> Unpack2xHalf(u)
2300 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002301 Module &M = *F.getParent();
2302 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002303 auto Index = CI->getOperand(0);
2304 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002305
Kévin Petite8edce32019-04-10 14:23:32 +01002306 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002307 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002308 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002309
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002310 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002311 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002312
Kévin Petite8edce32019-04-10 14:23:32 +01002313 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002314 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002315
Kévin Petite8edce32019-04-10 14:23:32 +01002316 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002317
Kévin Petite8edce32019-04-10 14:23:32 +01002318 // Get our final float2.
2319 return CallInst::Create(NewF, Load, "", CI);
2320 });
David Neto6ad93232018-06-07 15:42:58 -07002321}
2322
SJW2c317da2020-03-23 07:39:13 -05002323bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002324
2325 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2326 //
2327 // %u2 = load <2 x i32> %ptr
2328 // %u2xy = extractelement %u2, 0
2329 // %u2zw = extractelement %u2, 1
2330 // %fxy = call <2 x float> Unpack2xHalf(uint)
2331 // %fzw = call <2 x float> Unpack2xHalf(uint)
2332 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002333 Module &M = *F.getParent();
2334 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002335 auto Index = CI->getOperand(0);
2336 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002337
Kévin Petite8edce32019-04-10 14:23:32 +01002338 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002339 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2340 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002341 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002342
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002343 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002344 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002345
Kévin Petite8edce32019-04-10 14:23:32 +01002346 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002347 auto X =
2348 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2349 auto Y =
2350 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002351
Kévin Petite8edce32019-04-10 14:23:32 +01002352 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002353 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002354
Kévin Petite8edce32019-04-10 14:23:32 +01002355 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002356
Kévin Petite8edce32019-04-10 14:23:32 +01002357 // Get the lower (x & y) components of our final float4.
2358 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002359
Kévin Petite8edce32019-04-10 14:23:32 +01002360 // Get the higher (z & w) components of our final float4.
2361 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002362
Kévin Petite8edce32019-04-10 14:23:32 +01002363 Constant *ShuffleMask[4] = {
2364 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2365 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002366
Kévin Petite8edce32019-04-10 14:23:32 +01002367 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002368 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2369 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002370 });
David Neto6ad93232018-06-07 15:42:58 -07002371}
2372
SJW2c317da2020-03-23 07:39:13 -05002373bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
2374 switch (vec_size) {
2375 case 0:
2376 return replaceVstoreHalf(F);
2377 case 2:
2378 return replaceVstoreHalf2(F);
2379 case 4:
2380 return replaceVstoreHalf4(F);
2381 default:
2382 llvm_unreachable("Unsupported vstore_half vector size");
2383 break;
2384 }
2385 return false;
2386}
David Neto22f144c2017-06-12 14:26:21 -04002387
SJW2c317da2020-03-23 07:39:13 -05002388bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
2389 Module &M = *F.getParent();
2390 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002391 // The value to store.
2392 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002393
Kévin Petite8edce32019-04-10 14:23:32 +01002394 // The index argument from vstore_half.
2395 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002396
Kévin Petite8edce32019-04-10 14:23:32 +01002397 // The pointer argument from vstore_half.
2398 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002399
Kévin Petite8edce32019-04-10 14:23:32 +01002400 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002401 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002402 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2403 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002404
Kévin Petite8edce32019-04-10 14:23:32 +01002405 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002406 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002407
Kévin Petite8edce32019-04-10 14:23:32 +01002408 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002409
Kévin Petite8edce32019-04-10 14:23:32 +01002410 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002411 auto TempVec = InsertElementInst::Create(
2412 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002413
Kévin Petite8edce32019-04-10 14:23:32 +01002414 // Pack the float2 -> half2 (in an int).
2415 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002416
alan-baker7efcaaa2020-05-06 19:33:27 -04002417 bool supports_16bit_storage = true;
2418 switch (Arg2->getType()->getPointerAddressSpace()) {
2419 case clspv::AddressSpace::Global:
2420 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2421 clspv::Option::StorageClass::kSSBO);
2422 break;
2423 case clspv::AddressSpace::Constant:
2424 if (clspv::Option::ConstantArgsInUniformBuffer())
2425 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2426 clspv::Option::StorageClass::kUBO);
2427 else
2428 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2429 clspv::Option::StorageClass::kSSBO);
2430 break;
2431 default:
2432 // Clspv will emit the Float16 capability if the half type is
2433 // encountered. That capability covers private and local addressspaces.
2434 break;
2435 }
2436
SJW2c317da2020-03-23 07:39:13 -05002437 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002438 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002439 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002440 auto ShortPointerTy =
2441 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002442
Kévin Petite8edce32019-04-10 14:23:32 +01002443 // Truncate our i32 to an i16.
2444 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002445
Kévin Petite8edce32019-04-10 14:23:32 +01002446 // Cast the half* pointer to short*.
2447 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002448
Kévin Petite8edce32019-04-10 14:23:32 +01002449 // Index into the correct address of the casted pointer.
2450 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002451
Kévin Petite8edce32019-04-10 14:23:32 +01002452 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002453 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002454 } else {
2455 // We can only write to 32-bit aligned words.
2456 //
2457 // Assuming base is aligned to 32-bits, replace the equivalent of
2458 // vstore_half(value, index, base)
2459 // with:
2460 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2461 // uint32_t write_to_upper_half = index & 1u;
2462 // uint32_t shift = write_to_upper_half << 4;
2463 //
2464 // // Pack the float value as a half number in bottom 16 bits
2465 // // of an i32.
2466 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2467 //
2468 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2469 // ^ ((packed & 0xffff) << shift)
2470 // // We only need relaxed consistency, but OpenCL 1.2 only has
2471 // // sequentially consistent atomics.
2472 // // TODO(dneto): Use relaxed consistency.
2473 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002474 auto IntPointerTy =
2475 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002476
Kévin Petite8edce32019-04-10 14:23:32 +01002477 auto Four = ConstantInt::get(IntTy, 4);
2478 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002479
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002480 auto IndexIsOdd =
2481 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002482 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002483 auto IndexIntoI32 =
2484 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2485 auto BaseI32Ptr =
2486 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2487 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2488 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002489 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002490 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002491 auto MaskBitsToWrite =
2492 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2493 auto MaskedCurrent = BinaryOperator::CreateAnd(
2494 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002495
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002496 auto XLowerBits =
2497 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2498 auto NewBitsToWrite =
2499 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2500 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2501 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002502
Kévin Petite8edce32019-04-10 14:23:32 +01002503 // Generate the call to atomi_xor.
2504 SmallVector<Type *, 5> ParamTypes;
2505 // The pointer type.
2506 ParamTypes.push_back(IntPointerTy);
2507 // The Types for memory scope, semantics, and value.
2508 ParamTypes.push_back(IntTy);
2509 ParamTypes.push_back(IntTy);
2510 ParamTypes.push_back(IntTy);
2511 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2512 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002513
Kévin Petite8edce32019-04-10 14:23:32 +01002514 const auto ConstantScopeDevice =
2515 ConstantInt::get(IntTy, spv::ScopeDevice);
2516 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2517 // (SPIR-V Workgroup).
2518 const auto AddrSpaceSemanticsBits =
2519 IntPointerTy->getPointerAddressSpace() == 1
2520 ? spv::MemorySemanticsUniformMemoryMask
2521 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002522
Kévin Petite8edce32019-04-10 14:23:32 +01002523 // We're using relaxed consistency here.
2524 const auto ConstantMemorySemantics =
2525 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2526 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002527
Kévin Petite8edce32019-04-10 14:23:32 +01002528 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2529 ConstantMemorySemantics, ValueToXor};
2530 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002531
2532 // Return a Nop so the old Call is removed
2533 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2534 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002535 }
David Neto22f144c2017-06-12 14:26:21 -04002536
SJW2c317da2020-03-23 07:39:13 -05002537 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002538 });
David Neto22f144c2017-06-12 14:26:21 -04002539}
2540
SJW2c317da2020-03-23 07:39:13 -05002541bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2542 Module &M = *F.getParent();
2543 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002544 // The value to store.
2545 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002546
Kévin Petite8edce32019-04-10 14:23:32 +01002547 // The index argument from vstore_half.
2548 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002549
Kévin Petite8edce32019-04-10 14:23:32 +01002550 // The pointer argument from vstore_half.
2551 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002552
Kévin Petite8edce32019-04-10 14:23:32 +01002553 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002554 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002555 auto NewPointerTy =
2556 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002557 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002558
Kévin Petite8edce32019-04-10 14:23:32 +01002559 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002560 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002561
Kévin Petite8edce32019-04-10 14:23:32 +01002562 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002563
Kévin Petite8edce32019-04-10 14:23:32 +01002564 // Turn the packed x & y into the final packing.
2565 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002566
Kévin Petite8edce32019-04-10 14:23:32 +01002567 // Cast the half* pointer to int*.
2568 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002569
Kévin Petite8edce32019-04-10 14:23:32 +01002570 // Index into the correct address of the casted pointer.
2571 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002572
Kévin Petite8edce32019-04-10 14:23:32 +01002573 // Store to the int* we casted to.
2574 return new StoreInst(X, Index, CI);
2575 });
David Neto22f144c2017-06-12 14:26:21 -04002576}
2577
SJW2c317da2020-03-23 07:39:13 -05002578bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2579 Module &M = *F.getParent();
2580 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002581 // The value to store.
2582 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002583
Kévin Petite8edce32019-04-10 14:23:32 +01002584 // The index argument from vstore_half.
2585 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002586
Kévin Petite8edce32019-04-10 14:23:32 +01002587 // The pointer argument from vstore_half.
2588 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002589
Kévin Petite8edce32019-04-10 14:23:32 +01002590 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002591 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2592 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002593 auto NewPointerTy =
2594 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002595 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002596
Kévin Petite8edce32019-04-10 14:23:32 +01002597 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2598 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002599
Kévin Petite8edce32019-04-10 14:23:32 +01002600 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002601 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2602 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002603
Kévin Petite8edce32019-04-10 14:23:32 +01002604 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2605 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002606
Kévin Petite8edce32019-04-10 14:23:32 +01002607 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002608 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2609 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002610
Kévin Petite8edce32019-04-10 14:23:32 +01002611 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002612 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002613
Kévin Petite8edce32019-04-10 14:23:32 +01002614 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002615
Kévin Petite8edce32019-04-10 14:23:32 +01002616 // Turn the packed x & y into the final component of our int2.
2617 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002618
Kévin Petite8edce32019-04-10 14:23:32 +01002619 // Turn the packed z & w into the final component of our int2.
2620 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002621
Kévin Petite8edce32019-04-10 14:23:32 +01002622 auto Combine = InsertElementInst::Create(
2623 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002624 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2625 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002626
Kévin Petite8edce32019-04-10 14:23:32 +01002627 // Cast the half* pointer to int2*.
2628 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002629
Kévin Petite8edce32019-04-10 14:23:32 +01002630 // Index into the correct address of the casted pointer.
2631 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002632
Kévin Petite8edce32019-04-10 14:23:32 +01002633 // Store to the int2* we casted to.
2634 return new StoreInst(Combine, Index, CI);
2635 });
David Neto22f144c2017-06-12 14:26:21 -04002636}
2637
SJW2c317da2020-03-23 07:39:13 -05002638bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2639 // convert half to float
2640 Module &M = *F.getParent();
2641 return replaceCallsWithValue(F, [&](CallInst *CI) {
2642 SmallVector<Type *, 3> types;
2643 SmallVector<Value *, 3> args;
alan-baker5641f5c2021-10-15 09:16:04 -04002644 for (size_t i = 0; i < CI->arg_size(); ++i) {
SJW2c317da2020-03-23 07:39:13 -05002645 types.push_back(CI->getArgOperand(i)->getType());
2646 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002647 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002648
alan-baker5a8c3be2020-09-09 13:44:26 -04002649 auto NewFType =
2650 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2651 cast<VectorType>(CI->getType())
2652 ->getElementCount()
2653 .getKnownMinValue()),
2654 types, false);
SJW2c317da2020-03-23 07:39:13 -05002655
SJW61531372020-06-09 07:31:08 -05002656 std::string NewFName =
2657 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002658
2659 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2660
2661 auto NewCI = CallInst::Create(NewF, args, "", CI);
2662
2663 // Convert to the half type.
2664 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2665 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002666}
2667
SJW2c317da2020-03-23 07:39:13 -05002668bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2669 // convert half to float
2670 Module &M = *F.getParent();
2671 return replaceCallsWithValue(F, [&](CallInst *CI) {
2672 SmallVector<Type *, 3> types(3);
2673 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002674
SJW2c317da2020-03-23 07:39:13 -05002675 // Image
2676 types[0] = CI->getArgOperand(0)->getType();
2677 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002678
SJW2c317da2020-03-23 07:39:13 -05002679 // Coord
2680 types[1] = CI->getArgOperand(1)->getType();
2681 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002682
SJW2c317da2020-03-23 07:39:13 -05002683 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002684 types[2] =
2685 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2686 cast<VectorType>(CI->getArgOperand(2)->getType())
2687 ->getElementCount()
2688 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002689
SJW2c317da2020-03-23 07:39:13 -05002690 auto NewFType =
2691 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002692
SJW61531372020-06-09 07:31:08 -05002693 std::string NewFName =
2694 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002695
SJW2c317da2020-03-23 07:39:13 -05002696 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002697
SJW2c317da2020-03-23 07:39:13 -05002698 // Convert data to the float type.
2699 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2700 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002701
SJW2c317da2020-03-23 07:39:13 -05002702 return CallInst::Create(NewF, args, "", CI);
2703 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002704}
2705
SJW2c317da2020-03-23 07:39:13 -05002706bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2707 Function &F) {
2708 // convert read_image with int coords to float coords
2709 Module &M = *F.getParent();
2710 return replaceCallsWithValue(F, [&](CallInst *CI) {
2711 // The image.
2712 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002713
SJW2c317da2020-03-23 07:39:13 -05002714 // The sampler.
2715 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002716
SJW2c317da2020-03-23 07:39:13 -05002717 // The coordinate (integer type that we can't handle).
2718 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002719
Romaric Jodin9b353742022-01-25 17:21:02 +01002720 uint32_t dim = clspv::ImageNumDimensions(Arg0->getType());
SJW2c317da2020-03-23 07:39:13 -05002721 uint32_t components =
2722 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2723 Type *float_ty = nullptr;
2724 if (components == 1) {
2725 float_ty = Type::getFloatTy(M.getContext());
2726 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002727 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2728 cast<VectorType>(Arg2->getType())
2729 ->getElementCount()
2730 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002731 }
David Neto22f144c2017-06-12 14:26:21 -04002732
SJW2c317da2020-03-23 07:39:13 -05002733 auto NewFType = FunctionType::get(
2734 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2735
2736 std::string NewFName = F.getName().str();
2737 NewFName[NewFName.length() - 1] = 'f';
2738
2739 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2740
2741 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2742
2743 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2744 });
David Neto22f144c2017-06-12 14:26:21 -04002745}
2746
SJW2c317da2020-03-23 07:39:13 -05002747bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2748 return replaceCallsWithValue(F, [&](CallInst *CI) {
2749 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002750
SJW2c317da2020-03-23 07:39:13 -05002751 // We need to map the OpenCL constants to the SPIR-V equivalents.
2752 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2753 const auto ConstantMemorySemantics = ConstantInt::get(
2754 IntTy, spv::MemorySemanticsUniformMemoryMask |
2755 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002756
SJW2c317da2020-03-23 07:39:13 -05002757 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002758
SJW2c317da2020-03-23 07:39:13 -05002759 // The pointer.
2760 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002761
SJW2c317da2020-03-23 07:39:13 -05002762 // The memory scope.
2763 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002764
SJW2c317da2020-03-23 07:39:13 -05002765 // The memory semantics.
2766 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002767
alan-baker5641f5c2021-10-15 09:16:04 -04002768 if (2 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05002769 // The unequal memory semantics.
2770 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002771
SJW2c317da2020-03-23 07:39:13 -05002772 // The value.
2773 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002774
SJW2c317da2020-03-23 07:39:13 -05002775 // The comparator.
2776 Params.push_back(CI->getArgOperand(1));
alan-baker5641f5c2021-10-15 09:16:04 -04002777 } else if (1 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05002778 // The value.
2779 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002780 }
David Neto22f144c2017-06-12 14:26:21 -04002781
SJW2c317da2020-03-23 07:39:13 -05002782 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2783 });
David Neto22f144c2017-06-12 14:26:21 -04002784}
2785
SJW2c317da2020-03-23 07:39:13 -05002786bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2787 llvm::AtomicRMWInst::BinOp Op) {
2788 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002789 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2790 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002791 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002792 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002793 SyncScope::System, CI);
2794 });
2795}
David Neto22f144c2017-06-12 14:26:21 -04002796
SJW2c317da2020-03-23 07:39:13 -05002797bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2798 Module &M = *F.getParent();
2799 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002800 auto IntTy = Type::getInt32Ty(M.getContext());
2801 auto FloatTy = Type::getFloatTy(M.getContext());
2802
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002803 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2804 ConstantInt::get(IntTy, 1),
2805 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002806
2807 Constant *UpShuffleMask[4] = {
2808 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2809 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2810
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002811 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2812 UndefValue::get(FloatTy),
2813 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002814
Kévin Petite8edce32019-04-10 14:23:32 +01002815 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002816 auto Arg0 =
2817 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2818 ConstantVector::get(DownShuffleMask), "", CI);
2819 auto Arg1 =
2820 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2821 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002822 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002823
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002824 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002825 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002826
SJW61531372020-06-09 07:31:08 -05002827 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002828
Kévin Petite8edce32019-04-10 14:23:32 +01002829 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002830
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002831 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2832 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002833 });
David Neto22f144c2017-06-12 14:26:21 -04002834}
David Neto62653202017-10-16 19:05:18 -04002835
SJW2c317da2020-03-23 07:39:13 -05002836bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002837 // OpenCL's float result = fract(float x, float* ptr)
2838 //
2839 // In the LLVM domain:
2840 //
2841 // %floor_result = call spir_func float @floor(float %x)
2842 // store float %floor_result, float * %ptr
2843 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2844 // %result = call spir_func float
2845 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2846 //
2847 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2848 // and clspv.fract occur in the SPIR-V generator pass:
2849 //
2850 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2851 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2852 // ...
2853 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2854 // OpStore %ptr %floor_result
2855 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2856 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002857 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002858
David Neto62653202017-10-16 19:05:18 -04002859 using std::string;
2860
2861 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2862 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002863
SJW2c317da2020-03-23 07:39:13 -05002864 Module &M = *F.getParent();
2865 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002866
SJW2c317da2020-03-23 07:39:13 -05002867 // This is either float or a float vector. All the float-like
2868 // types are this type.
2869 auto result_ty = F.getReturnType();
2870
SJW61531372020-06-09 07:31:08 -05002871 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002872 Function *fmin_fn = M.getFunction(fmin_name);
2873 if (!fmin_fn) {
2874 // Make the fmin function.
2875 FunctionType *fn_ty =
2876 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2877 fmin_fn =
2878 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2879 fmin_fn->addFnAttr(Attribute::ReadNone);
2880 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2881 }
2882
SJW61531372020-06-09 07:31:08 -05002883 std::string floor_name =
2884 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002885 Function *floor_fn = M.getFunction(floor_name);
2886 if (!floor_fn) {
2887 // Make the floor function.
2888 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2889 floor_fn =
2890 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2891 floor_fn->addFnAttr(Attribute::ReadNone);
2892 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2893 }
2894
SJW61531372020-06-09 07:31:08 -05002895 std::string clspv_fract_name =
2896 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002897 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2898 if (!clspv_fract_fn) {
2899 // Make the clspv_fract function.
2900 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2901 clspv_fract_fn = cast<Function>(
2902 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2903 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2904 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2905 }
2906
2907 // Number of significant significand bits, whether represented or not.
2908 unsigned num_significand_bits;
2909 switch (result_ty->getScalarType()->getTypeID()) {
2910 case Type::HalfTyID:
2911 num_significand_bits = 11;
2912 break;
2913 case Type::FloatTyID:
2914 num_significand_bits = 24;
2915 break;
2916 case Type::DoubleTyID:
2917 num_significand_bits = 53;
2918 break;
2919 default:
2920 llvm_unreachable("Unhandled float type when processing fract builtin");
2921 break;
2922 }
2923 // Beware that the disassembler displays this value as
2924 // OpConstant %float 1
2925 // which is not quite right.
2926 const double kJustUnderOneScalar =
2927 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2928
2929 Constant *just_under_one =
2930 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2931 if (result_ty->isVectorTy()) {
2932 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002933 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002934 }
2935
2936 IRBuilder<> Builder(CI);
2937
2938 auto arg = CI->getArgOperand(0);
2939 auto ptr = CI->getArgOperand(1);
2940
2941 // Compute floor result and store it.
2942 auto floor = Builder.CreateCall(floor_fn, {arg});
2943 Builder.CreateStore(floor, ptr);
2944
2945 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2946 auto fract_result =
2947 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2948
2949 return fract_result;
2950 });
David Neto62653202017-10-16 19:05:18 -04002951}
alan-bakera52b7312020-10-26 08:58:51 -04002952
Kévin Petit8576f682020-11-02 14:51:32 +00002953bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04002954 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00002955 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04002956 // a_shr = a >> 1
2957 // b_shr = b >> 1
2958 // add1 = a_shr + b_shr
2959 // join = a |join_opcode| b
2960 // and = join & 1
2961 // add = add1 + and
2962 const auto a = Call->getArgOperand(0);
2963 const auto b = Call->getArgOperand(1);
2964 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00002965 Value *a_shift, *b_shift;
2966 if (is_signed) {
2967 a_shift = builder.CreateAShr(a, 1);
2968 b_shift = builder.CreateAShr(b, 1);
2969 } else {
2970 a_shift = builder.CreateLShr(a, 1);
2971 b_shift = builder.CreateLShr(b, 1);
2972 }
alan-bakerb6da5132020-10-29 15:59:06 -04002973 auto add = builder.CreateAdd(a_shift, b_shift);
2974 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
2975 auto constant_one = ConstantInt::get(a->getType(), 1);
2976 auto and_bit = builder.CreateAnd(join, constant_one);
2977 return builder.CreateAdd(add, and_bit);
2978 });
2979}
2980
alan-baker3f1bf492020-11-05 09:07:36 -05002981bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
2982 bool is_add) {
2983 return replaceCallsWithValue(F, [&F, this, is_signed,
2984 is_add](CallInst *Call) {
2985 auto ty = Call->getType();
2986 auto a = Call->getArgOperand(0);
2987 auto b = Call->getArgOperand(1);
2988 IRBuilder<> builder(Call);
alan-bakera52b7312020-10-26 08:58:51 -04002989 if (is_signed) {
2990 unsigned bitwidth = ty->getScalarSizeInBits();
2991 if (bitwidth < 32) {
alan-baker3f1bf492020-11-05 09:07:36 -05002992 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01002993 if (clspv::Option::HackClampWidth() && extended_width < 32) {
2994 extended_width = 32;
2995 }
alan-baker3f1bf492020-11-05 09:07:36 -05002996 Type *extended_ty =
2997 IntegerType::get(Call->getContext(), extended_width);
2998 Constant *min = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002999 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05003000 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3001 Constant *max = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04003002 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05003003 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
alan-bakera52b7312020-10-26 08:58:51 -04003004 // Don't use the type in GetMangledFunctionName to ensure we get
3005 // signed parameters.
3006 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
alan-bakera52b7312020-10-26 08:58:51 -04003007 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
alan-baker3f1bf492020-11-05 09:07:36 -05003008 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3009 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3010 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3011 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3012 if (extended_width == 32) {
alan-bakera52b7312020-10-26 08:58:51 -04003013 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
alan-bakera52b7312020-10-26 08:58:51 -04003014 } else {
3015 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3016 }
alan-baker3f1bf492020-11-05 09:07:36 -05003017 } else {
3018 if (extended_width == 32) {
3019 sclamp_name += "iii";
3020 } else {
3021 sclamp_name += "sss";
3022 }
alan-bakera52b7312020-10-26 08:58:51 -04003023 }
alan-baker3f1bf492020-11-05 09:07:36 -05003024
3025 auto sext_a = builder.CreateSExt(a, extended_ty);
3026 auto sext_b = builder.CreateSExt(b, extended_ty);
3027 Value *op = nullptr;
3028 // Extended operations won't wrap.
3029 if (is_add)
3030 op = builder.CreateAdd(sext_a, sext_b, "", true, true);
3031 else
3032 op = builder.CreateSub(sext_a, sext_b, "", true, true);
3033 auto clamp_ty = FunctionType::get(
3034 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3035 auto callee = F.getParent()->getOrInsertFunction(sclamp_name, clamp_ty);
3036 auto clamp = builder.CreateCall(callee, {op, min, max});
3037 return builder.CreateTrunc(clamp, ty);
alan-bakera52b7312020-10-26 08:58:51 -04003038 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05003039 // Add:
3040 // c = a + b
alan-bakera52b7312020-10-26 08:58:51 -04003041 // if (b < 0)
3042 // c = c > a ? min : c;
3043 // else
alan-baker3f1bf492020-11-05 09:07:36 -05003044 // c = c < a ? max : c;
alan-bakera52b7312020-10-26 08:58:51 -04003045 //
alan-baker3f1bf492020-11-05 09:07:36 -05003046 // Sub:
3047 // c = a - b;
3048 // if (b < 0)
3049 // c = c < a ? max : c;
3050 // else
3051 // c = c > a ? min : c;
3052 Constant *min = ConstantInt::get(Call->getContext(),
3053 APInt::getSignedMinValue(bitwidth));
3054 Constant *max = ConstantInt::get(Call->getContext(),
3055 APInt::getSignedMaxValue(bitwidth));
alan-bakera52b7312020-10-26 08:58:51 -04003056 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3057 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3058 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3059 }
alan-baker3f1bf492020-11-05 09:07:36 -05003060 Value *op = nullptr;
3061 if (is_add) {
3062 op = builder.CreateAdd(a, b);
3063 } else {
3064 op = builder.CreateSub(a, b);
3065 }
3066 auto b_lt_0 = builder.CreateICmpSLT(b, Constant::getNullValue(ty));
3067 auto op_gt_a = builder.CreateICmpSGT(op, a);
3068 auto op_lt_a = builder.CreateICmpSLT(op, a);
3069 auto neg_cmp = is_add ? op_gt_a : op_lt_a;
3070 auto pos_cmp = is_add ? op_lt_a : op_gt_a;
3071 auto neg_value = is_add ? min : max;
3072 auto pos_value = is_add ? max : min;
3073 auto neg_clamp = builder.CreateSelect(neg_cmp, neg_value, op);
3074 auto pos_clamp = builder.CreateSelect(pos_cmp, pos_value, op);
3075 return builder.CreateSelect(b_lt_0, neg_clamp, pos_clamp);
alan-bakera52b7312020-10-26 08:58:51 -04003076 }
3077 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05003078 // Replace with OpIAddCarry/OpISubBorrow and clamp to max/0 on a
3079 // carr/borrow.
3080 spv::Op op = is_add ? spv::OpIAddCarry : spv::OpISubBorrow;
3081 auto clamp_value =
3082 is_add ? Constant::getAllOnesValue(ty) : Constant::getNullValue(ty);
3083 auto struct_ty = GetPairStruct(ty);
3084 auto call =
3085 InsertSPIRVOp(Call, op, {Attribute::ReadNone}, struct_ty, {a, b});
3086 auto add_sub = builder.CreateExtractValue(call, {0});
3087 auto carry_borrow = builder.CreateExtractValue(call, {1});
3088 auto cmp = builder.CreateICmpEQ(carry_borrow, Constant::getNullValue(ty));
3089 return builder.CreateSelect(cmp, add_sub, clamp_value);
alan-bakera52b7312020-10-26 08:58:51 -04003090 }
alan-bakera52b7312020-10-26 08:58:51 -04003091 });
3092}
alan-baker4986eff2020-10-29 13:38:00 -04003093
3094bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
3095 return replaceCallsWithValue(F, [](CallInst *Call) {
3096 auto pointer = Call->getArgOperand(0);
3097 // Clang emits an address space cast to the generic address space. Skip the
3098 // cast and use the input directly.
3099 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3100 pointer = cast->getPointerOperand();
3101 }
alan-baker5641f5c2021-10-15 09:16:04 -04003102 Value *order_arg = Call->arg_size() > 1 ? Call->getArgOperand(1) : nullptr;
3103 Value *scope_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003104 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3105 clspv::AddressSpace::Global;
3106 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
3107 spv::MemorySemanticsAcquireMask);
3108 auto scope = MemoryScope(scope_arg, is_global, Call);
3109 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
3110 Call->getType(), {pointer, scope, order});
3111 });
3112}
3113
3114bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
3115 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
3116 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
3117 auto pointer = Call->getArgOperand(0);
3118 // Clang emits an address space cast to the generic address space. Skip the
3119 // cast and use the input directly.
3120 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3121 pointer = cast->getPointerOperand();
3122 }
3123 Value *value = Call->getArgOperand(1);
alan-baker5641f5c2021-10-15 09:16:04 -04003124 Value *order_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
3125 Value *scope_arg = Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003126 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3127 clspv::AddressSpace::Global;
3128 auto scope = MemoryScope(scope_arg, is_global, Call);
3129 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
3130 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
3131 {pointer, scope, order, value});
3132 });
3133}
3134
3135bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
3136 return replaceCallsWithValue(F, [](CallInst *Call) {
3137 auto pointer = Call->getArgOperand(0);
3138 // Clang emits an address space cast to the generic address space. Skip the
3139 // cast and use the input directly.
3140 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
3141 pointer = cast->getPointerOperand();
3142 }
3143 auto expected = Call->getArgOperand(1);
3144 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
3145 expected = cast->getPointerOperand();
3146 }
3147 auto value = Call->getArgOperand(2);
3148 bool is_global = pointer->getType()->getPointerAddressSpace() ==
3149 clspv::AddressSpace::Global;
3150 Value *success_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003151 Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003152 Value *failure_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04003153 Call->arg_size() > 4 ? Call->getArgOperand(4) : nullptr;
3154 Value *scope_arg = Call->arg_size() > 5 ? Call->getArgOperand(5) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04003155 auto scope = MemoryScope(scope_arg, is_global, Call);
3156 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
3157 spv::MemorySemanticsAcquireReleaseMask);
3158 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
3159 spv::MemorySemanticsAcquireMask);
3160
3161 // If the value pointed to by |expected| equals the value pointed to by
3162 // |pointer|, |value| is written into |pointer|, otherwise the value in
3163 // |pointer| is written into |expected|. In order to avoid extra stores,
3164 // the basic block with the original atomic is split and the store is
3165 // performed in the |then| block. The condition is the inversion of the
3166 // comparison result.
3167 IRBuilder<> builder(Call);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01003168 auto load = builder.CreateLoad(expected->getType()->getPointerElementType(),
3169 expected);
alan-baker4986eff2020-10-29 13:38:00 -04003170 auto cmp_xchg = InsertSPIRVOp(
3171 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
3172 value->getType(), {pointer, scope, success, failure, value, load});
3173 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
3174 auto not_cmp = builder.CreateNot(cmp);
3175 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
3176 builder.SetInsertPoint(then_branch);
3177 builder.CreateStore(cmp_xchg, expected);
3178 return cmp;
3179 });
3180}
alan-bakercc2bafb2020-11-02 08:30:18 -05003181
alan-baker2cecaa72020-11-05 14:05:20 -05003182bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05003183 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3184 return false;
3185
3186 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05003187 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05003188 return false;
3189
alan-baker5f2e88e2020-12-07 15:24:04 -05003190 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
3191 Function *intrinsic = Intrinsic::getDeclaration(
3192 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
3193 Call->getType());
3194 const auto c_false = ConstantInt::getFalse(Call->getContext());
3195 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
3196 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05003197 });
3198}
alan-baker6b9d1ee2020-11-03 23:11:32 -05003199
3200bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
3201 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
3202 const auto ty = Call->getType();
3203 const auto a = Call->getArgOperand(0);
3204 const auto b = Call->getArgOperand(1);
3205 const auto c = Call->getArgOperand(2);
3206 IRBuilder<> builder(Call);
3207 if (is_signed) {
3208 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
3209 if (bitwidth < 32) {
3210 // mul = sext(a) * sext(b)
3211 // add = mul + sext(c)
3212 // res = clamp(add, MIN, MAX)
3213 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01003214 if (clspv::Option::HackClampWidth() && extended_width < 32) {
3215 extended_width = 32;
3216 }
alan-baker6b9d1ee2020-11-03 23:11:32 -05003217 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
3218 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3219 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3220 }
3221 auto a_sext = builder.CreateSExt(a, extended_ty);
3222 auto b_sext = builder.CreateSExt(b, extended_ty);
3223 auto c_sext = builder.CreateSExt(c, extended_ty);
3224 // Extended the size so no overflows occur.
3225 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
3226 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
3227 auto func_ty = FunctionType::get(
3228 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3229 // Don't use function type because we need signed parameters.
3230 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
3231 // The clamp values are the signed min and max of the original bitwidth
3232 // sign extended to the extended bitwidth.
3233 Constant *min = ConstantInt::get(
3234 Call->getContext(),
3235 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3236 Constant *max = ConstantInt::get(
3237 Call->getContext(),
3238 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
3239 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3240 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3241 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3242 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3243 if (extended_width == 32)
3244 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
3245 else
3246 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3247 } else {
3248 if (extended_width == 32)
3249 clamp_name += "iii";
3250 else
3251 clamp_name += "sss";
3252 }
3253 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
3254 auto clamp = builder.CreateCall(callee, {add, min, max});
3255 return builder.CreateTrunc(clamp, ty);
3256 } else {
alan-baker6b9d1ee2020-11-03 23:11:32 -05003257 // Compute
3258 // {hi, lo} = smul_extended(a, b)
3259 // add = lo + c
Romaric Jodinc507f312022-04-08 19:09:45 +02003260 auto mul_ext = InsertOpMulExtended(Call, a, b, true);
3261
alan-baker6b9d1ee2020-11-03 23:11:32 -05003262 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3263 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3264 auto add = builder.CreateAdd(mul_lo, c);
3265
3266 // Constants for use in the calculation.
3267 Constant *min = ConstantInt::get(Call->getContext(),
3268 APInt::getSignedMinValue(bitwidth));
3269 Constant *max = ConstantInt::get(Call->getContext(),
3270 APInt::getSignedMaxValue(bitwidth));
3271 Constant *max_plus_1 = ConstantInt::get(
3272 Call->getContext(),
3273 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
3274 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3275 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3276 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3277 max_plus_1 =
3278 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
3279 }
3280
3281 auto a_xor_b = builder.CreateXor(a, b);
3282 auto same_sign =
3283 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
3284 auto different_sign = builder.CreateNot(same_sign);
3285 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
3286 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
3287 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
3288 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
3289 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
3290 auto add_gt_max = builder.CreateICmpUGT(add, max);
3291 auto hi_eq_m1 =
3292 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
3293 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
3294 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
3295 auto max_sub_lo = builder.CreateSub(max, mul_lo);
3296 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
3297
3298 // Equivalent to:
3299 // if (((x < 0) == (y < 0)) && mul_hi != 0)
3300 // return MAX
3301 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
3302 // return MAX
3303 // if (((x < 0) != (y < 0)) && mul_hi != -1)
3304 // return MIN
3305 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
3306 // return MIN
3307 // return add
3308 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
3309 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
3310 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
3311 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
3312 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
3313 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
3314 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
3315 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
3316 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
3317 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
3318 auto sel = builder.CreateSelect(min_clamp, min, add);
3319 return builder.CreateSelect(max_clamp, max, sel);
3320 }
3321 } else {
3322 // {lo, hi} = mul_extended(a, b)
3323 // {add, carry} = add_carry(lo, c)
3324 // cmp = (mul_hi | carry) == 0
3325 // mad_sat = cmp ? add : MAX
3326 auto struct_ty = GetPairStruct(ty);
Romaric Jodinc507f312022-04-08 19:09:45 +02003327 auto mul_ext = InsertOpMulExtended(Call, a, b, false);
alan-baker6b9d1ee2020-11-03 23:11:32 -05003328 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3329 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3330 auto add_carry =
3331 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
3332 struct_ty, {mul_lo, c});
3333 auto add = builder.CreateExtractValue(add_carry, {0});
3334 auto carry = builder.CreateExtractValue(add_carry, {1});
3335 auto or_value = builder.CreateOr(mul_hi, carry);
3336 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
3337 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
3338 }
3339 });
3340}
alan-baker15106572020-11-06 15:08:10 -05003341
3342bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
3343 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3344 return false;
3345
3346 if (F.getFunctionType()->getNumParams() != 2)
3347 return false;
3348
3349 if (F.getFunctionType()->getParamType(0) !=
3350 F.getFunctionType()->getParamType(1)) {
3351 return false;
3352 }
3353
3354 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
3355 case Type::FloatTyID:
3356 case Type::HalfTyID:
3357 case Type::DoubleTyID:
3358 break;
3359 default:
3360 return false;
3361 }
3362
3363 // Scalar versions all return an int, while vector versions return a vector
3364 // of an equally sized integer types (e.g. short, int or long).
3365 if (isa<VectorType>(F.getReturnType())) {
3366 if (F.getReturnType()->getScalarSizeInBits() !=
3367 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
3368 return false;
3369 }
3370 } else {
3371 if (F.getReturnType()->getScalarSizeInBits() != 32)
3372 return false;
3373 }
3374
3375 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
3376 // Replace with a floating point [un]ordered comparison followed by an
3377 // extension.
3378 auto x = Call->getArgOperand(0);
3379 auto y = Call->getArgOperand(1);
3380 IRBuilder<> builder(Call);
3381 Value *tmp = nullptr;
3382 if (is_ordered) {
3383 // This leads to a slight inefficiency in the SPIR-V that is easy for
3384 // drivers to optimize where the SPIR-V for the comparison and the
3385 // extension could be fused to drop the inversion of the OpIsNan.
3386 tmp = builder.CreateFCmpORD(x, y);
3387 } else {
3388 tmp = builder.CreateFCmpUNO(x, y);
3389 }
3390 // OpenCL CTS requires that vector versions use sign extension, but scalar
3391 // versions use zero extension.
3392 if (isa<VectorType>(Call->getType()))
3393 return builder.CreateSExt(tmp, Call->getType());
3394 return builder.CreateZExt(tmp, Call->getType());
3395 });
3396}
alan-baker497920b2020-11-09 16:41:36 -05003397
3398bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
3399 return replaceCallsWithValue(F, [this](CallInst *Call) {
3400 auto ty = Call->getType();
3401 auto x = Call->getArgOperand(0);
3402 unsigned width = x->getType()->getScalarSizeInBits();
3403 Type *int_ty = IntegerType::get(Call->getContext(), width);
3404 uint64_t abs_mask = 0x7fffffff;
3405 uint64_t exp_mask = 0x7f800000;
3406 uint64_t min_mask = 0x00800000;
3407 if (width == 16) {
3408 abs_mask = 0x7fff;
3409 exp_mask = 0x7c00;
3410 min_mask = 0x0400;
3411 } else if (width == 64) {
3412 abs_mask = 0x7fffffffffffffff;
3413 exp_mask = 0x7ff0000000000000;
3414 min_mask = 0x0010000000000000;
3415 }
3416 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3417 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3418 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3419 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3420 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3421 abs_const =
3422 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3423 exp_const =
3424 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3425 min_const =
3426 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3427 }
3428 // Drop the sign bit and then check that the number is between
3429 // (exclusive) the min and max exponent values for the bit width.
3430 IRBuilder<> builder(Call);
3431 auto bitcast = builder.CreateBitCast(x, int_ty);
3432 auto abs = builder.CreateAnd(bitcast, abs_const);
3433 auto lt = builder.CreateICmpULT(abs, exp_const);
3434 auto ge = builder.CreateICmpUGE(abs, min_const);
3435 auto tmp = builder.CreateAnd(lt, ge);
3436 // OpenCL CTS requires that vector versions use sign extension, but scalar
3437 // versions use zero extension.
3438 if (isa<VectorType>(ty))
3439 return builder.CreateSExt(tmp, ty);
3440 return builder.CreateZExt(tmp, ty);
3441 });
3442}
alan-bakere0406e72020-11-10 12:32:04 -05003443
3444bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3445 return replaceCallsWithValue(F, [](CallInst *Call) {
3446 const auto x = Call->getArgOperand(0);
3447 const auto y = Call->getArgOperand(1);
3448 IRBuilder<> builder(Call);
3449 auto sub = builder.CreateFSub(x, y);
3450 auto cmp = builder.CreateFCmpUGT(x, y);
3451 return builder.CreateSelect(cmp, sub,
3452 Constant::getNullValue(Call->getType()));
3453 });
3454}
alan-baker3e0de472020-12-08 15:57:17 -05003455
3456bool ReplaceOpenCLBuiltinPass::replaceRound(Function &F) {
3457 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3458 const auto x = Call->getArgOperand(0);
3459 const double c_halfway = 0.5;
3460 auto halfway = ConstantFP::get(Call->getType(), c_halfway);
3461
3462 const auto clspv_fract_name =
3463 Builtins::GetMangledFunctionName("clspv.fract", F.getFunctionType());
3464 Function *clspv_fract_fn = F.getParent()->getFunction(clspv_fract_name);
3465 if (!clspv_fract_fn) {
3466 // Make the clspv_fract function.
3467 clspv_fract_fn = cast<Function>(
3468 F.getParent()
3469 ->getOrInsertFunction(clspv_fract_name, F.getFunctionType())
3470 .getCallee());
3471 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3472 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3473 }
3474
3475 auto ceil = Intrinsic::getDeclaration(F.getParent(), Intrinsic::ceil,
3476 Call->getType());
3477 auto floor = Intrinsic::getDeclaration(F.getParent(), Intrinsic::floor,
3478 Call->getType());
3479 auto fabs = Intrinsic::getDeclaration(F.getParent(), Intrinsic::fabs,
3480 Call->getType());
3481 auto copysign = Intrinsic::getDeclaration(
3482 F.getParent(), Intrinsic::copysign, {Call->getType(), Call->getType()});
3483
3484 IRBuilder<> builder(Call);
3485
3486 auto fabs_call = builder.CreateCall(F.getFunctionType(), fabs, {x});
3487 auto ceil_call = builder.CreateCall(F.getFunctionType(), ceil, {fabs_call});
3488 auto floor_call =
3489 builder.CreateCall(F.getFunctionType(), floor, {fabs_call});
3490 auto fract_call =
3491 builder.CreateCall(F.getFunctionType(), clspv_fract_fn, {fabs_call});
3492 auto cmp = builder.CreateFCmpOGE(fract_call, halfway);
3493 auto sel = builder.CreateSelect(cmp, ceil_call, floor_call);
3494 return builder.CreateCall(copysign->getFunctionType(), copysign, {sel, x});
3495 });
3496}
3497
3498bool ReplaceOpenCLBuiltinPass::replaceTrigPi(Function &F,
3499 Builtins::BuiltinType type) {
3500 return replaceCallsWithValue(F, [&F, type](CallInst *Call) -> Value * {
3501 const auto x = Call->getArgOperand(0);
3502 const double k_pi = 0x1.921fb54442d18p+1;
3503 Constant *pi = ConstantFP::get(x->getType(), k_pi);
3504
3505 IRBuilder<> builder(Call);
3506 auto mul = builder.CreateFMul(x, pi);
3507 switch (type) {
3508 case Builtins::kSinpi: {
3509 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3510 x->getType());
3511 return builder.CreateCall(func->getFunctionType(), func, {mul});
3512 }
3513 case Builtins::kCospi: {
3514 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3515 x->getType());
3516 return builder.CreateCall(func->getFunctionType(), func, {mul});
3517 }
3518 case Builtins::kTanpi: {
3519 auto sin = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3520 x->getType());
3521 auto sin_call = builder.CreateCall(sin->getFunctionType(), sin, {mul});
3522 auto cos = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3523 x->getType());
3524 auto cos_call = builder.CreateCall(cos->getFunctionType(), cos, {mul});
3525 return builder.CreateFDiv(sin_call, cos_call);
3526 }
3527 default:
3528 llvm_unreachable("unexpected builtin");
3529 break;
3530 }
3531 return nullptr;
3532 });
3533}
alan-baker8b968112020-12-15 15:53:29 -05003534
3535bool ReplaceOpenCLBuiltinPass::replaceSincos(Function &F) {
3536 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3537 auto sin_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3538 Call->getType());
3539 auto cos_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3540 Call->getType());
3541
3542 IRBuilder<> builder(Call);
3543 auto sin = builder.CreateCall(sin_func->getFunctionType(), sin_func,
3544 {Call->getArgOperand(0)});
3545 auto cos = builder.CreateCall(cos_func->getFunctionType(), cos_func,
3546 {Call->getArgOperand(0)});
3547 builder.CreateStore(cos, Call->getArgOperand(1));
3548 return sin;
3549 });
3550}
3551
3552bool ReplaceOpenCLBuiltinPass::replaceExpm1(Function &F) {
3553 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3554 auto exp_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::exp,
3555 Call->getType());
3556
3557 IRBuilder<> builder(Call);
3558 auto exp = builder.CreateCall(exp_func->getFunctionType(), exp_func,
3559 {Call->getArgOperand(0)});
3560 return builder.CreateFSub(exp, ConstantFP::get(Call->getType(), 1.0));
3561 });
3562}
3563
3564bool ReplaceOpenCLBuiltinPass::replacePown(Function &F) {
3565 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3566 auto pow_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::pow,
3567 Call->getType());
3568
3569 IRBuilder<> builder(Call);
3570 auto conv = builder.CreateSIToFP(Call->getArgOperand(1), Call->getType());
3571 return builder.CreateCall(pow_func->getFunctionType(), pow_func,
3572 {Call->getArgOperand(0), conv});
3573 });
3574}