blob: 5007ef6346a45ad337eeea8af719303853bffd4b [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039#include "Passes.h"
40#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
46#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
47
48namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000049
David Neto22f144c2017-06-12 14:26:21 -040050uint32_t clz(uint32_t v) {
51 uint32_t r;
52 uint32_t shift;
53
54 r = (v > 0xFFFF) << 4;
55 v >>= r;
56 shift = (v > 0xFF) << 3;
57 v >>= shift;
58 r |= shift;
59 shift = (v > 0xF) << 2;
60 v >>= shift;
61 r |= shift;
62 shift = (v > 0x3) << 1;
63 v >>= shift;
64 r |= shift;
65 r |= (v >> 1);
66
67 return r;
68}
69
Kévin Petitfdfa92e2019-09-25 14:20:58 +010070Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
71 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040072 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040073 IntTy = FixedVectorType::get(IntTy,
74 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010075 }
76 return IntTy;
77}
78
alan-baker4986eff2020-10-29 13:38:00 -040079Value *MemoryOrderSemantics(Value *order, bool is_global,
80 Instruction *InsertBefore,
alan-baker36309f92021-02-05 12:28:03 -050081 spv::MemorySemanticsMask base_semantics,
82 bool include_storage = true) {
alan-baker4986eff2020-10-29 13:38:00 -040083 enum AtomicMemoryOrder : uint32_t {
84 kMemoryOrderRelaxed = 0,
85 kMemoryOrderAcquire = 2,
86 kMemoryOrderRelease = 3,
87 kMemoryOrderAcqRel = 4,
88 kMemoryOrderSeqCst = 5
89 };
90
91 IRBuilder<> builder(InsertBefore);
92
93 // Constants for OpenCL C 2.0 memory_order.
94 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
95 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
96 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
97 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
98
99 // Constants for SPIR-V ordering memory semantics.
100 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
101 const auto AcquireSemantics =
102 builder.getInt32(spv::MemorySemanticsAcquireMask);
103 const auto ReleaseSemantics =
104 builder.getInt32(spv::MemorySemanticsReleaseMask);
105 const auto AcqRelSemantics =
106 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
107
108 // Constants for SPIR-V storage class semantics.
109 const auto UniformSemantics =
110 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
111 const auto WorkgroupSemantics =
112 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
113
114 // Instead of sequentially consistent, use acquire, release or acquire
115 // release semantics.
116 Value *base_order = nullptr;
117 switch (base_semantics) {
118 case spv::MemorySemanticsAcquireMask:
119 base_order = AcquireSemantics;
120 break;
121 case spv::MemorySemanticsReleaseMask:
122 base_order = ReleaseSemantics;
123 break;
124 default:
125 base_order = AcqRelSemantics;
126 break;
127 }
128
129 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
alan-baker36309f92021-02-05 12:28:03 -0500130 if (order == nullptr) {
131 if (include_storage)
132 return builder.CreateOr({storage, base_order});
133 else
134 return base_order;
135 }
alan-baker4986eff2020-10-29 13:38:00 -0400136
137 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
138 auto is_acquire = builder.CreateICmpEQ(order, acquire);
139 auto is_release = builder.CreateICmpEQ(order, release);
140 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
141 auto semantics =
142 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
143 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
144 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
145 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
alan-baker36309f92021-02-05 12:28:03 -0500146 if (include_storage)
147 return builder.CreateOr({storage, semantics});
148 else
149 return semantics;
alan-baker4986eff2020-10-29 13:38:00 -0400150}
151
152Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
153 enum AtomicMemoryScope : uint32_t {
154 kMemoryScopeWorkItem = 0,
155 kMemoryScopeWorkGroup = 1,
156 kMemoryScopeDevice = 2,
157 kMemoryScopeAllSVMDevices = 3, // not supported
158 kMemoryScopeSubGroup = 4
159 };
160
161 IRBuilder<> builder(InsertBefore);
162
163 // Constants for OpenCL C 2.0 memory_scope.
164 const auto work_item =
165 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
166 const auto work_group =
167 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
168 const auto sub_group =
169 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
170 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
171
172 // Constants for SPIR-V memory scopes.
173 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
174 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
175 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
176 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
177
178 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
179 if (scope == nullptr)
180 return base_scope;
181
182 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
183 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
184 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
185 auto is_device = builder.CreateICmpEQ(scope, device);
186
187 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
188 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
189 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
190 scope = builder.CreateSelect(is_device, DeviceScope, scope);
191
192 return scope;
193}
194
SJW2c317da2020-03-23 07:39:13 -0500195bool replaceCallsWithValue(Function &F,
196 std::function<Value *(CallInst *)> Replacer) {
197
198 bool Changed = false;
199
200 SmallVector<Instruction *, 4> ToRemoves;
201
202 // Walk the users of the function.
203 for (auto &U : F.uses()) {
204 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
205
206 auto NewValue = Replacer(CI);
207
208 if (NewValue != nullptr) {
209 CI->replaceAllUsesWith(NewValue);
210
211 // Lastly, remember to remove the user.
212 ToRemoves.push_back(CI);
213 }
214 }
215 }
216
217 Changed = !ToRemoves.empty();
218
219 // And cleanup the calls we don't use anymore.
220 for (auto V : ToRemoves) {
221 V->eraseFromParent();
222 }
223
224 return Changed;
225}
226
David Neto22f144c2017-06-12 14:26:21 -0400227struct ReplaceOpenCLBuiltinPass final : public ModulePass {
228 static char ID;
229 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
230
231 bool runOnModule(Module &M) override;
alan-baker6b9d1ee2020-11-03 23:11:32 -0500232
233private:
SJW2c317da2020-03-23 07:39:13 -0500234 bool runOnFunction(Function &F);
235 bool replaceAbs(Function &F);
236 bool replaceAbsDiff(Function &F, bool is_signed);
237 bool replaceCopysign(Function &F);
238 bool replaceRecip(Function &F);
239 bool replaceDivide(Function &F);
240 bool replaceDot(Function &F);
241 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500242 bool replaceExp10(Function &F, const std::string &basename);
243 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100244 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400245 bool replaceBarrier(Function &F, bool subgroup = false);
alan-baker36309f92021-02-05 12:28:03 -0500246 bool replaceMemFence(Function &F, spv::MemorySemanticsMask semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100247 bool replacePrefetch(Function &F);
alan-baker3e217772020-11-07 17:29:40 -0500248 bool replaceRelational(Function &F, CmpInst::Predicate P);
SJW2c317da2020-03-23 07:39:13 -0500249 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
250 bool replaceIsFinite(Function &F);
251 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
252 bool replaceUpsample(Function &F);
253 bool replaceRotate(Function &F);
254 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
255 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
256 bool replaceSelect(Function &F);
257 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500258 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500259 bool replaceSignbit(Function &F, bool is_vec);
260 bool replaceMul(Function &F, bool is_float, bool is_mad);
261 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
262 bool replaceVloadHalf(Function &F);
263 bool replaceVloadHalf2(Function &F);
264 bool replaceVloadHalf4(Function &F);
265 bool replaceClspvVloadaHalf2(Function &F);
266 bool replaceClspvVloadaHalf4(Function &F);
267 bool replaceVstoreHalf(Function &F, int vec_size);
268 bool replaceVstoreHalf(Function &F);
269 bool replaceVstoreHalf2(Function &F);
270 bool replaceVstoreHalf4(Function &F);
271 bool replaceHalfReadImage(Function &F);
272 bool replaceHalfWriteImage(Function &F);
273 bool replaceSampledReadImageWithIntCoords(Function &F);
274 bool replaceAtomics(Function &F, spv::Op Op);
275 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
alan-baker4986eff2020-10-29 13:38:00 -0400276 bool replaceAtomicLoad(Function &F);
277 bool replaceExplicitAtomics(Function &F, spv::Op Op,
278 spv::MemorySemanticsMask semantics =
279 spv::MemorySemanticsAcquireReleaseMask);
280 bool replaceAtomicCompareExchange(Function &);
SJW2c317da2020-03-23 07:39:13 -0500281 bool replaceCross(Function &F);
282 bool replaceFract(Function &F, int vec_size);
283 bool replaceVload(Function &F);
284 bool replaceVstore(Function &F);
alan-baker3f1bf492020-11-05 09:07:36 -0500285 bool replaceAddSubSat(Function &F, bool is_signed, bool is_add);
Kévin Petit8576f682020-11-02 14:51:32 +0000286 bool replaceHadd(Function &F, bool is_signed,
287 Instruction::BinaryOps join_opcode);
alan-baker2cecaa72020-11-05 14:05:20 -0500288 bool replaceCountZeroes(Function &F, bool leading);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500289 bool replaceMadSat(Function &F, bool is_signed);
alan-baker15106572020-11-06 15:08:10 -0500290 bool replaceOrdered(Function &F, bool is_ordered);
alan-baker497920b2020-11-09 16:41:36 -0500291 bool replaceIsNormal(Function &F);
alan-bakere0406e72020-11-10 12:32:04 -0500292 bool replaceFDim(Function &F);
alan-baker3e0de472020-12-08 15:57:17 -0500293 bool replaceRound(Function &F);
294 bool replaceTrigPi(Function &F, Builtins::BuiltinType type);
alan-baker8b968112020-12-15 15:53:29 -0500295 bool replaceSincos(Function &F);
296 bool replaceExpm1(Function &F);
297 bool replacePown(Function &F);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500298
rjodinchr791203f2021-10-07 20:42:41 +0200299 bool replaceWaitGroupEvents(Function &F);
300 GlobalVariable *
301 getOrCreateGlobalVariable(Module &M, std::string VariableName,
302 AddressSpace::Type VariableAddressSpace);
303 Value *replaceAsyncWorkGroupCopies(Module &M, CallInst *CI, Value *Dst,
304 Value *Src, Value *NumGentypes,
305 Value *Stride, Value *Event);
306 bool replaceAsyncWorkGroupCopy(Function &F);
307 bool replaceAsyncWorkGroupStridedCopy(Function &F);
308
alan-baker6b9d1ee2020-11-03 23:11:32 -0500309 // Caches struct types for { |type|, |type| }. This prevents
310 // getOrInsertFunction from introducing a bitcasts between structs with
311 // identical contents.
312 Type *GetPairStruct(Type *type);
313
314 DenseMap<Type *, Type *> PairStructMap;
David Neto22f144c2017-06-12 14:26:21 -0400315};
SJW2c317da2020-03-23 07:39:13 -0500316
Kévin Petit91bc72e2019-04-08 15:17:46 +0100317} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400318
319char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400320INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
321 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400322
323namespace clspv {
324ModulePass *createReplaceOpenCLBuiltinPass() {
325 return new ReplaceOpenCLBuiltinPass();
326}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400327} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400328
329bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500330 std::list<Function *> func_list;
331 for (auto &F : M.getFunctionList()) {
332 // process only function declarations
333 if (F.isDeclaration() && runOnFunction(F)) {
334 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000335 }
336 }
SJW2c317da2020-03-23 07:39:13 -0500337 if (func_list.size() != 0) {
338 // recursively convert functions, but first remove dead
339 for (auto *F : func_list) {
340 if (F->use_empty()) {
341 F->eraseFromParent();
342 }
343 }
344 runOnModule(M);
345 return true;
346 }
347 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000348}
349
SJW2c317da2020-03-23 07:39:13 -0500350bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
351 auto &FI = Builtins::Lookup(&F);
352 switch (FI.getType()) {
353 case Builtins::kAbs:
354 if (!FI.getParameter(0).is_signed) {
355 return replaceAbs(F);
356 }
357 break;
358 case Builtins::kAbsDiff:
359 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400360
361 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500362 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400363
alan-bakercc2bafb2020-11-02 08:30:18 -0500364 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500365 return replaceCountZeroes(F, true);
366
367 case Builtins::kCtz:
368 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500369
alan-bakerb6da5132020-10-29 15:59:06 -0400370 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000371 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400372 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000373 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400374
SJW2c317da2020-03-23 07:39:13 -0500375 case Builtins::kCopysign:
376 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100377
SJW2c317da2020-03-23 07:39:13 -0500378 case Builtins::kHalfRecip:
379 case Builtins::kNativeRecip:
380 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100381
SJW2c317da2020-03-23 07:39:13 -0500382 case Builtins::kHalfDivide:
383 case Builtins::kNativeDivide:
384 return replaceDivide(F);
385
386 case Builtins::kDot:
387 return replaceDot(F);
388
389 case Builtins::kExp10:
390 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500391 case Builtins::kNativeExp10:
392 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500393
alan-baker8b968112020-12-15 15:53:29 -0500394 case Builtins::kExpm1:
395 return replaceExpm1(F);
396
SJW2c317da2020-03-23 07:39:13 -0500397 case Builtins::kLog10:
398 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500399 case Builtins::kNativeLog10:
400 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500401
gnl21636e7992020-09-09 16:08:16 +0100402 case Builtins::kLog1p:
403 return replaceLog1p(F);
404
alan-bakere0406e72020-11-10 12:32:04 -0500405 case Builtins::kFdim:
406 return replaceFDim(F);
407
SJW2c317da2020-03-23 07:39:13 -0500408 case Builtins::kFmod:
409 return replaceFmod(F);
410
alan-baker8b968112020-12-15 15:53:29 -0500411 case Builtins::kPown:
412 return replacePown(F);
413
alan-baker3e0de472020-12-08 15:57:17 -0500414 case Builtins::kRound:
415 return replaceRound(F);
416
417 case Builtins::kCospi:
418 case Builtins::kSinpi:
419 case Builtins::kTanpi:
420 return replaceTrigPi(F, FI.getType());
421
alan-baker8b968112020-12-15 15:53:29 -0500422 case Builtins::kSincos:
423 return replaceSincos(F);
424
SJW2c317da2020-03-23 07:39:13 -0500425 case Builtins::kBarrier:
426 case Builtins::kWorkGroupBarrier:
427 return replaceBarrier(F);
428
alan-baker12d2c182020-07-20 08:22:42 -0400429 case Builtins::kSubGroupBarrier:
430 return replaceBarrier(F, true);
431
alan-baker36309f92021-02-05 12:28:03 -0500432 case Builtins::kAtomicWorkItemFence:
433 return replaceMemFence(F, spv::MemorySemanticsMaskNone);
SJW2c317da2020-03-23 07:39:13 -0500434 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400435 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500436 case Builtins::kReadMemFence:
437 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
438 case Builtins::kWriteMemFence:
439 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
440
441 // Relational
442 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500443 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500444 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500445 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500446 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500447 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500448 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500449 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500450 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500451 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500452 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500453 return replaceRelational(F, CmpInst::FCMP_UNE);
454 case Builtins::kIslessgreater:
455 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500456
alan-baker15106572020-11-06 15:08:10 -0500457 case Builtins::kIsordered:
458 return replaceOrdered(F, true);
459
460 case Builtins::kIsunordered:
461 return replaceOrdered(F, false);
462
SJW2c317da2020-03-23 07:39:13 -0500463 case Builtins::kIsinf: {
464 bool is_vec = FI.getParameter(0).vector_size != 0;
465 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
466 }
467 case Builtins::kIsnan: {
468 bool is_vec = FI.getParameter(0).vector_size != 0;
469 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
470 }
471
472 case Builtins::kIsfinite:
473 return replaceIsFinite(F);
474
475 case Builtins::kAll: {
476 bool is_vec = FI.getParameter(0).vector_size != 0;
477 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
478 }
479 case Builtins::kAny: {
480 bool is_vec = FI.getParameter(0).vector_size != 0;
481 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
482 }
483
alan-baker497920b2020-11-09 16:41:36 -0500484 case Builtins::kIsnormal:
485 return replaceIsNormal(F);
486
SJW2c317da2020-03-23 07:39:13 -0500487 case Builtins::kUpsample:
488 return replaceUpsample(F);
489
490 case Builtins::kRotate:
491 return replaceRotate(F);
492
493 case Builtins::kConvert:
494 return replaceConvert(F, FI.getParameter(0).is_signed,
495 FI.getReturnType().is_signed);
496
alan-baker4986eff2020-10-29 13:38:00 -0400497 // OpenCL 2.0 explicit atomics have different default scopes and semantics
498 // than legacy atomic functions.
499 case Builtins::kAtomicLoad:
500 case Builtins::kAtomicLoadExplicit:
501 return replaceAtomicLoad(F);
502 case Builtins::kAtomicStore:
503 case Builtins::kAtomicStoreExplicit:
504 return replaceExplicitAtomics(F, spv::OpAtomicStore,
505 spv::MemorySemanticsReleaseMask);
506 case Builtins::kAtomicExchange:
507 case Builtins::kAtomicExchangeExplicit:
508 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
509 case Builtins::kAtomicFetchAdd:
510 case Builtins::kAtomicFetchAddExplicit:
511 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
512 case Builtins::kAtomicFetchSub:
513 case Builtins::kAtomicFetchSubExplicit:
514 return replaceExplicitAtomics(F, spv::OpAtomicISub);
515 case Builtins::kAtomicFetchOr:
516 case Builtins::kAtomicFetchOrExplicit:
517 return replaceExplicitAtomics(F, spv::OpAtomicOr);
518 case Builtins::kAtomicFetchXor:
519 case Builtins::kAtomicFetchXorExplicit:
520 return replaceExplicitAtomics(F, spv::OpAtomicXor);
521 case Builtins::kAtomicFetchAnd:
522 case Builtins::kAtomicFetchAndExplicit:
523 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
524 case Builtins::kAtomicFetchMin:
525 case Builtins::kAtomicFetchMinExplicit:
526 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
527 ? spv::OpAtomicSMin
528 : spv::OpAtomicUMin);
529 case Builtins::kAtomicFetchMax:
530 case Builtins::kAtomicFetchMaxExplicit:
531 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
532 ? spv::OpAtomicSMax
533 : spv::OpAtomicUMax);
534 // Weak compare exchange is generated as strong compare exchange.
535 case Builtins::kAtomicCompareExchangeWeak:
536 case Builtins::kAtomicCompareExchangeWeakExplicit:
537 case Builtins::kAtomicCompareExchangeStrong:
538 case Builtins::kAtomicCompareExchangeStrongExplicit:
539 return replaceAtomicCompareExchange(F);
540
541 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500542 case Builtins::kAtomicInc:
543 return replaceAtomics(F, spv::OpAtomicIIncrement);
544 case Builtins::kAtomicDec:
545 return replaceAtomics(F, spv::OpAtomicIDecrement);
546 case Builtins::kAtomicCmpxchg:
547 return replaceAtomics(F, spv::OpAtomicCompareExchange);
548 case Builtins::kAtomicAdd:
549 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
550 case Builtins::kAtomicSub:
551 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
552 case Builtins::kAtomicXchg:
553 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
554 case Builtins::kAtomicMin:
555 return replaceAtomics(F, FI.getParameter(0).is_signed
556 ? llvm::AtomicRMWInst::Min
557 : llvm::AtomicRMWInst::UMin);
558 case Builtins::kAtomicMax:
559 return replaceAtomics(F, FI.getParameter(0).is_signed
560 ? llvm::AtomicRMWInst::Max
561 : llvm::AtomicRMWInst::UMax);
562 case Builtins::kAtomicAnd:
563 return replaceAtomics(F, llvm::AtomicRMWInst::And);
564 case Builtins::kAtomicOr:
565 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
566 case Builtins::kAtomicXor:
567 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
568
569 case Builtins::kCross:
570 if (FI.getParameter(0).vector_size == 4) {
571 return replaceCross(F);
572 }
573 break;
574
575 case Builtins::kFract:
576 if (FI.getParameterCount()) {
577 return replaceFract(F, FI.getParameter(0).vector_size);
578 }
579 break;
580
581 case Builtins::kMadHi:
582 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
583 case Builtins::kMulHi:
584 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
585
alan-baker6b9d1ee2020-11-03 23:11:32 -0500586 case Builtins::kMadSat:
587 return replaceMadSat(F, FI.getParameter(0).is_signed);
588
SJW2c317da2020-03-23 07:39:13 -0500589 case Builtins::kMad:
590 case Builtins::kMad24:
591 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
592 true);
593 case Builtins::kMul24:
594 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
595 false);
596
597 case Builtins::kSelect:
598 return replaceSelect(F);
599
600 case Builtins::kBitselect:
601 return replaceBitSelect(F);
602
603 case Builtins::kVload:
604 return replaceVload(F);
605
606 case Builtins::kVloadaHalf:
607 case Builtins::kVloadHalf:
608 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
609
610 case Builtins::kVstore:
611 return replaceVstore(F);
612
613 case Builtins::kVstoreHalf:
614 case Builtins::kVstoreaHalf:
615 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
616
617 case Builtins::kSmoothstep: {
618 int vec_size = FI.getLastParameter().vector_size;
619 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500620 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500621 }
622 break;
623 }
624 case Builtins::kStep: {
625 int vec_size = FI.getLastParameter().vector_size;
626 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500627 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500628 }
629 break;
630 }
631
632 case Builtins::kSignbit:
633 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
634
alan-baker3f1bf492020-11-05 09:07:36 -0500635 case Builtins::kSubSat:
636 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
637
SJW2c317da2020-03-23 07:39:13 -0500638 case Builtins::kReadImageh:
639 return replaceHalfReadImage(F);
640 case Builtins::kReadImagef:
641 case Builtins::kReadImagei:
642 case Builtins::kReadImageui: {
643 if (FI.getParameter(1).isSampler() &&
644 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
645 return replaceSampledReadImageWithIntCoords(F);
646 }
647 break;
648 }
649
650 case Builtins::kWriteImageh:
651 return replaceHalfWriteImage(F);
652
Kévin Petit1cb45112020-04-27 18:55:48 +0100653 case Builtins::kPrefetch:
654 return replacePrefetch(F);
655
rjodinchr791203f2021-10-07 20:42:41 +0200656 // Asynchronous copies
657 case Builtins::kAsyncWorkGroupCopy:
658 return replaceAsyncWorkGroupCopy(F);
659 case Builtins::kAsyncWorkGroupStridedCopy:
660 return replaceAsyncWorkGroupStridedCopy(F);
661 case Builtins::kWaitGroupEvents:
662 return replaceWaitGroupEvents(F);
663
SJW2c317da2020-03-23 07:39:13 -0500664 default:
665 break;
666 }
667
668 return false;
669}
670
alan-baker6b9d1ee2020-11-03 23:11:32 -0500671Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
672 auto iter = PairStructMap.find(type);
673 if (iter != PairStructMap.end())
674 return iter->second;
675
676 auto new_struct = StructType::get(type->getContext(), {type, type});
677 PairStructMap[type] = new_struct;
678 return new_struct;
679}
680
rjodinchr791203f2021-10-07 20:42:41 +0200681bool ReplaceOpenCLBuiltinPass::replaceWaitGroupEvents(Function &F) {
682 /* Simple implementation for wait_group_events to avoid dealing with the event
683 * list:
684 *
685 * void wait_group_events(int num_events, event_t *event_list) {
686 * barrier(CLK_LOCAL_MEM_FENCE);
687 * }
688 *
689 */
690
691 enum {
692 CLK_LOCAL_MEM_FENCE = 0x01,
693 CLK_GLOBAL_MEM_FENCE = 0x02,
694 CLK_IMAGE_MEM_FENCE = 0x04
695 };
696
697 return replaceCallsWithValue(F, [](CallInst *CI) {
698 IRBuilder<> Builder(CI);
699
700 const auto ConstantScopeWorkgroup = Builder.getInt32(spv::ScopeWorkgroup);
701 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
702 Instruction::Shl, Builder.getInt32(CLK_LOCAL_MEM_FENCE),
703 Builder.getInt32(clz(spv::MemorySemanticsWorkgroupMemoryMask) -
704 clz(CLK_LOCAL_MEM_FENCE)),
705 "", CI);
706 auto MemorySemantics = BinaryOperator::Create(
707 Instruction::Or, MemorySemanticsWorkgroup,
708 ConstantInt::get(Builder.getInt32Ty(),
709 spv::MemorySemanticsAcquireReleaseMask),
710 "", CI);
711
712 return clspv::InsertSPIRVOp(
713 CI, spv::OpControlBarrier,
714 {Attribute::NoDuplicate, Attribute::Convergent}, Builder.getVoidTy(),
715 {ConstantScopeWorkgroup, ConstantScopeWorkgroup, MemorySemantics});
716 });
717}
718
719GlobalVariable *ReplaceOpenCLBuiltinPass::getOrCreateGlobalVariable(
720 Module &M, std::string VariableName,
721 AddressSpace::Type VariableAddressSpace) {
722 GlobalVariable *GV = M.getGlobalVariable(VariableName);
723 if (GV == nullptr) {
724 IntegerType *IT = IntegerType::get(M.getContext(), 32);
725 VectorType *VT = FixedVectorType::get(IT, 3);
726
727 GV = new GlobalVariable(M, VT, false, GlobalValue::ExternalLinkage, nullptr,
728 VariableName, nullptr,
729 GlobalValue::ThreadLocalMode::NotThreadLocal,
730 VariableAddressSpace);
731 GV->setInitializer(Constant::getNullValue(VT));
732 }
733 return GV;
734}
735
736Value *ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopies(
737 Module &M, CallInst *CI, Value *Dst, Value *Src, Value *NumGentypes,
738 Value *Stride, Value *Event) {
739 /*
740 * event_t *async_work_group_strided_copy(T *dst, T *src, size_t num_gentypes,
741 * size_t stride, event_t event) {
742 * size_t start_id = ((get_local_id(2) * get_local_size(1))
743 * + get_local_id(1)) * get_local_size(0)
744 * + get_local_id(0);
745 * size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
746 * for (size_t it = start_id; it < num_gentypes; it += incr) {
747 * dst[it] = src[it * stride];
748 * }
749 * return event;
750 * }
751 */
752
753 /* BB:
754 * before
755 * async_work_group_strided_copy
756 * after
757 *
758 * ================================
759 *
760 * BB:
761 * before
762 * start_id = f(get_local_ids, get_local_sizes)
763 * incr = g(get_local_sizes)
764 * br CmpBB
765 *
766 * CmpBB:
767 * it = PHI(start_id, it)
768 * cmp = it < NumGentypes
769 * condBr cmp, LoopBB, ExitBB
770 *
771 * LoopBB:
772 * dstI = dst[it]
773 * srcI = src[it * stride]
774 * OpCopyMemory dstI, srcI
775 * it += incr
776 * br CmpBB
777 *
778 * ExitBB:
779 * after
780 */
781
782 IRBuilder<> Builder(CI);
783
784 auto Cst0 = Builder.getInt32(0);
785 auto Cst1 = Builder.getInt32(1);
786 auto Cst2 = Builder.getInt32(2);
787
788 // get_local_id({0, 1, 2});
789 GlobalVariable *GVId =
790 getOrCreateGlobalVariable(M, clspv::LocalInvocationIdVariableName(),
791 clspv::LocalInvocationIdAddressSpace());
792 auto LocalId0 = Builder.CreateLoad(Builder.CreateGEP(GVId, {Cst0, Cst0}));
793 auto LocalId1 = Builder.CreateLoad(Builder.CreateGEP(GVId, {Cst0, Cst1}));
794 auto LocalId2 = Builder.CreateLoad(Builder.CreateGEP(GVId, {Cst0, Cst2}));
795
796 // get_local_size({0, 1, 2});
797 GlobalVariable *GVSize =
798 getOrCreateGlobalVariable(M, clspv::WorkgroupSizeVariableName(),
799 clspv::WorkgroupSizeAddressSpace());
800 auto LocalSize = Builder.CreateLoad(GVSize);
801 auto LocalSize0 = Builder.CreateExtractElement(LocalSize, Cst0);
802 auto LocalSize1 = Builder.CreateExtractElement(LocalSize, Cst1);
803 auto LocalSize2 = Builder.CreateExtractElement(LocalSize, Cst2);
804
805 // size_t start_id = ((get_local_id(2) * get_local_size(1))
806 // + get_local_id(1)) * get_local_size(0)
807 // + get_local_id(0);
808 auto tmp0 = Builder.CreateMul(LocalId2, LocalSize1);
809 auto tmp1 = Builder.CreateAdd(tmp0, LocalId1);
810 auto tmp2 = Builder.CreateMul(tmp1, LocalSize0);
811 auto StartId = Builder.CreateAdd(tmp2, LocalId0);
812
813 // size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
814 auto tmp3 = Builder.CreateMul(LocalSize0, LocalSize1);
815 auto Incr = Builder.CreateMul(tmp3, LocalSize2);
816
817 // Create BasicBlocks
818 auto BB = CI->getParent();
819 auto CmpBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
820 auto LoopBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
821 auto ExitBB = SplitBlock(BB, CI);
822
823 // BB
824 auto BrCmpBB = BranchInst::Create(CmpBB);
825 ReplaceInstWithInst(BB->getTerminator(), BrCmpBB);
826
827 // CmpBB
828 Builder.SetInsertPoint(CmpBB);
829 auto PHIIterator = Builder.CreatePHI(Builder.getInt32Ty(), 2);
830 auto Cmp = Builder.CreateCmp(CmpInst::ICMP_ULT, PHIIterator, NumGentypes);
831 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
832
833 // LoopBB
834 Builder.SetInsertPoint(LoopBB);
835
836 // default values for non-strided copies
837 Value *SrcIterator = PHIIterator;
838 Value *DstIterator = PHIIterator;
839 if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
840 clspv::AddressSpace::Global)) {
841 // async_work_group_strided_copy local to global case
842 DstIterator = Builder.CreateMul(PHIIterator, Stride);
843 } else if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
844 clspv::AddressSpace::Local)) {
845 // async_work_group_strided_copy global to local case
846 SrcIterator = Builder.CreateMul(PHIIterator, Stride);
847 }
848 auto DstI = Builder.CreateGEP(Dst, {DstIterator});
849 auto SrcI = Builder.CreateGEP(Src, {SrcIterator});
850 auto NewIterator = Builder.CreateAdd(PHIIterator, Incr);
851 auto Br = Builder.CreateBr(CmpBB);
852 clspv::InsertSPIRVOp(Br, spv::OpCopyMemory, {}, Builder.getVoidTy(),
853 {DstI, SrcI});
854
855 // Set PHIIterator for CmpBB now that we have NewIterator
856 PHIIterator->addIncoming(StartId, BB);
857 PHIIterator->addIncoming(NewIterator, LoopBB);
858
859 return Event;
860}
861
862bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopy(Function &F) {
863 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
864 Module &M = *F.getParent();
865
866 auto Dst = CI->getOperand(0);
867 auto Src = CI->getOperand(1);
868 auto NumGentypes = CI->getOperand(2);
869 auto Event = CI->getOperand(3);
870
871 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, nullptr,
872 Event);
873 });
874}
875
876bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupStridedCopy(Function &F) {
877 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
878 Module &M = *F.getParent();
879
880 auto Dst = CI->getOperand(0);
881 auto Src = CI->getOperand(1);
882 auto NumGentypes = CI->getOperand(2);
883 auto Stride = CI->getOperand(3);
884 auto Event = CI->getOperand(4);
885
886 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, Stride,
887 Event);
888 });
889}
890
SJW2c317da2020-03-23 07:39:13 -0500891bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
892 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400893 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100894}
895
SJW2c317da2020-03-23 07:39:13 -0500896bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
897 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100898 auto XValue = CI->getOperand(0);
899 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100900
Kévin Petite8edce32019-04-10 14:23:32 +0100901 IRBuilder<> Builder(CI);
902 auto XmY = Builder.CreateSub(XValue, YValue);
903 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100904
SJW2c317da2020-03-23 07:39:13 -0500905 Value *Cmp = nullptr;
906 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100907 Cmp = Builder.CreateICmpSGT(YValue, XValue);
908 } else {
909 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100910 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100911
Kévin Petite8edce32019-04-10 14:23:32 +0100912 return Builder.CreateSelect(Cmp, YmX, XmY);
913 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100914}
915
SJW2c317da2020-03-23 07:39:13 -0500916bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500917 return replaceCallsWithValue(F, [&F](CallInst *Call) {
918 const auto x = Call->getArgOperand(0);
919 const auto y = Call->getArgOperand(1);
920 auto intrinsic = Intrinsic::getDeclaration(
921 F.getParent(), Intrinsic::copysign, Call->getType());
922 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
923 Call);
Kévin Petite8edce32019-04-10 14:23:32 +0100924 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100925}
926
SJW2c317da2020-03-23 07:39:13 -0500927bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
928 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100929 // Recip has one arg.
930 auto Arg = CI->getOperand(0);
931 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
932 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
933 });
David Neto22f144c2017-06-12 14:26:21 -0400934}
935
SJW2c317da2020-03-23 07:39:13 -0500936bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
937 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100938 auto Op0 = CI->getOperand(0);
939 auto Op1 = CI->getOperand(1);
940 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
941 });
David Neto22f144c2017-06-12 14:26:21 -0400942}
943
SJW2c317da2020-03-23 07:39:13 -0500944bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
945 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100946 auto Op0 = CI->getOperand(0);
947 auto Op1 = CI->getOperand(1);
948
SJW2c317da2020-03-23 07:39:13 -0500949 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100950 if (Op0->getType()->isVectorTy()) {
951 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
952 CI->getType(), {Op0, Op1});
953 } else {
954 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
955 }
956
957 return V;
958 });
959}
960
SJW2c317da2020-03-23 07:39:13 -0500961bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500962 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500963 // convert to natural
964 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500965 std::string NewFName = basename.substr(0, slen);
966 NewFName =
967 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400968
SJW2c317da2020-03-23 07:39:13 -0500969 Module &M = *F.getParent();
970 return replaceCallsWithValue(F, [&](CallInst *CI) {
971 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
972
973 auto Arg = CI->getOperand(0);
974
975 // Constant of the natural log of 10 (ln(10)).
976 const double Ln10 =
977 2.302585092994045684017991454684364207601101488628772976033;
978
979 auto Mul = BinaryOperator::Create(
980 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
981
982 return CallInst::Create(NewF, Mul, "", CI);
983 });
David Neto22f144c2017-06-12 14:26:21 -0400984}
985
SJW2c317da2020-03-23 07:39:13 -0500986bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100987 // OpenCL fmod(x,y) is x - y * trunc(x/y)
988 // The sign for a non-zero result is taken from x.
989 // (Try an example.)
990 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500991 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100992 auto Op0 = CI->getOperand(0);
993 auto Op1 = CI->getOperand(1);
994 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
995 });
996}
997
SJW2c317da2020-03-23 07:39:13 -0500998bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500999 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001000 // convert to natural
1001 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001002 std::string NewFName = basename.substr(0, slen);
1003 NewFName =
1004 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001005
SJW2c317da2020-03-23 07:39:13 -05001006 Module &M = *F.getParent();
1007 return replaceCallsWithValue(F, [&](CallInst *CI) {
1008 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1009
1010 auto Arg = CI->getOperand(0);
1011
1012 // Constant of the reciprocal of the natural log of 10 (ln(10)).
1013 const double Ln10 =
1014 0.434294481903251827651128918916605082294397005803666566114;
1015
1016 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
1017
1018 return BinaryOperator::Create(Instruction::FMul,
1019 ConstantFP::get(Arg->getType(), Ln10), NewCI,
1020 "", CI);
1021 });
David Neto22f144c2017-06-12 14:26:21 -04001022}
1023
gnl21636e7992020-09-09 16:08:16 +01001024bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
1025 // convert to natural
alan-baker8b968112020-12-15 15:53:29 -05001026 return replaceCallsWithValue(F, [&F](CallInst *CI) {
gnl21636e7992020-09-09 16:08:16 +01001027 auto Arg = CI->getOperand(0);
1028
1029 auto ArgP1 = BinaryOperator::Create(
1030 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
1031
alan-baker8b968112020-12-15 15:53:29 -05001032 auto log =
1033 Intrinsic::getDeclaration(F.getParent(), Intrinsic::log, CI->getType());
1034 return CallInst::Create(log, ArgP1, "", CI);
gnl21636e7992020-09-09 16:08:16 +01001035 });
1036}
1037
alan-baker12d2c182020-07-20 08:22:42 -04001038bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -04001039
alan-bakerf6bc8252020-09-23 14:58:55 -04001040 enum {
1041 CLK_LOCAL_MEM_FENCE = 0x01,
1042 CLK_GLOBAL_MEM_FENCE = 0x02,
1043 CLK_IMAGE_MEM_FENCE = 0x04
1044 };
David Neto22f144c2017-06-12 14:26:21 -04001045
alan-baker12d2c182020-07-20 08:22:42 -04001046 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +01001047 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001048
Kévin Petitc4643922019-06-17 19:32:05 +01001049 // We need to map the OpenCL constants to the SPIR-V equivalents.
1050 const auto LocalMemFence =
1051 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1052 const auto GlobalMemFence =
1053 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001054 const auto ImageMemFence =
1055 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -04001056 const auto ConstantAcquireRelease = ConstantInt::get(
1057 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +01001058 const auto ConstantScopeDevice =
1059 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
1060 const auto ConstantScopeWorkgroup =
1061 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -04001062 const auto ConstantScopeSubgroup =
1063 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -04001064
Kévin Petitc4643922019-06-17 19:32:05 +01001065 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1066 const auto LocalMemFenceMask =
1067 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1068 const auto WorkgroupShiftAmount =
1069 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1070 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1071 Instruction::Shl, LocalMemFenceMask,
1072 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001073
Kévin Petitc4643922019-06-17 19:32:05 +01001074 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1075 const auto GlobalMemFenceMask =
1076 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1077 const auto UniformShiftAmount =
1078 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1079 const auto MemorySemanticsUniform = BinaryOperator::Create(
1080 Instruction::Shl, GlobalMemFenceMask,
1081 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001082
alan-bakerf6bc8252020-09-23 14:58:55 -04001083 // OpenCL 2.0
1084 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1085 const auto ImageMemFenceMask =
1086 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1087 const auto ImageShiftAmount =
1088 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1089 const auto MemorySemanticsImage = BinaryOperator::Create(
1090 Instruction::Shl, ImageMemFenceMask,
1091 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1092
Kévin Petitc4643922019-06-17 19:32:05 +01001093 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -04001094 // MemorySemanticsSequentiallyConsistentMask.
1095 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +01001096 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -04001097 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -04001098 auto MemorySemantics2 = BinaryOperator::Create(
1099 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
1100 auto MemorySemantics = BinaryOperator::Create(
1101 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001102
alan-baker12d2c182020-07-20 08:22:42 -04001103 // If the memory scope is not specified explicitly, it is either Subgroup
1104 // or Workgroup depending on the type of barrier.
1105 Value *MemoryScope =
1106 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
1107 if (CI->data_operands_size() > 1) {
1108 enum {
1109 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
1110 CL_MEMORY_SCOPE_DEVICE = 0x2,
1111 CL_MEMORY_SCOPE_SUBGROUP = 0x4
1112 };
1113 // The call was given an explicit memory scope.
1114 const auto MemoryScopeSubgroup =
1115 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
1116 const auto MemoryScopeDevice =
1117 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -04001118
alan-baker12d2c182020-07-20 08:22:42 -04001119 auto Cmp =
1120 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1121 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
1122 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
1123 ConstantScopeWorkgroup, "", CI);
1124 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1125 MemoryScopeDevice, CI->getOperand(1), "", CI);
1126 MemoryScope =
1127 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
1128 }
1129
1130 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
1131 // the type of barrier;
1132 const auto ExecutionScope =
1133 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -04001134
Kévin Petitc4643922019-06-17 19:32:05 +01001135 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -04001136 {Attribute::NoDuplicate, Attribute::Convergent},
1137 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +01001138 {ExecutionScope, MemoryScope, MemorySemantics});
1139 });
David Neto22f144c2017-06-12 14:26:21 -04001140}
1141
alan-baker36309f92021-02-05 12:28:03 -05001142bool ReplaceOpenCLBuiltinPass::replaceMemFence(
1143 Function &F, spv::MemorySemanticsMask semantics) {
David Neto22f144c2017-06-12 14:26:21 -04001144
SJW2c317da2020-03-23 07:39:13 -05001145 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -04001146 enum {
1147 CLK_LOCAL_MEM_FENCE = 0x01,
1148 CLK_GLOBAL_MEM_FENCE = 0x02,
1149 CLK_IMAGE_MEM_FENCE = 0x04,
1150 };
David Neto22f144c2017-06-12 14:26:21 -04001151
SJW2c317da2020-03-23 07:39:13 -05001152 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001153
SJW2c317da2020-03-23 07:39:13 -05001154 // We need to map the OpenCL constants to the SPIR-V equivalents.
1155 const auto LocalMemFence =
1156 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1157 const auto GlobalMemFence =
1158 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001159 const auto ImageMemFence =
1160 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -05001161 const auto ConstantMemorySemantics =
1162 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -04001163 const auto ConstantScopeWorkgroup =
1164 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -04001165
SJW2c317da2020-03-23 07:39:13 -05001166 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1167 const auto LocalMemFenceMask =
1168 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1169 const auto WorkgroupShiftAmount =
1170 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1171 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1172 Instruction::Shl, LocalMemFenceMask,
1173 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001174
SJW2c317da2020-03-23 07:39:13 -05001175 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1176 const auto GlobalMemFenceMask =
1177 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1178 const auto UniformShiftAmount =
1179 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1180 const auto MemorySemanticsUniform = BinaryOperator::Create(
1181 Instruction::Shl, GlobalMemFenceMask,
1182 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001183
alan-bakerf6bc8252020-09-23 14:58:55 -04001184 // OpenCL 2.0
1185 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1186 const auto ImageMemFenceMask =
1187 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1188 const auto ImageShiftAmount =
1189 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1190 const auto MemorySemanticsImage = BinaryOperator::Create(
1191 Instruction::Shl, ImageMemFenceMask,
1192 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1193
alan-baker36309f92021-02-05 12:28:03 -05001194 Value *MemOrder = ConstantMemorySemantics;
1195 Value *MemScope = ConstantScopeWorkgroup;
1196 IRBuilder<> builder(CI);
1197 if (CI->getNumArgOperands() > 1) {
1198 MemOrder = MemoryOrderSemantics(CI->getArgOperand(1), false, CI,
1199 semantics, false);
1200 MemScope = MemoryScope(CI->getArgOperand(2), false, CI);
1201 }
1202 // Join the storage semantics and the order semantics.
alan-bakerf6bc8252020-09-23 14:58:55 -04001203 auto MemorySemantics1 =
alan-baker36309f92021-02-05 12:28:03 -05001204 builder.CreateOr({MemorySemanticsWorkgroup, MemorySemanticsUniform});
1205 auto MemorySemantics2 = builder.CreateOr({MemorySemanticsImage, MemOrder});
1206 auto MemorySemantics =
1207 builder.CreateOr({MemorySemantics1, MemorySemantics2});
David Neto22f144c2017-06-12 14:26:21 -04001208
alan-baker3d905692020-10-28 14:02:37 -04001209 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
1210 {Attribute::Convergent}, CI->getType(),
alan-baker36309f92021-02-05 12:28:03 -05001211 {MemScope, MemorySemantics});
SJW2c317da2020-03-23 07:39:13 -05001212 });
David Neto22f144c2017-06-12 14:26:21 -04001213}
1214
Kévin Petit1cb45112020-04-27 18:55:48 +01001215bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
1216 bool Changed = false;
1217
1218 SmallVector<Instruction *, 4> ToRemoves;
1219
1220 // Find all calls to the function
1221 for (auto &U : F.uses()) {
1222 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1223 ToRemoves.push_back(CI);
1224 }
1225 }
1226
1227 Changed = !ToRemoves.empty();
1228
1229 // Delete them
1230 for (auto V : ToRemoves) {
1231 V->eraseFromParent();
1232 }
1233
1234 return Changed;
1235}
1236
SJW2c317da2020-03-23 07:39:13 -05001237bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001238 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001239 return replaceCallsWithValue(F, [&](CallInst *CI) {
1240 // The predicate to use in the CmpInst.
1241 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001242
SJW2c317da2020-03-23 07:39:13 -05001243 auto Arg1 = CI->getOperand(0);
1244 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001245
SJW2c317da2020-03-23 07:39:13 -05001246 const auto Cmp =
1247 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001248 if (isa<VectorType>(F.getReturnType()))
1249 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1250 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001251 });
David Neto22f144c2017-06-12 14:26:21 -04001252}
1253
SJW2c317da2020-03-23 07:39:13 -05001254bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1255 spv::Op SPIRVOp,
1256 int32_t C) {
1257 Module &M = *F.getParent();
1258 return replaceCallsWithValue(F, [&](CallInst *CI) {
1259 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001260
SJW2c317da2020-03-23 07:39:13 -05001261 // The value to return for true.
1262 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001263
SJW2c317da2020-03-23 07:39:13 -05001264 // The value to return for false.
1265 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001266
SJW2c317da2020-03-23 07:39:13 -05001267 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001268 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001269 CorrespondingBoolTy =
1270 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1271 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001272 }
David Neto22f144c2017-06-12 14:26:21 -04001273
SJW2c317da2020-03-23 07:39:13 -05001274 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1275 CorrespondingBoolTy, {CI->getOperand(0)});
1276
1277 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1278 });
David Neto22f144c2017-06-12 14:26:21 -04001279}
1280
SJW2c317da2020-03-23 07:39:13 -05001281bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1282 Module &M = *F.getParent();
1283 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001284 auto &C = M.getContext();
1285 auto Val = CI->getOperand(0);
1286 auto ValTy = Val->getType();
1287 auto RetTy = CI->getType();
1288
1289 // Get a suitable integer type to represent the number
1290 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1291
1292 // Create Mask
1293 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001294 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001295 switch (ScalarSize) {
1296 case 16:
1297 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1298 break;
1299 case 32:
1300 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1301 break;
1302 case 64:
1303 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1304 break;
1305 default:
1306 llvm_unreachable("Unsupported floating-point type");
1307 }
1308
1309 IRBuilder<> Builder(CI);
1310
1311 // Bitcast to int
1312 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1313
1314 // Mask and compare
1315 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1316 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1317
1318 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001319 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001320 if (ValTy->isVectorTy()) {
1321 RetTrue = ConstantInt::getSigned(RetTy, -1);
1322 } else {
1323 RetTrue = ConstantInt::get(RetTy, 1);
1324 }
1325 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1326 });
1327}
1328
SJW2c317da2020-03-23 07:39:13 -05001329bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1330 Module &M = *F.getParent();
1331 return replaceCallsWithValue(F, [&](CallInst *CI) {
1332 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001333
SJW2c317da2020-03-23 07:39:13 -05001334 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001335
SJW2c317da2020-03-23 07:39:13 -05001336 // If the argument is a 32-bit int, just use a shift
1337 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1338 V = BinaryOperator::Create(Instruction::LShr, Arg,
1339 ConstantInt::get(Arg->getType(), 31), "", CI);
1340 } else {
1341 // The value for zero to compare against.
1342 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001343
SJW2c317da2020-03-23 07:39:13 -05001344 // The value to return for true.
1345 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001346
SJW2c317da2020-03-23 07:39:13 -05001347 // The value to return for false.
1348 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001349
SJW2c317da2020-03-23 07:39:13 -05001350 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1351 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001352
SJW2c317da2020-03-23 07:39:13 -05001353 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001354
SJW2c317da2020-03-23 07:39:13 -05001355 // If we have a function to call, call it!
1356 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001357
SJW2c317da2020-03-23 07:39:13 -05001358 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001359
SJW2c317da2020-03-23 07:39:13 -05001360 const auto NewCI = clspv::InsertSPIRVOp(
1361 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1362 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001363
SJW2c317da2020-03-23 07:39:13 -05001364 } else {
1365 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001366 }
1367
SJW2c317da2020-03-23 07:39:13 -05001368 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001369 }
SJW2c317da2020-03-23 07:39:13 -05001370 return V;
1371 });
David Neto22f144c2017-06-12 14:26:21 -04001372}
1373
SJW2c317da2020-03-23 07:39:13 -05001374bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1375 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1376 // Get arguments
1377 auto HiValue = CI->getOperand(0);
1378 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001379
SJW2c317da2020-03-23 07:39:13 -05001380 // Don't touch overloads that aren't in OpenCL C
1381 auto HiType = HiValue->getType();
1382 auto LoType = LoValue->getType();
1383
1384 if (HiType != LoType) {
1385 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001386 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001387
SJW2c317da2020-03-23 07:39:13 -05001388 if (!HiType->isIntOrIntVectorTy()) {
1389 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001390 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001391
SJW2c317da2020-03-23 07:39:13 -05001392 if (HiType->getScalarSizeInBits() * 2 !=
1393 CI->getType()->getScalarSizeInBits()) {
1394 return nullptr;
1395 }
1396
1397 if ((HiType->getScalarSizeInBits() != 8) &&
1398 (HiType->getScalarSizeInBits() != 16) &&
1399 (HiType->getScalarSizeInBits() != 32)) {
1400 return nullptr;
1401 }
1402
James Pricecf53df42020-04-20 14:41:24 -04001403 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001404 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001405 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1406 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001407 return nullptr;
1408 }
1409 }
1410
1411 // Convert both operands to the result type
1412 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1413 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1414
1415 // Shift high operand
1416 auto ShiftAmount =
1417 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1418 auto HiShifted =
1419 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1420
1421 // OR both results
1422 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1423 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001424}
1425
SJW2c317da2020-03-23 07:39:13 -05001426bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1427 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1428 // Get arguments
1429 auto SrcValue = CI->getOperand(0);
1430 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001431
SJW2c317da2020-03-23 07:39:13 -05001432 // Don't touch overloads that aren't in OpenCL C
1433 auto SrcType = SrcValue->getType();
1434 auto RotType = RotAmount->getType();
1435
1436 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1437 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001438 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001439
SJW2c317da2020-03-23 07:39:13 -05001440 if (!SrcType->isIntOrIntVectorTy()) {
1441 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001442 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001443
SJW2c317da2020-03-23 07:39:13 -05001444 if ((SrcType->getScalarSizeInBits() != 8) &&
1445 (SrcType->getScalarSizeInBits() != 16) &&
1446 (SrcType->getScalarSizeInBits() != 32) &&
1447 (SrcType->getScalarSizeInBits() != 64)) {
1448 return nullptr;
1449 }
1450
James Pricecf53df42020-04-20 14:41:24 -04001451 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001452 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001453 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1454 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001455 return nullptr;
1456 }
1457 }
1458
alan-bakerfd22ae12020-10-29 15:59:22 -04001459 // Replace with LLVM's funnel shift left intrinsic because it is more
1460 // generic than rotate.
1461 Function *intrinsic =
1462 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1463 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1464 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001465 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001466}
1467
SJW2c317da2020-03-23 07:39:13 -05001468bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1469 bool DstIsSigned) {
1470 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1471 Value *V = nullptr;
1472 // Get arguments
1473 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001474
SJW2c317da2020-03-23 07:39:13 -05001475 // Don't touch overloads that aren't in OpenCL C
1476 auto SrcType = SrcValue->getType();
1477 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001478
SJW2c317da2020-03-23 07:39:13 -05001479 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1480 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1481 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001482 }
1483
James Pricecf53df42020-04-20 14:41:24 -04001484 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001485 unsigned SrcNumElements =
1486 SrcVecType->getElementCount().getKnownMinValue();
1487 unsigned DstNumElements =
1488 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001489 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001490 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001491 }
1492
James Pricecf53df42020-04-20 14:41:24 -04001493 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1494 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1495 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001496 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001497 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001498 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001499
SJW2c317da2020-03-23 07:39:13 -05001500 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1501 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1502
1503 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1504 bool DstIsInt = DstType->isIntOrIntVectorTy();
1505
1506 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1507 // Unnecessary cast operation.
1508 V = SrcValue;
1509 } else if (SrcIsFloat && DstIsFloat) {
1510 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1511 } else if (SrcIsFloat && DstIsInt) {
1512 if (DstIsSigned) {
1513 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1514 } else {
1515 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1516 }
1517 } else if (SrcIsInt && DstIsFloat) {
1518 if (SrcIsSigned) {
1519 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1520 } else {
1521 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1522 }
1523 } else if (SrcIsInt && DstIsInt) {
1524 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1525 } else {
1526 // Not something we're supposed to handle, just move on
1527 }
1528
1529 return V;
1530 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001531}
1532
SJW2c317da2020-03-23 07:39:13 -05001533bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1534 bool is_mad) {
1535 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1536 Value *V = nullptr;
1537 // Get arguments
1538 auto AValue = CI->getOperand(0);
1539 auto BValue = CI->getOperand(1);
1540 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001541
SJW2c317da2020-03-23 07:39:13 -05001542 // Don't touch overloads that aren't in OpenCL C
1543 auto AType = AValue->getType();
1544 auto BType = BValue->getType();
1545 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001546
SJW2c317da2020-03-23 07:39:13 -05001547 if ((AType != BType) || (CI->getType() != AType) ||
1548 (is_mad && (AType != CType))) {
1549 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001550 }
1551
SJW2c317da2020-03-23 07:39:13 -05001552 if (!AType->isIntOrIntVectorTy()) {
1553 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001554 }
Kévin Petit8a560882019-03-21 15:24:34 +00001555
SJW2c317da2020-03-23 07:39:13 -05001556 if ((AType->getScalarSizeInBits() != 8) &&
1557 (AType->getScalarSizeInBits() != 16) &&
1558 (AType->getScalarSizeInBits() != 32) &&
1559 (AType->getScalarSizeInBits() != 64)) {
1560 return V;
1561 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001562
James Pricecf53df42020-04-20 14:41:24 -04001563 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001564 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001565 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1566 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001567 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001568 }
1569 }
1570
SJW2c317da2020-03-23 07:39:13 -05001571 // Our SPIR-V op returns a struct, create a type for it
alan-baker6b9d1ee2020-11-03 23:11:32 -05001572 auto ExMulRetType = GetPairStruct(AType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001573
SJW2c317da2020-03-23 07:39:13 -05001574 // Select the appropriate signed/unsigned SPIR-V op
1575 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1576
1577 // Call the SPIR-V op
1578 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1579 ExMulRetType, {AValue, BValue});
1580
1581 // Get the high part of the result
1582 unsigned Idxs[] = {1};
1583 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1584
1585 // If we're handling a mad_hi, add the third argument to the result
1586 if (is_mad) {
1587 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001588 }
1589
SJW2c317da2020-03-23 07:39:13 -05001590 return V;
1591 });
Kévin Petit8a560882019-03-21 15:24:34 +00001592}
1593
SJW2c317da2020-03-23 07:39:13 -05001594bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1595 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1596 // Get arguments
1597 auto FalseValue = CI->getOperand(0);
1598 auto TrueValue = CI->getOperand(1);
1599 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001600
SJW2c317da2020-03-23 07:39:13 -05001601 // Don't touch overloads that aren't in OpenCL C
1602 auto FalseType = FalseValue->getType();
1603 auto TrueType = TrueValue->getType();
1604 auto PredicateType = PredicateValue->getType();
1605
1606 if (FalseType != TrueType) {
1607 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001608 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001609
SJW2c317da2020-03-23 07:39:13 -05001610 if (!PredicateType->isIntOrIntVectorTy()) {
1611 return nullptr;
1612 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001613
SJW2c317da2020-03-23 07:39:13 -05001614 if (!FalseType->isIntOrIntVectorTy() &&
1615 !FalseType->getScalarType()->isFloatingPointTy()) {
1616 return nullptr;
1617 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001618
SJW2c317da2020-03-23 07:39:13 -05001619 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1620 return nullptr;
1621 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001622
SJW2c317da2020-03-23 07:39:13 -05001623 if (FalseType->getScalarSizeInBits() !=
1624 PredicateType->getScalarSizeInBits()) {
1625 return nullptr;
1626 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001627
James Pricecf53df42020-04-20 14:41:24 -04001628 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001629 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1630 if (NumElements != cast<VectorType>(PredicateType)
1631 ->getElementCount()
1632 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001633 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001634 }
1635
James Pricecf53df42020-04-20 14:41:24 -04001636 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1637 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001638 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001639 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001640 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001641
SJW2c317da2020-03-23 07:39:13 -05001642 // Create constant
1643 const auto ZeroValue = Constant::getNullValue(PredicateType);
1644
1645 // Scalar and vector are to be treated differently
1646 CmpInst::Predicate Pred;
1647 if (PredicateType->isVectorTy()) {
1648 Pred = CmpInst::ICMP_SLT;
1649 } else {
1650 Pred = CmpInst::ICMP_NE;
1651 }
1652
1653 // Create comparison instruction
1654 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1655 ZeroValue, "", CI);
1656
1657 // Create select
1658 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1659 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001660}
1661
SJW2c317da2020-03-23 07:39:13 -05001662bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1663 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1664 Value *V = nullptr;
1665 if (CI->getNumOperands() != 4) {
1666 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001667 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001668
SJW2c317da2020-03-23 07:39:13 -05001669 // Get arguments
1670 auto FalseValue = CI->getOperand(0);
1671 auto TrueValue = CI->getOperand(1);
1672 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001673
SJW2c317da2020-03-23 07:39:13 -05001674 // Don't touch overloads that aren't in OpenCL C
1675 auto FalseType = FalseValue->getType();
1676 auto TrueType = TrueValue->getType();
1677 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001678
SJW2c317da2020-03-23 07:39:13 -05001679 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1680 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001681 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001682
James Pricecf53df42020-04-20 14:41:24 -04001683 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001684 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1685 !TrueType->getScalarType()->isIntegerTy()) {
1686 return V;
1687 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001688 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001689 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1690 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001691 return V;
1692 }
1693 }
1694
1695 // Remember the type of the operands
1696 auto OpType = TrueType;
1697
1698 // The actual bit selection will always be done on an integer type,
1699 // declare it here
1700 Type *BitType;
1701
1702 // If the operands are float, then bitcast them to int
1703 if (OpType->getScalarType()->isFloatingPointTy()) {
1704
1705 // First create the new type
1706 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1707
1708 // Then bitcast all operands
1709 PredicateValue =
1710 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1711 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1712 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1713
1714 } else {
1715 // The operands have an integer type, use it directly
1716 BitType = OpType;
1717 }
1718
1719 // All the operands are now always integers
1720 // implement as (c & b) | (~c & a)
1721
1722 // Create our negated predicate value
1723 auto AllOnes = Constant::getAllOnesValue(BitType);
1724 auto NotPredicateValue = BinaryOperator::Create(
1725 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1726
1727 // Then put everything together
1728 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1729 FalseValue, "", CI);
1730 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1731 TrueValue, "", CI);
1732
1733 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1734
1735 // If we were dealing with a floating point type, we must bitcast
1736 // the result back to that
1737 if (OpType->getScalarType()->isFloatingPointTy()) {
1738 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1739 }
1740
1741 return V;
1742 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001743}
1744
SJW61531372020-06-09 07:31:08 -05001745bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001746 // convert to vector versions
1747 Module &M = *F.getParent();
1748 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1749 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1750 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001751
SJW2c317da2020-03-23 07:39:13 -05001752 // First figure out which function we're dealing with
1753 if (is_smooth) {
1754 ArgsToSplat.push_back(CI->getOperand(1));
1755 VectorArg = CI->getOperand(2);
1756 } else {
1757 VectorArg = CI->getOperand(1);
1758 }
1759
1760 // Splat arguments that need to be
1761 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001762 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001763
1764 for (auto arg : ArgsToSplat) {
1765 Value *NewVectorArg = UndefValue::get(VecType);
Marco Antognini7e338402021-03-15 12:48:37 +00001766 for (size_t i = 0; i < VecType->getElementCount().getKnownMinValue();
1767 i++) {
SJW2c317da2020-03-23 07:39:13 -05001768 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1769 NewVectorArg =
1770 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1771 }
1772 SplatArgs.push_back(NewVectorArg);
1773 }
1774
1775 // Replace the call with the vector/vector flavour
1776 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1777 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1778
SJW61531372020-06-09 07:31:08 -05001779 std::string NewFName = Builtins::GetMangledFunctionName(
1780 is_smooth ? "smoothstep" : "step", NewFType);
1781
SJW2c317da2020-03-23 07:39:13 -05001782 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1783
1784 SmallVector<Value *, 3> NewArgs;
1785 for (auto arg : SplatArgs) {
1786 NewArgs.push_back(arg);
1787 }
1788 NewArgs.push_back(VectorArg);
1789
1790 return CallInst::Create(NewF, NewArgs, "", CI);
1791 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001792}
1793
SJW2c317da2020-03-23 07:39:13 -05001794bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001795 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1796 auto Arg = CI->getOperand(0);
1797 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001798
SJW2c317da2020-03-23 07:39:13 -05001799 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001800
SJW2c317da2020-03-23 07:39:13 -05001801 return BinaryOperator::Create(Op, Bitcast,
1802 ConstantInt::get(CI->getType(), 31), "", CI);
1803 });
David Neto22f144c2017-06-12 14:26:21 -04001804}
1805
SJW2c317da2020-03-23 07:39:13 -05001806bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1807 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001808 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1809 // The multiply instruction to use.
1810 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001811
SJW2c317da2020-03-23 07:39:13 -05001812 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001813
SJW2c317da2020-03-23 07:39:13 -05001814 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1815 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001816
SJW2c317da2020-03-23 07:39:13 -05001817 if (is_mad) {
1818 // The add instruction to use.
1819 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001820
SJW2c317da2020-03-23 07:39:13 -05001821 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001822 }
David Neto22f144c2017-06-12 14:26:21 -04001823
SJW2c317da2020-03-23 07:39:13 -05001824 return V;
1825 });
David Neto22f144c2017-06-12 14:26:21 -04001826}
1827
SJW2c317da2020-03-23 07:39:13 -05001828bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001829 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1830 Value *V = nullptr;
1831 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001832
SJW2c317da2020-03-23 07:39:13 -05001833 auto data_type = data->getType();
1834 if (!data_type->isVectorTy())
1835 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001836
James Pricecf53df42020-04-20 14:41:24 -04001837 auto vec_data_type = cast<VectorType>(data_type);
1838
alan-baker5a8c3be2020-09-09 13:44:26 -04001839 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001840 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1841 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001842
SJW2c317da2020-03-23 07:39:13 -05001843 auto offset = CI->getOperand(1);
1844 auto ptr = CI->getOperand(2);
1845 auto ptr_type = ptr->getType();
1846 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001847 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001848 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001849
SJW2c317da2020-03-23 07:39:13 -05001850 // Avoid pointer casts. Instead generate the correct number of stores
1851 // and rely on drivers to coalesce appropriately.
1852 IRBuilder<> builder(CI);
1853 auto elems_const = builder.getInt32(elems);
1854 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001855 for (size_t i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001856 auto idx = builder.getInt32(i);
1857 auto add = builder.CreateAdd(adjust, idx);
1858 auto gep = builder.CreateGEP(ptr, add);
1859 auto extract = builder.CreateExtractElement(data, i);
1860 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001861 }
SJW2c317da2020-03-23 07:39:13 -05001862 return V;
1863 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001864}
1865
SJW2c317da2020-03-23 07:39:13 -05001866bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001867 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1868 Value *V = nullptr;
1869 auto ret_type = F.getReturnType();
1870 if (!ret_type->isVectorTy())
1871 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001872
James Pricecf53df42020-04-20 14:41:24 -04001873 auto vec_ret_type = cast<VectorType>(ret_type);
1874
alan-baker5a8c3be2020-09-09 13:44:26 -04001875 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001876 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1877 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001878
SJW2c317da2020-03-23 07:39:13 -05001879 auto offset = CI->getOperand(0);
1880 auto ptr = CI->getOperand(1);
1881 auto ptr_type = ptr->getType();
1882 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001883 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001884 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001885
SJW2c317da2020-03-23 07:39:13 -05001886 // Avoid pointer casts. Instead generate the correct number of loads
1887 // and rely on drivers to coalesce appropriately.
1888 IRBuilder<> builder(CI);
1889 auto elems_const = builder.getInt32(elems);
1890 V = UndefValue::get(ret_type);
1891 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001892 for (unsigned i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001893 auto idx = builder.getInt32(i);
1894 auto add = builder.CreateAdd(adjust, idx);
1895 auto gep = builder.CreateGEP(ptr, add);
1896 auto load = builder.CreateLoad(gep);
1897 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001898 }
SJW2c317da2020-03-23 07:39:13 -05001899 return V;
1900 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001901}
1902
SJW2c317da2020-03-23 07:39:13 -05001903bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1904 const std::string &name,
1905 int vec_size) {
1906 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1907 if (!vec_size) {
1908 // deduce vec_size from last character of name (e.g. vload_half4)
1909 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001910 }
SJW2c317da2020-03-23 07:39:13 -05001911 switch (vec_size) {
1912 case 2:
1913 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1914 case 4:
1915 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1916 case 0:
1917 if (!is_clspv_version) {
1918 return replaceVloadHalf(F);
1919 }
Marco Antognini7e338402021-03-15 12:48:37 +00001920 // Fall-through
SJW2c317da2020-03-23 07:39:13 -05001921 default:
1922 llvm_unreachable("Unsupported vload_half vector size");
1923 break;
1924 }
1925 return false;
David Neto22f144c2017-06-12 14:26:21 -04001926}
1927
SJW2c317da2020-03-23 07:39:13 -05001928bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1929 Module &M = *F.getParent();
1930 return replaceCallsWithValue(F, [&](CallInst *CI) {
1931 // The index argument from vload_half.
1932 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001933
SJW2c317da2020-03-23 07:39:13 -05001934 // The pointer argument from vload_half.
1935 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001936
SJW2c317da2020-03-23 07:39:13 -05001937 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001938 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001939 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1940
1941 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001942 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001943
1944 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1945
1946 Value *V = nullptr;
1947
alan-baker7efcaaa2020-05-06 19:33:27 -04001948 bool supports_16bit_storage = true;
1949 switch (Arg1->getType()->getPointerAddressSpace()) {
1950 case clspv::AddressSpace::Global:
1951 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1952 clspv::Option::StorageClass::kSSBO);
1953 break;
1954 case clspv::AddressSpace::Constant:
1955 if (clspv::Option::ConstantArgsInUniformBuffer())
1956 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1957 clspv::Option::StorageClass::kUBO);
1958 else
1959 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1960 clspv::Option::StorageClass::kSSBO);
1961 break;
1962 default:
1963 // Clspv will emit the Float16 capability if the half type is
1964 // encountered. That capability covers private and local addressspaces.
1965 break;
1966 }
1967
1968 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001969 auto ShortTy = Type::getInt16Ty(M.getContext());
1970 auto ShortPointerTy =
1971 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1972
1973 // Cast the half* pointer to short*.
1974 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1975
1976 // Index into the correct address of the casted pointer.
1977 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1978
1979 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001980 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001981
1982 // ZExt the short -> int.
1983 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1984
1985 // Get our float2.
1986 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1987
1988 // Extract out the bottom element which is our float result.
1989 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1990 } else {
1991 // Assume the pointer argument points to storage aligned to 32bits
1992 // or more.
1993 // TODO(dneto): Do more analysis to make sure this is true?
1994 //
1995 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1996 // with:
1997 //
1998 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1999 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2000 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2001 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2002 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2003 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2004 // x float> %converted, %index_is_odd32
2005
2006 auto IntPointerTy =
2007 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
2008
2009 // Cast the base pointer to int*.
2010 // In a valid call (according to assumptions), this should get
2011 // optimized away in the simplify GEP pass.
2012 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2013
2014 auto One = ConstantInt::get(IntTy, 1);
2015 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2016 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2017
2018 // Index into the correct address of the casted pointer.
2019 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2020
2021 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002022 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002023
2024 // Get our float2.
2025 auto Call = CallInst::Create(NewF, Load, "", CI);
2026
2027 // Extract out the float result, where the element number is
2028 // determined by whether the original index was even or odd.
2029 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2030 }
2031 return V;
2032 });
2033}
2034
2035bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
2036 Module &M = *F.getParent();
2037 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002038 // The index argument from vload_half.
2039 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002040
Kévin Petite8edce32019-04-10 14:23:32 +01002041 // The pointer argument from vload_half.
2042 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002043
Kévin Petite8edce32019-04-10 14:23:32 +01002044 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002045 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002046 auto NewPointerTy =
2047 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002048 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002049
Kévin Petite8edce32019-04-10 14:23:32 +01002050 // Cast the half* pointer to int*.
2051 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002052
Kévin Petite8edce32019-04-10 14:23:32 +01002053 // Index into the correct address of the casted pointer.
2054 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002055
Kévin Petite8edce32019-04-10 14:23:32 +01002056 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002057 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002058
Kévin Petite8edce32019-04-10 14:23:32 +01002059 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002060 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002061
Kévin Petite8edce32019-04-10 14:23:32 +01002062 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002063
Kévin Petite8edce32019-04-10 14:23:32 +01002064 // Get our float2.
2065 return CallInst::Create(NewF, Load, "", CI);
2066 });
David Neto22f144c2017-06-12 14:26:21 -04002067}
2068
SJW2c317da2020-03-23 07:39:13 -05002069bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
2070 Module &M = *F.getParent();
2071 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002072 // The index argument from vload_half.
2073 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002074
Kévin Petite8edce32019-04-10 14:23:32 +01002075 // The pointer argument from vload_half.
2076 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002077
Kévin Petite8edce32019-04-10 14:23:32 +01002078 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002079 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2080 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002081 auto NewPointerTy =
2082 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002083 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002084
Kévin Petite8edce32019-04-10 14:23:32 +01002085 // Cast the half* pointer to int2*.
2086 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002087
Kévin Petite8edce32019-04-10 14:23:32 +01002088 // Index into the correct address of the casted pointer.
2089 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002090
Kévin Petite8edce32019-04-10 14:23:32 +01002091 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002092 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002093
Kévin Petite8edce32019-04-10 14:23:32 +01002094 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002095 auto X =
2096 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2097 auto Y =
2098 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002099
Kévin Petite8edce32019-04-10 14:23:32 +01002100 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002101 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002102
Kévin Petite8edce32019-04-10 14:23:32 +01002103 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002104
Kévin Petite8edce32019-04-10 14:23:32 +01002105 // Get the lower (x & y) components of our final float4.
2106 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002107
Kévin Petite8edce32019-04-10 14:23:32 +01002108 // Get the higher (z & w) components of our final float4.
2109 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002110
Kévin Petite8edce32019-04-10 14:23:32 +01002111 Constant *ShuffleMask[4] = {
2112 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2113 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002114
Kévin Petite8edce32019-04-10 14:23:32 +01002115 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002116 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2117 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002118 });
David Neto22f144c2017-06-12 14:26:21 -04002119}
2120
SJW2c317da2020-03-23 07:39:13 -05002121bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002122
2123 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2124 //
2125 // %u = load i32 %ptr
2126 // %fxy = call <2 x float> Unpack2xHalf(u)
2127 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002128 Module &M = *F.getParent();
2129 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002130 auto Index = CI->getOperand(0);
2131 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002132
Kévin Petite8edce32019-04-10 14:23:32 +01002133 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002134 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002135 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002136
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002137 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002138 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002139
Kévin Petite8edce32019-04-10 14:23:32 +01002140 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002141 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002142
Kévin Petite8edce32019-04-10 14:23:32 +01002143 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002144
Kévin Petite8edce32019-04-10 14:23:32 +01002145 // Get our final float2.
2146 return CallInst::Create(NewF, Load, "", CI);
2147 });
David Neto6ad93232018-06-07 15:42:58 -07002148}
2149
SJW2c317da2020-03-23 07:39:13 -05002150bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002151
2152 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2153 //
2154 // %u2 = load <2 x i32> %ptr
2155 // %u2xy = extractelement %u2, 0
2156 // %u2zw = extractelement %u2, 1
2157 // %fxy = call <2 x float> Unpack2xHalf(uint)
2158 // %fzw = call <2 x float> Unpack2xHalf(uint)
2159 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002160 Module &M = *F.getParent();
2161 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002162 auto Index = CI->getOperand(0);
2163 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002164
Kévin Petite8edce32019-04-10 14:23:32 +01002165 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002166 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2167 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002168 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002169
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002170 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002171 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002172
Kévin Petite8edce32019-04-10 14:23:32 +01002173 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002174 auto X =
2175 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2176 auto Y =
2177 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002178
Kévin Petite8edce32019-04-10 14:23:32 +01002179 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002180 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002181
Kévin Petite8edce32019-04-10 14:23:32 +01002182 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002183
Kévin Petite8edce32019-04-10 14:23:32 +01002184 // Get the lower (x & y) components of our final float4.
2185 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002186
Kévin Petite8edce32019-04-10 14:23:32 +01002187 // Get the higher (z & w) components of our final float4.
2188 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002189
Kévin Petite8edce32019-04-10 14:23:32 +01002190 Constant *ShuffleMask[4] = {
2191 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2192 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002193
Kévin Petite8edce32019-04-10 14:23:32 +01002194 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002195 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2196 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002197 });
David Neto6ad93232018-06-07 15:42:58 -07002198}
2199
SJW2c317da2020-03-23 07:39:13 -05002200bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
2201 switch (vec_size) {
2202 case 0:
2203 return replaceVstoreHalf(F);
2204 case 2:
2205 return replaceVstoreHalf2(F);
2206 case 4:
2207 return replaceVstoreHalf4(F);
2208 default:
2209 llvm_unreachable("Unsupported vstore_half vector size");
2210 break;
2211 }
2212 return false;
2213}
David Neto22f144c2017-06-12 14:26:21 -04002214
SJW2c317da2020-03-23 07:39:13 -05002215bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
2216 Module &M = *F.getParent();
2217 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002218 // The value to store.
2219 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002220
Kévin Petite8edce32019-04-10 14:23:32 +01002221 // The index argument from vstore_half.
2222 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002223
Kévin Petite8edce32019-04-10 14:23:32 +01002224 // The pointer argument from vstore_half.
2225 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002226
Kévin Petite8edce32019-04-10 14:23:32 +01002227 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002228 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002229 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2230 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002231
Kévin Petite8edce32019-04-10 14:23:32 +01002232 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002233 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002234
Kévin Petite8edce32019-04-10 14:23:32 +01002235 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002236
Kévin Petite8edce32019-04-10 14:23:32 +01002237 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002238 auto TempVec = InsertElementInst::Create(
2239 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002240
Kévin Petite8edce32019-04-10 14:23:32 +01002241 // Pack the float2 -> half2 (in an int).
2242 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002243
alan-baker7efcaaa2020-05-06 19:33:27 -04002244 bool supports_16bit_storage = true;
2245 switch (Arg2->getType()->getPointerAddressSpace()) {
2246 case clspv::AddressSpace::Global:
2247 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2248 clspv::Option::StorageClass::kSSBO);
2249 break;
2250 case clspv::AddressSpace::Constant:
2251 if (clspv::Option::ConstantArgsInUniformBuffer())
2252 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2253 clspv::Option::StorageClass::kUBO);
2254 else
2255 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2256 clspv::Option::StorageClass::kSSBO);
2257 break;
2258 default:
2259 // Clspv will emit the Float16 capability if the half type is
2260 // encountered. That capability covers private and local addressspaces.
2261 break;
2262 }
2263
SJW2c317da2020-03-23 07:39:13 -05002264 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002265 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002266 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002267 auto ShortPointerTy =
2268 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002269
Kévin Petite8edce32019-04-10 14:23:32 +01002270 // Truncate our i32 to an i16.
2271 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002272
Kévin Petite8edce32019-04-10 14:23:32 +01002273 // Cast the half* pointer to short*.
2274 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002275
Kévin Petite8edce32019-04-10 14:23:32 +01002276 // Index into the correct address of the casted pointer.
2277 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002278
Kévin Petite8edce32019-04-10 14:23:32 +01002279 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002280 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002281 } else {
2282 // We can only write to 32-bit aligned words.
2283 //
2284 // Assuming base is aligned to 32-bits, replace the equivalent of
2285 // vstore_half(value, index, base)
2286 // with:
2287 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2288 // uint32_t write_to_upper_half = index & 1u;
2289 // uint32_t shift = write_to_upper_half << 4;
2290 //
2291 // // Pack the float value as a half number in bottom 16 bits
2292 // // of an i32.
2293 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2294 //
2295 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2296 // ^ ((packed & 0xffff) << shift)
2297 // // We only need relaxed consistency, but OpenCL 1.2 only has
2298 // // sequentially consistent atomics.
2299 // // TODO(dneto): Use relaxed consistency.
2300 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002301 auto IntPointerTy =
2302 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002303
Kévin Petite8edce32019-04-10 14:23:32 +01002304 auto Four = ConstantInt::get(IntTy, 4);
2305 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002306
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002307 auto IndexIsOdd =
2308 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002309 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002310 auto IndexIntoI32 =
2311 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2312 auto BaseI32Ptr =
2313 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2314 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2315 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002316 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002317 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002318 auto MaskBitsToWrite =
2319 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2320 auto MaskedCurrent = BinaryOperator::CreateAnd(
2321 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002322
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002323 auto XLowerBits =
2324 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2325 auto NewBitsToWrite =
2326 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2327 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2328 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002329
Kévin Petite8edce32019-04-10 14:23:32 +01002330 // Generate the call to atomi_xor.
2331 SmallVector<Type *, 5> ParamTypes;
2332 // The pointer type.
2333 ParamTypes.push_back(IntPointerTy);
2334 // The Types for memory scope, semantics, and value.
2335 ParamTypes.push_back(IntTy);
2336 ParamTypes.push_back(IntTy);
2337 ParamTypes.push_back(IntTy);
2338 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2339 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002340
Kévin Petite8edce32019-04-10 14:23:32 +01002341 const auto ConstantScopeDevice =
2342 ConstantInt::get(IntTy, spv::ScopeDevice);
2343 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2344 // (SPIR-V Workgroup).
2345 const auto AddrSpaceSemanticsBits =
2346 IntPointerTy->getPointerAddressSpace() == 1
2347 ? spv::MemorySemanticsUniformMemoryMask
2348 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002349
Kévin Petite8edce32019-04-10 14:23:32 +01002350 // We're using relaxed consistency here.
2351 const auto ConstantMemorySemantics =
2352 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2353 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002354
Kévin Petite8edce32019-04-10 14:23:32 +01002355 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2356 ConstantMemorySemantics, ValueToXor};
2357 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002358
2359 // Return a Nop so the old Call is removed
2360 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2361 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002362 }
David Neto22f144c2017-06-12 14:26:21 -04002363
SJW2c317da2020-03-23 07:39:13 -05002364 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002365 });
David Neto22f144c2017-06-12 14:26:21 -04002366}
2367
SJW2c317da2020-03-23 07:39:13 -05002368bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2369 Module &M = *F.getParent();
2370 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002371 // The value to store.
2372 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002373
Kévin Petite8edce32019-04-10 14:23:32 +01002374 // The index argument from vstore_half.
2375 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002376
Kévin Petite8edce32019-04-10 14:23:32 +01002377 // The pointer argument from vstore_half.
2378 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002379
Kévin Petite8edce32019-04-10 14:23:32 +01002380 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002381 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002382 auto NewPointerTy =
2383 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002384 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002385
Kévin Petite8edce32019-04-10 14:23:32 +01002386 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002387 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002388
Kévin Petite8edce32019-04-10 14:23:32 +01002389 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002390
Kévin Petite8edce32019-04-10 14:23:32 +01002391 // Turn the packed x & y into the final packing.
2392 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002393
Kévin Petite8edce32019-04-10 14:23:32 +01002394 // Cast the half* pointer to int*.
2395 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002396
Kévin Petite8edce32019-04-10 14:23:32 +01002397 // Index into the correct address of the casted pointer.
2398 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002399
Kévin Petite8edce32019-04-10 14:23:32 +01002400 // Store to the int* we casted to.
2401 return new StoreInst(X, Index, CI);
2402 });
David Neto22f144c2017-06-12 14:26:21 -04002403}
2404
SJW2c317da2020-03-23 07:39:13 -05002405bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2406 Module &M = *F.getParent();
2407 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002408 // The value to store.
2409 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002410
Kévin Petite8edce32019-04-10 14:23:32 +01002411 // The index argument from vstore_half.
2412 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002413
Kévin Petite8edce32019-04-10 14:23:32 +01002414 // The pointer argument from vstore_half.
2415 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002416
Kévin Petite8edce32019-04-10 14:23:32 +01002417 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002418 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2419 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002420 auto NewPointerTy =
2421 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002422 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002423
Kévin Petite8edce32019-04-10 14:23:32 +01002424 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2425 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002426
Kévin Petite8edce32019-04-10 14:23:32 +01002427 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002428 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2429 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002430
Kévin Petite8edce32019-04-10 14:23:32 +01002431 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2432 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002433
Kévin Petite8edce32019-04-10 14:23:32 +01002434 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002435 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2436 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002437
Kévin Petite8edce32019-04-10 14:23:32 +01002438 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002439 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002440
Kévin Petite8edce32019-04-10 14:23:32 +01002441 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002442
Kévin Petite8edce32019-04-10 14:23:32 +01002443 // Turn the packed x & y into the final component of our int2.
2444 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002445
Kévin Petite8edce32019-04-10 14:23:32 +01002446 // Turn the packed z & w into the final component of our int2.
2447 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002448
Kévin Petite8edce32019-04-10 14:23:32 +01002449 auto Combine = InsertElementInst::Create(
2450 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002451 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2452 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002453
Kévin Petite8edce32019-04-10 14:23:32 +01002454 // Cast the half* pointer to int2*.
2455 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002456
Kévin Petite8edce32019-04-10 14:23:32 +01002457 // Index into the correct address of the casted pointer.
2458 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002459
Kévin Petite8edce32019-04-10 14:23:32 +01002460 // Store to the int2* we casted to.
2461 return new StoreInst(Combine, Index, CI);
2462 });
David Neto22f144c2017-06-12 14:26:21 -04002463}
2464
SJW2c317da2020-03-23 07:39:13 -05002465bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2466 // convert half to float
2467 Module &M = *F.getParent();
2468 return replaceCallsWithValue(F, [&](CallInst *CI) {
2469 SmallVector<Type *, 3> types;
2470 SmallVector<Value *, 3> args;
Marco Antognini7e338402021-03-15 12:48:37 +00002471 for (size_t i = 0; i < CI->getNumArgOperands(); ++i) {
SJW2c317da2020-03-23 07:39:13 -05002472 types.push_back(CI->getArgOperand(i)->getType());
2473 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002474 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002475
alan-baker5a8c3be2020-09-09 13:44:26 -04002476 auto NewFType =
2477 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2478 cast<VectorType>(CI->getType())
2479 ->getElementCount()
2480 .getKnownMinValue()),
2481 types, false);
SJW2c317da2020-03-23 07:39:13 -05002482
SJW61531372020-06-09 07:31:08 -05002483 std::string NewFName =
2484 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002485
2486 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2487
2488 auto NewCI = CallInst::Create(NewF, args, "", CI);
2489
2490 // Convert to the half type.
2491 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2492 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002493}
2494
SJW2c317da2020-03-23 07:39:13 -05002495bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2496 // convert half to float
2497 Module &M = *F.getParent();
2498 return replaceCallsWithValue(F, [&](CallInst *CI) {
2499 SmallVector<Type *, 3> types(3);
2500 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002501
SJW2c317da2020-03-23 07:39:13 -05002502 // Image
2503 types[0] = CI->getArgOperand(0)->getType();
2504 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002505
SJW2c317da2020-03-23 07:39:13 -05002506 // Coord
2507 types[1] = CI->getArgOperand(1)->getType();
2508 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002509
SJW2c317da2020-03-23 07:39:13 -05002510 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002511 types[2] =
2512 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2513 cast<VectorType>(CI->getArgOperand(2)->getType())
2514 ->getElementCount()
2515 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002516
SJW2c317da2020-03-23 07:39:13 -05002517 auto NewFType =
2518 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002519
SJW61531372020-06-09 07:31:08 -05002520 std::string NewFName =
2521 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002522
SJW2c317da2020-03-23 07:39:13 -05002523 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002524
SJW2c317da2020-03-23 07:39:13 -05002525 // Convert data to the float type.
2526 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2527 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002528
SJW2c317da2020-03-23 07:39:13 -05002529 return CallInst::Create(NewF, args, "", CI);
2530 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002531}
2532
SJW2c317da2020-03-23 07:39:13 -05002533bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2534 Function &F) {
2535 // convert read_image with int coords to float coords
2536 Module &M = *F.getParent();
2537 return replaceCallsWithValue(F, [&](CallInst *CI) {
2538 // The image.
2539 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002540
SJW2c317da2020-03-23 07:39:13 -05002541 // The sampler.
2542 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002543
SJW2c317da2020-03-23 07:39:13 -05002544 // The coordinate (integer type that we can't handle).
2545 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002546
SJW2c317da2020-03-23 07:39:13 -05002547 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2548 uint32_t components =
2549 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2550 Type *float_ty = nullptr;
2551 if (components == 1) {
2552 float_ty = Type::getFloatTy(M.getContext());
2553 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002554 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2555 cast<VectorType>(Arg2->getType())
2556 ->getElementCount()
2557 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002558 }
David Neto22f144c2017-06-12 14:26:21 -04002559
SJW2c317da2020-03-23 07:39:13 -05002560 auto NewFType = FunctionType::get(
2561 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2562
2563 std::string NewFName = F.getName().str();
2564 NewFName[NewFName.length() - 1] = 'f';
2565
2566 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2567
2568 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2569
2570 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2571 });
David Neto22f144c2017-06-12 14:26:21 -04002572}
2573
SJW2c317da2020-03-23 07:39:13 -05002574bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2575 return replaceCallsWithValue(F, [&](CallInst *CI) {
2576 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002577
SJW2c317da2020-03-23 07:39:13 -05002578 // We need to map the OpenCL constants to the SPIR-V equivalents.
2579 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2580 const auto ConstantMemorySemantics = ConstantInt::get(
2581 IntTy, spv::MemorySemanticsUniformMemoryMask |
2582 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002583
SJW2c317da2020-03-23 07:39:13 -05002584 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002585
SJW2c317da2020-03-23 07:39:13 -05002586 // The pointer.
2587 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002588
SJW2c317da2020-03-23 07:39:13 -05002589 // The memory scope.
2590 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002591
SJW2c317da2020-03-23 07:39:13 -05002592 // The memory semantics.
2593 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002594
SJW2c317da2020-03-23 07:39:13 -05002595 if (2 < CI->getNumArgOperands()) {
2596 // The unequal memory semantics.
2597 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002598
SJW2c317da2020-03-23 07:39:13 -05002599 // The value.
2600 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002601
SJW2c317da2020-03-23 07:39:13 -05002602 // The comparator.
2603 Params.push_back(CI->getArgOperand(1));
2604 } else if (1 < CI->getNumArgOperands()) {
2605 // The value.
2606 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002607 }
David Neto22f144c2017-06-12 14:26:21 -04002608
SJW2c317da2020-03-23 07:39:13 -05002609 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2610 });
David Neto22f144c2017-06-12 14:26:21 -04002611}
2612
SJW2c317da2020-03-23 07:39:13 -05002613bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2614 llvm::AtomicRMWInst::BinOp Op) {
2615 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002616 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2617 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002618 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002619 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002620 SyncScope::System, CI);
2621 });
2622}
David Neto22f144c2017-06-12 14:26:21 -04002623
SJW2c317da2020-03-23 07:39:13 -05002624bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2625 Module &M = *F.getParent();
2626 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002627 auto IntTy = Type::getInt32Ty(M.getContext());
2628 auto FloatTy = Type::getFloatTy(M.getContext());
2629
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002630 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2631 ConstantInt::get(IntTy, 1),
2632 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002633
2634 Constant *UpShuffleMask[4] = {
2635 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2636 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2637
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002638 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2639 UndefValue::get(FloatTy),
2640 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002641
Kévin Petite8edce32019-04-10 14:23:32 +01002642 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002643 auto Arg0 =
2644 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2645 ConstantVector::get(DownShuffleMask), "", CI);
2646 auto Arg1 =
2647 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2648 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002649 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002650
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002651 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002652 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002653
SJW61531372020-06-09 07:31:08 -05002654 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002655
Kévin Petite8edce32019-04-10 14:23:32 +01002656 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002657
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002658 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2659 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002660 });
David Neto22f144c2017-06-12 14:26:21 -04002661}
David Neto62653202017-10-16 19:05:18 -04002662
SJW2c317da2020-03-23 07:39:13 -05002663bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002664 // OpenCL's float result = fract(float x, float* ptr)
2665 //
2666 // In the LLVM domain:
2667 //
2668 // %floor_result = call spir_func float @floor(float %x)
2669 // store float %floor_result, float * %ptr
2670 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2671 // %result = call spir_func float
2672 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2673 //
2674 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2675 // and clspv.fract occur in the SPIR-V generator pass:
2676 //
2677 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2678 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2679 // ...
2680 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2681 // OpStore %ptr %floor_result
2682 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2683 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002684 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002685
David Neto62653202017-10-16 19:05:18 -04002686 using std::string;
2687
2688 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2689 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002690
SJW2c317da2020-03-23 07:39:13 -05002691 Module &M = *F.getParent();
2692 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002693
SJW2c317da2020-03-23 07:39:13 -05002694 // This is either float or a float vector. All the float-like
2695 // types are this type.
2696 auto result_ty = F.getReturnType();
2697
SJW61531372020-06-09 07:31:08 -05002698 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002699 Function *fmin_fn = M.getFunction(fmin_name);
2700 if (!fmin_fn) {
2701 // Make the fmin function.
2702 FunctionType *fn_ty =
2703 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2704 fmin_fn =
2705 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2706 fmin_fn->addFnAttr(Attribute::ReadNone);
2707 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2708 }
2709
SJW61531372020-06-09 07:31:08 -05002710 std::string floor_name =
2711 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002712 Function *floor_fn = M.getFunction(floor_name);
2713 if (!floor_fn) {
2714 // Make the floor function.
2715 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2716 floor_fn =
2717 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2718 floor_fn->addFnAttr(Attribute::ReadNone);
2719 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2720 }
2721
SJW61531372020-06-09 07:31:08 -05002722 std::string clspv_fract_name =
2723 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002724 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2725 if (!clspv_fract_fn) {
2726 // Make the clspv_fract function.
2727 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2728 clspv_fract_fn = cast<Function>(
2729 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2730 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2731 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2732 }
2733
2734 // Number of significant significand bits, whether represented or not.
2735 unsigned num_significand_bits;
2736 switch (result_ty->getScalarType()->getTypeID()) {
2737 case Type::HalfTyID:
2738 num_significand_bits = 11;
2739 break;
2740 case Type::FloatTyID:
2741 num_significand_bits = 24;
2742 break;
2743 case Type::DoubleTyID:
2744 num_significand_bits = 53;
2745 break;
2746 default:
2747 llvm_unreachable("Unhandled float type when processing fract builtin");
2748 break;
2749 }
2750 // Beware that the disassembler displays this value as
2751 // OpConstant %float 1
2752 // which is not quite right.
2753 const double kJustUnderOneScalar =
2754 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2755
2756 Constant *just_under_one =
2757 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2758 if (result_ty->isVectorTy()) {
2759 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002760 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002761 }
2762
2763 IRBuilder<> Builder(CI);
2764
2765 auto arg = CI->getArgOperand(0);
2766 auto ptr = CI->getArgOperand(1);
2767
2768 // Compute floor result and store it.
2769 auto floor = Builder.CreateCall(floor_fn, {arg});
2770 Builder.CreateStore(floor, ptr);
2771
2772 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2773 auto fract_result =
2774 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2775
2776 return fract_result;
2777 });
David Neto62653202017-10-16 19:05:18 -04002778}
alan-bakera52b7312020-10-26 08:58:51 -04002779
Kévin Petit8576f682020-11-02 14:51:32 +00002780bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04002781 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00002782 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04002783 // a_shr = a >> 1
2784 // b_shr = b >> 1
2785 // add1 = a_shr + b_shr
2786 // join = a |join_opcode| b
2787 // and = join & 1
2788 // add = add1 + and
2789 const auto a = Call->getArgOperand(0);
2790 const auto b = Call->getArgOperand(1);
2791 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00002792 Value *a_shift, *b_shift;
2793 if (is_signed) {
2794 a_shift = builder.CreateAShr(a, 1);
2795 b_shift = builder.CreateAShr(b, 1);
2796 } else {
2797 a_shift = builder.CreateLShr(a, 1);
2798 b_shift = builder.CreateLShr(b, 1);
2799 }
alan-bakerb6da5132020-10-29 15:59:06 -04002800 auto add = builder.CreateAdd(a_shift, b_shift);
2801 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
2802 auto constant_one = ConstantInt::get(a->getType(), 1);
2803 auto and_bit = builder.CreateAnd(join, constant_one);
2804 return builder.CreateAdd(add, and_bit);
2805 });
2806}
2807
alan-baker3f1bf492020-11-05 09:07:36 -05002808bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
2809 bool is_add) {
2810 return replaceCallsWithValue(F, [&F, this, is_signed,
2811 is_add](CallInst *Call) {
2812 auto ty = Call->getType();
2813 auto a = Call->getArgOperand(0);
2814 auto b = Call->getArgOperand(1);
2815 IRBuilder<> builder(Call);
alan-bakera52b7312020-10-26 08:58:51 -04002816 if (is_signed) {
2817 unsigned bitwidth = ty->getScalarSizeInBits();
2818 if (bitwidth < 32) {
alan-baker3f1bf492020-11-05 09:07:36 -05002819 unsigned extended_width = bitwidth << 1;
2820 Type *extended_ty =
2821 IntegerType::get(Call->getContext(), extended_width);
2822 Constant *min = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002823 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002824 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2825 Constant *max = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002826 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002827 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
alan-bakera52b7312020-10-26 08:58:51 -04002828 // Don't use the type in GetMangledFunctionName to ensure we get
2829 // signed parameters.
2830 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
alan-bakera52b7312020-10-26 08:58:51 -04002831 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
alan-baker3f1bf492020-11-05 09:07:36 -05002832 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2833 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2834 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2835 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2836 if (extended_width == 32) {
alan-bakera52b7312020-10-26 08:58:51 -04002837 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
alan-bakera52b7312020-10-26 08:58:51 -04002838 } else {
2839 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2840 }
alan-baker3f1bf492020-11-05 09:07:36 -05002841 } else {
2842 if (extended_width == 32) {
2843 sclamp_name += "iii";
2844 } else {
2845 sclamp_name += "sss";
2846 }
alan-bakera52b7312020-10-26 08:58:51 -04002847 }
alan-baker3f1bf492020-11-05 09:07:36 -05002848
2849 auto sext_a = builder.CreateSExt(a, extended_ty);
2850 auto sext_b = builder.CreateSExt(b, extended_ty);
2851 Value *op = nullptr;
2852 // Extended operations won't wrap.
2853 if (is_add)
2854 op = builder.CreateAdd(sext_a, sext_b, "", true, true);
2855 else
2856 op = builder.CreateSub(sext_a, sext_b, "", true, true);
2857 auto clamp_ty = FunctionType::get(
2858 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2859 auto callee = F.getParent()->getOrInsertFunction(sclamp_name, clamp_ty);
2860 auto clamp = builder.CreateCall(callee, {op, min, max});
2861 return builder.CreateTrunc(clamp, ty);
alan-bakera52b7312020-10-26 08:58:51 -04002862 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002863 // Add:
2864 // c = a + b
alan-bakera52b7312020-10-26 08:58:51 -04002865 // if (b < 0)
2866 // c = c > a ? min : c;
2867 // else
alan-baker3f1bf492020-11-05 09:07:36 -05002868 // c = c < a ? max : c;
alan-bakera52b7312020-10-26 08:58:51 -04002869 //
alan-baker3f1bf492020-11-05 09:07:36 -05002870 // Sub:
2871 // c = a - b;
2872 // if (b < 0)
2873 // c = c < a ? max : c;
2874 // else
2875 // c = c > a ? min : c;
2876 Constant *min = ConstantInt::get(Call->getContext(),
2877 APInt::getSignedMinValue(bitwidth));
2878 Constant *max = ConstantInt::get(Call->getContext(),
2879 APInt::getSignedMaxValue(bitwidth));
alan-bakera52b7312020-10-26 08:58:51 -04002880 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2881 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2882 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2883 }
alan-baker3f1bf492020-11-05 09:07:36 -05002884 Value *op = nullptr;
2885 if (is_add) {
2886 op = builder.CreateAdd(a, b);
2887 } else {
2888 op = builder.CreateSub(a, b);
2889 }
2890 auto b_lt_0 = builder.CreateICmpSLT(b, Constant::getNullValue(ty));
2891 auto op_gt_a = builder.CreateICmpSGT(op, a);
2892 auto op_lt_a = builder.CreateICmpSLT(op, a);
2893 auto neg_cmp = is_add ? op_gt_a : op_lt_a;
2894 auto pos_cmp = is_add ? op_lt_a : op_gt_a;
2895 auto neg_value = is_add ? min : max;
2896 auto pos_value = is_add ? max : min;
2897 auto neg_clamp = builder.CreateSelect(neg_cmp, neg_value, op);
2898 auto pos_clamp = builder.CreateSelect(pos_cmp, pos_value, op);
2899 return builder.CreateSelect(b_lt_0, neg_clamp, pos_clamp);
alan-bakera52b7312020-10-26 08:58:51 -04002900 }
2901 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002902 // Replace with OpIAddCarry/OpISubBorrow and clamp to max/0 on a
2903 // carr/borrow.
2904 spv::Op op = is_add ? spv::OpIAddCarry : spv::OpISubBorrow;
2905 auto clamp_value =
2906 is_add ? Constant::getAllOnesValue(ty) : Constant::getNullValue(ty);
2907 auto struct_ty = GetPairStruct(ty);
2908 auto call =
2909 InsertSPIRVOp(Call, op, {Attribute::ReadNone}, struct_ty, {a, b});
2910 auto add_sub = builder.CreateExtractValue(call, {0});
2911 auto carry_borrow = builder.CreateExtractValue(call, {1});
2912 auto cmp = builder.CreateICmpEQ(carry_borrow, Constant::getNullValue(ty));
2913 return builder.CreateSelect(cmp, add_sub, clamp_value);
alan-bakera52b7312020-10-26 08:58:51 -04002914 }
alan-bakera52b7312020-10-26 08:58:51 -04002915 });
2916}
alan-baker4986eff2020-10-29 13:38:00 -04002917
2918bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
2919 return replaceCallsWithValue(F, [](CallInst *Call) {
2920 auto pointer = Call->getArgOperand(0);
2921 // Clang emits an address space cast to the generic address space. Skip the
2922 // cast and use the input directly.
2923 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2924 pointer = cast->getPointerOperand();
2925 }
2926 Value *order_arg =
2927 Call->getNumArgOperands() > 1 ? Call->getArgOperand(1) : nullptr;
2928 Value *scope_arg =
2929 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2930 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2931 clspv::AddressSpace::Global;
2932 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
2933 spv::MemorySemanticsAcquireMask);
2934 auto scope = MemoryScope(scope_arg, is_global, Call);
2935 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
2936 Call->getType(), {pointer, scope, order});
2937 });
2938}
2939
2940bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
2941 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
2942 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
2943 auto pointer = Call->getArgOperand(0);
2944 // Clang emits an address space cast to the generic address space. Skip the
2945 // cast and use the input directly.
2946 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2947 pointer = cast->getPointerOperand();
2948 }
2949 Value *value = Call->getArgOperand(1);
2950 Value *order_arg =
2951 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2952 Value *scope_arg =
2953 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2954 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2955 clspv::AddressSpace::Global;
2956 auto scope = MemoryScope(scope_arg, is_global, Call);
2957 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
2958 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
2959 {pointer, scope, order, value});
2960 });
2961}
2962
2963bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
2964 return replaceCallsWithValue(F, [](CallInst *Call) {
2965 auto pointer = Call->getArgOperand(0);
2966 // Clang emits an address space cast to the generic address space. Skip the
2967 // cast and use the input directly.
2968 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2969 pointer = cast->getPointerOperand();
2970 }
2971 auto expected = Call->getArgOperand(1);
2972 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
2973 expected = cast->getPointerOperand();
2974 }
2975 auto value = Call->getArgOperand(2);
2976 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2977 clspv::AddressSpace::Global;
2978 Value *success_arg =
2979 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2980 Value *failure_arg =
2981 Call->getNumArgOperands() > 4 ? Call->getArgOperand(4) : nullptr;
2982 Value *scope_arg =
2983 Call->getNumArgOperands() > 5 ? Call->getArgOperand(5) : nullptr;
2984 auto scope = MemoryScope(scope_arg, is_global, Call);
2985 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
2986 spv::MemorySemanticsAcquireReleaseMask);
2987 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
2988 spv::MemorySemanticsAcquireMask);
2989
2990 // If the value pointed to by |expected| equals the value pointed to by
2991 // |pointer|, |value| is written into |pointer|, otherwise the value in
2992 // |pointer| is written into |expected|. In order to avoid extra stores,
2993 // the basic block with the original atomic is split and the store is
2994 // performed in the |then| block. The condition is the inversion of the
2995 // comparison result.
2996 IRBuilder<> builder(Call);
2997 auto load = builder.CreateLoad(expected);
2998 auto cmp_xchg = InsertSPIRVOp(
2999 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
3000 value->getType(), {pointer, scope, success, failure, value, load});
3001 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
3002 auto not_cmp = builder.CreateNot(cmp);
3003 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
3004 builder.SetInsertPoint(then_branch);
3005 builder.CreateStore(cmp_xchg, expected);
3006 return cmp;
3007 });
3008}
alan-bakercc2bafb2020-11-02 08:30:18 -05003009
alan-baker2cecaa72020-11-05 14:05:20 -05003010bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05003011 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3012 return false;
3013
3014 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05003015 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05003016 return false;
3017
alan-baker5f2e88e2020-12-07 15:24:04 -05003018 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
3019 Function *intrinsic = Intrinsic::getDeclaration(
3020 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
3021 Call->getType());
3022 const auto c_false = ConstantInt::getFalse(Call->getContext());
3023 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
3024 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05003025 });
3026}
alan-baker6b9d1ee2020-11-03 23:11:32 -05003027
3028bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
3029 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
3030 const auto ty = Call->getType();
3031 const auto a = Call->getArgOperand(0);
3032 const auto b = Call->getArgOperand(1);
3033 const auto c = Call->getArgOperand(2);
3034 IRBuilder<> builder(Call);
3035 if (is_signed) {
3036 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
3037 if (bitwidth < 32) {
3038 // mul = sext(a) * sext(b)
3039 // add = mul + sext(c)
3040 // res = clamp(add, MIN, MAX)
3041 unsigned extended_width = bitwidth << 1;
3042 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
3043 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3044 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3045 }
3046 auto a_sext = builder.CreateSExt(a, extended_ty);
3047 auto b_sext = builder.CreateSExt(b, extended_ty);
3048 auto c_sext = builder.CreateSExt(c, extended_ty);
3049 // Extended the size so no overflows occur.
3050 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
3051 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
3052 auto func_ty = FunctionType::get(
3053 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3054 // Don't use function type because we need signed parameters.
3055 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
3056 // The clamp values are the signed min and max of the original bitwidth
3057 // sign extended to the extended bitwidth.
3058 Constant *min = ConstantInt::get(
3059 Call->getContext(),
3060 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3061 Constant *max = ConstantInt::get(
3062 Call->getContext(),
3063 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
3064 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3065 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3066 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3067 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3068 if (extended_width == 32)
3069 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
3070 else
3071 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3072 } else {
3073 if (extended_width == 32)
3074 clamp_name += "iii";
3075 else
3076 clamp_name += "sss";
3077 }
3078 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
3079 auto clamp = builder.CreateCall(callee, {add, min, max});
3080 return builder.CreateTrunc(clamp, ty);
3081 } else {
3082 auto struct_ty = GetPairStruct(ty);
3083 // Compute
3084 // {hi, lo} = smul_extended(a, b)
3085 // add = lo + c
3086 auto mul_ext = InsertSPIRVOp(Call, spv::OpSMulExtended,
3087 {Attribute::ReadNone}, struct_ty, {a, b});
3088 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3089 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3090 auto add = builder.CreateAdd(mul_lo, c);
3091
3092 // Constants for use in the calculation.
3093 Constant *min = ConstantInt::get(Call->getContext(),
3094 APInt::getSignedMinValue(bitwidth));
3095 Constant *max = ConstantInt::get(Call->getContext(),
3096 APInt::getSignedMaxValue(bitwidth));
3097 Constant *max_plus_1 = ConstantInt::get(
3098 Call->getContext(),
3099 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
3100 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3101 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3102 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3103 max_plus_1 =
3104 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
3105 }
3106
3107 auto a_xor_b = builder.CreateXor(a, b);
3108 auto same_sign =
3109 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
3110 auto different_sign = builder.CreateNot(same_sign);
3111 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
3112 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
3113 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
3114 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
3115 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
3116 auto add_gt_max = builder.CreateICmpUGT(add, max);
3117 auto hi_eq_m1 =
3118 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
3119 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
3120 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
3121 auto max_sub_lo = builder.CreateSub(max, mul_lo);
3122 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
3123
3124 // Equivalent to:
3125 // if (((x < 0) == (y < 0)) && mul_hi != 0)
3126 // return MAX
3127 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
3128 // return MAX
3129 // if (((x < 0) != (y < 0)) && mul_hi != -1)
3130 // return MIN
3131 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
3132 // return MIN
3133 // return add
3134 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
3135 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
3136 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
3137 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
3138 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
3139 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
3140 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
3141 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
3142 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
3143 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
3144 auto sel = builder.CreateSelect(min_clamp, min, add);
3145 return builder.CreateSelect(max_clamp, max, sel);
3146 }
3147 } else {
3148 // {lo, hi} = mul_extended(a, b)
3149 // {add, carry} = add_carry(lo, c)
3150 // cmp = (mul_hi | carry) == 0
3151 // mad_sat = cmp ? add : MAX
3152 auto struct_ty = GetPairStruct(ty);
3153 auto mul_ext = InsertSPIRVOp(Call, spv::OpUMulExtended,
3154 {Attribute::ReadNone}, struct_ty, {a, b});
3155 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3156 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3157 auto add_carry =
3158 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
3159 struct_ty, {mul_lo, c});
3160 auto add = builder.CreateExtractValue(add_carry, {0});
3161 auto carry = builder.CreateExtractValue(add_carry, {1});
3162 auto or_value = builder.CreateOr(mul_hi, carry);
3163 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
3164 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
3165 }
3166 });
3167}
alan-baker15106572020-11-06 15:08:10 -05003168
3169bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
3170 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3171 return false;
3172
3173 if (F.getFunctionType()->getNumParams() != 2)
3174 return false;
3175
3176 if (F.getFunctionType()->getParamType(0) !=
3177 F.getFunctionType()->getParamType(1)) {
3178 return false;
3179 }
3180
3181 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
3182 case Type::FloatTyID:
3183 case Type::HalfTyID:
3184 case Type::DoubleTyID:
3185 break;
3186 default:
3187 return false;
3188 }
3189
3190 // Scalar versions all return an int, while vector versions return a vector
3191 // of an equally sized integer types (e.g. short, int or long).
3192 if (isa<VectorType>(F.getReturnType())) {
3193 if (F.getReturnType()->getScalarSizeInBits() !=
3194 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
3195 return false;
3196 }
3197 } else {
3198 if (F.getReturnType()->getScalarSizeInBits() != 32)
3199 return false;
3200 }
3201
3202 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
3203 // Replace with a floating point [un]ordered comparison followed by an
3204 // extension.
3205 auto x = Call->getArgOperand(0);
3206 auto y = Call->getArgOperand(1);
3207 IRBuilder<> builder(Call);
3208 Value *tmp = nullptr;
3209 if (is_ordered) {
3210 // This leads to a slight inefficiency in the SPIR-V that is easy for
3211 // drivers to optimize where the SPIR-V for the comparison and the
3212 // extension could be fused to drop the inversion of the OpIsNan.
3213 tmp = builder.CreateFCmpORD(x, y);
3214 } else {
3215 tmp = builder.CreateFCmpUNO(x, y);
3216 }
3217 // OpenCL CTS requires that vector versions use sign extension, but scalar
3218 // versions use zero extension.
3219 if (isa<VectorType>(Call->getType()))
3220 return builder.CreateSExt(tmp, Call->getType());
3221 return builder.CreateZExt(tmp, Call->getType());
3222 });
3223}
alan-baker497920b2020-11-09 16:41:36 -05003224
3225bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
3226 return replaceCallsWithValue(F, [this](CallInst *Call) {
3227 auto ty = Call->getType();
3228 auto x = Call->getArgOperand(0);
3229 unsigned width = x->getType()->getScalarSizeInBits();
3230 Type *int_ty = IntegerType::get(Call->getContext(), width);
3231 uint64_t abs_mask = 0x7fffffff;
3232 uint64_t exp_mask = 0x7f800000;
3233 uint64_t min_mask = 0x00800000;
3234 if (width == 16) {
3235 abs_mask = 0x7fff;
3236 exp_mask = 0x7c00;
3237 min_mask = 0x0400;
3238 } else if (width == 64) {
3239 abs_mask = 0x7fffffffffffffff;
3240 exp_mask = 0x7ff0000000000000;
3241 min_mask = 0x0010000000000000;
3242 }
3243 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3244 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3245 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3246 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3247 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3248 abs_const =
3249 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3250 exp_const =
3251 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3252 min_const =
3253 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3254 }
3255 // Drop the sign bit and then check that the number is between
3256 // (exclusive) the min and max exponent values for the bit width.
3257 IRBuilder<> builder(Call);
3258 auto bitcast = builder.CreateBitCast(x, int_ty);
3259 auto abs = builder.CreateAnd(bitcast, abs_const);
3260 auto lt = builder.CreateICmpULT(abs, exp_const);
3261 auto ge = builder.CreateICmpUGE(abs, min_const);
3262 auto tmp = builder.CreateAnd(lt, ge);
3263 // OpenCL CTS requires that vector versions use sign extension, but scalar
3264 // versions use zero extension.
3265 if (isa<VectorType>(ty))
3266 return builder.CreateSExt(tmp, ty);
3267 return builder.CreateZExt(tmp, ty);
3268 });
3269}
alan-bakere0406e72020-11-10 12:32:04 -05003270
3271bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3272 return replaceCallsWithValue(F, [](CallInst *Call) {
3273 const auto x = Call->getArgOperand(0);
3274 const auto y = Call->getArgOperand(1);
3275 IRBuilder<> builder(Call);
3276 auto sub = builder.CreateFSub(x, y);
3277 auto cmp = builder.CreateFCmpUGT(x, y);
3278 return builder.CreateSelect(cmp, sub,
3279 Constant::getNullValue(Call->getType()));
3280 });
3281}
alan-baker3e0de472020-12-08 15:57:17 -05003282
3283bool ReplaceOpenCLBuiltinPass::replaceRound(Function &F) {
3284 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3285 const auto x = Call->getArgOperand(0);
3286 const double c_halfway = 0.5;
3287 auto halfway = ConstantFP::get(Call->getType(), c_halfway);
3288
3289 const auto clspv_fract_name =
3290 Builtins::GetMangledFunctionName("clspv.fract", F.getFunctionType());
3291 Function *clspv_fract_fn = F.getParent()->getFunction(clspv_fract_name);
3292 if (!clspv_fract_fn) {
3293 // Make the clspv_fract function.
3294 clspv_fract_fn = cast<Function>(
3295 F.getParent()
3296 ->getOrInsertFunction(clspv_fract_name, F.getFunctionType())
3297 .getCallee());
3298 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3299 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3300 }
3301
3302 auto ceil = Intrinsic::getDeclaration(F.getParent(), Intrinsic::ceil,
3303 Call->getType());
3304 auto floor = Intrinsic::getDeclaration(F.getParent(), Intrinsic::floor,
3305 Call->getType());
3306 auto fabs = Intrinsic::getDeclaration(F.getParent(), Intrinsic::fabs,
3307 Call->getType());
3308 auto copysign = Intrinsic::getDeclaration(
3309 F.getParent(), Intrinsic::copysign, {Call->getType(), Call->getType()});
3310
3311 IRBuilder<> builder(Call);
3312
3313 auto fabs_call = builder.CreateCall(F.getFunctionType(), fabs, {x});
3314 auto ceil_call = builder.CreateCall(F.getFunctionType(), ceil, {fabs_call});
3315 auto floor_call =
3316 builder.CreateCall(F.getFunctionType(), floor, {fabs_call});
3317 auto fract_call =
3318 builder.CreateCall(F.getFunctionType(), clspv_fract_fn, {fabs_call});
3319 auto cmp = builder.CreateFCmpOGE(fract_call, halfway);
3320 auto sel = builder.CreateSelect(cmp, ceil_call, floor_call);
3321 return builder.CreateCall(copysign->getFunctionType(), copysign, {sel, x});
3322 });
3323}
3324
3325bool ReplaceOpenCLBuiltinPass::replaceTrigPi(Function &F,
3326 Builtins::BuiltinType type) {
3327 return replaceCallsWithValue(F, [&F, type](CallInst *Call) -> Value * {
3328 const auto x = Call->getArgOperand(0);
3329 const double k_pi = 0x1.921fb54442d18p+1;
3330 Constant *pi = ConstantFP::get(x->getType(), k_pi);
3331
3332 IRBuilder<> builder(Call);
3333 auto mul = builder.CreateFMul(x, pi);
3334 switch (type) {
3335 case Builtins::kSinpi: {
3336 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3337 x->getType());
3338 return builder.CreateCall(func->getFunctionType(), func, {mul});
3339 }
3340 case Builtins::kCospi: {
3341 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3342 x->getType());
3343 return builder.CreateCall(func->getFunctionType(), func, {mul});
3344 }
3345 case Builtins::kTanpi: {
3346 auto sin = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3347 x->getType());
3348 auto sin_call = builder.CreateCall(sin->getFunctionType(), sin, {mul});
3349 auto cos = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3350 x->getType());
3351 auto cos_call = builder.CreateCall(cos->getFunctionType(), cos, {mul});
3352 return builder.CreateFDiv(sin_call, cos_call);
3353 }
3354 default:
3355 llvm_unreachable("unexpected builtin");
3356 break;
3357 }
3358 return nullptr;
3359 });
3360}
alan-baker8b968112020-12-15 15:53:29 -05003361
3362bool ReplaceOpenCLBuiltinPass::replaceSincos(Function &F) {
3363 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3364 auto sin_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3365 Call->getType());
3366 auto cos_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3367 Call->getType());
3368
3369 IRBuilder<> builder(Call);
3370 auto sin = builder.CreateCall(sin_func->getFunctionType(), sin_func,
3371 {Call->getArgOperand(0)});
3372 auto cos = builder.CreateCall(cos_func->getFunctionType(), cos_func,
3373 {Call->getArgOperand(0)});
3374 builder.CreateStore(cos, Call->getArgOperand(1));
3375 return sin;
3376 });
3377}
3378
3379bool ReplaceOpenCLBuiltinPass::replaceExpm1(Function &F) {
3380 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3381 auto exp_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::exp,
3382 Call->getType());
3383
3384 IRBuilder<> builder(Call);
3385 auto exp = builder.CreateCall(exp_func->getFunctionType(), exp_func,
3386 {Call->getArgOperand(0)});
3387 return builder.CreateFSub(exp, ConstantFP::get(Call->getType(), 1.0));
3388 });
3389}
3390
3391bool ReplaceOpenCLBuiltinPass::replacePown(Function &F) {
3392 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3393 auto pow_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::pow,
3394 Call->getType());
3395
3396 IRBuilder<> builder(Call);
3397 auto conv = builder.CreateSIToFP(Call->getArgOperand(1), Call->getType());
3398 return builder.CreateCall(pow_func->getFunctionType(), pow_func,
3399 {Call->getArgOperand(0), conv});
3400 });
3401}