blob: 17b4cd9c8e95002583642baf86ea537c0ff07a1b [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039#include "Passes.h"
40#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
46#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
47
48namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000049
David Neto22f144c2017-06-12 14:26:21 -040050uint32_t clz(uint32_t v) {
51 uint32_t r;
52 uint32_t shift;
53
54 r = (v > 0xFFFF) << 4;
55 v >>= r;
56 shift = (v > 0xFF) << 3;
57 v >>= shift;
58 r |= shift;
59 shift = (v > 0xF) << 2;
60 v >>= shift;
61 r |= shift;
62 shift = (v > 0x3) << 1;
63 v >>= shift;
64 r |= shift;
65 r |= (v >> 1);
66
67 return r;
68}
69
Kévin Petitfdfa92e2019-09-25 14:20:58 +010070Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
71 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040072 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040073 IntTy = FixedVectorType::get(IntTy,
74 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010075 }
76 return IntTy;
77}
78
alan-baker4986eff2020-10-29 13:38:00 -040079Value *MemoryOrderSemantics(Value *order, bool is_global,
80 Instruction *InsertBefore,
alan-baker36309f92021-02-05 12:28:03 -050081 spv::MemorySemanticsMask base_semantics,
82 bool include_storage = true) {
alan-baker4986eff2020-10-29 13:38:00 -040083 enum AtomicMemoryOrder : uint32_t {
84 kMemoryOrderRelaxed = 0,
85 kMemoryOrderAcquire = 2,
86 kMemoryOrderRelease = 3,
87 kMemoryOrderAcqRel = 4,
88 kMemoryOrderSeqCst = 5
89 };
90
91 IRBuilder<> builder(InsertBefore);
92
93 // Constants for OpenCL C 2.0 memory_order.
94 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
95 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
96 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
97 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
98
99 // Constants for SPIR-V ordering memory semantics.
100 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
101 const auto AcquireSemantics =
102 builder.getInt32(spv::MemorySemanticsAcquireMask);
103 const auto ReleaseSemantics =
104 builder.getInt32(spv::MemorySemanticsReleaseMask);
105 const auto AcqRelSemantics =
106 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
107
108 // Constants for SPIR-V storage class semantics.
109 const auto UniformSemantics =
110 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
111 const auto WorkgroupSemantics =
112 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
113
114 // Instead of sequentially consistent, use acquire, release or acquire
115 // release semantics.
116 Value *base_order = nullptr;
117 switch (base_semantics) {
118 case spv::MemorySemanticsAcquireMask:
119 base_order = AcquireSemantics;
120 break;
121 case spv::MemorySemanticsReleaseMask:
122 base_order = ReleaseSemantics;
123 break;
124 default:
125 base_order = AcqRelSemantics;
126 break;
127 }
128
129 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
alan-baker36309f92021-02-05 12:28:03 -0500130 if (order == nullptr) {
131 if (include_storage)
132 return builder.CreateOr({storage, base_order});
133 else
134 return base_order;
135 }
alan-baker4986eff2020-10-29 13:38:00 -0400136
137 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
138 auto is_acquire = builder.CreateICmpEQ(order, acquire);
139 auto is_release = builder.CreateICmpEQ(order, release);
140 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
141 auto semantics =
142 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
143 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
144 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
145 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
alan-baker36309f92021-02-05 12:28:03 -0500146 if (include_storage)
147 return builder.CreateOr({storage, semantics});
148 else
149 return semantics;
alan-baker4986eff2020-10-29 13:38:00 -0400150}
151
152Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
153 enum AtomicMemoryScope : uint32_t {
154 kMemoryScopeWorkItem = 0,
155 kMemoryScopeWorkGroup = 1,
156 kMemoryScopeDevice = 2,
157 kMemoryScopeAllSVMDevices = 3, // not supported
158 kMemoryScopeSubGroup = 4
159 };
160
161 IRBuilder<> builder(InsertBefore);
162
163 // Constants for OpenCL C 2.0 memory_scope.
164 const auto work_item =
165 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
166 const auto work_group =
167 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
168 const auto sub_group =
169 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
170 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
171
172 // Constants for SPIR-V memory scopes.
173 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
174 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
175 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
176 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
177
178 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
179 if (scope == nullptr)
180 return base_scope;
181
182 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
183 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
184 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
185 auto is_device = builder.CreateICmpEQ(scope, device);
186
187 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
188 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
189 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
190 scope = builder.CreateSelect(is_device, DeviceScope, scope);
191
192 return scope;
193}
194
SJW2c317da2020-03-23 07:39:13 -0500195bool replaceCallsWithValue(Function &F,
196 std::function<Value *(CallInst *)> Replacer) {
197
198 bool Changed = false;
199
200 SmallVector<Instruction *, 4> ToRemoves;
201
202 // Walk the users of the function.
203 for (auto &U : F.uses()) {
204 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
205
206 auto NewValue = Replacer(CI);
207
208 if (NewValue != nullptr) {
209 CI->replaceAllUsesWith(NewValue);
210
211 // Lastly, remember to remove the user.
212 ToRemoves.push_back(CI);
213 }
214 }
215 }
216
217 Changed = !ToRemoves.empty();
218
219 // And cleanup the calls we don't use anymore.
220 for (auto V : ToRemoves) {
221 V->eraseFromParent();
222 }
223
224 return Changed;
225}
226
David Neto22f144c2017-06-12 14:26:21 -0400227struct ReplaceOpenCLBuiltinPass final : public ModulePass {
228 static char ID;
229 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
230
231 bool runOnModule(Module &M) override;
alan-baker6b9d1ee2020-11-03 23:11:32 -0500232
233private:
SJW2c317da2020-03-23 07:39:13 -0500234 bool runOnFunction(Function &F);
235 bool replaceAbs(Function &F);
236 bool replaceAbsDiff(Function &F, bool is_signed);
237 bool replaceCopysign(Function &F);
238 bool replaceRecip(Function &F);
239 bool replaceDivide(Function &F);
240 bool replaceDot(Function &F);
241 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500242 bool replaceExp10(Function &F, const std::string &basename);
243 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100244 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400245 bool replaceBarrier(Function &F, bool subgroup = false);
alan-baker36309f92021-02-05 12:28:03 -0500246 bool replaceMemFence(Function &F, spv::MemorySemanticsMask semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100247 bool replacePrefetch(Function &F);
alan-baker3e217772020-11-07 17:29:40 -0500248 bool replaceRelational(Function &F, CmpInst::Predicate P);
SJW2c317da2020-03-23 07:39:13 -0500249 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
250 bool replaceIsFinite(Function &F);
251 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
252 bool replaceUpsample(Function &F);
253 bool replaceRotate(Function &F);
254 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
255 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
256 bool replaceSelect(Function &F);
257 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500258 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500259 bool replaceSignbit(Function &F, bool is_vec);
260 bool replaceMul(Function &F, bool is_float, bool is_mad);
261 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
262 bool replaceVloadHalf(Function &F);
263 bool replaceVloadHalf2(Function &F);
264 bool replaceVloadHalf4(Function &F);
265 bool replaceClspvVloadaHalf2(Function &F);
266 bool replaceClspvVloadaHalf4(Function &F);
267 bool replaceVstoreHalf(Function &F, int vec_size);
268 bool replaceVstoreHalf(Function &F);
269 bool replaceVstoreHalf2(Function &F);
270 bool replaceVstoreHalf4(Function &F);
271 bool replaceHalfReadImage(Function &F);
272 bool replaceHalfWriteImage(Function &F);
273 bool replaceSampledReadImageWithIntCoords(Function &F);
274 bool replaceAtomics(Function &F, spv::Op Op);
275 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
alan-baker4986eff2020-10-29 13:38:00 -0400276 bool replaceAtomicLoad(Function &F);
277 bool replaceExplicitAtomics(Function &F, spv::Op Op,
278 spv::MemorySemanticsMask semantics =
279 spv::MemorySemanticsAcquireReleaseMask);
280 bool replaceAtomicCompareExchange(Function &);
SJW2c317da2020-03-23 07:39:13 -0500281 bool replaceCross(Function &F);
282 bool replaceFract(Function &F, int vec_size);
283 bool replaceVload(Function &F);
284 bool replaceVstore(Function &F);
alan-baker3f1bf492020-11-05 09:07:36 -0500285 bool replaceAddSubSat(Function &F, bool is_signed, bool is_add);
Kévin Petit8576f682020-11-02 14:51:32 +0000286 bool replaceHadd(Function &F, bool is_signed,
287 Instruction::BinaryOps join_opcode);
alan-baker2cecaa72020-11-05 14:05:20 -0500288 bool replaceCountZeroes(Function &F, bool leading);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500289 bool replaceMadSat(Function &F, bool is_signed);
alan-baker15106572020-11-06 15:08:10 -0500290 bool replaceOrdered(Function &F, bool is_ordered);
alan-baker497920b2020-11-09 16:41:36 -0500291 bool replaceIsNormal(Function &F);
alan-bakere0406e72020-11-10 12:32:04 -0500292 bool replaceFDim(Function &F);
alan-baker3e0de472020-12-08 15:57:17 -0500293 bool replaceRound(Function &F);
294 bool replaceTrigPi(Function &F, Builtins::BuiltinType type);
alan-baker8b968112020-12-15 15:53:29 -0500295 bool replaceSincos(Function &F);
296 bool replaceExpm1(Function &F);
297 bool replacePown(Function &F);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500298
rjodinchr791203f2021-10-07 20:42:41 +0200299 bool replaceWaitGroupEvents(Function &F);
300 GlobalVariable *
301 getOrCreateGlobalVariable(Module &M, std::string VariableName,
302 AddressSpace::Type VariableAddressSpace);
303 Value *replaceAsyncWorkGroupCopies(Module &M, CallInst *CI, Value *Dst,
304 Value *Src, Value *NumGentypes,
305 Value *Stride, Value *Event);
306 bool replaceAsyncWorkGroupCopy(Function &F);
307 bool replaceAsyncWorkGroupStridedCopy(Function &F);
308
alan-baker6b9d1ee2020-11-03 23:11:32 -0500309 // Caches struct types for { |type|, |type| }. This prevents
310 // getOrInsertFunction from introducing a bitcasts between structs with
311 // identical contents.
312 Type *GetPairStruct(Type *type);
313
314 DenseMap<Type *, Type *> PairStructMap;
David Neto22f144c2017-06-12 14:26:21 -0400315};
SJW2c317da2020-03-23 07:39:13 -0500316
Kévin Petit91bc72e2019-04-08 15:17:46 +0100317} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400318
319char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400320INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
321 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400322
323namespace clspv {
324ModulePass *createReplaceOpenCLBuiltinPass() {
325 return new ReplaceOpenCLBuiltinPass();
326}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400327} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400328
329bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500330 std::list<Function *> func_list;
331 for (auto &F : M.getFunctionList()) {
332 // process only function declarations
333 if (F.isDeclaration() && runOnFunction(F)) {
334 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000335 }
336 }
SJW2c317da2020-03-23 07:39:13 -0500337 if (func_list.size() != 0) {
338 // recursively convert functions, but first remove dead
339 for (auto *F : func_list) {
340 if (F->use_empty()) {
341 F->eraseFromParent();
342 }
343 }
344 runOnModule(M);
345 return true;
346 }
347 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000348}
349
SJW2c317da2020-03-23 07:39:13 -0500350bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
351 auto &FI = Builtins::Lookup(&F);
352 switch (FI.getType()) {
353 case Builtins::kAbs:
354 if (!FI.getParameter(0).is_signed) {
355 return replaceAbs(F);
356 }
357 break;
358 case Builtins::kAbsDiff:
359 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400360
361 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500362 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400363
alan-bakercc2bafb2020-11-02 08:30:18 -0500364 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500365 return replaceCountZeroes(F, true);
366
367 case Builtins::kCtz:
368 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500369
alan-bakerb6da5132020-10-29 15:59:06 -0400370 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000371 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400372 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000373 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400374
SJW2c317da2020-03-23 07:39:13 -0500375 case Builtins::kCopysign:
376 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100377
SJW2c317da2020-03-23 07:39:13 -0500378 case Builtins::kHalfRecip:
379 case Builtins::kNativeRecip:
380 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100381
SJW2c317da2020-03-23 07:39:13 -0500382 case Builtins::kHalfDivide:
383 case Builtins::kNativeDivide:
384 return replaceDivide(F);
385
386 case Builtins::kDot:
387 return replaceDot(F);
388
389 case Builtins::kExp10:
390 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500391 case Builtins::kNativeExp10:
392 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500393
alan-baker8b968112020-12-15 15:53:29 -0500394 case Builtins::kExpm1:
395 return replaceExpm1(F);
396
SJW2c317da2020-03-23 07:39:13 -0500397 case Builtins::kLog10:
398 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500399 case Builtins::kNativeLog10:
400 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500401
gnl21636e7992020-09-09 16:08:16 +0100402 case Builtins::kLog1p:
403 return replaceLog1p(F);
404
alan-bakere0406e72020-11-10 12:32:04 -0500405 case Builtins::kFdim:
406 return replaceFDim(F);
407
SJW2c317da2020-03-23 07:39:13 -0500408 case Builtins::kFmod:
409 return replaceFmod(F);
410
alan-baker8b968112020-12-15 15:53:29 -0500411 case Builtins::kPown:
412 return replacePown(F);
413
alan-baker3e0de472020-12-08 15:57:17 -0500414 case Builtins::kRound:
415 return replaceRound(F);
416
417 case Builtins::kCospi:
418 case Builtins::kSinpi:
419 case Builtins::kTanpi:
420 return replaceTrigPi(F, FI.getType());
421
alan-baker8b968112020-12-15 15:53:29 -0500422 case Builtins::kSincos:
423 return replaceSincos(F);
424
SJW2c317da2020-03-23 07:39:13 -0500425 case Builtins::kBarrier:
426 case Builtins::kWorkGroupBarrier:
427 return replaceBarrier(F);
428
alan-baker12d2c182020-07-20 08:22:42 -0400429 case Builtins::kSubGroupBarrier:
430 return replaceBarrier(F, true);
431
alan-baker36309f92021-02-05 12:28:03 -0500432 case Builtins::kAtomicWorkItemFence:
433 return replaceMemFence(F, spv::MemorySemanticsMaskNone);
SJW2c317da2020-03-23 07:39:13 -0500434 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400435 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500436 case Builtins::kReadMemFence:
437 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
438 case Builtins::kWriteMemFence:
439 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
440
441 // Relational
442 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500443 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500444 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500445 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500446 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500447 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500448 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500449 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500450 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500451 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500452 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500453 return replaceRelational(F, CmpInst::FCMP_UNE);
454 case Builtins::kIslessgreater:
455 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500456
alan-baker15106572020-11-06 15:08:10 -0500457 case Builtins::kIsordered:
458 return replaceOrdered(F, true);
459
460 case Builtins::kIsunordered:
461 return replaceOrdered(F, false);
462
SJW2c317da2020-03-23 07:39:13 -0500463 case Builtins::kIsinf: {
464 bool is_vec = FI.getParameter(0).vector_size != 0;
465 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
466 }
467 case Builtins::kIsnan: {
468 bool is_vec = FI.getParameter(0).vector_size != 0;
469 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
470 }
471
472 case Builtins::kIsfinite:
473 return replaceIsFinite(F);
474
475 case Builtins::kAll: {
476 bool is_vec = FI.getParameter(0).vector_size != 0;
477 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
478 }
479 case Builtins::kAny: {
480 bool is_vec = FI.getParameter(0).vector_size != 0;
481 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
482 }
483
alan-baker497920b2020-11-09 16:41:36 -0500484 case Builtins::kIsnormal:
485 return replaceIsNormal(F);
486
SJW2c317da2020-03-23 07:39:13 -0500487 case Builtins::kUpsample:
488 return replaceUpsample(F);
489
490 case Builtins::kRotate:
491 return replaceRotate(F);
492
493 case Builtins::kConvert:
494 return replaceConvert(F, FI.getParameter(0).is_signed,
495 FI.getReturnType().is_signed);
496
alan-baker4986eff2020-10-29 13:38:00 -0400497 // OpenCL 2.0 explicit atomics have different default scopes and semantics
498 // than legacy atomic functions.
499 case Builtins::kAtomicLoad:
500 case Builtins::kAtomicLoadExplicit:
501 return replaceAtomicLoad(F);
502 case Builtins::kAtomicStore:
503 case Builtins::kAtomicStoreExplicit:
504 return replaceExplicitAtomics(F, spv::OpAtomicStore,
505 spv::MemorySemanticsReleaseMask);
506 case Builtins::kAtomicExchange:
507 case Builtins::kAtomicExchangeExplicit:
508 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
509 case Builtins::kAtomicFetchAdd:
510 case Builtins::kAtomicFetchAddExplicit:
511 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
512 case Builtins::kAtomicFetchSub:
513 case Builtins::kAtomicFetchSubExplicit:
514 return replaceExplicitAtomics(F, spv::OpAtomicISub);
515 case Builtins::kAtomicFetchOr:
516 case Builtins::kAtomicFetchOrExplicit:
517 return replaceExplicitAtomics(F, spv::OpAtomicOr);
518 case Builtins::kAtomicFetchXor:
519 case Builtins::kAtomicFetchXorExplicit:
520 return replaceExplicitAtomics(F, spv::OpAtomicXor);
521 case Builtins::kAtomicFetchAnd:
522 case Builtins::kAtomicFetchAndExplicit:
523 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
524 case Builtins::kAtomicFetchMin:
525 case Builtins::kAtomicFetchMinExplicit:
526 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
527 ? spv::OpAtomicSMin
528 : spv::OpAtomicUMin);
529 case Builtins::kAtomicFetchMax:
530 case Builtins::kAtomicFetchMaxExplicit:
531 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
532 ? spv::OpAtomicSMax
533 : spv::OpAtomicUMax);
534 // Weak compare exchange is generated as strong compare exchange.
535 case Builtins::kAtomicCompareExchangeWeak:
536 case Builtins::kAtomicCompareExchangeWeakExplicit:
537 case Builtins::kAtomicCompareExchangeStrong:
538 case Builtins::kAtomicCompareExchangeStrongExplicit:
539 return replaceAtomicCompareExchange(F);
540
541 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500542 case Builtins::kAtomicInc:
543 return replaceAtomics(F, spv::OpAtomicIIncrement);
544 case Builtins::kAtomicDec:
545 return replaceAtomics(F, spv::OpAtomicIDecrement);
546 case Builtins::kAtomicCmpxchg:
547 return replaceAtomics(F, spv::OpAtomicCompareExchange);
548 case Builtins::kAtomicAdd:
549 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
550 case Builtins::kAtomicSub:
551 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
552 case Builtins::kAtomicXchg:
553 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
554 case Builtins::kAtomicMin:
555 return replaceAtomics(F, FI.getParameter(0).is_signed
556 ? llvm::AtomicRMWInst::Min
557 : llvm::AtomicRMWInst::UMin);
558 case Builtins::kAtomicMax:
559 return replaceAtomics(F, FI.getParameter(0).is_signed
560 ? llvm::AtomicRMWInst::Max
561 : llvm::AtomicRMWInst::UMax);
562 case Builtins::kAtomicAnd:
563 return replaceAtomics(F, llvm::AtomicRMWInst::And);
564 case Builtins::kAtomicOr:
565 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
566 case Builtins::kAtomicXor:
567 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
568
569 case Builtins::kCross:
570 if (FI.getParameter(0).vector_size == 4) {
571 return replaceCross(F);
572 }
573 break;
574
575 case Builtins::kFract:
576 if (FI.getParameterCount()) {
577 return replaceFract(F, FI.getParameter(0).vector_size);
578 }
579 break;
580
581 case Builtins::kMadHi:
582 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
583 case Builtins::kMulHi:
584 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
585
alan-baker6b9d1ee2020-11-03 23:11:32 -0500586 case Builtins::kMadSat:
587 return replaceMadSat(F, FI.getParameter(0).is_signed);
588
SJW2c317da2020-03-23 07:39:13 -0500589 case Builtins::kMad:
590 case Builtins::kMad24:
591 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
592 true);
593 case Builtins::kMul24:
594 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
595 false);
596
597 case Builtins::kSelect:
598 return replaceSelect(F);
599
600 case Builtins::kBitselect:
601 return replaceBitSelect(F);
602
603 case Builtins::kVload:
604 return replaceVload(F);
605
606 case Builtins::kVloadaHalf:
607 case Builtins::kVloadHalf:
608 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
609
610 case Builtins::kVstore:
611 return replaceVstore(F);
612
613 case Builtins::kVstoreHalf:
614 case Builtins::kVstoreaHalf:
615 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
616
617 case Builtins::kSmoothstep: {
618 int vec_size = FI.getLastParameter().vector_size;
619 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500620 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500621 }
622 break;
623 }
624 case Builtins::kStep: {
625 int vec_size = FI.getLastParameter().vector_size;
626 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500627 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500628 }
629 break;
630 }
631
632 case Builtins::kSignbit:
633 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
634
alan-baker3f1bf492020-11-05 09:07:36 -0500635 case Builtins::kSubSat:
636 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
637
SJW2c317da2020-03-23 07:39:13 -0500638 case Builtins::kReadImageh:
639 return replaceHalfReadImage(F);
640 case Builtins::kReadImagef:
641 case Builtins::kReadImagei:
642 case Builtins::kReadImageui: {
643 if (FI.getParameter(1).isSampler() &&
644 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
645 return replaceSampledReadImageWithIntCoords(F);
646 }
647 break;
648 }
649
650 case Builtins::kWriteImageh:
651 return replaceHalfWriteImage(F);
652
Kévin Petit1cb45112020-04-27 18:55:48 +0100653 case Builtins::kPrefetch:
654 return replacePrefetch(F);
655
rjodinchr791203f2021-10-07 20:42:41 +0200656 // Asynchronous copies
657 case Builtins::kAsyncWorkGroupCopy:
658 return replaceAsyncWorkGroupCopy(F);
659 case Builtins::kAsyncWorkGroupStridedCopy:
660 return replaceAsyncWorkGroupStridedCopy(F);
661 case Builtins::kWaitGroupEvents:
662 return replaceWaitGroupEvents(F);
663
SJW2c317da2020-03-23 07:39:13 -0500664 default:
665 break;
666 }
667
668 return false;
669}
670
alan-baker6b9d1ee2020-11-03 23:11:32 -0500671Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
672 auto iter = PairStructMap.find(type);
673 if (iter != PairStructMap.end())
674 return iter->second;
675
676 auto new_struct = StructType::get(type->getContext(), {type, type});
677 PairStructMap[type] = new_struct;
678 return new_struct;
679}
680
rjodinchr791203f2021-10-07 20:42:41 +0200681bool ReplaceOpenCLBuiltinPass::replaceWaitGroupEvents(Function &F) {
682 /* Simple implementation for wait_group_events to avoid dealing with the event
683 * list:
684 *
685 * void wait_group_events(int num_events, event_t *event_list) {
686 * barrier(CLK_LOCAL_MEM_FENCE);
687 * }
688 *
689 */
690
691 enum {
692 CLK_LOCAL_MEM_FENCE = 0x01,
693 CLK_GLOBAL_MEM_FENCE = 0x02,
694 CLK_IMAGE_MEM_FENCE = 0x04
695 };
696
697 return replaceCallsWithValue(F, [](CallInst *CI) {
698 IRBuilder<> Builder(CI);
699
700 const auto ConstantScopeWorkgroup = Builder.getInt32(spv::ScopeWorkgroup);
701 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
702 Instruction::Shl, Builder.getInt32(CLK_LOCAL_MEM_FENCE),
703 Builder.getInt32(clz(spv::MemorySemanticsWorkgroupMemoryMask) -
704 clz(CLK_LOCAL_MEM_FENCE)),
705 "", CI);
706 auto MemorySemantics = BinaryOperator::Create(
707 Instruction::Or, MemorySemanticsWorkgroup,
708 ConstantInt::get(Builder.getInt32Ty(),
709 spv::MemorySemanticsAcquireReleaseMask),
710 "", CI);
711
712 return clspv::InsertSPIRVOp(
713 CI, spv::OpControlBarrier,
714 {Attribute::NoDuplicate, Attribute::Convergent}, Builder.getVoidTy(),
715 {ConstantScopeWorkgroup, ConstantScopeWorkgroup, MemorySemantics});
716 });
717}
718
719GlobalVariable *ReplaceOpenCLBuiltinPass::getOrCreateGlobalVariable(
720 Module &M, std::string VariableName,
721 AddressSpace::Type VariableAddressSpace) {
722 GlobalVariable *GV = M.getGlobalVariable(VariableName);
723 if (GV == nullptr) {
724 IntegerType *IT = IntegerType::get(M.getContext(), 32);
725 VectorType *VT = FixedVectorType::get(IT, 3);
726
727 GV = new GlobalVariable(M, VT, false, GlobalValue::ExternalLinkage, nullptr,
728 VariableName, nullptr,
729 GlobalValue::ThreadLocalMode::NotThreadLocal,
730 VariableAddressSpace);
731 GV->setInitializer(Constant::getNullValue(VT));
732 }
733 return GV;
734}
735
736Value *ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopies(
737 Module &M, CallInst *CI, Value *Dst, Value *Src, Value *NumGentypes,
738 Value *Stride, Value *Event) {
739 /*
740 * event_t *async_work_group_strided_copy(T *dst, T *src, size_t num_gentypes,
741 * size_t stride, event_t event) {
742 * size_t start_id = ((get_local_id(2) * get_local_size(1))
743 * + get_local_id(1)) * get_local_size(0)
744 * + get_local_id(0);
745 * size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
746 * for (size_t it = start_id; it < num_gentypes; it += incr) {
747 * dst[it] = src[it * stride];
748 * }
749 * return event;
750 * }
751 */
752
753 /* BB:
754 * before
755 * async_work_group_strided_copy
756 * after
757 *
758 * ================================
759 *
760 * BB:
761 * before
762 * start_id = f(get_local_ids, get_local_sizes)
763 * incr = g(get_local_sizes)
764 * br CmpBB
765 *
766 * CmpBB:
767 * it = PHI(start_id, it)
768 * cmp = it < NumGentypes
769 * condBr cmp, LoopBB, ExitBB
770 *
771 * LoopBB:
772 * dstI = dst[it]
773 * srcI = src[it * stride]
774 * OpCopyMemory dstI, srcI
775 * it += incr
776 * br CmpBB
777 *
778 * ExitBB:
779 * after
780 */
781
782 IRBuilder<> Builder(CI);
783
784 auto Cst0 = Builder.getInt32(0);
785 auto Cst1 = Builder.getInt32(1);
786 auto Cst2 = Builder.getInt32(2);
787
788 // get_local_id({0, 1, 2});
789 GlobalVariable *GVId =
790 getOrCreateGlobalVariable(M, clspv::LocalInvocationIdVariableName(),
791 clspv::LocalInvocationIdAddressSpace());
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100792 Type *GVIdElTy = GVId->getType()->getScalarType()->getPointerElementType();
793 Value *GEP0 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst0});
794 Value *LocalId0 =
795 Builder.CreateLoad(GEP0->getType()->getPointerElementType(), GEP0);
796 Value *GEP1 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst1});
797 Value *LocalId1 =
798 Builder.CreateLoad(GEP1->getType()->getPointerElementType(), GEP1);
799 Value *GEP2 = Builder.CreateGEP(GVIdElTy, GVId, {Cst0, Cst2});
800 Value *LocalId2 =
801 Builder.CreateLoad(GEP2->getType()->getPointerElementType(), GEP2);
rjodinchr791203f2021-10-07 20:42:41 +0200802
803 // get_local_size({0, 1, 2});
804 GlobalVariable *GVSize =
805 getOrCreateGlobalVariable(M, clspv::WorkgroupSizeVariableName(),
806 clspv::WorkgroupSizeAddressSpace());
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100807 auto LocalSize =
808 Builder.CreateLoad(GVSize->getType()->getPointerElementType(), GVSize);
rjodinchr791203f2021-10-07 20:42:41 +0200809 auto LocalSize0 = Builder.CreateExtractElement(LocalSize, Cst0);
810 auto LocalSize1 = Builder.CreateExtractElement(LocalSize, Cst1);
811 auto LocalSize2 = Builder.CreateExtractElement(LocalSize, Cst2);
812
813 // size_t start_id = ((get_local_id(2) * get_local_size(1))
814 // + get_local_id(1)) * get_local_size(0)
815 // + get_local_id(0);
816 auto tmp0 = Builder.CreateMul(LocalId2, LocalSize1);
817 auto tmp1 = Builder.CreateAdd(tmp0, LocalId1);
818 auto tmp2 = Builder.CreateMul(tmp1, LocalSize0);
819 auto StartId = Builder.CreateAdd(tmp2, LocalId0);
820
821 // size_t incr = get_local_size(0) * get_local_size(1) * get_local_size(2);
822 auto tmp3 = Builder.CreateMul(LocalSize0, LocalSize1);
823 auto Incr = Builder.CreateMul(tmp3, LocalSize2);
824
825 // Create BasicBlocks
826 auto BB = CI->getParent();
827 auto CmpBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
828 auto LoopBB = BasicBlock::Create(BB->getContext(), "", BB->getParent());
829 auto ExitBB = SplitBlock(BB, CI);
830
831 // BB
832 auto BrCmpBB = BranchInst::Create(CmpBB);
833 ReplaceInstWithInst(BB->getTerminator(), BrCmpBB);
834
835 // CmpBB
836 Builder.SetInsertPoint(CmpBB);
837 auto PHIIterator = Builder.CreatePHI(Builder.getInt32Ty(), 2);
838 auto Cmp = Builder.CreateCmp(CmpInst::ICMP_ULT, PHIIterator, NumGentypes);
839 Builder.CreateCondBr(Cmp, LoopBB, ExitBB);
840
841 // LoopBB
842 Builder.SetInsertPoint(LoopBB);
843
844 // default values for non-strided copies
845 Value *SrcIterator = PHIIterator;
846 Value *DstIterator = PHIIterator;
847 if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
848 clspv::AddressSpace::Global)) {
849 // async_work_group_strided_copy local to global case
850 DstIterator = Builder.CreateMul(PHIIterator, Stride);
851 } else if (Stride != nullptr && (Dst->getType()->getPointerAddressSpace() ==
852 clspv::AddressSpace::Local)) {
853 // async_work_group_strided_copy global to local case
854 SrcIterator = Builder.CreateMul(PHIIterator, Stride);
855 }
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +0100856 auto DstI = Builder.CreateGEP(
857 Dst->getType()->getScalarType()->getPointerElementType(), Dst,
858 DstIterator);
859 auto SrcI = Builder.CreateGEP(
860 Src->getType()->getScalarType()->getPointerElementType(), Src,
861 SrcIterator);
rjodinchr791203f2021-10-07 20:42:41 +0200862 auto NewIterator = Builder.CreateAdd(PHIIterator, Incr);
863 auto Br = Builder.CreateBr(CmpBB);
864 clspv::InsertSPIRVOp(Br, spv::OpCopyMemory, {}, Builder.getVoidTy(),
865 {DstI, SrcI});
866
867 // Set PHIIterator for CmpBB now that we have NewIterator
868 PHIIterator->addIncoming(StartId, BB);
869 PHIIterator->addIncoming(NewIterator, LoopBB);
870
871 return Event;
872}
873
874bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupCopy(Function &F) {
875 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
876 Module &M = *F.getParent();
877
878 auto Dst = CI->getOperand(0);
879 auto Src = CI->getOperand(1);
880 auto NumGentypes = CI->getOperand(2);
881 auto Event = CI->getOperand(3);
882
883 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, nullptr,
884 Event);
885 });
886}
887
888bool ReplaceOpenCLBuiltinPass::replaceAsyncWorkGroupStridedCopy(Function &F) {
889 return replaceCallsWithValue(F, [&F, this](CallInst *CI) {
890 Module &M = *F.getParent();
891
892 auto Dst = CI->getOperand(0);
893 auto Src = CI->getOperand(1);
894 auto NumGentypes = CI->getOperand(2);
895 auto Stride = CI->getOperand(3);
896 auto Event = CI->getOperand(4);
897
898 return replaceAsyncWorkGroupCopies(M, CI, Dst, Src, NumGentypes, Stride,
899 Event);
900 });
901}
902
SJW2c317da2020-03-23 07:39:13 -0500903bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
904 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400905 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100906}
907
SJW2c317da2020-03-23 07:39:13 -0500908bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
909 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100910 auto XValue = CI->getOperand(0);
911 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100912
Kévin Petite8edce32019-04-10 14:23:32 +0100913 IRBuilder<> Builder(CI);
914 auto XmY = Builder.CreateSub(XValue, YValue);
915 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100916
SJW2c317da2020-03-23 07:39:13 -0500917 Value *Cmp = nullptr;
918 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100919 Cmp = Builder.CreateICmpSGT(YValue, XValue);
920 } else {
921 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100922 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100923
Kévin Petite8edce32019-04-10 14:23:32 +0100924 return Builder.CreateSelect(Cmp, YmX, XmY);
925 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100926}
927
SJW2c317da2020-03-23 07:39:13 -0500928bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500929 return replaceCallsWithValue(F, [&F](CallInst *Call) {
930 const auto x = Call->getArgOperand(0);
931 const auto y = Call->getArgOperand(1);
932 auto intrinsic = Intrinsic::getDeclaration(
933 F.getParent(), Intrinsic::copysign, Call->getType());
934 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
935 Call);
Kévin Petite8edce32019-04-10 14:23:32 +0100936 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100937}
938
SJW2c317da2020-03-23 07:39:13 -0500939bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
940 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100941 // Recip has one arg.
942 auto Arg = CI->getOperand(0);
943 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
944 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
945 });
David Neto22f144c2017-06-12 14:26:21 -0400946}
947
SJW2c317da2020-03-23 07:39:13 -0500948bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
949 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100950 auto Op0 = CI->getOperand(0);
951 auto Op1 = CI->getOperand(1);
952 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
953 });
David Neto22f144c2017-06-12 14:26:21 -0400954}
955
SJW2c317da2020-03-23 07:39:13 -0500956bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
957 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100958 auto Op0 = CI->getOperand(0);
959 auto Op1 = CI->getOperand(1);
960
SJW2c317da2020-03-23 07:39:13 -0500961 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100962 if (Op0->getType()->isVectorTy()) {
963 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
964 CI->getType(), {Op0, Op1});
965 } else {
966 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
967 }
968
969 return V;
970 });
971}
972
SJW2c317da2020-03-23 07:39:13 -0500973bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500974 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500975 // convert to natural
976 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500977 std::string NewFName = basename.substr(0, slen);
978 NewFName =
979 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400980
SJW2c317da2020-03-23 07:39:13 -0500981 Module &M = *F.getParent();
982 return replaceCallsWithValue(F, [&](CallInst *CI) {
983 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
984
985 auto Arg = CI->getOperand(0);
986
987 // Constant of the natural log of 10 (ln(10)).
988 const double Ln10 =
989 2.302585092994045684017991454684364207601101488628772976033;
990
991 auto Mul = BinaryOperator::Create(
992 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
993
994 return CallInst::Create(NewF, Mul, "", CI);
995 });
David Neto22f144c2017-06-12 14:26:21 -0400996}
997
SJW2c317da2020-03-23 07:39:13 -0500998bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100999 // OpenCL fmod(x,y) is x - y * trunc(x/y)
1000 // The sign for a non-zero result is taken from x.
1001 // (Try an example.)
1002 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -05001003 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +01001004 auto Op0 = CI->getOperand(0);
1005 auto Op1 = CI->getOperand(1);
1006 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
1007 });
1008}
1009
SJW2c317da2020-03-23 07:39:13 -05001010bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -05001011 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -05001012 // convert to natural
1013 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -05001014 std::string NewFName = basename.substr(0, slen);
1015 NewFName =
1016 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -04001017
SJW2c317da2020-03-23 07:39:13 -05001018 Module &M = *F.getParent();
1019 return replaceCallsWithValue(F, [&](CallInst *CI) {
1020 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
1021
1022 auto Arg = CI->getOperand(0);
1023
1024 // Constant of the reciprocal of the natural log of 10 (ln(10)).
1025 const double Ln10 =
1026 0.434294481903251827651128918916605082294397005803666566114;
1027
1028 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
1029
1030 return BinaryOperator::Create(Instruction::FMul,
1031 ConstantFP::get(Arg->getType(), Ln10), NewCI,
1032 "", CI);
1033 });
David Neto22f144c2017-06-12 14:26:21 -04001034}
1035
gnl21636e7992020-09-09 16:08:16 +01001036bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
1037 // convert to natural
alan-baker8b968112020-12-15 15:53:29 -05001038 return replaceCallsWithValue(F, [&F](CallInst *CI) {
gnl21636e7992020-09-09 16:08:16 +01001039 auto Arg = CI->getOperand(0);
1040
1041 auto ArgP1 = BinaryOperator::Create(
1042 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
1043
alan-baker8b968112020-12-15 15:53:29 -05001044 auto log =
1045 Intrinsic::getDeclaration(F.getParent(), Intrinsic::log, CI->getType());
1046 return CallInst::Create(log, ArgP1, "", CI);
gnl21636e7992020-09-09 16:08:16 +01001047 });
1048}
1049
alan-baker12d2c182020-07-20 08:22:42 -04001050bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -04001051
alan-bakerf6bc8252020-09-23 14:58:55 -04001052 enum {
1053 CLK_LOCAL_MEM_FENCE = 0x01,
1054 CLK_GLOBAL_MEM_FENCE = 0x02,
1055 CLK_IMAGE_MEM_FENCE = 0x04
1056 };
David Neto22f144c2017-06-12 14:26:21 -04001057
alan-baker12d2c182020-07-20 08:22:42 -04001058 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +01001059 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001060
Kévin Petitc4643922019-06-17 19:32:05 +01001061 // We need to map the OpenCL constants to the SPIR-V equivalents.
1062 const auto LocalMemFence =
1063 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1064 const auto GlobalMemFence =
1065 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001066 const auto ImageMemFence =
1067 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -04001068 const auto ConstantAcquireRelease = ConstantInt::get(
1069 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +01001070 const auto ConstantScopeDevice =
1071 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
1072 const auto ConstantScopeWorkgroup =
1073 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -04001074 const auto ConstantScopeSubgroup =
1075 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -04001076
Kévin Petitc4643922019-06-17 19:32:05 +01001077 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1078 const auto LocalMemFenceMask =
1079 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1080 const auto WorkgroupShiftAmount =
1081 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1082 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1083 Instruction::Shl, LocalMemFenceMask,
1084 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001085
Kévin Petitc4643922019-06-17 19:32:05 +01001086 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1087 const auto GlobalMemFenceMask =
1088 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1089 const auto UniformShiftAmount =
1090 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1091 const auto MemorySemanticsUniform = BinaryOperator::Create(
1092 Instruction::Shl, GlobalMemFenceMask,
1093 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001094
alan-bakerf6bc8252020-09-23 14:58:55 -04001095 // OpenCL 2.0
1096 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1097 const auto ImageMemFenceMask =
1098 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1099 const auto ImageShiftAmount =
1100 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1101 const auto MemorySemanticsImage = BinaryOperator::Create(
1102 Instruction::Shl, ImageMemFenceMask,
1103 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1104
Kévin Petitc4643922019-06-17 19:32:05 +01001105 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -04001106 // MemorySemanticsSequentiallyConsistentMask.
1107 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +01001108 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -04001109 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -04001110 auto MemorySemantics2 = BinaryOperator::Create(
1111 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
1112 auto MemorySemantics = BinaryOperator::Create(
1113 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001114
alan-baker12d2c182020-07-20 08:22:42 -04001115 // If the memory scope is not specified explicitly, it is either Subgroup
1116 // or Workgroup depending on the type of barrier.
1117 Value *MemoryScope =
1118 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
1119 if (CI->data_operands_size() > 1) {
1120 enum {
1121 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
1122 CL_MEMORY_SCOPE_DEVICE = 0x2,
1123 CL_MEMORY_SCOPE_SUBGROUP = 0x4
1124 };
1125 // The call was given an explicit memory scope.
1126 const auto MemoryScopeSubgroup =
1127 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
1128 const auto MemoryScopeDevice =
1129 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -04001130
alan-baker12d2c182020-07-20 08:22:42 -04001131 auto Cmp =
1132 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1133 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
1134 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
1135 ConstantScopeWorkgroup, "", CI);
1136 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
1137 MemoryScopeDevice, CI->getOperand(1), "", CI);
1138 MemoryScope =
1139 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
1140 }
1141
1142 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
1143 // the type of barrier;
1144 const auto ExecutionScope =
1145 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -04001146
Kévin Petitc4643922019-06-17 19:32:05 +01001147 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -04001148 {Attribute::NoDuplicate, Attribute::Convergent},
1149 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +01001150 {ExecutionScope, MemoryScope, MemorySemantics});
1151 });
David Neto22f144c2017-06-12 14:26:21 -04001152}
1153
alan-baker36309f92021-02-05 12:28:03 -05001154bool ReplaceOpenCLBuiltinPass::replaceMemFence(
1155 Function &F, spv::MemorySemanticsMask semantics) {
David Neto22f144c2017-06-12 14:26:21 -04001156
SJW2c317da2020-03-23 07:39:13 -05001157 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -04001158 enum {
1159 CLK_LOCAL_MEM_FENCE = 0x01,
1160 CLK_GLOBAL_MEM_FENCE = 0x02,
1161 CLK_IMAGE_MEM_FENCE = 0x04,
1162 };
David Neto22f144c2017-06-12 14:26:21 -04001163
SJW2c317da2020-03-23 07:39:13 -05001164 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001165
SJW2c317da2020-03-23 07:39:13 -05001166 // We need to map the OpenCL constants to the SPIR-V equivalents.
1167 const auto LocalMemFence =
1168 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
1169 const auto GlobalMemFence =
1170 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -04001171 const auto ImageMemFence =
1172 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -05001173 const auto ConstantMemorySemantics =
1174 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -04001175 const auto ConstantScopeWorkgroup =
1176 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -04001177
SJW2c317da2020-03-23 07:39:13 -05001178 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
1179 const auto LocalMemFenceMask =
1180 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
1181 const auto WorkgroupShiftAmount =
1182 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
1183 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
1184 Instruction::Shl, LocalMemFenceMask,
1185 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001186
SJW2c317da2020-03-23 07:39:13 -05001187 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
1188 const auto GlobalMemFenceMask =
1189 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
1190 const auto UniformShiftAmount =
1191 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
1192 const auto MemorySemanticsUniform = BinaryOperator::Create(
1193 Instruction::Shl, GlobalMemFenceMask,
1194 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001195
alan-bakerf6bc8252020-09-23 14:58:55 -04001196 // OpenCL 2.0
1197 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
1198 const auto ImageMemFenceMask =
1199 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
1200 const auto ImageShiftAmount =
1201 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
1202 const auto MemorySemanticsImage = BinaryOperator::Create(
1203 Instruction::Shl, ImageMemFenceMask,
1204 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
1205
alan-baker36309f92021-02-05 12:28:03 -05001206 Value *MemOrder = ConstantMemorySemantics;
1207 Value *MemScope = ConstantScopeWorkgroup;
1208 IRBuilder<> builder(CI);
alan-baker5641f5c2021-10-15 09:16:04 -04001209 if (CI->arg_size() > 1) {
alan-baker36309f92021-02-05 12:28:03 -05001210 MemOrder = MemoryOrderSemantics(CI->getArgOperand(1), false, CI,
1211 semantics, false);
1212 MemScope = MemoryScope(CI->getArgOperand(2), false, CI);
1213 }
1214 // Join the storage semantics and the order semantics.
alan-bakerf6bc8252020-09-23 14:58:55 -04001215 auto MemorySemantics1 =
alan-baker36309f92021-02-05 12:28:03 -05001216 builder.CreateOr({MemorySemanticsWorkgroup, MemorySemanticsUniform});
1217 auto MemorySemantics2 = builder.CreateOr({MemorySemanticsImage, MemOrder});
1218 auto MemorySemantics =
1219 builder.CreateOr({MemorySemantics1, MemorySemantics2});
David Neto22f144c2017-06-12 14:26:21 -04001220
alan-baker3d905692020-10-28 14:02:37 -04001221 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
1222 {Attribute::Convergent}, CI->getType(),
alan-baker36309f92021-02-05 12:28:03 -05001223 {MemScope, MemorySemantics});
SJW2c317da2020-03-23 07:39:13 -05001224 });
David Neto22f144c2017-06-12 14:26:21 -04001225}
1226
Kévin Petit1cb45112020-04-27 18:55:48 +01001227bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
1228 bool Changed = false;
1229
1230 SmallVector<Instruction *, 4> ToRemoves;
1231
1232 // Find all calls to the function
1233 for (auto &U : F.uses()) {
1234 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1235 ToRemoves.push_back(CI);
1236 }
1237 }
1238
1239 Changed = !ToRemoves.empty();
1240
1241 // Delete them
1242 for (auto V : ToRemoves) {
1243 V->eraseFromParent();
1244 }
1245
1246 return Changed;
1247}
1248
SJW2c317da2020-03-23 07:39:13 -05001249bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001250 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001251 return replaceCallsWithValue(F, [&](CallInst *CI) {
1252 // The predicate to use in the CmpInst.
1253 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001254
SJW2c317da2020-03-23 07:39:13 -05001255 auto Arg1 = CI->getOperand(0);
1256 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001257
SJW2c317da2020-03-23 07:39:13 -05001258 const auto Cmp =
1259 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001260 if (isa<VectorType>(F.getReturnType()))
1261 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1262 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001263 });
David Neto22f144c2017-06-12 14:26:21 -04001264}
1265
SJW2c317da2020-03-23 07:39:13 -05001266bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1267 spv::Op SPIRVOp,
1268 int32_t C) {
1269 Module &M = *F.getParent();
1270 return replaceCallsWithValue(F, [&](CallInst *CI) {
1271 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001272
SJW2c317da2020-03-23 07:39:13 -05001273 // The value to return for true.
1274 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001275
SJW2c317da2020-03-23 07:39:13 -05001276 // The value to return for false.
1277 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001278
SJW2c317da2020-03-23 07:39:13 -05001279 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001280 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001281 CorrespondingBoolTy =
1282 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1283 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001284 }
David Neto22f144c2017-06-12 14:26:21 -04001285
SJW2c317da2020-03-23 07:39:13 -05001286 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1287 CorrespondingBoolTy, {CI->getOperand(0)});
1288
1289 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1290 });
David Neto22f144c2017-06-12 14:26:21 -04001291}
1292
SJW2c317da2020-03-23 07:39:13 -05001293bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1294 Module &M = *F.getParent();
1295 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001296 auto &C = M.getContext();
1297 auto Val = CI->getOperand(0);
1298 auto ValTy = Val->getType();
1299 auto RetTy = CI->getType();
1300
1301 // Get a suitable integer type to represent the number
1302 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1303
1304 // Create Mask
1305 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001306 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001307 switch (ScalarSize) {
1308 case 16:
1309 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1310 break;
1311 case 32:
1312 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1313 break;
1314 case 64:
1315 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1316 break;
1317 default:
1318 llvm_unreachable("Unsupported floating-point type");
1319 }
1320
1321 IRBuilder<> Builder(CI);
1322
1323 // Bitcast to int
1324 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1325
1326 // Mask and compare
1327 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1328 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1329
1330 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001331 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001332 if (ValTy->isVectorTy()) {
1333 RetTrue = ConstantInt::getSigned(RetTy, -1);
1334 } else {
1335 RetTrue = ConstantInt::get(RetTy, 1);
1336 }
1337 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1338 });
1339}
1340
SJW2c317da2020-03-23 07:39:13 -05001341bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1342 Module &M = *F.getParent();
1343 return replaceCallsWithValue(F, [&](CallInst *CI) {
1344 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001345
SJW2c317da2020-03-23 07:39:13 -05001346 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001347
SJW2c317da2020-03-23 07:39:13 -05001348 // If the argument is a 32-bit int, just use a shift
1349 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1350 V = BinaryOperator::Create(Instruction::LShr, Arg,
1351 ConstantInt::get(Arg->getType(), 31), "", CI);
1352 } else {
1353 // The value for zero to compare against.
1354 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001355
SJW2c317da2020-03-23 07:39:13 -05001356 // The value to return for true.
1357 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001358
SJW2c317da2020-03-23 07:39:13 -05001359 // The value to return for false.
1360 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001361
SJW2c317da2020-03-23 07:39:13 -05001362 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1363 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001364
SJW2c317da2020-03-23 07:39:13 -05001365 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001366
SJW2c317da2020-03-23 07:39:13 -05001367 // If we have a function to call, call it!
1368 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001369
SJW2c317da2020-03-23 07:39:13 -05001370 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001371
SJW2c317da2020-03-23 07:39:13 -05001372 const auto NewCI = clspv::InsertSPIRVOp(
1373 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1374 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001375
SJW2c317da2020-03-23 07:39:13 -05001376 } else {
1377 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001378 }
1379
SJW2c317da2020-03-23 07:39:13 -05001380 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001381 }
SJW2c317da2020-03-23 07:39:13 -05001382 return V;
1383 });
David Neto22f144c2017-06-12 14:26:21 -04001384}
1385
SJW2c317da2020-03-23 07:39:13 -05001386bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1387 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1388 // Get arguments
1389 auto HiValue = CI->getOperand(0);
1390 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001391
SJW2c317da2020-03-23 07:39:13 -05001392 // Don't touch overloads that aren't in OpenCL C
1393 auto HiType = HiValue->getType();
1394 auto LoType = LoValue->getType();
1395
1396 if (HiType != LoType) {
1397 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001398 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001399
SJW2c317da2020-03-23 07:39:13 -05001400 if (!HiType->isIntOrIntVectorTy()) {
1401 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001402 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001403
SJW2c317da2020-03-23 07:39:13 -05001404 if (HiType->getScalarSizeInBits() * 2 !=
1405 CI->getType()->getScalarSizeInBits()) {
1406 return nullptr;
1407 }
1408
1409 if ((HiType->getScalarSizeInBits() != 8) &&
1410 (HiType->getScalarSizeInBits() != 16) &&
1411 (HiType->getScalarSizeInBits() != 32)) {
1412 return nullptr;
1413 }
1414
James Pricecf53df42020-04-20 14:41:24 -04001415 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001416 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001417 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1418 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001419 return nullptr;
1420 }
1421 }
1422
1423 // Convert both operands to the result type
1424 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1425 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1426
1427 // Shift high operand
1428 auto ShiftAmount =
1429 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1430 auto HiShifted =
1431 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1432
1433 // OR both results
1434 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1435 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001436}
1437
SJW2c317da2020-03-23 07:39:13 -05001438bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1439 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1440 // Get arguments
1441 auto SrcValue = CI->getOperand(0);
1442 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001443
SJW2c317da2020-03-23 07:39:13 -05001444 // Don't touch overloads that aren't in OpenCL C
1445 auto SrcType = SrcValue->getType();
1446 auto RotType = RotAmount->getType();
1447
1448 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1449 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001450 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001451
SJW2c317da2020-03-23 07:39:13 -05001452 if (!SrcType->isIntOrIntVectorTy()) {
1453 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001454 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001455
SJW2c317da2020-03-23 07:39:13 -05001456 if ((SrcType->getScalarSizeInBits() != 8) &&
1457 (SrcType->getScalarSizeInBits() != 16) &&
1458 (SrcType->getScalarSizeInBits() != 32) &&
1459 (SrcType->getScalarSizeInBits() != 64)) {
1460 return nullptr;
1461 }
1462
James Pricecf53df42020-04-20 14:41:24 -04001463 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001464 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001465 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1466 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001467 return nullptr;
1468 }
1469 }
1470
alan-bakerfd22ae12020-10-29 15:59:22 -04001471 // Replace with LLVM's funnel shift left intrinsic because it is more
1472 // generic than rotate.
1473 Function *intrinsic =
1474 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1475 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1476 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001477 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001478}
1479
SJW2c317da2020-03-23 07:39:13 -05001480bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1481 bool DstIsSigned) {
1482 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1483 Value *V = nullptr;
1484 // Get arguments
1485 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001486
SJW2c317da2020-03-23 07:39:13 -05001487 // Don't touch overloads that aren't in OpenCL C
1488 auto SrcType = SrcValue->getType();
1489 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001490
SJW2c317da2020-03-23 07:39:13 -05001491 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1492 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1493 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001494 }
1495
James Pricecf53df42020-04-20 14:41:24 -04001496 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001497 unsigned SrcNumElements =
1498 SrcVecType->getElementCount().getKnownMinValue();
1499 unsigned DstNumElements =
1500 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001501 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001502 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001503 }
1504
James Pricecf53df42020-04-20 14:41:24 -04001505 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1506 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1507 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001508 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001509 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001510 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001511
SJW2c317da2020-03-23 07:39:13 -05001512 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1513 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1514
1515 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1516 bool DstIsInt = DstType->isIntOrIntVectorTy();
1517
1518 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1519 // Unnecessary cast operation.
1520 V = SrcValue;
1521 } else if (SrcIsFloat && DstIsFloat) {
1522 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1523 } else if (SrcIsFloat && DstIsInt) {
1524 if (DstIsSigned) {
1525 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1526 } else {
1527 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1528 }
1529 } else if (SrcIsInt && DstIsFloat) {
1530 if (SrcIsSigned) {
1531 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1532 } else {
1533 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1534 }
1535 } else if (SrcIsInt && DstIsInt) {
1536 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1537 } else {
1538 // Not something we're supposed to handle, just move on
1539 }
1540
1541 return V;
1542 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001543}
1544
SJW2c317da2020-03-23 07:39:13 -05001545bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1546 bool is_mad) {
1547 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1548 Value *V = nullptr;
1549 // Get arguments
1550 auto AValue = CI->getOperand(0);
1551 auto BValue = CI->getOperand(1);
1552 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001553
SJW2c317da2020-03-23 07:39:13 -05001554 // Don't touch overloads that aren't in OpenCL C
1555 auto AType = AValue->getType();
1556 auto BType = BValue->getType();
1557 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001558
SJW2c317da2020-03-23 07:39:13 -05001559 if ((AType != BType) || (CI->getType() != AType) ||
1560 (is_mad && (AType != CType))) {
1561 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001562 }
1563
SJW2c317da2020-03-23 07:39:13 -05001564 if (!AType->isIntOrIntVectorTy()) {
1565 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001566 }
Kévin Petit8a560882019-03-21 15:24:34 +00001567
SJW2c317da2020-03-23 07:39:13 -05001568 if ((AType->getScalarSizeInBits() != 8) &&
1569 (AType->getScalarSizeInBits() != 16) &&
1570 (AType->getScalarSizeInBits() != 32) &&
1571 (AType->getScalarSizeInBits() != 64)) {
1572 return V;
1573 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001574
James Pricecf53df42020-04-20 14:41:24 -04001575 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001576 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001577 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1578 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001579 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001580 }
1581 }
1582
SJW2c317da2020-03-23 07:39:13 -05001583 // Our SPIR-V op returns a struct, create a type for it
alan-baker6b9d1ee2020-11-03 23:11:32 -05001584 auto ExMulRetType = GetPairStruct(AType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001585
SJW2c317da2020-03-23 07:39:13 -05001586 // Select the appropriate signed/unsigned SPIR-V op
1587 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1588
1589 // Call the SPIR-V op
1590 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1591 ExMulRetType, {AValue, BValue});
1592
1593 // Get the high part of the result
1594 unsigned Idxs[] = {1};
1595 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1596
1597 // If we're handling a mad_hi, add the third argument to the result
1598 if (is_mad) {
1599 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001600 }
1601
SJW2c317da2020-03-23 07:39:13 -05001602 return V;
1603 });
Kévin Petit8a560882019-03-21 15:24:34 +00001604}
1605
SJW2c317da2020-03-23 07:39:13 -05001606bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1607 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1608 // Get arguments
1609 auto FalseValue = CI->getOperand(0);
1610 auto TrueValue = CI->getOperand(1);
1611 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001612
SJW2c317da2020-03-23 07:39:13 -05001613 // Don't touch overloads that aren't in OpenCL C
1614 auto FalseType = FalseValue->getType();
1615 auto TrueType = TrueValue->getType();
1616 auto PredicateType = PredicateValue->getType();
1617
1618 if (FalseType != TrueType) {
1619 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001620 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001621
SJW2c317da2020-03-23 07:39:13 -05001622 if (!PredicateType->isIntOrIntVectorTy()) {
1623 return nullptr;
1624 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001625
SJW2c317da2020-03-23 07:39:13 -05001626 if (!FalseType->isIntOrIntVectorTy() &&
1627 !FalseType->getScalarType()->isFloatingPointTy()) {
1628 return nullptr;
1629 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001630
SJW2c317da2020-03-23 07:39:13 -05001631 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1632 return nullptr;
1633 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001634
SJW2c317da2020-03-23 07:39:13 -05001635 if (FalseType->getScalarSizeInBits() !=
1636 PredicateType->getScalarSizeInBits()) {
1637 return nullptr;
1638 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001639
James Pricecf53df42020-04-20 14:41:24 -04001640 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001641 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1642 if (NumElements != cast<VectorType>(PredicateType)
1643 ->getElementCount()
1644 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001645 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001646 }
1647
James Pricecf53df42020-04-20 14:41:24 -04001648 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1649 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001650 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001651 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001652 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001653
SJW2c317da2020-03-23 07:39:13 -05001654 // Create constant
1655 const auto ZeroValue = Constant::getNullValue(PredicateType);
1656
1657 // Scalar and vector are to be treated differently
1658 CmpInst::Predicate Pred;
1659 if (PredicateType->isVectorTy()) {
1660 Pred = CmpInst::ICMP_SLT;
1661 } else {
1662 Pred = CmpInst::ICMP_NE;
1663 }
1664
1665 // Create comparison instruction
1666 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1667 ZeroValue, "", CI);
1668
1669 // Create select
1670 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1671 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001672}
1673
SJW2c317da2020-03-23 07:39:13 -05001674bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1675 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1676 Value *V = nullptr;
1677 if (CI->getNumOperands() != 4) {
1678 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001679 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001680
SJW2c317da2020-03-23 07:39:13 -05001681 // Get arguments
1682 auto FalseValue = CI->getOperand(0);
1683 auto TrueValue = CI->getOperand(1);
1684 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001685
SJW2c317da2020-03-23 07:39:13 -05001686 // Don't touch overloads that aren't in OpenCL C
1687 auto FalseType = FalseValue->getType();
1688 auto TrueType = TrueValue->getType();
1689 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001690
SJW2c317da2020-03-23 07:39:13 -05001691 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1692 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001693 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001694
James Pricecf53df42020-04-20 14:41:24 -04001695 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001696 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1697 !TrueType->getScalarType()->isIntegerTy()) {
1698 return V;
1699 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001700 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001701 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1702 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001703 return V;
1704 }
1705 }
1706
1707 // Remember the type of the operands
1708 auto OpType = TrueType;
1709
1710 // The actual bit selection will always be done on an integer type,
1711 // declare it here
1712 Type *BitType;
1713
1714 // If the operands are float, then bitcast them to int
1715 if (OpType->getScalarType()->isFloatingPointTy()) {
1716
1717 // First create the new type
1718 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1719
1720 // Then bitcast all operands
1721 PredicateValue =
1722 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1723 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1724 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1725
1726 } else {
1727 // The operands have an integer type, use it directly
1728 BitType = OpType;
1729 }
1730
1731 // All the operands are now always integers
1732 // implement as (c & b) | (~c & a)
1733
1734 // Create our negated predicate value
1735 auto AllOnes = Constant::getAllOnesValue(BitType);
1736 auto NotPredicateValue = BinaryOperator::Create(
1737 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1738
1739 // Then put everything together
1740 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1741 FalseValue, "", CI);
1742 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1743 TrueValue, "", CI);
1744
1745 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1746
1747 // If we were dealing with a floating point type, we must bitcast
1748 // the result back to that
1749 if (OpType->getScalarType()->isFloatingPointTy()) {
1750 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1751 }
1752
1753 return V;
1754 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001755}
1756
SJW61531372020-06-09 07:31:08 -05001757bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001758 // convert to vector versions
1759 Module &M = *F.getParent();
1760 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1761 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1762 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001763
SJW2c317da2020-03-23 07:39:13 -05001764 // First figure out which function we're dealing with
1765 if (is_smooth) {
1766 ArgsToSplat.push_back(CI->getOperand(1));
1767 VectorArg = CI->getOperand(2);
1768 } else {
1769 VectorArg = CI->getOperand(1);
1770 }
1771
1772 // Splat arguments that need to be
1773 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001774 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001775
1776 for (auto arg : ArgsToSplat) {
1777 Value *NewVectorArg = UndefValue::get(VecType);
Marco Antognini7e338402021-03-15 12:48:37 +00001778 for (size_t i = 0; i < VecType->getElementCount().getKnownMinValue();
1779 i++) {
SJW2c317da2020-03-23 07:39:13 -05001780 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1781 NewVectorArg =
1782 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1783 }
1784 SplatArgs.push_back(NewVectorArg);
1785 }
1786
1787 // Replace the call with the vector/vector flavour
1788 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1789 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1790
SJW61531372020-06-09 07:31:08 -05001791 std::string NewFName = Builtins::GetMangledFunctionName(
1792 is_smooth ? "smoothstep" : "step", NewFType);
1793
SJW2c317da2020-03-23 07:39:13 -05001794 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1795
1796 SmallVector<Value *, 3> NewArgs;
1797 for (auto arg : SplatArgs) {
1798 NewArgs.push_back(arg);
1799 }
1800 NewArgs.push_back(VectorArg);
1801
1802 return CallInst::Create(NewF, NewArgs, "", CI);
1803 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001804}
1805
SJW2c317da2020-03-23 07:39:13 -05001806bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001807 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1808 auto Arg = CI->getOperand(0);
1809 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001810
SJW2c317da2020-03-23 07:39:13 -05001811 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001812
SJW2c317da2020-03-23 07:39:13 -05001813 return BinaryOperator::Create(Op, Bitcast,
1814 ConstantInt::get(CI->getType(), 31), "", CI);
1815 });
David Neto22f144c2017-06-12 14:26:21 -04001816}
1817
SJW2c317da2020-03-23 07:39:13 -05001818bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1819 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001820 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1821 // The multiply instruction to use.
1822 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001823
SJW2c317da2020-03-23 07:39:13 -05001824 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001825
SJW2c317da2020-03-23 07:39:13 -05001826 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1827 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001828
SJW2c317da2020-03-23 07:39:13 -05001829 if (is_mad) {
1830 // The add instruction to use.
1831 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001832
SJW2c317da2020-03-23 07:39:13 -05001833 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001834 }
David Neto22f144c2017-06-12 14:26:21 -04001835
SJW2c317da2020-03-23 07:39:13 -05001836 return V;
1837 });
David Neto22f144c2017-06-12 14:26:21 -04001838}
1839
SJW2c317da2020-03-23 07:39:13 -05001840bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001841 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1842 Value *V = nullptr;
1843 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001844
SJW2c317da2020-03-23 07:39:13 -05001845 auto data_type = data->getType();
1846 if (!data_type->isVectorTy())
1847 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001848
James Pricecf53df42020-04-20 14:41:24 -04001849 auto vec_data_type = cast<VectorType>(data_type);
1850
alan-baker5a8c3be2020-09-09 13:44:26 -04001851 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001852 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1853 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001854
SJW2c317da2020-03-23 07:39:13 -05001855 auto offset = CI->getOperand(1);
1856 auto ptr = CI->getOperand(2);
1857 auto ptr_type = ptr->getType();
1858 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001859 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001860 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001861
SJW2c317da2020-03-23 07:39:13 -05001862 // Avoid pointer casts. Instead generate the correct number of stores
1863 // and rely on drivers to coalesce appropriately.
1864 IRBuilder<> builder(CI);
1865 auto elems_const = builder.getInt32(elems);
1866 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001867 for (size_t i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001868 auto idx = builder.getInt32(i);
1869 auto add = builder.CreateAdd(adjust, idx);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01001870 auto gep = builder.CreateGEP(
1871 ptr->getType()->getScalarType()->getPointerElementType(), ptr, add);
SJW2c317da2020-03-23 07:39:13 -05001872 auto extract = builder.CreateExtractElement(data, i);
1873 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001874 }
SJW2c317da2020-03-23 07:39:13 -05001875 return V;
1876 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001877}
1878
SJW2c317da2020-03-23 07:39:13 -05001879bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001880 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1881 Value *V = nullptr;
1882 auto ret_type = F.getReturnType();
1883 if (!ret_type->isVectorTy())
1884 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001885
James Pricecf53df42020-04-20 14:41:24 -04001886 auto vec_ret_type = cast<VectorType>(ret_type);
1887
alan-baker5a8c3be2020-09-09 13:44:26 -04001888 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001889 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1890 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001891
SJW2c317da2020-03-23 07:39:13 -05001892 auto offset = CI->getOperand(0);
1893 auto ptr = CI->getOperand(1);
1894 auto ptr_type = ptr->getType();
1895 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001896 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001897 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001898
SJW2c317da2020-03-23 07:39:13 -05001899 // Avoid pointer casts. Instead generate the correct number of loads
1900 // and rely on drivers to coalesce appropriately.
1901 IRBuilder<> builder(CI);
1902 auto elems_const = builder.getInt32(elems);
1903 V = UndefValue::get(ret_type);
1904 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001905 for (unsigned i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001906 auto idx = builder.getInt32(i);
1907 auto add = builder.CreateAdd(adjust, idx);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01001908 auto gep = builder.CreateGEP(
1909 ptr_type->getScalarType()->getPointerElementType(), ptr, add);
1910 auto load =
1911 builder.CreateLoad(gep->getType()->getPointerElementType(), gep);
SJW2c317da2020-03-23 07:39:13 -05001912 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001913 }
SJW2c317da2020-03-23 07:39:13 -05001914 return V;
1915 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001916}
1917
SJW2c317da2020-03-23 07:39:13 -05001918bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1919 const std::string &name,
1920 int vec_size) {
1921 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1922 if (!vec_size) {
1923 // deduce vec_size from last character of name (e.g. vload_half4)
1924 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001925 }
SJW2c317da2020-03-23 07:39:13 -05001926 switch (vec_size) {
1927 case 2:
1928 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1929 case 4:
1930 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1931 case 0:
1932 if (!is_clspv_version) {
1933 return replaceVloadHalf(F);
1934 }
Marco Antognini7e338402021-03-15 12:48:37 +00001935 // Fall-through
SJW2c317da2020-03-23 07:39:13 -05001936 default:
1937 llvm_unreachable("Unsupported vload_half vector size");
1938 break;
1939 }
1940 return false;
David Neto22f144c2017-06-12 14:26:21 -04001941}
1942
SJW2c317da2020-03-23 07:39:13 -05001943bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1944 Module &M = *F.getParent();
1945 return replaceCallsWithValue(F, [&](CallInst *CI) {
1946 // The index argument from vload_half.
1947 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001948
SJW2c317da2020-03-23 07:39:13 -05001949 // The pointer argument from vload_half.
1950 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001951
SJW2c317da2020-03-23 07:39:13 -05001952 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001953 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001954 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1955
1956 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001957 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001958
1959 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1960
1961 Value *V = nullptr;
1962
alan-baker7efcaaa2020-05-06 19:33:27 -04001963 bool supports_16bit_storage = true;
1964 switch (Arg1->getType()->getPointerAddressSpace()) {
1965 case clspv::AddressSpace::Global:
1966 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1967 clspv::Option::StorageClass::kSSBO);
1968 break;
1969 case clspv::AddressSpace::Constant:
1970 if (clspv::Option::ConstantArgsInUniformBuffer())
1971 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1972 clspv::Option::StorageClass::kUBO);
1973 else
1974 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1975 clspv::Option::StorageClass::kSSBO);
1976 break;
1977 default:
1978 // Clspv will emit the Float16 capability if the half type is
1979 // encountered. That capability covers private and local addressspaces.
1980 break;
1981 }
1982
1983 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001984 auto ShortTy = Type::getInt16Ty(M.getContext());
1985 auto ShortPointerTy =
1986 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1987
1988 // Cast the half* pointer to short*.
1989 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1990
1991 // Index into the correct address of the casted pointer.
1992 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1993
1994 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001995 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001996
1997 // ZExt the short -> int.
1998 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1999
2000 // Get our float2.
2001 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2002
2003 // Extract out the bottom element which is our float result.
2004 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
2005 } else {
2006 // Assume the pointer argument points to storage aligned to 32bits
2007 // or more.
2008 // TODO(dneto): Do more analysis to make sure this is true?
2009 //
2010 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2011 // with:
2012 //
2013 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2014 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2015 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2016 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2017 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2018 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2019 // x float> %converted, %index_is_odd32
2020
2021 auto IntPointerTy =
2022 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
2023
2024 // Cast the base pointer to int*.
2025 // In a valid call (according to assumptions), this should get
2026 // optimized away in the simplify GEP pass.
2027 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2028
2029 auto One = ConstantInt::get(IntTy, 1);
2030 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2031 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2032
2033 // Index into the correct address of the casted pointer.
2034 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2035
2036 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002037 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05002038
2039 // Get our float2.
2040 auto Call = CallInst::Create(NewF, Load, "", CI);
2041
2042 // Extract out the float result, where the element number is
2043 // determined by whether the original index was even or odd.
2044 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2045 }
2046 return V;
2047 });
2048}
2049
2050bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
2051 Module &M = *F.getParent();
2052 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002053 // The index argument from vload_half.
2054 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002055
Kévin Petite8edce32019-04-10 14:23:32 +01002056 // The pointer argument from vload_half.
2057 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002058
Kévin Petite8edce32019-04-10 14:23:32 +01002059 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002060 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002061 auto NewPointerTy =
2062 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002063 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002064
Kévin Petite8edce32019-04-10 14:23:32 +01002065 // Cast the half* pointer to int*.
2066 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002067
Kévin Petite8edce32019-04-10 14:23:32 +01002068 // Index into the correct address of the casted pointer.
2069 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002070
Kévin Petite8edce32019-04-10 14:23:32 +01002071 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002072 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002073
Kévin Petite8edce32019-04-10 14:23:32 +01002074 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002075 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002076
Kévin Petite8edce32019-04-10 14:23:32 +01002077 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002078
Kévin Petite8edce32019-04-10 14:23:32 +01002079 // Get our float2.
2080 return CallInst::Create(NewF, Load, "", CI);
2081 });
David Neto22f144c2017-06-12 14:26:21 -04002082}
2083
SJW2c317da2020-03-23 07:39:13 -05002084bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
2085 Module &M = *F.getParent();
2086 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002087 // The index argument from vload_half.
2088 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002089
Kévin Petite8edce32019-04-10 14:23:32 +01002090 // The pointer argument from vload_half.
2091 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002092
Kévin Petite8edce32019-04-10 14:23:32 +01002093 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002094 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2095 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002096 auto NewPointerTy =
2097 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002098 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002099
Kévin Petite8edce32019-04-10 14:23:32 +01002100 // Cast the half* pointer to int2*.
2101 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002102
Kévin Petite8edce32019-04-10 14:23:32 +01002103 // Index into the correct address of the casted pointer.
2104 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002105
Kévin Petite8edce32019-04-10 14:23:32 +01002106 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04002107 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002108
Kévin Petite8edce32019-04-10 14:23:32 +01002109 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002110 auto X =
2111 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2112 auto Y =
2113 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002114
Kévin Petite8edce32019-04-10 14:23:32 +01002115 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002116 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002117
Kévin Petite8edce32019-04-10 14:23:32 +01002118 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002119
Kévin Petite8edce32019-04-10 14:23:32 +01002120 // Get the lower (x & y) components of our final float4.
2121 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002122
Kévin Petite8edce32019-04-10 14:23:32 +01002123 // Get the higher (z & w) components of our final float4.
2124 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002125
Kévin Petite8edce32019-04-10 14:23:32 +01002126 Constant *ShuffleMask[4] = {
2127 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2128 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002129
Kévin Petite8edce32019-04-10 14:23:32 +01002130 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002131 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2132 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002133 });
David Neto22f144c2017-06-12 14:26:21 -04002134}
2135
SJW2c317da2020-03-23 07:39:13 -05002136bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002137
2138 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2139 //
2140 // %u = load i32 %ptr
2141 // %fxy = call <2 x float> Unpack2xHalf(u)
2142 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002143 Module &M = *F.getParent();
2144 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002145 auto Index = CI->getOperand(0);
2146 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002147
Kévin Petite8edce32019-04-10 14:23:32 +01002148 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002149 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002150 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002151
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002152 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002153 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002154
Kévin Petite8edce32019-04-10 14:23:32 +01002155 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002156 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002157
Kévin Petite8edce32019-04-10 14:23:32 +01002158 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002159
Kévin Petite8edce32019-04-10 14:23:32 +01002160 // Get our final float2.
2161 return CallInst::Create(NewF, Load, "", CI);
2162 });
David Neto6ad93232018-06-07 15:42:58 -07002163}
2164
SJW2c317da2020-03-23 07:39:13 -05002165bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07002166
2167 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2168 //
2169 // %u2 = load <2 x i32> %ptr
2170 // %u2xy = extractelement %u2, 0
2171 // %u2zw = extractelement %u2, 1
2172 // %fxy = call <2 x float> Unpack2xHalf(uint)
2173 // %fzw = call <2 x float> Unpack2xHalf(uint)
2174 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05002175 Module &M = *F.getParent();
2176 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002177 auto Index = CI->getOperand(0);
2178 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002179
Kévin Petite8edce32019-04-10 14:23:32 +01002180 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002181 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2182 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002183 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002184
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002185 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002186 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002187
Kévin Petite8edce32019-04-10 14:23:32 +01002188 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002189 auto X =
2190 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2191 auto Y =
2192 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002193
Kévin Petite8edce32019-04-10 14:23:32 +01002194 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05002195 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07002196
Kévin Petite8edce32019-04-10 14:23:32 +01002197 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002198
Kévin Petite8edce32019-04-10 14:23:32 +01002199 // Get the lower (x & y) components of our final float4.
2200 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002201
Kévin Petite8edce32019-04-10 14:23:32 +01002202 // Get the higher (z & w) components of our final float4.
2203 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002204
Kévin Petite8edce32019-04-10 14:23:32 +01002205 Constant *ShuffleMask[4] = {
2206 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2207 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002208
Kévin Petite8edce32019-04-10 14:23:32 +01002209 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002210 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2211 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002212 });
David Neto6ad93232018-06-07 15:42:58 -07002213}
2214
SJW2c317da2020-03-23 07:39:13 -05002215bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
2216 switch (vec_size) {
2217 case 0:
2218 return replaceVstoreHalf(F);
2219 case 2:
2220 return replaceVstoreHalf2(F);
2221 case 4:
2222 return replaceVstoreHalf4(F);
2223 default:
2224 llvm_unreachable("Unsupported vstore_half vector size");
2225 break;
2226 }
2227 return false;
2228}
David Neto22f144c2017-06-12 14:26:21 -04002229
SJW2c317da2020-03-23 07:39:13 -05002230bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
2231 Module &M = *F.getParent();
2232 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002233 // The value to store.
2234 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002235
Kévin Petite8edce32019-04-10 14:23:32 +01002236 // The index argument from vstore_half.
2237 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002238
Kévin Petite8edce32019-04-10 14:23:32 +01002239 // The pointer argument from vstore_half.
2240 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002241
Kévin Petite8edce32019-04-10 14:23:32 +01002242 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002243 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002244 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2245 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002246
Kévin Petite8edce32019-04-10 14:23:32 +01002247 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002248 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002249
Kévin Petite8edce32019-04-10 14:23:32 +01002250 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002251
Kévin Petite8edce32019-04-10 14:23:32 +01002252 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002253 auto TempVec = InsertElementInst::Create(
2254 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002255
Kévin Petite8edce32019-04-10 14:23:32 +01002256 // Pack the float2 -> half2 (in an int).
2257 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002258
alan-baker7efcaaa2020-05-06 19:33:27 -04002259 bool supports_16bit_storage = true;
2260 switch (Arg2->getType()->getPointerAddressSpace()) {
2261 case clspv::AddressSpace::Global:
2262 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2263 clspv::Option::StorageClass::kSSBO);
2264 break;
2265 case clspv::AddressSpace::Constant:
2266 if (clspv::Option::ConstantArgsInUniformBuffer())
2267 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2268 clspv::Option::StorageClass::kUBO);
2269 else
2270 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2271 clspv::Option::StorageClass::kSSBO);
2272 break;
2273 default:
2274 // Clspv will emit the Float16 capability if the half type is
2275 // encountered. That capability covers private and local addressspaces.
2276 break;
2277 }
2278
SJW2c317da2020-03-23 07:39:13 -05002279 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002280 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002281 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002282 auto ShortPointerTy =
2283 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002284
Kévin Petite8edce32019-04-10 14:23:32 +01002285 // Truncate our i32 to an i16.
2286 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002287
Kévin Petite8edce32019-04-10 14:23:32 +01002288 // Cast the half* pointer to short*.
2289 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002290
Kévin Petite8edce32019-04-10 14:23:32 +01002291 // Index into the correct address of the casted pointer.
2292 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002293
Kévin Petite8edce32019-04-10 14:23:32 +01002294 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002295 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002296 } else {
2297 // We can only write to 32-bit aligned words.
2298 //
2299 // Assuming base is aligned to 32-bits, replace the equivalent of
2300 // vstore_half(value, index, base)
2301 // with:
2302 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2303 // uint32_t write_to_upper_half = index & 1u;
2304 // uint32_t shift = write_to_upper_half << 4;
2305 //
2306 // // Pack the float value as a half number in bottom 16 bits
2307 // // of an i32.
2308 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2309 //
2310 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2311 // ^ ((packed & 0xffff) << shift)
2312 // // We only need relaxed consistency, but OpenCL 1.2 only has
2313 // // sequentially consistent atomics.
2314 // // TODO(dneto): Use relaxed consistency.
2315 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002316 auto IntPointerTy =
2317 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002318
Kévin Petite8edce32019-04-10 14:23:32 +01002319 auto Four = ConstantInt::get(IntTy, 4);
2320 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002321
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002322 auto IndexIsOdd =
2323 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002324 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002325 auto IndexIntoI32 =
2326 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2327 auto BaseI32Ptr =
2328 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2329 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2330 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002331 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002332 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002333 auto MaskBitsToWrite =
2334 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2335 auto MaskedCurrent = BinaryOperator::CreateAnd(
2336 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002337
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002338 auto XLowerBits =
2339 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2340 auto NewBitsToWrite =
2341 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2342 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2343 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002344
Kévin Petite8edce32019-04-10 14:23:32 +01002345 // Generate the call to atomi_xor.
2346 SmallVector<Type *, 5> ParamTypes;
2347 // The pointer type.
2348 ParamTypes.push_back(IntPointerTy);
2349 // The Types for memory scope, semantics, and value.
2350 ParamTypes.push_back(IntTy);
2351 ParamTypes.push_back(IntTy);
2352 ParamTypes.push_back(IntTy);
2353 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2354 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002355
Kévin Petite8edce32019-04-10 14:23:32 +01002356 const auto ConstantScopeDevice =
2357 ConstantInt::get(IntTy, spv::ScopeDevice);
2358 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2359 // (SPIR-V Workgroup).
2360 const auto AddrSpaceSemanticsBits =
2361 IntPointerTy->getPointerAddressSpace() == 1
2362 ? spv::MemorySemanticsUniformMemoryMask
2363 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002364
Kévin Petite8edce32019-04-10 14:23:32 +01002365 // We're using relaxed consistency here.
2366 const auto ConstantMemorySemantics =
2367 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2368 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002369
Kévin Petite8edce32019-04-10 14:23:32 +01002370 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2371 ConstantMemorySemantics, ValueToXor};
2372 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002373
2374 // Return a Nop so the old Call is removed
2375 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2376 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002377 }
David Neto22f144c2017-06-12 14:26:21 -04002378
SJW2c317da2020-03-23 07:39:13 -05002379 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002380 });
David Neto22f144c2017-06-12 14:26:21 -04002381}
2382
SJW2c317da2020-03-23 07:39:13 -05002383bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2384 Module &M = *F.getParent();
2385 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002386 // The value to store.
2387 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002388
Kévin Petite8edce32019-04-10 14:23:32 +01002389 // The index argument from vstore_half.
2390 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002391
Kévin Petite8edce32019-04-10 14:23:32 +01002392 // The pointer argument from vstore_half.
2393 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002394
Kévin Petite8edce32019-04-10 14:23:32 +01002395 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002396 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002397 auto NewPointerTy =
2398 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002399 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002400
Kévin Petite8edce32019-04-10 14:23:32 +01002401 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002402 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002403
Kévin Petite8edce32019-04-10 14:23:32 +01002404 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002405
Kévin Petite8edce32019-04-10 14:23:32 +01002406 // Turn the packed x & y into the final packing.
2407 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002408
Kévin Petite8edce32019-04-10 14:23:32 +01002409 // Cast the half* pointer to int*.
2410 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002411
Kévin Petite8edce32019-04-10 14:23:32 +01002412 // Index into the correct address of the casted pointer.
2413 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002414
Kévin Petite8edce32019-04-10 14:23:32 +01002415 // Store to the int* we casted to.
2416 return new StoreInst(X, Index, CI);
2417 });
David Neto22f144c2017-06-12 14:26:21 -04002418}
2419
SJW2c317da2020-03-23 07:39:13 -05002420bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2421 Module &M = *F.getParent();
2422 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002423 // The value to store.
2424 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002425
Kévin Petite8edce32019-04-10 14:23:32 +01002426 // The index argument from vstore_half.
2427 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002428
Kévin Petite8edce32019-04-10 14:23:32 +01002429 // The pointer argument from vstore_half.
2430 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002431
Kévin Petite8edce32019-04-10 14:23:32 +01002432 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002433 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2434 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002435 auto NewPointerTy =
2436 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002437 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002438
Kévin Petite8edce32019-04-10 14:23:32 +01002439 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2440 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002441
Kévin Petite8edce32019-04-10 14:23:32 +01002442 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002443 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2444 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002445
Kévin Petite8edce32019-04-10 14:23:32 +01002446 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2447 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002448
Kévin Petite8edce32019-04-10 14:23:32 +01002449 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002450 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2451 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002452
Kévin Petite8edce32019-04-10 14:23:32 +01002453 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002454 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002455
Kévin Petite8edce32019-04-10 14:23:32 +01002456 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002457
Kévin Petite8edce32019-04-10 14:23:32 +01002458 // Turn the packed x & y into the final component of our int2.
2459 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002460
Kévin Petite8edce32019-04-10 14:23:32 +01002461 // Turn the packed z & w into the final component of our int2.
2462 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002463
Kévin Petite8edce32019-04-10 14:23:32 +01002464 auto Combine = InsertElementInst::Create(
2465 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002466 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2467 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002468
Kévin Petite8edce32019-04-10 14:23:32 +01002469 // Cast the half* pointer to int2*.
2470 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002471
Kévin Petite8edce32019-04-10 14:23:32 +01002472 // Index into the correct address of the casted pointer.
2473 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002474
Kévin Petite8edce32019-04-10 14:23:32 +01002475 // Store to the int2* we casted to.
2476 return new StoreInst(Combine, Index, CI);
2477 });
David Neto22f144c2017-06-12 14:26:21 -04002478}
2479
SJW2c317da2020-03-23 07:39:13 -05002480bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2481 // convert half to float
2482 Module &M = *F.getParent();
2483 return replaceCallsWithValue(F, [&](CallInst *CI) {
2484 SmallVector<Type *, 3> types;
2485 SmallVector<Value *, 3> args;
alan-baker5641f5c2021-10-15 09:16:04 -04002486 for (size_t i = 0; i < CI->arg_size(); ++i) {
SJW2c317da2020-03-23 07:39:13 -05002487 types.push_back(CI->getArgOperand(i)->getType());
2488 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002489 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002490
alan-baker5a8c3be2020-09-09 13:44:26 -04002491 auto NewFType =
2492 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2493 cast<VectorType>(CI->getType())
2494 ->getElementCount()
2495 .getKnownMinValue()),
2496 types, false);
SJW2c317da2020-03-23 07:39:13 -05002497
SJW61531372020-06-09 07:31:08 -05002498 std::string NewFName =
2499 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002500
2501 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2502
2503 auto NewCI = CallInst::Create(NewF, args, "", CI);
2504
2505 // Convert to the half type.
2506 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2507 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002508}
2509
SJW2c317da2020-03-23 07:39:13 -05002510bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2511 // convert half to float
2512 Module &M = *F.getParent();
2513 return replaceCallsWithValue(F, [&](CallInst *CI) {
2514 SmallVector<Type *, 3> types(3);
2515 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002516
SJW2c317da2020-03-23 07:39:13 -05002517 // Image
2518 types[0] = CI->getArgOperand(0)->getType();
2519 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002520
SJW2c317da2020-03-23 07:39:13 -05002521 // Coord
2522 types[1] = CI->getArgOperand(1)->getType();
2523 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002524
SJW2c317da2020-03-23 07:39:13 -05002525 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002526 types[2] =
2527 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2528 cast<VectorType>(CI->getArgOperand(2)->getType())
2529 ->getElementCount()
2530 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002531
SJW2c317da2020-03-23 07:39:13 -05002532 auto NewFType =
2533 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002534
SJW61531372020-06-09 07:31:08 -05002535 std::string NewFName =
2536 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002537
SJW2c317da2020-03-23 07:39:13 -05002538 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002539
SJW2c317da2020-03-23 07:39:13 -05002540 // Convert data to the float type.
2541 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2542 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002543
SJW2c317da2020-03-23 07:39:13 -05002544 return CallInst::Create(NewF, args, "", CI);
2545 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002546}
2547
SJW2c317da2020-03-23 07:39:13 -05002548bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2549 Function &F) {
2550 // convert read_image with int coords to float coords
2551 Module &M = *F.getParent();
2552 return replaceCallsWithValue(F, [&](CallInst *CI) {
2553 // The image.
2554 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002555
SJW2c317da2020-03-23 07:39:13 -05002556 // The sampler.
2557 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002558
SJW2c317da2020-03-23 07:39:13 -05002559 // The coordinate (integer type that we can't handle).
2560 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002561
Romaric Jodin9b353742022-01-25 17:21:02 +01002562 uint32_t dim = clspv::ImageNumDimensions(Arg0->getType());
SJW2c317da2020-03-23 07:39:13 -05002563 uint32_t components =
2564 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2565 Type *float_ty = nullptr;
2566 if (components == 1) {
2567 float_ty = Type::getFloatTy(M.getContext());
2568 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002569 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2570 cast<VectorType>(Arg2->getType())
2571 ->getElementCount()
2572 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002573 }
David Neto22f144c2017-06-12 14:26:21 -04002574
SJW2c317da2020-03-23 07:39:13 -05002575 auto NewFType = FunctionType::get(
2576 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2577
2578 std::string NewFName = F.getName().str();
2579 NewFName[NewFName.length() - 1] = 'f';
2580
2581 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2582
2583 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2584
2585 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2586 });
David Neto22f144c2017-06-12 14:26:21 -04002587}
2588
SJW2c317da2020-03-23 07:39:13 -05002589bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2590 return replaceCallsWithValue(F, [&](CallInst *CI) {
2591 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002592
SJW2c317da2020-03-23 07:39:13 -05002593 // We need to map the OpenCL constants to the SPIR-V equivalents.
2594 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2595 const auto ConstantMemorySemantics = ConstantInt::get(
2596 IntTy, spv::MemorySemanticsUniformMemoryMask |
2597 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002598
SJW2c317da2020-03-23 07:39:13 -05002599 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002600
SJW2c317da2020-03-23 07:39:13 -05002601 // The pointer.
2602 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002603
SJW2c317da2020-03-23 07:39:13 -05002604 // The memory scope.
2605 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002606
SJW2c317da2020-03-23 07:39:13 -05002607 // The memory semantics.
2608 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002609
alan-baker5641f5c2021-10-15 09:16:04 -04002610 if (2 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05002611 // The unequal memory semantics.
2612 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002613
SJW2c317da2020-03-23 07:39:13 -05002614 // The value.
2615 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002616
SJW2c317da2020-03-23 07:39:13 -05002617 // The comparator.
2618 Params.push_back(CI->getArgOperand(1));
alan-baker5641f5c2021-10-15 09:16:04 -04002619 } else if (1 < CI->arg_size()) {
SJW2c317da2020-03-23 07:39:13 -05002620 // The value.
2621 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002622 }
David Neto22f144c2017-06-12 14:26:21 -04002623
SJW2c317da2020-03-23 07:39:13 -05002624 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2625 });
David Neto22f144c2017-06-12 14:26:21 -04002626}
2627
SJW2c317da2020-03-23 07:39:13 -05002628bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2629 llvm::AtomicRMWInst::BinOp Op) {
2630 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002631 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2632 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002633 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002634 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002635 SyncScope::System, CI);
2636 });
2637}
David Neto22f144c2017-06-12 14:26:21 -04002638
SJW2c317da2020-03-23 07:39:13 -05002639bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2640 Module &M = *F.getParent();
2641 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002642 auto IntTy = Type::getInt32Ty(M.getContext());
2643 auto FloatTy = Type::getFloatTy(M.getContext());
2644
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002645 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2646 ConstantInt::get(IntTy, 1),
2647 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002648
2649 Constant *UpShuffleMask[4] = {
2650 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2651 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2652
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002653 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2654 UndefValue::get(FloatTy),
2655 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002656
Kévin Petite8edce32019-04-10 14:23:32 +01002657 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002658 auto Arg0 =
2659 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2660 ConstantVector::get(DownShuffleMask), "", CI);
2661 auto Arg1 =
2662 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2663 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002664 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002665
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002666 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002667 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002668
SJW61531372020-06-09 07:31:08 -05002669 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002670
Kévin Petite8edce32019-04-10 14:23:32 +01002671 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002672
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002673 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2674 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002675 });
David Neto22f144c2017-06-12 14:26:21 -04002676}
David Neto62653202017-10-16 19:05:18 -04002677
SJW2c317da2020-03-23 07:39:13 -05002678bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002679 // OpenCL's float result = fract(float x, float* ptr)
2680 //
2681 // In the LLVM domain:
2682 //
2683 // %floor_result = call spir_func float @floor(float %x)
2684 // store float %floor_result, float * %ptr
2685 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2686 // %result = call spir_func float
2687 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2688 //
2689 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2690 // and clspv.fract occur in the SPIR-V generator pass:
2691 //
2692 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2693 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2694 // ...
2695 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2696 // OpStore %ptr %floor_result
2697 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2698 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002699 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002700
David Neto62653202017-10-16 19:05:18 -04002701 using std::string;
2702
2703 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2704 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002705
SJW2c317da2020-03-23 07:39:13 -05002706 Module &M = *F.getParent();
2707 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002708
SJW2c317da2020-03-23 07:39:13 -05002709 // This is either float or a float vector. All the float-like
2710 // types are this type.
2711 auto result_ty = F.getReturnType();
2712
SJW61531372020-06-09 07:31:08 -05002713 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002714 Function *fmin_fn = M.getFunction(fmin_name);
2715 if (!fmin_fn) {
2716 // Make the fmin function.
2717 FunctionType *fn_ty =
2718 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2719 fmin_fn =
2720 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2721 fmin_fn->addFnAttr(Attribute::ReadNone);
2722 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2723 }
2724
SJW61531372020-06-09 07:31:08 -05002725 std::string floor_name =
2726 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002727 Function *floor_fn = M.getFunction(floor_name);
2728 if (!floor_fn) {
2729 // Make the floor function.
2730 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2731 floor_fn =
2732 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2733 floor_fn->addFnAttr(Attribute::ReadNone);
2734 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2735 }
2736
SJW61531372020-06-09 07:31:08 -05002737 std::string clspv_fract_name =
2738 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002739 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2740 if (!clspv_fract_fn) {
2741 // Make the clspv_fract function.
2742 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2743 clspv_fract_fn = cast<Function>(
2744 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2745 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2746 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2747 }
2748
2749 // Number of significant significand bits, whether represented or not.
2750 unsigned num_significand_bits;
2751 switch (result_ty->getScalarType()->getTypeID()) {
2752 case Type::HalfTyID:
2753 num_significand_bits = 11;
2754 break;
2755 case Type::FloatTyID:
2756 num_significand_bits = 24;
2757 break;
2758 case Type::DoubleTyID:
2759 num_significand_bits = 53;
2760 break;
2761 default:
2762 llvm_unreachable("Unhandled float type when processing fract builtin");
2763 break;
2764 }
2765 // Beware that the disassembler displays this value as
2766 // OpConstant %float 1
2767 // which is not quite right.
2768 const double kJustUnderOneScalar =
2769 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2770
2771 Constant *just_under_one =
2772 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2773 if (result_ty->isVectorTy()) {
2774 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002775 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002776 }
2777
2778 IRBuilder<> Builder(CI);
2779
2780 auto arg = CI->getArgOperand(0);
2781 auto ptr = CI->getArgOperand(1);
2782
2783 // Compute floor result and store it.
2784 auto floor = Builder.CreateCall(floor_fn, {arg});
2785 Builder.CreateStore(floor, ptr);
2786
2787 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2788 auto fract_result =
2789 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2790
2791 return fract_result;
2792 });
David Neto62653202017-10-16 19:05:18 -04002793}
alan-bakera52b7312020-10-26 08:58:51 -04002794
Kévin Petit8576f682020-11-02 14:51:32 +00002795bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04002796 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00002797 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04002798 // a_shr = a >> 1
2799 // b_shr = b >> 1
2800 // add1 = a_shr + b_shr
2801 // join = a |join_opcode| b
2802 // and = join & 1
2803 // add = add1 + and
2804 const auto a = Call->getArgOperand(0);
2805 const auto b = Call->getArgOperand(1);
2806 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00002807 Value *a_shift, *b_shift;
2808 if (is_signed) {
2809 a_shift = builder.CreateAShr(a, 1);
2810 b_shift = builder.CreateAShr(b, 1);
2811 } else {
2812 a_shift = builder.CreateLShr(a, 1);
2813 b_shift = builder.CreateLShr(b, 1);
2814 }
alan-bakerb6da5132020-10-29 15:59:06 -04002815 auto add = builder.CreateAdd(a_shift, b_shift);
2816 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
2817 auto constant_one = ConstantInt::get(a->getType(), 1);
2818 auto and_bit = builder.CreateAnd(join, constant_one);
2819 return builder.CreateAdd(add, and_bit);
2820 });
2821}
2822
alan-baker3f1bf492020-11-05 09:07:36 -05002823bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
2824 bool is_add) {
2825 return replaceCallsWithValue(F, [&F, this, is_signed,
2826 is_add](CallInst *Call) {
2827 auto ty = Call->getType();
2828 auto a = Call->getArgOperand(0);
2829 auto b = Call->getArgOperand(1);
2830 IRBuilder<> builder(Call);
alan-bakera52b7312020-10-26 08:58:51 -04002831 if (is_signed) {
2832 unsigned bitwidth = ty->getScalarSizeInBits();
2833 if (bitwidth < 32) {
alan-baker3f1bf492020-11-05 09:07:36 -05002834 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01002835 if (clspv::Option::HackClampWidth() && extended_width < 32) {
2836 extended_width = 32;
2837 }
alan-baker3f1bf492020-11-05 09:07:36 -05002838 Type *extended_ty =
2839 IntegerType::get(Call->getContext(), extended_width);
2840 Constant *min = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002841 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002842 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2843 Constant *max = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002844 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002845 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
alan-bakera52b7312020-10-26 08:58:51 -04002846 // Don't use the type in GetMangledFunctionName to ensure we get
2847 // signed parameters.
2848 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
alan-bakera52b7312020-10-26 08:58:51 -04002849 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
alan-baker3f1bf492020-11-05 09:07:36 -05002850 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2851 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2852 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2853 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2854 if (extended_width == 32) {
alan-bakera52b7312020-10-26 08:58:51 -04002855 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
alan-bakera52b7312020-10-26 08:58:51 -04002856 } else {
2857 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2858 }
alan-baker3f1bf492020-11-05 09:07:36 -05002859 } else {
2860 if (extended_width == 32) {
2861 sclamp_name += "iii";
2862 } else {
2863 sclamp_name += "sss";
2864 }
alan-bakera52b7312020-10-26 08:58:51 -04002865 }
alan-baker3f1bf492020-11-05 09:07:36 -05002866
2867 auto sext_a = builder.CreateSExt(a, extended_ty);
2868 auto sext_b = builder.CreateSExt(b, extended_ty);
2869 Value *op = nullptr;
2870 // Extended operations won't wrap.
2871 if (is_add)
2872 op = builder.CreateAdd(sext_a, sext_b, "", true, true);
2873 else
2874 op = builder.CreateSub(sext_a, sext_b, "", true, true);
2875 auto clamp_ty = FunctionType::get(
2876 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2877 auto callee = F.getParent()->getOrInsertFunction(sclamp_name, clamp_ty);
2878 auto clamp = builder.CreateCall(callee, {op, min, max});
2879 return builder.CreateTrunc(clamp, ty);
alan-bakera52b7312020-10-26 08:58:51 -04002880 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002881 // Add:
2882 // c = a + b
alan-bakera52b7312020-10-26 08:58:51 -04002883 // if (b < 0)
2884 // c = c > a ? min : c;
2885 // else
alan-baker3f1bf492020-11-05 09:07:36 -05002886 // c = c < a ? max : c;
alan-bakera52b7312020-10-26 08:58:51 -04002887 //
alan-baker3f1bf492020-11-05 09:07:36 -05002888 // Sub:
2889 // c = a - b;
2890 // if (b < 0)
2891 // c = c < a ? max : c;
2892 // else
2893 // c = c > a ? min : c;
2894 Constant *min = ConstantInt::get(Call->getContext(),
2895 APInt::getSignedMinValue(bitwidth));
2896 Constant *max = ConstantInt::get(Call->getContext(),
2897 APInt::getSignedMaxValue(bitwidth));
alan-bakera52b7312020-10-26 08:58:51 -04002898 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2899 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2900 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2901 }
alan-baker3f1bf492020-11-05 09:07:36 -05002902 Value *op = nullptr;
2903 if (is_add) {
2904 op = builder.CreateAdd(a, b);
2905 } else {
2906 op = builder.CreateSub(a, b);
2907 }
2908 auto b_lt_0 = builder.CreateICmpSLT(b, Constant::getNullValue(ty));
2909 auto op_gt_a = builder.CreateICmpSGT(op, a);
2910 auto op_lt_a = builder.CreateICmpSLT(op, a);
2911 auto neg_cmp = is_add ? op_gt_a : op_lt_a;
2912 auto pos_cmp = is_add ? op_lt_a : op_gt_a;
2913 auto neg_value = is_add ? min : max;
2914 auto pos_value = is_add ? max : min;
2915 auto neg_clamp = builder.CreateSelect(neg_cmp, neg_value, op);
2916 auto pos_clamp = builder.CreateSelect(pos_cmp, pos_value, op);
2917 return builder.CreateSelect(b_lt_0, neg_clamp, pos_clamp);
alan-bakera52b7312020-10-26 08:58:51 -04002918 }
2919 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002920 // Replace with OpIAddCarry/OpISubBorrow and clamp to max/0 on a
2921 // carr/borrow.
2922 spv::Op op = is_add ? spv::OpIAddCarry : spv::OpISubBorrow;
2923 auto clamp_value =
2924 is_add ? Constant::getAllOnesValue(ty) : Constant::getNullValue(ty);
2925 auto struct_ty = GetPairStruct(ty);
2926 auto call =
2927 InsertSPIRVOp(Call, op, {Attribute::ReadNone}, struct_ty, {a, b});
2928 auto add_sub = builder.CreateExtractValue(call, {0});
2929 auto carry_borrow = builder.CreateExtractValue(call, {1});
2930 auto cmp = builder.CreateICmpEQ(carry_borrow, Constant::getNullValue(ty));
2931 return builder.CreateSelect(cmp, add_sub, clamp_value);
alan-bakera52b7312020-10-26 08:58:51 -04002932 }
alan-bakera52b7312020-10-26 08:58:51 -04002933 });
2934}
alan-baker4986eff2020-10-29 13:38:00 -04002935
2936bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
2937 return replaceCallsWithValue(F, [](CallInst *Call) {
2938 auto pointer = Call->getArgOperand(0);
2939 // Clang emits an address space cast to the generic address space. Skip the
2940 // cast and use the input directly.
2941 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2942 pointer = cast->getPointerOperand();
2943 }
alan-baker5641f5c2021-10-15 09:16:04 -04002944 Value *order_arg = Call->arg_size() > 1 ? Call->getArgOperand(1) : nullptr;
2945 Value *scope_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04002946 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2947 clspv::AddressSpace::Global;
2948 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
2949 spv::MemorySemanticsAcquireMask);
2950 auto scope = MemoryScope(scope_arg, is_global, Call);
2951 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
2952 Call->getType(), {pointer, scope, order});
2953 });
2954}
2955
2956bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
2957 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
2958 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
2959 auto pointer = Call->getArgOperand(0);
2960 // Clang emits an address space cast to the generic address space. Skip the
2961 // cast and use the input directly.
2962 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2963 pointer = cast->getPointerOperand();
2964 }
2965 Value *value = Call->getArgOperand(1);
alan-baker5641f5c2021-10-15 09:16:04 -04002966 Value *order_arg = Call->arg_size() > 2 ? Call->getArgOperand(2) : nullptr;
2967 Value *scope_arg = Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04002968 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2969 clspv::AddressSpace::Global;
2970 auto scope = MemoryScope(scope_arg, is_global, Call);
2971 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
2972 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
2973 {pointer, scope, order, value});
2974 });
2975}
2976
2977bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
2978 return replaceCallsWithValue(F, [](CallInst *Call) {
2979 auto pointer = Call->getArgOperand(0);
2980 // Clang emits an address space cast to the generic address space. Skip the
2981 // cast and use the input directly.
2982 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2983 pointer = cast->getPointerOperand();
2984 }
2985 auto expected = Call->getArgOperand(1);
2986 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
2987 expected = cast->getPointerOperand();
2988 }
2989 auto value = Call->getArgOperand(2);
2990 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2991 clspv::AddressSpace::Global;
2992 Value *success_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04002993 Call->arg_size() > 3 ? Call->getArgOperand(3) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04002994 Value *failure_arg =
alan-baker5641f5c2021-10-15 09:16:04 -04002995 Call->arg_size() > 4 ? Call->getArgOperand(4) : nullptr;
2996 Value *scope_arg = Call->arg_size() > 5 ? Call->getArgOperand(5) : nullptr;
alan-baker4986eff2020-10-29 13:38:00 -04002997 auto scope = MemoryScope(scope_arg, is_global, Call);
2998 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
2999 spv::MemorySemanticsAcquireReleaseMask);
3000 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
3001 spv::MemorySemanticsAcquireMask);
3002
3003 // If the value pointed to by |expected| equals the value pointed to by
3004 // |pointer|, |value| is written into |pointer|, otherwise the value in
3005 // |pointer| is written into |expected|. In order to avoid extra stores,
3006 // the basic block with the original atomic is split and the store is
3007 // performed in the |then| block. The condition is the inversion of the
3008 // comparison result.
3009 IRBuilder<> builder(Call);
Benjamin Kramer7a16f3b2021-12-06 18:46:23 +01003010 auto load = builder.CreateLoad(expected->getType()->getPointerElementType(),
3011 expected);
alan-baker4986eff2020-10-29 13:38:00 -04003012 auto cmp_xchg = InsertSPIRVOp(
3013 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
3014 value->getType(), {pointer, scope, success, failure, value, load});
3015 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
3016 auto not_cmp = builder.CreateNot(cmp);
3017 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
3018 builder.SetInsertPoint(then_branch);
3019 builder.CreateStore(cmp_xchg, expected);
3020 return cmp;
3021 });
3022}
alan-bakercc2bafb2020-11-02 08:30:18 -05003023
alan-baker2cecaa72020-11-05 14:05:20 -05003024bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05003025 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3026 return false;
3027
3028 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05003029 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05003030 return false;
3031
alan-baker5f2e88e2020-12-07 15:24:04 -05003032 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
3033 Function *intrinsic = Intrinsic::getDeclaration(
3034 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
3035 Call->getType());
3036 const auto c_false = ConstantInt::getFalse(Call->getContext());
3037 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
3038 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05003039 });
3040}
alan-baker6b9d1ee2020-11-03 23:11:32 -05003041
3042bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
3043 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
3044 const auto ty = Call->getType();
3045 const auto a = Call->getArgOperand(0);
3046 const auto b = Call->getArgOperand(1);
3047 const auto c = Call->getArgOperand(2);
3048 IRBuilder<> builder(Call);
3049 if (is_signed) {
3050 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
3051 if (bitwidth < 32) {
3052 // mul = sext(a) * sext(b)
3053 // add = mul + sext(c)
3054 // res = clamp(add, MIN, MAX)
3055 unsigned extended_width = bitwidth << 1;
Romaric Jodin73ef1be2022-01-25 17:21:22 +01003056 if (clspv::Option::HackClampWidth() && extended_width < 32) {
3057 extended_width = 32;
3058 }
alan-baker6b9d1ee2020-11-03 23:11:32 -05003059 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
3060 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3061 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
3062 }
3063 auto a_sext = builder.CreateSExt(a, extended_ty);
3064 auto b_sext = builder.CreateSExt(b, extended_ty);
3065 auto c_sext = builder.CreateSExt(c, extended_ty);
3066 // Extended the size so no overflows occur.
3067 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
3068 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
3069 auto func_ty = FunctionType::get(
3070 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
3071 // Don't use function type because we need signed parameters.
3072 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
3073 // The clamp values are the signed min and max of the original bitwidth
3074 // sign extended to the extended bitwidth.
3075 Constant *min = ConstantInt::get(
3076 Call->getContext(),
3077 APInt::getSignedMinValue(bitwidth).sext(extended_width));
3078 Constant *max = ConstantInt::get(
3079 Call->getContext(),
3080 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
3081 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3082 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3083 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3084 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
3085 if (extended_width == 32)
3086 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
3087 else
3088 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
3089 } else {
3090 if (extended_width == 32)
3091 clamp_name += "iii";
3092 else
3093 clamp_name += "sss";
3094 }
3095 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
3096 auto clamp = builder.CreateCall(callee, {add, min, max});
3097 return builder.CreateTrunc(clamp, ty);
3098 } else {
3099 auto struct_ty = GetPairStruct(ty);
3100 // Compute
3101 // {hi, lo} = smul_extended(a, b)
3102 // add = lo + c
3103 auto mul_ext = InsertSPIRVOp(Call, spv::OpSMulExtended,
3104 {Attribute::ReadNone}, struct_ty, {a, b});
3105 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3106 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3107 auto add = builder.CreateAdd(mul_lo, c);
3108
3109 // Constants for use in the calculation.
3110 Constant *min = ConstantInt::get(Call->getContext(),
3111 APInt::getSignedMinValue(bitwidth));
3112 Constant *max = ConstantInt::get(Call->getContext(),
3113 APInt::getSignedMaxValue(bitwidth));
3114 Constant *max_plus_1 = ConstantInt::get(
3115 Call->getContext(),
3116 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
3117 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3118 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
3119 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
3120 max_plus_1 =
3121 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
3122 }
3123
3124 auto a_xor_b = builder.CreateXor(a, b);
3125 auto same_sign =
3126 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
3127 auto different_sign = builder.CreateNot(same_sign);
3128 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
3129 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
3130 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
3131 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
3132 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
3133 auto add_gt_max = builder.CreateICmpUGT(add, max);
3134 auto hi_eq_m1 =
3135 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
3136 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
3137 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
3138 auto max_sub_lo = builder.CreateSub(max, mul_lo);
3139 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
3140
3141 // Equivalent to:
3142 // if (((x < 0) == (y < 0)) && mul_hi != 0)
3143 // return MAX
3144 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
3145 // return MAX
3146 // if (((x < 0) != (y < 0)) && mul_hi != -1)
3147 // return MIN
3148 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
3149 // return MIN
3150 // return add
3151 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
3152 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
3153 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
3154 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
3155 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
3156 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
3157 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
3158 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
3159 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
3160 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
3161 auto sel = builder.CreateSelect(min_clamp, min, add);
3162 return builder.CreateSelect(max_clamp, max, sel);
3163 }
3164 } else {
3165 // {lo, hi} = mul_extended(a, b)
3166 // {add, carry} = add_carry(lo, c)
3167 // cmp = (mul_hi | carry) == 0
3168 // mad_sat = cmp ? add : MAX
3169 auto struct_ty = GetPairStruct(ty);
3170 auto mul_ext = InsertSPIRVOp(Call, spv::OpUMulExtended,
3171 {Attribute::ReadNone}, struct_ty, {a, b});
3172 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
3173 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
3174 auto add_carry =
3175 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
3176 struct_ty, {mul_lo, c});
3177 auto add = builder.CreateExtractValue(add_carry, {0});
3178 auto carry = builder.CreateExtractValue(add_carry, {1});
3179 auto or_value = builder.CreateOr(mul_hi, carry);
3180 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
3181 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
3182 }
3183 });
3184}
alan-baker15106572020-11-06 15:08:10 -05003185
3186bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
3187 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
3188 return false;
3189
3190 if (F.getFunctionType()->getNumParams() != 2)
3191 return false;
3192
3193 if (F.getFunctionType()->getParamType(0) !=
3194 F.getFunctionType()->getParamType(1)) {
3195 return false;
3196 }
3197
3198 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
3199 case Type::FloatTyID:
3200 case Type::HalfTyID:
3201 case Type::DoubleTyID:
3202 break;
3203 default:
3204 return false;
3205 }
3206
3207 // Scalar versions all return an int, while vector versions return a vector
3208 // of an equally sized integer types (e.g. short, int or long).
3209 if (isa<VectorType>(F.getReturnType())) {
3210 if (F.getReturnType()->getScalarSizeInBits() !=
3211 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
3212 return false;
3213 }
3214 } else {
3215 if (F.getReturnType()->getScalarSizeInBits() != 32)
3216 return false;
3217 }
3218
3219 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
3220 // Replace with a floating point [un]ordered comparison followed by an
3221 // extension.
3222 auto x = Call->getArgOperand(0);
3223 auto y = Call->getArgOperand(1);
3224 IRBuilder<> builder(Call);
3225 Value *tmp = nullptr;
3226 if (is_ordered) {
3227 // This leads to a slight inefficiency in the SPIR-V that is easy for
3228 // drivers to optimize where the SPIR-V for the comparison and the
3229 // extension could be fused to drop the inversion of the OpIsNan.
3230 tmp = builder.CreateFCmpORD(x, y);
3231 } else {
3232 tmp = builder.CreateFCmpUNO(x, y);
3233 }
3234 // OpenCL CTS requires that vector versions use sign extension, but scalar
3235 // versions use zero extension.
3236 if (isa<VectorType>(Call->getType()))
3237 return builder.CreateSExt(tmp, Call->getType());
3238 return builder.CreateZExt(tmp, Call->getType());
3239 });
3240}
alan-baker497920b2020-11-09 16:41:36 -05003241
3242bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
3243 return replaceCallsWithValue(F, [this](CallInst *Call) {
3244 auto ty = Call->getType();
3245 auto x = Call->getArgOperand(0);
3246 unsigned width = x->getType()->getScalarSizeInBits();
3247 Type *int_ty = IntegerType::get(Call->getContext(), width);
3248 uint64_t abs_mask = 0x7fffffff;
3249 uint64_t exp_mask = 0x7f800000;
3250 uint64_t min_mask = 0x00800000;
3251 if (width == 16) {
3252 abs_mask = 0x7fff;
3253 exp_mask = 0x7c00;
3254 min_mask = 0x0400;
3255 } else if (width == 64) {
3256 abs_mask = 0x7fffffffffffffff;
3257 exp_mask = 0x7ff0000000000000;
3258 min_mask = 0x0010000000000000;
3259 }
3260 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3261 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3262 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3263 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3264 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3265 abs_const =
3266 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3267 exp_const =
3268 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3269 min_const =
3270 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3271 }
3272 // Drop the sign bit and then check that the number is between
3273 // (exclusive) the min and max exponent values for the bit width.
3274 IRBuilder<> builder(Call);
3275 auto bitcast = builder.CreateBitCast(x, int_ty);
3276 auto abs = builder.CreateAnd(bitcast, abs_const);
3277 auto lt = builder.CreateICmpULT(abs, exp_const);
3278 auto ge = builder.CreateICmpUGE(abs, min_const);
3279 auto tmp = builder.CreateAnd(lt, ge);
3280 // OpenCL CTS requires that vector versions use sign extension, but scalar
3281 // versions use zero extension.
3282 if (isa<VectorType>(ty))
3283 return builder.CreateSExt(tmp, ty);
3284 return builder.CreateZExt(tmp, ty);
3285 });
3286}
alan-bakere0406e72020-11-10 12:32:04 -05003287
3288bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3289 return replaceCallsWithValue(F, [](CallInst *Call) {
3290 const auto x = Call->getArgOperand(0);
3291 const auto y = Call->getArgOperand(1);
3292 IRBuilder<> builder(Call);
3293 auto sub = builder.CreateFSub(x, y);
3294 auto cmp = builder.CreateFCmpUGT(x, y);
3295 return builder.CreateSelect(cmp, sub,
3296 Constant::getNullValue(Call->getType()));
3297 });
3298}
alan-baker3e0de472020-12-08 15:57:17 -05003299
3300bool ReplaceOpenCLBuiltinPass::replaceRound(Function &F) {
3301 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3302 const auto x = Call->getArgOperand(0);
3303 const double c_halfway = 0.5;
3304 auto halfway = ConstantFP::get(Call->getType(), c_halfway);
3305
3306 const auto clspv_fract_name =
3307 Builtins::GetMangledFunctionName("clspv.fract", F.getFunctionType());
3308 Function *clspv_fract_fn = F.getParent()->getFunction(clspv_fract_name);
3309 if (!clspv_fract_fn) {
3310 // Make the clspv_fract function.
3311 clspv_fract_fn = cast<Function>(
3312 F.getParent()
3313 ->getOrInsertFunction(clspv_fract_name, F.getFunctionType())
3314 .getCallee());
3315 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3316 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3317 }
3318
3319 auto ceil = Intrinsic::getDeclaration(F.getParent(), Intrinsic::ceil,
3320 Call->getType());
3321 auto floor = Intrinsic::getDeclaration(F.getParent(), Intrinsic::floor,
3322 Call->getType());
3323 auto fabs = Intrinsic::getDeclaration(F.getParent(), Intrinsic::fabs,
3324 Call->getType());
3325 auto copysign = Intrinsic::getDeclaration(
3326 F.getParent(), Intrinsic::copysign, {Call->getType(), Call->getType()});
3327
3328 IRBuilder<> builder(Call);
3329
3330 auto fabs_call = builder.CreateCall(F.getFunctionType(), fabs, {x});
3331 auto ceil_call = builder.CreateCall(F.getFunctionType(), ceil, {fabs_call});
3332 auto floor_call =
3333 builder.CreateCall(F.getFunctionType(), floor, {fabs_call});
3334 auto fract_call =
3335 builder.CreateCall(F.getFunctionType(), clspv_fract_fn, {fabs_call});
3336 auto cmp = builder.CreateFCmpOGE(fract_call, halfway);
3337 auto sel = builder.CreateSelect(cmp, ceil_call, floor_call);
3338 return builder.CreateCall(copysign->getFunctionType(), copysign, {sel, x});
3339 });
3340}
3341
3342bool ReplaceOpenCLBuiltinPass::replaceTrigPi(Function &F,
3343 Builtins::BuiltinType type) {
3344 return replaceCallsWithValue(F, [&F, type](CallInst *Call) -> Value * {
3345 const auto x = Call->getArgOperand(0);
3346 const double k_pi = 0x1.921fb54442d18p+1;
3347 Constant *pi = ConstantFP::get(x->getType(), k_pi);
3348
3349 IRBuilder<> builder(Call);
3350 auto mul = builder.CreateFMul(x, pi);
3351 switch (type) {
3352 case Builtins::kSinpi: {
3353 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3354 x->getType());
3355 return builder.CreateCall(func->getFunctionType(), func, {mul});
3356 }
3357 case Builtins::kCospi: {
3358 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3359 x->getType());
3360 return builder.CreateCall(func->getFunctionType(), func, {mul});
3361 }
3362 case Builtins::kTanpi: {
3363 auto sin = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3364 x->getType());
3365 auto sin_call = builder.CreateCall(sin->getFunctionType(), sin, {mul});
3366 auto cos = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3367 x->getType());
3368 auto cos_call = builder.CreateCall(cos->getFunctionType(), cos, {mul});
3369 return builder.CreateFDiv(sin_call, cos_call);
3370 }
3371 default:
3372 llvm_unreachable("unexpected builtin");
3373 break;
3374 }
3375 return nullptr;
3376 });
3377}
alan-baker8b968112020-12-15 15:53:29 -05003378
3379bool ReplaceOpenCLBuiltinPass::replaceSincos(Function &F) {
3380 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3381 auto sin_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3382 Call->getType());
3383 auto cos_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3384 Call->getType());
3385
3386 IRBuilder<> builder(Call);
3387 auto sin = builder.CreateCall(sin_func->getFunctionType(), sin_func,
3388 {Call->getArgOperand(0)});
3389 auto cos = builder.CreateCall(cos_func->getFunctionType(), cos_func,
3390 {Call->getArgOperand(0)});
3391 builder.CreateStore(cos, Call->getArgOperand(1));
3392 return sin;
3393 });
3394}
3395
3396bool ReplaceOpenCLBuiltinPass::replaceExpm1(Function &F) {
3397 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3398 auto exp_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::exp,
3399 Call->getType());
3400
3401 IRBuilder<> builder(Call);
3402 auto exp = builder.CreateCall(exp_func->getFunctionType(), exp_func,
3403 {Call->getArgOperand(0)});
3404 return builder.CreateFSub(exp, ConstantFP::get(Call->getType(), 1.0));
3405 });
3406}
3407
3408bool ReplaceOpenCLBuiltinPass::replacePown(Function &F) {
3409 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3410 auto pow_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::pow,
3411 Call->getType());
3412
3413 IRBuilder<> builder(Call);
3414 auto conv = builder.CreateSIToFP(Call->getArgOperand(1), Call->getType());
3415 return builder.CreateCall(pow_func->getFunctionType(), pow_func,
3416 {Call->getArgOperand(0), conv});
3417 });
3418}