blob: 8dbc5cde321fe2168a4d35a3735318090687ee52 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039#include "Passes.h"
40#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
46#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
47
48namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000049
David Neto22f144c2017-06-12 14:26:21 -040050uint32_t clz(uint32_t v) {
51 uint32_t r;
52 uint32_t shift;
53
54 r = (v > 0xFFFF) << 4;
55 v >>= r;
56 shift = (v > 0xFF) << 3;
57 v >>= shift;
58 r |= shift;
59 shift = (v > 0xF) << 2;
60 v >>= shift;
61 r |= shift;
62 shift = (v > 0x3) << 1;
63 v >>= shift;
64 r |= shift;
65 r |= (v >> 1);
66
67 return r;
68}
69
Kévin Petitfdfa92e2019-09-25 14:20:58 +010070Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
71 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040072 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040073 IntTy = FixedVectorType::get(IntTy,
74 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010075 }
76 return IntTy;
77}
78
alan-baker4986eff2020-10-29 13:38:00 -040079Value *MemoryOrderSemantics(Value *order, bool is_global,
80 Instruction *InsertBefore,
alan-baker36309f92021-02-05 12:28:03 -050081 spv::MemorySemanticsMask base_semantics,
82 bool include_storage = true) {
alan-baker4986eff2020-10-29 13:38:00 -040083 enum AtomicMemoryOrder : uint32_t {
84 kMemoryOrderRelaxed = 0,
85 kMemoryOrderAcquire = 2,
86 kMemoryOrderRelease = 3,
87 kMemoryOrderAcqRel = 4,
88 kMemoryOrderSeqCst = 5
89 };
90
91 IRBuilder<> builder(InsertBefore);
92
93 // Constants for OpenCL C 2.0 memory_order.
94 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
95 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
96 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
97 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
98
99 // Constants for SPIR-V ordering memory semantics.
100 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
101 const auto AcquireSemantics =
102 builder.getInt32(spv::MemorySemanticsAcquireMask);
103 const auto ReleaseSemantics =
104 builder.getInt32(spv::MemorySemanticsReleaseMask);
105 const auto AcqRelSemantics =
106 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
107
108 // Constants for SPIR-V storage class semantics.
109 const auto UniformSemantics =
110 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
111 const auto WorkgroupSemantics =
112 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
113
114 // Instead of sequentially consistent, use acquire, release or acquire
115 // release semantics.
116 Value *base_order = nullptr;
117 switch (base_semantics) {
118 case spv::MemorySemanticsAcquireMask:
119 base_order = AcquireSemantics;
120 break;
121 case spv::MemorySemanticsReleaseMask:
122 base_order = ReleaseSemantics;
123 break;
124 default:
125 base_order = AcqRelSemantics;
126 break;
127 }
128
129 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
alan-baker36309f92021-02-05 12:28:03 -0500130 if (order == nullptr) {
131 if (include_storage)
132 return builder.CreateOr({storage, base_order});
133 else
134 return base_order;
135 }
alan-baker4986eff2020-10-29 13:38:00 -0400136
137 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
138 auto is_acquire = builder.CreateICmpEQ(order, acquire);
139 auto is_release = builder.CreateICmpEQ(order, release);
140 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
141 auto semantics =
142 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
143 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
144 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
145 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
alan-baker36309f92021-02-05 12:28:03 -0500146 if (include_storage)
147 return builder.CreateOr({storage, semantics});
148 else
149 return semantics;
alan-baker4986eff2020-10-29 13:38:00 -0400150}
151
152Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
153 enum AtomicMemoryScope : uint32_t {
154 kMemoryScopeWorkItem = 0,
155 kMemoryScopeWorkGroup = 1,
156 kMemoryScopeDevice = 2,
157 kMemoryScopeAllSVMDevices = 3, // not supported
158 kMemoryScopeSubGroup = 4
159 };
160
161 IRBuilder<> builder(InsertBefore);
162
163 // Constants for OpenCL C 2.0 memory_scope.
164 const auto work_item =
165 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
166 const auto work_group =
167 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
168 const auto sub_group =
169 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
170 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
171
172 // Constants for SPIR-V memory scopes.
173 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
174 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
175 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
176 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
177
178 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
179 if (scope == nullptr)
180 return base_scope;
181
182 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
183 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
184 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
185 auto is_device = builder.CreateICmpEQ(scope, device);
186
187 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
188 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
189 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
190 scope = builder.CreateSelect(is_device, DeviceScope, scope);
191
192 return scope;
193}
194
SJW2c317da2020-03-23 07:39:13 -0500195bool replaceCallsWithValue(Function &F,
196 std::function<Value *(CallInst *)> Replacer) {
197
198 bool Changed = false;
199
200 SmallVector<Instruction *, 4> ToRemoves;
201
202 // Walk the users of the function.
203 for (auto &U : F.uses()) {
204 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
205
206 auto NewValue = Replacer(CI);
207
208 if (NewValue != nullptr) {
209 CI->replaceAllUsesWith(NewValue);
210
211 // Lastly, remember to remove the user.
212 ToRemoves.push_back(CI);
213 }
214 }
215 }
216
217 Changed = !ToRemoves.empty();
218
219 // And cleanup the calls we don't use anymore.
220 for (auto V : ToRemoves) {
221 V->eraseFromParent();
222 }
223
224 return Changed;
225}
226
David Neto22f144c2017-06-12 14:26:21 -0400227struct ReplaceOpenCLBuiltinPass final : public ModulePass {
228 static char ID;
229 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
230
231 bool runOnModule(Module &M) override;
alan-baker6b9d1ee2020-11-03 23:11:32 -0500232
233private:
SJW2c317da2020-03-23 07:39:13 -0500234 bool runOnFunction(Function &F);
235 bool replaceAbs(Function &F);
236 bool replaceAbsDiff(Function &F, bool is_signed);
237 bool replaceCopysign(Function &F);
238 bool replaceRecip(Function &F);
239 bool replaceDivide(Function &F);
240 bool replaceDot(Function &F);
241 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500242 bool replaceExp10(Function &F, const std::string &basename);
243 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100244 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400245 bool replaceBarrier(Function &F, bool subgroup = false);
alan-baker36309f92021-02-05 12:28:03 -0500246 bool replaceMemFence(Function &F, spv::MemorySemanticsMask semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100247 bool replacePrefetch(Function &F);
alan-baker3e217772020-11-07 17:29:40 -0500248 bool replaceRelational(Function &F, CmpInst::Predicate P);
SJW2c317da2020-03-23 07:39:13 -0500249 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
250 bool replaceIsFinite(Function &F);
251 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
252 bool replaceUpsample(Function &F);
253 bool replaceRotate(Function &F);
254 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
255 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
256 bool replaceSelect(Function &F);
257 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500258 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500259 bool replaceSignbit(Function &F, bool is_vec);
260 bool replaceMul(Function &F, bool is_float, bool is_mad);
261 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
262 bool replaceVloadHalf(Function &F);
263 bool replaceVloadHalf2(Function &F);
264 bool replaceVloadHalf4(Function &F);
265 bool replaceClspvVloadaHalf2(Function &F);
266 bool replaceClspvVloadaHalf4(Function &F);
267 bool replaceVstoreHalf(Function &F, int vec_size);
268 bool replaceVstoreHalf(Function &F);
269 bool replaceVstoreHalf2(Function &F);
270 bool replaceVstoreHalf4(Function &F);
271 bool replaceHalfReadImage(Function &F);
272 bool replaceHalfWriteImage(Function &F);
273 bool replaceSampledReadImageWithIntCoords(Function &F);
274 bool replaceAtomics(Function &F, spv::Op Op);
275 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
alan-baker4986eff2020-10-29 13:38:00 -0400276 bool replaceAtomicLoad(Function &F);
277 bool replaceExplicitAtomics(Function &F, spv::Op Op,
278 spv::MemorySemanticsMask semantics =
279 spv::MemorySemanticsAcquireReleaseMask);
280 bool replaceAtomicCompareExchange(Function &);
SJW2c317da2020-03-23 07:39:13 -0500281 bool replaceCross(Function &F);
282 bool replaceFract(Function &F, int vec_size);
283 bool replaceVload(Function &F);
284 bool replaceVstore(Function &F);
alan-baker3f1bf492020-11-05 09:07:36 -0500285 bool replaceAddSubSat(Function &F, bool is_signed, bool is_add);
Kévin Petit8576f682020-11-02 14:51:32 +0000286 bool replaceHadd(Function &F, bool is_signed,
287 Instruction::BinaryOps join_opcode);
alan-baker2cecaa72020-11-05 14:05:20 -0500288 bool replaceCountZeroes(Function &F, bool leading);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500289 bool replaceMadSat(Function &F, bool is_signed);
alan-baker15106572020-11-06 15:08:10 -0500290 bool replaceOrdered(Function &F, bool is_ordered);
alan-baker497920b2020-11-09 16:41:36 -0500291 bool replaceIsNormal(Function &F);
alan-bakere0406e72020-11-10 12:32:04 -0500292 bool replaceFDim(Function &F);
alan-baker3e0de472020-12-08 15:57:17 -0500293 bool replaceRound(Function &F);
294 bool replaceTrigPi(Function &F, Builtins::BuiltinType type);
alan-baker8b968112020-12-15 15:53:29 -0500295 bool replaceSincos(Function &F);
296 bool replaceExpm1(Function &F);
297 bool replacePown(Function &F);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500298
299 // Caches struct types for { |type|, |type| }. This prevents
300 // getOrInsertFunction from introducing a bitcasts between structs with
301 // identical contents.
302 Type *GetPairStruct(Type *type);
303
304 DenseMap<Type *, Type *> PairStructMap;
David Neto22f144c2017-06-12 14:26:21 -0400305};
SJW2c317da2020-03-23 07:39:13 -0500306
Kévin Petit91bc72e2019-04-08 15:17:46 +0100307} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400308
309char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400310INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
311 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400312
313namespace clspv {
314ModulePass *createReplaceOpenCLBuiltinPass() {
315 return new ReplaceOpenCLBuiltinPass();
316}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400317} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400318
319bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500320 std::list<Function *> func_list;
321 for (auto &F : M.getFunctionList()) {
322 // process only function declarations
323 if (F.isDeclaration() && runOnFunction(F)) {
324 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000325 }
326 }
SJW2c317da2020-03-23 07:39:13 -0500327 if (func_list.size() != 0) {
328 // recursively convert functions, but first remove dead
329 for (auto *F : func_list) {
330 if (F->use_empty()) {
331 F->eraseFromParent();
332 }
333 }
334 runOnModule(M);
335 return true;
336 }
337 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000338}
339
SJW2c317da2020-03-23 07:39:13 -0500340bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
341 auto &FI = Builtins::Lookup(&F);
342 switch (FI.getType()) {
343 case Builtins::kAbs:
344 if (!FI.getParameter(0).is_signed) {
345 return replaceAbs(F);
346 }
347 break;
348 case Builtins::kAbsDiff:
349 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400350
351 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500352 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400353
alan-bakercc2bafb2020-11-02 08:30:18 -0500354 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500355 return replaceCountZeroes(F, true);
356
357 case Builtins::kCtz:
358 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500359
alan-bakerb6da5132020-10-29 15:59:06 -0400360 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000361 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400362 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000363 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400364
SJW2c317da2020-03-23 07:39:13 -0500365 case Builtins::kCopysign:
366 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100367
SJW2c317da2020-03-23 07:39:13 -0500368 case Builtins::kHalfRecip:
369 case Builtins::kNativeRecip:
370 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100371
SJW2c317da2020-03-23 07:39:13 -0500372 case Builtins::kHalfDivide:
373 case Builtins::kNativeDivide:
374 return replaceDivide(F);
375
376 case Builtins::kDot:
377 return replaceDot(F);
378
379 case Builtins::kExp10:
380 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500381 case Builtins::kNativeExp10:
382 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500383
alan-baker8b968112020-12-15 15:53:29 -0500384 case Builtins::kExpm1:
385 return replaceExpm1(F);
386
SJW2c317da2020-03-23 07:39:13 -0500387 case Builtins::kLog10:
388 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500389 case Builtins::kNativeLog10:
390 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500391
gnl21636e7992020-09-09 16:08:16 +0100392 case Builtins::kLog1p:
393 return replaceLog1p(F);
394
alan-bakere0406e72020-11-10 12:32:04 -0500395 case Builtins::kFdim:
396 return replaceFDim(F);
397
SJW2c317da2020-03-23 07:39:13 -0500398 case Builtins::kFmod:
399 return replaceFmod(F);
400
alan-baker8b968112020-12-15 15:53:29 -0500401 case Builtins::kPown:
402 return replacePown(F);
403
alan-baker3e0de472020-12-08 15:57:17 -0500404 case Builtins::kRound:
405 return replaceRound(F);
406
407 case Builtins::kCospi:
408 case Builtins::kSinpi:
409 case Builtins::kTanpi:
410 return replaceTrigPi(F, FI.getType());
411
alan-baker8b968112020-12-15 15:53:29 -0500412 case Builtins::kSincos:
413 return replaceSincos(F);
414
SJW2c317da2020-03-23 07:39:13 -0500415 case Builtins::kBarrier:
416 case Builtins::kWorkGroupBarrier:
417 return replaceBarrier(F);
418
alan-baker12d2c182020-07-20 08:22:42 -0400419 case Builtins::kSubGroupBarrier:
420 return replaceBarrier(F, true);
421
alan-baker36309f92021-02-05 12:28:03 -0500422 case Builtins::kAtomicWorkItemFence:
423 return replaceMemFence(F, spv::MemorySemanticsMaskNone);
SJW2c317da2020-03-23 07:39:13 -0500424 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400425 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500426 case Builtins::kReadMemFence:
427 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
428 case Builtins::kWriteMemFence:
429 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
430
431 // Relational
432 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500433 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500434 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500435 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500436 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500437 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500438 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500439 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500440 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500441 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500442 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500443 return replaceRelational(F, CmpInst::FCMP_UNE);
444 case Builtins::kIslessgreater:
445 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500446
alan-baker15106572020-11-06 15:08:10 -0500447 case Builtins::kIsordered:
448 return replaceOrdered(F, true);
449
450 case Builtins::kIsunordered:
451 return replaceOrdered(F, false);
452
SJW2c317da2020-03-23 07:39:13 -0500453 case Builtins::kIsinf: {
454 bool is_vec = FI.getParameter(0).vector_size != 0;
455 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
456 }
457 case Builtins::kIsnan: {
458 bool is_vec = FI.getParameter(0).vector_size != 0;
459 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
460 }
461
462 case Builtins::kIsfinite:
463 return replaceIsFinite(F);
464
465 case Builtins::kAll: {
466 bool is_vec = FI.getParameter(0).vector_size != 0;
467 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
468 }
469 case Builtins::kAny: {
470 bool is_vec = FI.getParameter(0).vector_size != 0;
471 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
472 }
473
alan-baker497920b2020-11-09 16:41:36 -0500474 case Builtins::kIsnormal:
475 return replaceIsNormal(F);
476
SJW2c317da2020-03-23 07:39:13 -0500477 case Builtins::kUpsample:
478 return replaceUpsample(F);
479
480 case Builtins::kRotate:
481 return replaceRotate(F);
482
483 case Builtins::kConvert:
484 return replaceConvert(F, FI.getParameter(0).is_signed,
485 FI.getReturnType().is_signed);
486
alan-baker4986eff2020-10-29 13:38:00 -0400487 // OpenCL 2.0 explicit atomics have different default scopes and semantics
488 // than legacy atomic functions.
489 case Builtins::kAtomicLoad:
490 case Builtins::kAtomicLoadExplicit:
491 return replaceAtomicLoad(F);
492 case Builtins::kAtomicStore:
493 case Builtins::kAtomicStoreExplicit:
494 return replaceExplicitAtomics(F, spv::OpAtomicStore,
495 spv::MemorySemanticsReleaseMask);
496 case Builtins::kAtomicExchange:
497 case Builtins::kAtomicExchangeExplicit:
498 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
499 case Builtins::kAtomicFetchAdd:
500 case Builtins::kAtomicFetchAddExplicit:
501 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
502 case Builtins::kAtomicFetchSub:
503 case Builtins::kAtomicFetchSubExplicit:
504 return replaceExplicitAtomics(F, spv::OpAtomicISub);
505 case Builtins::kAtomicFetchOr:
506 case Builtins::kAtomicFetchOrExplicit:
507 return replaceExplicitAtomics(F, spv::OpAtomicOr);
508 case Builtins::kAtomicFetchXor:
509 case Builtins::kAtomicFetchXorExplicit:
510 return replaceExplicitAtomics(F, spv::OpAtomicXor);
511 case Builtins::kAtomicFetchAnd:
512 case Builtins::kAtomicFetchAndExplicit:
513 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
514 case Builtins::kAtomicFetchMin:
515 case Builtins::kAtomicFetchMinExplicit:
516 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
517 ? spv::OpAtomicSMin
518 : spv::OpAtomicUMin);
519 case Builtins::kAtomicFetchMax:
520 case Builtins::kAtomicFetchMaxExplicit:
521 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
522 ? spv::OpAtomicSMax
523 : spv::OpAtomicUMax);
524 // Weak compare exchange is generated as strong compare exchange.
525 case Builtins::kAtomicCompareExchangeWeak:
526 case Builtins::kAtomicCompareExchangeWeakExplicit:
527 case Builtins::kAtomicCompareExchangeStrong:
528 case Builtins::kAtomicCompareExchangeStrongExplicit:
529 return replaceAtomicCompareExchange(F);
530
531 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500532 case Builtins::kAtomicInc:
533 return replaceAtomics(F, spv::OpAtomicIIncrement);
534 case Builtins::kAtomicDec:
535 return replaceAtomics(F, spv::OpAtomicIDecrement);
536 case Builtins::kAtomicCmpxchg:
537 return replaceAtomics(F, spv::OpAtomicCompareExchange);
538 case Builtins::kAtomicAdd:
539 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
540 case Builtins::kAtomicSub:
541 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
542 case Builtins::kAtomicXchg:
543 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
544 case Builtins::kAtomicMin:
545 return replaceAtomics(F, FI.getParameter(0).is_signed
546 ? llvm::AtomicRMWInst::Min
547 : llvm::AtomicRMWInst::UMin);
548 case Builtins::kAtomicMax:
549 return replaceAtomics(F, FI.getParameter(0).is_signed
550 ? llvm::AtomicRMWInst::Max
551 : llvm::AtomicRMWInst::UMax);
552 case Builtins::kAtomicAnd:
553 return replaceAtomics(F, llvm::AtomicRMWInst::And);
554 case Builtins::kAtomicOr:
555 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
556 case Builtins::kAtomicXor:
557 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
558
559 case Builtins::kCross:
560 if (FI.getParameter(0).vector_size == 4) {
561 return replaceCross(F);
562 }
563 break;
564
565 case Builtins::kFract:
566 if (FI.getParameterCount()) {
567 return replaceFract(F, FI.getParameter(0).vector_size);
568 }
569 break;
570
571 case Builtins::kMadHi:
572 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
573 case Builtins::kMulHi:
574 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
575
alan-baker6b9d1ee2020-11-03 23:11:32 -0500576 case Builtins::kMadSat:
577 return replaceMadSat(F, FI.getParameter(0).is_signed);
578
SJW2c317da2020-03-23 07:39:13 -0500579 case Builtins::kMad:
580 case Builtins::kMad24:
581 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
582 true);
583 case Builtins::kMul24:
584 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
585 false);
586
587 case Builtins::kSelect:
588 return replaceSelect(F);
589
590 case Builtins::kBitselect:
591 return replaceBitSelect(F);
592
593 case Builtins::kVload:
594 return replaceVload(F);
595
596 case Builtins::kVloadaHalf:
597 case Builtins::kVloadHalf:
598 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
599
600 case Builtins::kVstore:
601 return replaceVstore(F);
602
603 case Builtins::kVstoreHalf:
604 case Builtins::kVstoreaHalf:
605 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
606
607 case Builtins::kSmoothstep: {
608 int vec_size = FI.getLastParameter().vector_size;
609 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500610 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500611 }
612 break;
613 }
614 case Builtins::kStep: {
615 int vec_size = FI.getLastParameter().vector_size;
616 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500617 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500618 }
619 break;
620 }
621
622 case Builtins::kSignbit:
623 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
624
alan-baker3f1bf492020-11-05 09:07:36 -0500625 case Builtins::kSubSat:
626 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
627
SJW2c317da2020-03-23 07:39:13 -0500628 case Builtins::kReadImageh:
629 return replaceHalfReadImage(F);
630 case Builtins::kReadImagef:
631 case Builtins::kReadImagei:
632 case Builtins::kReadImageui: {
633 if (FI.getParameter(1).isSampler() &&
634 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
635 return replaceSampledReadImageWithIntCoords(F);
636 }
637 break;
638 }
639
640 case Builtins::kWriteImageh:
641 return replaceHalfWriteImage(F);
642
Kévin Petit1cb45112020-04-27 18:55:48 +0100643 case Builtins::kPrefetch:
644 return replacePrefetch(F);
645
SJW2c317da2020-03-23 07:39:13 -0500646 default:
647 break;
648 }
649
650 return false;
651}
652
alan-baker6b9d1ee2020-11-03 23:11:32 -0500653Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
654 auto iter = PairStructMap.find(type);
655 if (iter != PairStructMap.end())
656 return iter->second;
657
658 auto new_struct = StructType::get(type->getContext(), {type, type});
659 PairStructMap[type] = new_struct;
660 return new_struct;
661}
662
SJW2c317da2020-03-23 07:39:13 -0500663bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
664 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400665 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100666}
667
SJW2c317da2020-03-23 07:39:13 -0500668bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
669 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100670 auto XValue = CI->getOperand(0);
671 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100672
Kévin Petite8edce32019-04-10 14:23:32 +0100673 IRBuilder<> Builder(CI);
674 auto XmY = Builder.CreateSub(XValue, YValue);
675 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100676
SJW2c317da2020-03-23 07:39:13 -0500677 Value *Cmp = nullptr;
678 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100679 Cmp = Builder.CreateICmpSGT(YValue, XValue);
680 } else {
681 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100682 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100683
Kévin Petite8edce32019-04-10 14:23:32 +0100684 return Builder.CreateSelect(Cmp, YmX, XmY);
685 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100686}
687
SJW2c317da2020-03-23 07:39:13 -0500688bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500689 return replaceCallsWithValue(F, [&F](CallInst *Call) {
690 const auto x = Call->getArgOperand(0);
691 const auto y = Call->getArgOperand(1);
692 auto intrinsic = Intrinsic::getDeclaration(
693 F.getParent(), Intrinsic::copysign, Call->getType());
694 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
695 Call);
Kévin Petite8edce32019-04-10 14:23:32 +0100696 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100697}
698
SJW2c317da2020-03-23 07:39:13 -0500699bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
700 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100701 // Recip has one arg.
702 auto Arg = CI->getOperand(0);
703 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
704 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
705 });
David Neto22f144c2017-06-12 14:26:21 -0400706}
707
SJW2c317da2020-03-23 07:39:13 -0500708bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
709 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100710 auto Op0 = CI->getOperand(0);
711 auto Op1 = CI->getOperand(1);
712 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
713 });
David Neto22f144c2017-06-12 14:26:21 -0400714}
715
SJW2c317da2020-03-23 07:39:13 -0500716bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
717 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100718 auto Op0 = CI->getOperand(0);
719 auto Op1 = CI->getOperand(1);
720
SJW2c317da2020-03-23 07:39:13 -0500721 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100722 if (Op0->getType()->isVectorTy()) {
723 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
724 CI->getType(), {Op0, Op1});
725 } else {
726 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
727 }
728
729 return V;
730 });
731}
732
SJW2c317da2020-03-23 07:39:13 -0500733bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500734 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500735 // convert to natural
736 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500737 std::string NewFName = basename.substr(0, slen);
738 NewFName =
739 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400740
SJW2c317da2020-03-23 07:39:13 -0500741 Module &M = *F.getParent();
742 return replaceCallsWithValue(F, [&](CallInst *CI) {
743 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
744
745 auto Arg = CI->getOperand(0);
746
747 // Constant of the natural log of 10 (ln(10)).
748 const double Ln10 =
749 2.302585092994045684017991454684364207601101488628772976033;
750
751 auto Mul = BinaryOperator::Create(
752 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
753
754 return CallInst::Create(NewF, Mul, "", CI);
755 });
David Neto22f144c2017-06-12 14:26:21 -0400756}
757
SJW2c317da2020-03-23 07:39:13 -0500758bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100759 // OpenCL fmod(x,y) is x - y * trunc(x/y)
760 // The sign for a non-zero result is taken from x.
761 // (Try an example.)
762 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500763 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100764 auto Op0 = CI->getOperand(0);
765 auto Op1 = CI->getOperand(1);
766 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
767 });
768}
769
SJW2c317da2020-03-23 07:39:13 -0500770bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500771 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500772 // convert to natural
773 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500774 std::string NewFName = basename.substr(0, slen);
775 NewFName =
776 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400777
SJW2c317da2020-03-23 07:39:13 -0500778 Module &M = *F.getParent();
779 return replaceCallsWithValue(F, [&](CallInst *CI) {
780 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
781
782 auto Arg = CI->getOperand(0);
783
784 // Constant of the reciprocal of the natural log of 10 (ln(10)).
785 const double Ln10 =
786 0.434294481903251827651128918916605082294397005803666566114;
787
788 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
789
790 return BinaryOperator::Create(Instruction::FMul,
791 ConstantFP::get(Arg->getType(), Ln10), NewCI,
792 "", CI);
793 });
David Neto22f144c2017-06-12 14:26:21 -0400794}
795
gnl21636e7992020-09-09 16:08:16 +0100796bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
797 // convert to natural
alan-baker8b968112020-12-15 15:53:29 -0500798 return replaceCallsWithValue(F, [&F](CallInst *CI) {
gnl21636e7992020-09-09 16:08:16 +0100799 auto Arg = CI->getOperand(0);
800
801 auto ArgP1 = BinaryOperator::Create(
802 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
803
alan-baker8b968112020-12-15 15:53:29 -0500804 auto log =
805 Intrinsic::getDeclaration(F.getParent(), Intrinsic::log, CI->getType());
806 return CallInst::Create(log, ArgP1, "", CI);
gnl21636e7992020-09-09 16:08:16 +0100807 });
808}
809
alan-baker12d2c182020-07-20 08:22:42 -0400810bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400811
alan-bakerf6bc8252020-09-23 14:58:55 -0400812 enum {
813 CLK_LOCAL_MEM_FENCE = 0x01,
814 CLK_GLOBAL_MEM_FENCE = 0x02,
815 CLK_IMAGE_MEM_FENCE = 0x04
816 };
David Neto22f144c2017-06-12 14:26:21 -0400817
alan-baker12d2c182020-07-20 08:22:42 -0400818 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100819 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400820
Kévin Petitc4643922019-06-17 19:32:05 +0100821 // We need to map the OpenCL constants to the SPIR-V equivalents.
822 const auto LocalMemFence =
823 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
824 const auto GlobalMemFence =
825 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400826 const auto ImageMemFence =
827 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400828 const auto ConstantAcquireRelease = ConstantInt::get(
829 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100830 const auto ConstantScopeDevice =
831 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
832 const auto ConstantScopeWorkgroup =
833 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400834 const auto ConstantScopeSubgroup =
835 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400836
Kévin Petitc4643922019-06-17 19:32:05 +0100837 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
838 const auto LocalMemFenceMask =
839 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
840 const auto WorkgroupShiftAmount =
841 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
842 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
843 Instruction::Shl, LocalMemFenceMask,
844 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400845
Kévin Petitc4643922019-06-17 19:32:05 +0100846 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
847 const auto GlobalMemFenceMask =
848 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
849 const auto UniformShiftAmount =
850 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
851 const auto MemorySemanticsUniform = BinaryOperator::Create(
852 Instruction::Shl, GlobalMemFenceMask,
853 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400854
alan-bakerf6bc8252020-09-23 14:58:55 -0400855 // OpenCL 2.0
856 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
857 const auto ImageMemFenceMask =
858 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
859 const auto ImageShiftAmount =
860 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
861 const auto MemorySemanticsImage = BinaryOperator::Create(
862 Instruction::Shl, ImageMemFenceMask,
863 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
864
Kévin Petitc4643922019-06-17 19:32:05 +0100865 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400866 // MemorySemanticsSequentiallyConsistentMask.
867 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +0100868 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400869 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400870 auto MemorySemantics2 = BinaryOperator::Create(
871 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
872 auto MemorySemantics = BinaryOperator::Create(
873 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400874
alan-baker12d2c182020-07-20 08:22:42 -0400875 // If the memory scope is not specified explicitly, it is either Subgroup
876 // or Workgroup depending on the type of barrier.
877 Value *MemoryScope =
878 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
879 if (CI->data_operands_size() > 1) {
880 enum {
881 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
882 CL_MEMORY_SCOPE_DEVICE = 0x2,
883 CL_MEMORY_SCOPE_SUBGROUP = 0x4
884 };
885 // The call was given an explicit memory scope.
886 const auto MemoryScopeSubgroup =
887 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
888 const auto MemoryScopeDevice =
889 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400890
alan-baker12d2c182020-07-20 08:22:42 -0400891 auto Cmp =
892 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
893 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
894 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
895 ConstantScopeWorkgroup, "", CI);
896 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
897 MemoryScopeDevice, CI->getOperand(1), "", CI);
898 MemoryScope =
899 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
900 }
901
902 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
903 // the type of barrier;
904 const auto ExecutionScope =
905 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400906
Kévin Petitc4643922019-06-17 19:32:05 +0100907 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -0400908 {Attribute::NoDuplicate, Attribute::Convergent},
909 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +0100910 {ExecutionScope, MemoryScope, MemorySemantics});
911 });
David Neto22f144c2017-06-12 14:26:21 -0400912}
913
alan-baker36309f92021-02-05 12:28:03 -0500914bool ReplaceOpenCLBuiltinPass::replaceMemFence(
915 Function &F, spv::MemorySemanticsMask semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400916
SJW2c317da2020-03-23 07:39:13 -0500917 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -0400918 enum {
919 CLK_LOCAL_MEM_FENCE = 0x01,
920 CLK_GLOBAL_MEM_FENCE = 0x02,
921 CLK_IMAGE_MEM_FENCE = 0x04,
922 };
David Neto22f144c2017-06-12 14:26:21 -0400923
SJW2c317da2020-03-23 07:39:13 -0500924 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400925
SJW2c317da2020-03-23 07:39:13 -0500926 // We need to map the OpenCL constants to the SPIR-V equivalents.
927 const auto LocalMemFence =
928 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
929 const auto GlobalMemFence =
930 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400931 const auto ImageMemFence =
932 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -0500933 const auto ConstantMemorySemantics =
934 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400935 const auto ConstantScopeWorkgroup =
936 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400937
SJW2c317da2020-03-23 07:39:13 -0500938 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
939 const auto LocalMemFenceMask =
940 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
941 const auto WorkgroupShiftAmount =
942 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
943 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
944 Instruction::Shl, LocalMemFenceMask,
945 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400946
SJW2c317da2020-03-23 07:39:13 -0500947 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
948 const auto GlobalMemFenceMask =
949 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
950 const auto UniformShiftAmount =
951 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
952 const auto MemorySemanticsUniform = BinaryOperator::Create(
953 Instruction::Shl, GlobalMemFenceMask,
954 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400955
alan-bakerf6bc8252020-09-23 14:58:55 -0400956 // OpenCL 2.0
957 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
958 const auto ImageMemFenceMask =
959 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
960 const auto ImageShiftAmount =
961 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
962 const auto MemorySemanticsImage = BinaryOperator::Create(
963 Instruction::Shl, ImageMemFenceMask,
964 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
965
alan-baker36309f92021-02-05 12:28:03 -0500966 Value *MemOrder = ConstantMemorySemantics;
967 Value *MemScope = ConstantScopeWorkgroup;
968 IRBuilder<> builder(CI);
969 if (CI->getNumArgOperands() > 1) {
970 MemOrder = MemoryOrderSemantics(CI->getArgOperand(1), false, CI,
971 semantics, false);
972 MemScope = MemoryScope(CI->getArgOperand(2), false, CI);
973 }
974 // Join the storage semantics and the order semantics.
alan-bakerf6bc8252020-09-23 14:58:55 -0400975 auto MemorySemantics1 =
alan-baker36309f92021-02-05 12:28:03 -0500976 builder.CreateOr({MemorySemanticsWorkgroup, MemorySemanticsUniform});
977 auto MemorySemantics2 = builder.CreateOr({MemorySemanticsImage, MemOrder});
978 auto MemorySemantics =
979 builder.CreateOr({MemorySemantics1, MemorySemantics2});
David Neto22f144c2017-06-12 14:26:21 -0400980
alan-baker3d905692020-10-28 14:02:37 -0400981 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
982 {Attribute::Convergent}, CI->getType(),
alan-baker36309f92021-02-05 12:28:03 -0500983 {MemScope, MemorySemantics});
SJW2c317da2020-03-23 07:39:13 -0500984 });
David Neto22f144c2017-06-12 14:26:21 -0400985}
986
Kévin Petit1cb45112020-04-27 18:55:48 +0100987bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
988 bool Changed = false;
989
990 SmallVector<Instruction *, 4> ToRemoves;
991
992 // Find all calls to the function
993 for (auto &U : F.uses()) {
994 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
995 ToRemoves.push_back(CI);
996 }
997 }
998
999 Changed = !ToRemoves.empty();
1000
1001 // Delete them
1002 for (auto V : ToRemoves) {
1003 V->eraseFromParent();
1004 }
1005
1006 return Changed;
1007}
1008
SJW2c317da2020-03-23 07:39:13 -05001009bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001010 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001011 return replaceCallsWithValue(F, [&](CallInst *CI) {
1012 // The predicate to use in the CmpInst.
1013 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001014
SJW2c317da2020-03-23 07:39:13 -05001015 auto Arg1 = CI->getOperand(0);
1016 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001017
SJW2c317da2020-03-23 07:39:13 -05001018 const auto Cmp =
1019 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001020 if (isa<VectorType>(F.getReturnType()))
1021 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1022 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001023 });
David Neto22f144c2017-06-12 14:26:21 -04001024}
1025
SJW2c317da2020-03-23 07:39:13 -05001026bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1027 spv::Op SPIRVOp,
1028 int32_t C) {
1029 Module &M = *F.getParent();
1030 return replaceCallsWithValue(F, [&](CallInst *CI) {
1031 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001032
SJW2c317da2020-03-23 07:39:13 -05001033 // The value to return for true.
1034 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001035
SJW2c317da2020-03-23 07:39:13 -05001036 // The value to return for false.
1037 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001038
SJW2c317da2020-03-23 07:39:13 -05001039 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001040 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001041 CorrespondingBoolTy =
1042 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1043 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001044 }
David Neto22f144c2017-06-12 14:26:21 -04001045
SJW2c317da2020-03-23 07:39:13 -05001046 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1047 CorrespondingBoolTy, {CI->getOperand(0)});
1048
1049 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1050 });
David Neto22f144c2017-06-12 14:26:21 -04001051}
1052
SJW2c317da2020-03-23 07:39:13 -05001053bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1054 Module &M = *F.getParent();
1055 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001056 auto &C = M.getContext();
1057 auto Val = CI->getOperand(0);
1058 auto ValTy = Val->getType();
1059 auto RetTy = CI->getType();
1060
1061 // Get a suitable integer type to represent the number
1062 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1063
1064 // Create Mask
1065 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001066 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001067 switch (ScalarSize) {
1068 case 16:
1069 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1070 break;
1071 case 32:
1072 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1073 break;
1074 case 64:
1075 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1076 break;
1077 default:
1078 llvm_unreachable("Unsupported floating-point type");
1079 }
1080
1081 IRBuilder<> Builder(CI);
1082
1083 // Bitcast to int
1084 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1085
1086 // Mask and compare
1087 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1088 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1089
1090 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001091 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001092 if (ValTy->isVectorTy()) {
1093 RetTrue = ConstantInt::getSigned(RetTy, -1);
1094 } else {
1095 RetTrue = ConstantInt::get(RetTy, 1);
1096 }
1097 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1098 });
1099}
1100
SJW2c317da2020-03-23 07:39:13 -05001101bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1102 Module &M = *F.getParent();
1103 return replaceCallsWithValue(F, [&](CallInst *CI) {
1104 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001105
SJW2c317da2020-03-23 07:39:13 -05001106 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001107
SJW2c317da2020-03-23 07:39:13 -05001108 // If the argument is a 32-bit int, just use a shift
1109 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1110 V = BinaryOperator::Create(Instruction::LShr, Arg,
1111 ConstantInt::get(Arg->getType(), 31), "", CI);
1112 } else {
1113 // The value for zero to compare against.
1114 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001115
SJW2c317da2020-03-23 07:39:13 -05001116 // The value to return for true.
1117 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001118
SJW2c317da2020-03-23 07:39:13 -05001119 // The value to return for false.
1120 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001121
SJW2c317da2020-03-23 07:39:13 -05001122 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1123 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001124
SJW2c317da2020-03-23 07:39:13 -05001125 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001126
SJW2c317da2020-03-23 07:39:13 -05001127 // If we have a function to call, call it!
1128 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001129
SJW2c317da2020-03-23 07:39:13 -05001130 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001131
SJW2c317da2020-03-23 07:39:13 -05001132 const auto NewCI = clspv::InsertSPIRVOp(
1133 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1134 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001135
SJW2c317da2020-03-23 07:39:13 -05001136 } else {
1137 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001138 }
1139
SJW2c317da2020-03-23 07:39:13 -05001140 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001141 }
SJW2c317da2020-03-23 07:39:13 -05001142 return V;
1143 });
David Neto22f144c2017-06-12 14:26:21 -04001144}
1145
SJW2c317da2020-03-23 07:39:13 -05001146bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1147 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1148 // Get arguments
1149 auto HiValue = CI->getOperand(0);
1150 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001151
SJW2c317da2020-03-23 07:39:13 -05001152 // Don't touch overloads that aren't in OpenCL C
1153 auto HiType = HiValue->getType();
1154 auto LoType = LoValue->getType();
1155
1156 if (HiType != LoType) {
1157 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001158 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001159
SJW2c317da2020-03-23 07:39:13 -05001160 if (!HiType->isIntOrIntVectorTy()) {
1161 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001162 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001163
SJW2c317da2020-03-23 07:39:13 -05001164 if (HiType->getScalarSizeInBits() * 2 !=
1165 CI->getType()->getScalarSizeInBits()) {
1166 return nullptr;
1167 }
1168
1169 if ((HiType->getScalarSizeInBits() != 8) &&
1170 (HiType->getScalarSizeInBits() != 16) &&
1171 (HiType->getScalarSizeInBits() != 32)) {
1172 return nullptr;
1173 }
1174
James Pricecf53df42020-04-20 14:41:24 -04001175 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001176 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001177 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1178 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001179 return nullptr;
1180 }
1181 }
1182
1183 // Convert both operands to the result type
1184 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1185 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1186
1187 // Shift high operand
1188 auto ShiftAmount =
1189 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1190 auto HiShifted =
1191 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1192
1193 // OR both results
1194 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1195 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001196}
1197
SJW2c317da2020-03-23 07:39:13 -05001198bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1199 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1200 // Get arguments
1201 auto SrcValue = CI->getOperand(0);
1202 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001203
SJW2c317da2020-03-23 07:39:13 -05001204 // Don't touch overloads that aren't in OpenCL C
1205 auto SrcType = SrcValue->getType();
1206 auto RotType = RotAmount->getType();
1207
1208 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1209 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001210 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001211
SJW2c317da2020-03-23 07:39:13 -05001212 if (!SrcType->isIntOrIntVectorTy()) {
1213 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001214 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001215
SJW2c317da2020-03-23 07:39:13 -05001216 if ((SrcType->getScalarSizeInBits() != 8) &&
1217 (SrcType->getScalarSizeInBits() != 16) &&
1218 (SrcType->getScalarSizeInBits() != 32) &&
1219 (SrcType->getScalarSizeInBits() != 64)) {
1220 return nullptr;
1221 }
1222
James Pricecf53df42020-04-20 14:41:24 -04001223 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001224 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001225 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1226 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001227 return nullptr;
1228 }
1229 }
1230
alan-bakerfd22ae12020-10-29 15:59:22 -04001231 // Replace with LLVM's funnel shift left intrinsic because it is more
1232 // generic than rotate.
1233 Function *intrinsic =
1234 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1235 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1236 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001237 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001238}
1239
SJW2c317da2020-03-23 07:39:13 -05001240bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1241 bool DstIsSigned) {
1242 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1243 Value *V = nullptr;
1244 // Get arguments
1245 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001246
SJW2c317da2020-03-23 07:39:13 -05001247 // Don't touch overloads that aren't in OpenCL C
1248 auto SrcType = SrcValue->getType();
1249 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001250
SJW2c317da2020-03-23 07:39:13 -05001251 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1252 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1253 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001254 }
1255
James Pricecf53df42020-04-20 14:41:24 -04001256 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001257 unsigned SrcNumElements =
1258 SrcVecType->getElementCount().getKnownMinValue();
1259 unsigned DstNumElements =
1260 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001261 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001262 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001263 }
1264
James Pricecf53df42020-04-20 14:41:24 -04001265 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1266 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1267 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001268 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001269 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001270 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001271
SJW2c317da2020-03-23 07:39:13 -05001272 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1273 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1274
1275 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1276 bool DstIsInt = DstType->isIntOrIntVectorTy();
1277
1278 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1279 // Unnecessary cast operation.
1280 V = SrcValue;
1281 } else if (SrcIsFloat && DstIsFloat) {
1282 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1283 } else if (SrcIsFloat && DstIsInt) {
1284 if (DstIsSigned) {
1285 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1286 } else {
1287 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1288 }
1289 } else if (SrcIsInt && DstIsFloat) {
1290 if (SrcIsSigned) {
1291 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1292 } else {
1293 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1294 }
1295 } else if (SrcIsInt && DstIsInt) {
1296 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1297 } else {
1298 // Not something we're supposed to handle, just move on
1299 }
1300
1301 return V;
1302 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001303}
1304
SJW2c317da2020-03-23 07:39:13 -05001305bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1306 bool is_mad) {
1307 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1308 Value *V = nullptr;
1309 // Get arguments
1310 auto AValue = CI->getOperand(0);
1311 auto BValue = CI->getOperand(1);
1312 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001313
SJW2c317da2020-03-23 07:39:13 -05001314 // Don't touch overloads that aren't in OpenCL C
1315 auto AType = AValue->getType();
1316 auto BType = BValue->getType();
1317 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001318
SJW2c317da2020-03-23 07:39:13 -05001319 if ((AType != BType) || (CI->getType() != AType) ||
1320 (is_mad && (AType != CType))) {
1321 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001322 }
1323
SJW2c317da2020-03-23 07:39:13 -05001324 if (!AType->isIntOrIntVectorTy()) {
1325 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001326 }
Kévin Petit8a560882019-03-21 15:24:34 +00001327
SJW2c317da2020-03-23 07:39:13 -05001328 if ((AType->getScalarSizeInBits() != 8) &&
1329 (AType->getScalarSizeInBits() != 16) &&
1330 (AType->getScalarSizeInBits() != 32) &&
1331 (AType->getScalarSizeInBits() != 64)) {
1332 return V;
1333 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001334
James Pricecf53df42020-04-20 14:41:24 -04001335 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001336 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001337 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1338 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001339 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001340 }
1341 }
1342
SJW2c317da2020-03-23 07:39:13 -05001343 // Our SPIR-V op returns a struct, create a type for it
alan-baker6b9d1ee2020-11-03 23:11:32 -05001344 auto ExMulRetType = GetPairStruct(AType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001345
SJW2c317da2020-03-23 07:39:13 -05001346 // Select the appropriate signed/unsigned SPIR-V op
1347 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1348
1349 // Call the SPIR-V op
1350 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1351 ExMulRetType, {AValue, BValue});
1352
1353 // Get the high part of the result
1354 unsigned Idxs[] = {1};
1355 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1356
1357 // If we're handling a mad_hi, add the third argument to the result
1358 if (is_mad) {
1359 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001360 }
1361
SJW2c317da2020-03-23 07:39:13 -05001362 return V;
1363 });
Kévin Petit8a560882019-03-21 15:24:34 +00001364}
1365
SJW2c317da2020-03-23 07:39:13 -05001366bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1367 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1368 // Get arguments
1369 auto FalseValue = CI->getOperand(0);
1370 auto TrueValue = CI->getOperand(1);
1371 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001372
SJW2c317da2020-03-23 07:39:13 -05001373 // Don't touch overloads that aren't in OpenCL C
1374 auto FalseType = FalseValue->getType();
1375 auto TrueType = TrueValue->getType();
1376 auto PredicateType = PredicateValue->getType();
1377
1378 if (FalseType != TrueType) {
1379 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001380 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001381
SJW2c317da2020-03-23 07:39:13 -05001382 if (!PredicateType->isIntOrIntVectorTy()) {
1383 return nullptr;
1384 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001385
SJW2c317da2020-03-23 07:39:13 -05001386 if (!FalseType->isIntOrIntVectorTy() &&
1387 !FalseType->getScalarType()->isFloatingPointTy()) {
1388 return nullptr;
1389 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001390
SJW2c317da2020-03-23 07:39:13 -05001391 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1392 return nullptr;
1393 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001394
SJW2c317da2020-03-23 07:39:13 -05001395 if (FalseType->getScalarSizeInBits() !=
1396 PredicateType->getScalarSizeInBits()) {
1397 return nullptr;
1398 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001399
James Pricecf53df42020-04-20 14:41:24 -04001400 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001401 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1402 if (NumElements != cast<VectorType>(PredicateType)
1403 ->getElementCount()
1404 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001405 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001406 }
1407
James Pricecf53df42020-04-20 14:41:24 -04001408 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1409 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001410 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001411 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001412 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001413
SJW2c317da2020-03-23 07:39:13 -05001414 // Create constant
1415 const auto ZeroValue = Constant::getNullValue(PredicateType);
1416
1417 // Scalar and vector are to be treated differently
1418 CmpInst::Predicate Pred;
1419 if (PredicateType->isVectorTy()) {
1420 Pred = CmpInst::ICMP_SLT;
1421 } else {
1422 Pred = CmpInst::ICMP_NE;
1423 }
1424
1425 // Create comparison instruction
1426 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1427 ZeroValue, "", CI);
1428
1429 // Create select
1430 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1431 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001432}
1433
SJW2c317da2020-03-23 07:39:13 -05001434bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1435 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1436 Value *V = nullptr;
1437 if (CI->getNumOperands() != 4) {
1438 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001439 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001440
SJW2c317da2020-03-23 07:39:13 -05001441 // Get arguments
1442 auto FalseValue = CI->getOperand(0);
1443 auto TrueValue = CI->getOperand(1);
1444 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001445
SJW2c317da2020-03-23 07:39:13 -05001446 // Don't touch overloads that aren't in OpenCL C
1447 auto FalseType = FalseValue->getType();
1448 auto TrueType = TrueValue->getType();
1449 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001450
SJW2c317da2020-03-23 07:39:13 -05001451 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1452 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001453 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001454
James Pricecf53df42020-04-20 14:41:24 -04001455 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001456 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1457 !TrueType->getScalarType()->isIntegerTy()) {
1458 return V;
1459 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001460 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001461 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1462 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001463 return V;
1464 }
1465 }
1466
1467 // Remember the type of the operands
1468 auto OpType = TrueType;
1469
1470 // The actual bit selection will always be done on an integer type,
1471 // declare it here
1472 Type *BitType;
1473
1474 // If the operands are float, then bitcast them to int
1475 if (OpType->getScalarType()->isFloatingPointTy()) {
1476
1477 // First create the new type
1478 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1479
1480 // Then bitcast all operands
1481 PredicateValue =
1482 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1483 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1484 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1485
1486 } else {
1487 // The operands have an integer type, use it directly
1488 BitType = OpType;
1489 }
1490
1491 // All the operands are now always integers
1492 // implement as (c & b) | (~c & a)
1493
1494 // Create our negated predicate value
1495 auto AllOnes = Constant::getAllOnesValue(BitType);
1496 auto NotPredicateValue = BinaryOperator::Create(
1497 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1498
1499 // Then put everything together
1500 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1501 FalseValue, "", CI);
1502 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1503 TrueValue, "", CI);
1504
1505 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1506
1507 // If we were dealing with a floating point type, we must bitcast
1508 // the result back to that
1509 if (OpType->getScalarType()->isFloatingPointTy()) {
1510 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1511 }
1512
1513 return V;
1514 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001515}
1516
SJW61531372020-06-09 07:31:08 -05001517bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001518 // convert to vector versions
1519 Module &M = *F.getParent();
1520 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1521 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1522 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001523
SJW2c317da2020-03-23 07:39:13 -05001524 // First figure out which function we're dealing with
1525 if (is_smooth) {
1526 ArgsToSplat.push_back(CI->getOperand(1));
1527 VectorArg = CI->getOperand(2);
1528 } else {
1529 VectorArg = CI->getOperand(1);
1530 }
1531
1532 // Splat arguments that need to be
1533 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001534 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001535
1536 for (auto arg : ArgsToSplat) {
1537 Value *NewVectorArg = UndefValue::get(VecType);
Marco Antognini7e338402021-03-15 12:48:37 +00001538 for (size_t i = 0; i < VecType->getElementCount().getKnownMinValue();
1539 i++) {
SJW2c317da2020-03-23 07:39:13 -05001540 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1541 NewVectorArg =
1542 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1543 }
1544 SplatArgs.push_back(NewVectorArg);
1545 }
1546
1547 // Replace the call with the vector/vector flavour
1548 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1549 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1550
SJW61531372020-06-09 07:31:08 -05001551 std::string NewFName = Builtins::GetMangledFunctionName(
1552 is_smooth ? "smoothstep" : "step", NewFType);
1553
SJW2c317da2020-03-23 07:39:13 -05001554 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1555
1556 SmallVector<Value *, 3> NewArgs;
1557 for (auto arg : SplatArgs) {
1558 NewArgs.push_back(arg);
1559 }
1560 NewArgs.push_back(VectorArg);
1561
1562 return CallInst::Create(NewF, NewArgs, "", CI);
1563 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001564}
1565
SJW2c317da2020-03-23 07:39:13 -05001566bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001567 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1568 auto Arg = CI->getOperand(0);
1569 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001570
SJW2c317da2020-03-23 07:39:13 -05001571 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001572
SJW2c317da2020-03-23 07:39:13 -05001573 return BinaryOperator::Create(Op, Bitcast,
1574 ConstantInt::get(CI->getType(), 31), "", CI);
1575 });
David Neto22f144c2017-06-12 14:26:21 -04001576}
1577
SJW2c317da2020-03-23 07:39:13 -05001578bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1579 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001580 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1581 // The multiply instruction to use.
1582 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001583
SJW2c317da2020-03-23 07:39:13 -05001584 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001585
SJW2c317da2020-03-23 07:39:13 -05001586 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1587 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001588
SJW2c317da2020-03-23 07:39:13 -05001589 if (is_mad) {
1590 // The add instruction to use.
1591 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001592
SJW2c317da2020-03-23 07:39:13 -05001593 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001594 }
David Neto22f144c2017-06-12 14:26:21 -04001595
SJW2c317da2020-03-23 07:39:13 -05001596 return V;
1597 });
David Neto22f144c2017-06-12 14:26:21 -04001598}
1599
SJW2c317da2020-03-23 07:39:13 -05001600bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001601 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1602 Value *V = nullptr;
1603 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001604
SJW2c317da2020-03-23 07:39:13 -05001605 auto data_type = data->getType();
1606 if (!data_type->isVectorTy())
1607 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001608
James Pricecf53df42020-04-20 14:41:24 -04001609 auto vec_data_type = cast<VectorType>(data_type);
1610
alan-baker5a8c3be2020-09-09 13:44:26 -04001611 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001612 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1613 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001614
SJW2c317da2020-03-23 07:39:13 -05001615 auto offset = CI->getOperand(1);
1616 auto ptr = CI->getOperand(2);
1617 auto ptr_type = ptr->getType();
1618 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001619 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001620 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001621
SJW2c317da2020-03-23 07:39:13 -05001622 // Avoid pointer casts. Instead generate the correct number of stores
1623 // and rely on drivers to coalesce appropriately.
1624 IRBuilder<> builder(CI);
1625 auto elems_const = builder.getInt32(elems);
1626 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001627 for (size_t i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001628 auto idx = builder.getInt32(i);
1629 auto add = builder.CreateAdd(adjust, idx);
1630 auto gep = builder.CreateGEP(ptr, add);
1631 auto extract = builder.CreateExtractElement(data, i);
1632 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001633 }
SJW2c317da2020-03-23 07:39:13 -05001634 return V;
1635 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001636}
1637
SJW2c317da2020-03-23 07:39:13 -05001638bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001639 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1640 Value *V = nullptr;
1641 auto ret_type = F.getReturnType();
1642 if (!ret_type->isVectorTy())
1643 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001644
James Pricecf53df42020-04-20 14:41:24 -04001645 auto vec_ret_type = cast<VectorType>(ret_type);
1646
alan-baker5a8c3be2020-09-09 13:44:26 -04001647 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001648 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1649 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001650
SJW2c317da2020-03-23 07:39:13 -05001651 auto offset = CI->getOperand(0);
1652 auto ptr = CI->getOperand(1);
1653 auto ptr_type = ptr->getType();
1654 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001655 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001656 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001657
SJW2c317da2020-03-23 07:39:13 -05001658 // Avoid pointer casts. Instead generate the correct number of loads
1659 // and rely on drivers to coalesce appropriately.
1660 IRBuilder<> builder(CI);
1661 auto elems_const = builder.getInt32(elems);
1662 V = UndefValue::get(ret_type);
1663 auto adjust = builder.CreateMul(offset, elems_const);
Marco Antognini7e338402021-03-15 12:48:37 +00001664 for (unsigned i = 0; i < elems; ++i) {
SJW2c317da2020-03-23 07:39:13 -05001665 auto idx = builder.getInt32(i);
1666 auto add = builder.CreateAdd(adjust, idx);
1667 auto gep = builder.CreateGEP(ptr, add);
1668 auto load = builder.CreateLoad(gep);
1669 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001670 }
SJW2c317da2020-03-23 07:39:13 -05001671 return V;
1672 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001673}
1674
SJW2c317da2020-03-23 07:39:13 -05001675bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1676 const std::string &name,
1677 int vec_size) {
1678 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1679 if (!vec_size) {
1680 // deduce vec_size from last character of name (e.g. vload_half4)
1681 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001682 }
SJW2c317da2020-03-23 07:39:13 -05001683 switch (vec_size) {
1684 case 2:
1685 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1686 case 4:
1687 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1688 case 0:
1689 if (!is_clspv_version) {
1690 return replaceVloadHalf(F);
1691 }
Marco Antognini7e338402021-03-15 12:48:37 +00001692 // Fall-through
SJW2c317da2020-03-23 07:39:13 -05001693 default:
1694 llvm_unreachable("Unsupported vload_half vector size");
1695 break;
1696 }
1697 return false;
David Neto22f144c2017-06-12 14:26:21 -04001698}
1699
SJW2c317da2020-03-23 07:39:13 -05001700bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1701 Module &M = *F.getParent();
1702 return replaceCallsWithValue(F, [&](CallInst *CI) {
1703 // The index argument from vload_half.
1704 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001705
SJW2c317da2020-03-23 07:39:13 -05001706 // The pointer argument from vload_half.
1707 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001708
SJW2c317da2020-03-23 07:39:13 -05001709 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001710 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001711 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1712
1713 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001714 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001715
1716 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1717
1718 Value *V = nullptr;
1719
alan-baker7efcaaa2020-05-06 19:33:27 -04001720 bool supports_16bit_storage = true;
1721 switch (Arg1->getType()->getPointerAddressSpace()) {
1722 case clspv::AddressSpace::Global:
1723 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1724 clspv::Option::StorageClass::kSSBO);
1725 break;
1726 case clspv::AddressSpace::Constant:
1727 if (clspv::Option::ConstantArgsInUniformBuffer())
1728 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1729 clspv::Option::StorageClass::kUBO);
1730 else
1731 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1732 clspv::Option::StorageClass::kSSBO);
1733 break;
1734 default:
1735 // Clspv will emit the Float16 capability if the half type is
1736 // encountered. That capability covers private and local addressspaces.
1737 break;
1738 }
1739
1740 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001741 auto ShortTy = Type::getInt16Ty(M.getContext());
1742 auto ShortPointerTy =
1743 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1744
1745 // Cast the half* pointer to short*.
1746 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1747
1748 // Index into the correct address of the casted pointer.
1749 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1750
1751 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001752 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001753
1754 // ZExt the short -> int.
1755 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1756
1757 // Get our float2.
1758 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1759
1760 // Extract out the bottom element which is our float result.
1761 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1762 } else {
1763 // Assume the pointer argument points to storage aligned to 32bits
1764 // or more.
1765 // TODO(dneto): Do more analysis to make sure this is true?
1766 //
1767 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1768 // with:
1769 //
1770 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1771 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1772 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1773 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1774 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1775 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1776 // x float> %converted, %index_is_odd32
1777
1778 auto IntPointerTy =
1779 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1780
1781 // Cast the base pointer to int*.
1782 // In a valid call (according to assumptions), this should get
1783 // optimized away in the simplify GEP pass.
1784 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1785
1786 auto One = ConstantInt::get(IntTy, 1);
1787 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1788 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1789
1790 // Index into the correct address of the casted pointer.
1791 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1792
1793 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001794 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001795
1796 // Get our float2.
1797 auto Call = CallInst::Create(NewF, Load, "", CI);
1798
1799 // Extract out the float result, where the element number is
1800 // determined by whether the original index was even or odd.
1801 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1802 }
1803 return V;
1804 });
1805}
1806
1807bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1808 Module &M = *F.getParent();
1809 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001810 // The index argument from vload_half.
1811 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001812
Kévin Petite8edce32019-04-10 14:23:32 +01001813 // The pointer argument from vload_half.
1814 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001815
Kévin Petite8edce32019-04-10 14:23:32 +01001816 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001817 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001818 auto NewPointerTy =
1819 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001820 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001821
Kévin Petite8edce32019-04-10 14:23:32 +01001822 // Cast the half* pointer to int*.
1823 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001824
Kévin Petite8edce32019-04-10 14:23:32 +01001825 // Index into the correct address of the casted pointer.
1826 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001827
Kévin Petite8edce32019-04-10 14:23:32 +01001828 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001829 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001830
Kévin Petite8edce32019-04-10 14:23:32 +01001831 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001832 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001833
Kévin Petite8edce32019-04-10 14:23:32 +01001834 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001835
Kévin Petite8edce32019-04-10 14:23:32 +01001836 // Get our float2.
1837 return CallInst::Create(NewF, Load, "", CI);
1838 });
David Neto22f144c2017-06-12 14:26:21 -04001839}
1840
SJW2c317da2020-03-23 07:39:13 -05001841bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1842 Module &M = *F.getParent();
1843 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001844 // The index argument from vload_half.
1845 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001846
Kévin Petite8edce32019-04-10 14:23:32 +01001847 // The pointer argument from vload_half.
1848 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001849
Kévin Petite8edce32019-04-10 14:23:32 +01001850 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001851 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1852 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001853 auto NewPointerTy =
1854 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001855 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001856
Kévin Petite8edce32019-04-10 14:23:32 +01001857 // Cast the half* pointer to int2*.
1858 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001859
Kévin Petite8edce32019-04-10 14:23:32 +01001860 // Index into the correct address of the casted pointer.
1861 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001862
Kévin Petite8edce32019-04-10 14:23:32 +01001863 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001864 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001865
Kévin Petite8edce32019-04-10 14:23:32 +01001866 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001867 auto X =
1868 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1869 auto Y =
1870 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001871
Kévin Petite8edce32019-04-10 14:23:32 +01001872 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001873 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001874
Kévin Petite8edce32019-04-10 14:23:32 +01001875 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001876
Kévin Petite8edce32019-04-10 14:23:32 +01001877 // Get the lower (x & y) components of our final float4.
1878 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001879
Kévin Petite8edce32019-04-10 14:23:32 +01001880 // Get the higher (z & w) components of our final float4.
1881 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001882
Kévin Petite8edce32019-04-10 14:23:32 +01001883 Constant *ShuffleMask[4] = {
1884 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1885 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001886
Kévin Petite8edce32019-04-10 14:23:32 +01001887 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001888 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1889 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001890 });
David Neto22f144c2017-06-12 14:26:21 -04001891}
1892
SJW2c317da2020-03-23 07:39:13 -05001893bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001894
1895 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1896 //
1897 // %u = load i32 %ptr
1898 // %fxy = call <2 x float> Unpack2xHalf(u)
1899 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001900 Module &M = *F.getParent();
1901 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001902 auto Index = CI->getOperand(0);
1903 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001904
Kévin Petite8edce32019-04-10 14:23:32 +01001905 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001906 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001907 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001908
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001909 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001910 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001911
Kévin Petite8edce32019-04-10 14:23:32 +01001912 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001913 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001914
Kévin Petite8edce32019-04-10 14:23:32 +01001915 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001916
Kévin Petite8edce32019-04-10 14:23:32 +01001917 // Get our final float2.
1918 return CallInst::Create(NewF, Load, "", CI);
1919 });
David Neto6ad93232018-06-07 15:42:58 -07001920}
1921
SJW2c317da2020-03-23 07:39:13 -05001922bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001923
1924 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1925 //
1926 // %u2 = load <2 x i32> %ptr
1927 // %u2xy = extractelement %u2, 0
1928 // %u2zw = extractelement %u2, 1
1929 // %fxy = call <2 x float> Unpack2xHalf(uint)
1930 // %fzw = call <2 x float> Unpack2xHalf(uint)
1931 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001932 Module &M = *F.getParent();
1933 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001934 auto Index = CI->getOperand(0);
1935 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001936
Kévin Petite8edce32019-04-10 14:23:32 +01001937 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001938 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1939 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001940 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001941
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001942 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001943 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001944
Kévin Petite8edce32019-04-10 14:23:32 +01001945 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001946 auto X =
1947 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1948 auto Y =
1949 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001950
Kévin Petite8edce32019-04-10 14:23:32 +01001951 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001952 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001953
Kévin Petite8edce32019-04-10 14:23:32 +01001954 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001955
Kévin Petite8edce32019-04-10 14:23:32 +01001956 // Get the lower (x & y) components of our final float4.
1957 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001958
Kévin Petite8edce32019-04-10 14:23:32 +01001959 // Get the higher (z & w) components of our final float4.
1960 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001961
Kévin Petite8edce32019-04-10 14:23:32 +01001962 Constant *ShuffleMask[4] = {
1963 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1964 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001965
Kévin Petite8edce32019-04-10 14:23:32 +01001966 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001967 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1968 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001969 });
David Neto6ad93232018-06-07 15:42:58 -07001970}
1971
SJW2c317da2020-03-23 07:39:13 -05001972bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1973 switch (vec_size) {
1974 case 0:
1975 return replaceVstoreHalf(F);
1976 case 2:
1977 return replaceVstoreHalf2(F);
1978 case 4:
1979 return replaceVstoreHalf4(F);
1980 default:
1981 llvm_unreachable("Unsupported vstore_half vector size");
1982 break;
1983 }
1984 return false;
1985}
David Neto22f144c2017-06-12 14:26:21 -04001986
SJW2c317da2020-03-23 07:39:13 -05001987bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1988 Module &M = *F.getParent();
1989 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001990 // The value to store.
1991 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001992
Kévin Petite8edce32019-04-10 14:23:32 +01001993 // The index argument from vstore_half.
1994 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001995
Kévin Petite8edce32019-04-10 14:23:32 +01001996 // The pointer argument from vstore_half.
1997 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001998
Kévin Petite8edce32019-04-10 14:23:32 +01001999 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002000 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01002001 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2002 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002003
Kévin Petite8edce32019-04-10 14:23:32 +01002004 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002005 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002006
Kévin Petite8edce32019-04-10 14:23:32 +01002007 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002008
Kévin Petite8edce32019-04-10 14:23:32 +01002009 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002010 auto TempVec = InsertElementInst::Create(
2011 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002012
Kévin Petite8edce32019-04-10 14:23:32 +01002013 // Pack the float2 -> half2 (in an int).
2014 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002015
alan-baker7efcaaa2020-05-06 19:33:27 -04002016 bool supports_16bit_storage = true;
2017 switch (Arg2->getType()->getPointerAddressSpace()) {
2018 case clspv::AddressSpace::Global:
2019 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2020 clspv::Option::StorageClass::kSSBO);
2021 break;
2022 case clspv::AddressSpace::Constant:
2023 if (clspv::Option::ConstantArgsInUniformBuffer())
2024 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2025 clspv::Option::StorageClass::kUBO);
2026 else
2027 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2028 clspv::Option::StorageClass::kSSBO);
2029 break;
2030 default:
2031 // Clspv will emit the Float16 capability if the half type is
2032 // encountered. That capability covers private and local addressspaces.
2033 break;
2034 }
2035
SJW2c317da2020-03-23 07:39:13 -05002036 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002037 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002038 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002039 auto ShortPointerTy =
2040 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002041
Kévin Petite8edce32019-04-10 14:23:32 +01002042 // Truncate our i32 to an i16.
2043 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002044
Kévin Petite8edce32019-04-10 14:23:32 +01002045 // Cast the half* pointer to short*.
2046 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002047
Kévin Petite8edce32019-04-10 14:23:32 +01002048 // Index into the correct address of the casted pointer.
2049 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002050
Kévin Petite8edce32019-04-10 14:23:32 +01002051 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002052 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002053 } else {
2054 // We can only write to 32-bit aligned words.
2055 //
2056 // Assuming base is aligned to 32-bits, replace the equivalent of
2057 // vstore_half(value, index, base)
2058 // with:
2059 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2060 // uint32_t write_to_upper_half = index & 1u;
2061 // uint32_t shift = write_to_upper_half << 4;
2062 //
2063 // // Pack the float value as a half number in bottom 16 bits
2064 // // of an i32.
2065 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2066 //
2067 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2068 // ^ ((packed & 0xffff) << shift)
2069 // // We only need relaxed consistency, but OpenCL 1.2 only has
2070 // // sequentially consistent atomics.
2071 // // TODO(dneto): Use relaxed consistency.
2072 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002073 auto IntPointerTy =
2074 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002075
Kévin Petite8edce32019-04-10 14:23:32 +01002076 auto Four = ConstantInt::get(IntTy, 4);
2077 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002078
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002079 auto IndexIsOdd =
2080 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002081 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002082 auto IndexIntoI32 =
2083 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2084 auto BaseI32Ptr =
2085 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2086 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2087 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002088 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002089 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002090 auto MaskBitsToWrite =
2091 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2092 auto MaskedCurrent = BinaryOperator::CreateAnd(
2093 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002094
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002095 auto XLowerBits =
2096 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2097 auto NewBitsToWrite =
2098 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2099 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2100 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002101
Kévin Petite8edce32019-04-10 14:23:32 +01002102 // Generate the call to atomi_xor.
2103 SmallVector<Type *, 5> ParamTypes;
2104 // The pointer type.
2105 ParamTypes.push_back(IntPointerTy);
2106 // The Types for memory scope, semantics, and value.
2107 ParamTypes.push_back(IntTy);
2108 ParamTypes.push_back(IntTy);
2109 ParamTypes.push_back(IntTy);
2110 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2111 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002112
Kévin Petite8edce32019-04-10 14:23:32 +01002113 const auto ConstantScopeDevice =
2114 ConstantInt::get(IntTy, spv::ScopeDevice);
2115 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2116 // (SPIR-V Workgroup).
2117 const auto AddrSpaceSemanticsBits =
2118 IntPointerTy->getPointerAddressSpace() == 1
2119 ? spv::MemorySemanticsUniformMemoryMask
2120 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002121
Kévin Petite8edce32019-04-10 14:23:32 +01002122 // We're using relaxed consistency here.
2123 const auto ConstantMemorySemantics =
2124 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2125 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002126
Kévin Petite8edce32019-04-10 14:23:32 +01002127 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2128 ConstantMemorySemantics, ValueToXor};
2129 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002130
2131 // Return a Nop so the old Call is removed
2132 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2133 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002134 }
David Neto22f144c2017-06-12 14:26:21 -04002135
SJW2c317da2020-03-23 07:39:13 -05002136 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002137 });
David Neto22f144c2017-06-12 14:26:21 -04002138}
2139
SJW2c317da2020-03-23 07:39:13 -05002140bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2141 Module &M = *F.getParent();
2142 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002143 // The value to store.
2144 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002145
Kévin Petite8edce32019-04-10 14:23:32 +01002146 // The index argument from vstore_half.
2147 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002148
Kévin Petite8edce32019-04-10 14:23:32 +01002149 // The pointer argument from vstore_half.
2150 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002151
Kévin Petite8edce32019-04-10 14:23:32 +01002152 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002153 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002154 auto NewPointerTy =
2155 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002156 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002157
Kévin Petite8edce32019-04-10 14:23:32 +01002158 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002159 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002160
Kévin Petite8edce32019-04-10 14:23:32 +01002161 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002162
Kévin Petite8edce32019-04-10 14:23:32 +01002163 // Turn the packed x & y into the final packing.
2164 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002165
Kévin Petite8edce32019-04-10 14:23:32 +01002166 // Cast the half* pointer to int*.
2167 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002168
Kévin Petite8edce32019-04-10 14:23:32 +01002169 // Index into the correct address of the casted pointer.
2170 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002171
Kévin Petite8edce32019-04-10 14:23:32 +01002172 // Store to the int* we casted to.
2173 return new StoreInst(X, Index, CI);
2174 });
David Neto22f144c2017-06-12 14:26:21 -04002175}
2176
SJW2c317da2020-03-23 07:39:13 -05002177bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2178 Module &M = *F.getParent();
2179 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002180 // The value to store.
2181 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002182
Kévin Petite8edce32019-04-10 14:23:32 +01002183 // The index argument from vstore_half.
2184 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002185
Kévin Petite8edce32019-04-10 14:23:32 +01002186 // The pointer argument from vstore_half.
2187 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002188
Kévin Petite8edce32019-04-10 14:23:32 +01002189 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002190 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2191 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002192 auto NewPointerTy =
2193 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002194 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002195
Kévin Petite8edce32019-04-10 14:23:32 +01002196 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2197 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002198
Kévin Petite8edce32019-04-10 14:23:32 +01002199 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002200 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2201 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002202
Kévin Petite8edce32019-04-10 14:23:32 +01002203 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2204 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002205
Kévin Petite8edce32019-04-10 14:23:32 +01002206 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002207 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2208 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002209
Kévin Petite8edce32019-04-10 14:23:32 +01002210 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002211 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002212
Kévin Petite8edce32019-04-10 14:23:32 +01002213 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002214
Kévin Petite8edce32019-04-10 14:23:32 +01002215 // Turn the packed x & y into the final component of our int2.
2216 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002217
Kévin Petite8edce32019-04-10 14:23:32 +01002218 // Turn the packed z & w into the final component of our int2.
2219 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002220
Kévin Petite8edce32019-04-10 14:23:32 +01002221 auto Combine = InsertElementInst::Create(
2222 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002223 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2224 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002225
Kévin Petite8edce32019-04-10 14:23:32 +01002226 // Cast the half* pointer to int2*.
2227 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002228
Kévin Petite8edce32019-04-10 14:23:32 +01002229 // Index into the correct address of the casted pointer.
2230 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002231
Kévin Petite8edce32019-04-10 14:23:32 +01002232 // Store to the int2* we casted to.
2233 return new StoreInst(Combine, Index, CI);
2234 });
David Neto22f144c2017-06-12 14:26:21 -04002235}
2236
SJW2c317da2020-03-23 07:39:13 -05002237bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2238 // convert half to float
2239 Module &M = *F.getParent();
2240 return replaceCallsWithValue(F, [&](CallInst *CI) {
2241 SmallVector<Type *, 3> types;
2242 SmallVector<Value *, 3> args;
Marco Antognini7e338402021-03-15 12:48:37 +00002243 for (size_t i = 0; i < CI->getNumArgOperands(); ++i) {
SJW2c317da2020-03-23 07:39:13 -05002244 types.push_back(CI->getArgOperand(i)->getType());
2245 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002246 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002247
alan-baker5a8c3be2020-09-09 13:44:26 -04002248 auto NewFType =
2249 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2250 cast<VectorType>(CI->getType())
2251 ->getElementCount()
2252 .getKnownMinValue()),
2253 types, false);
SJW2c317da2020-03-23 07:39:13 -05002254
SJW61531372020-06-09 07:31:08 -05002255 std::string NewFName =
2256 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002257
2258 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2259
2260 auto NewCI = CallInst::Create(NewF, args, "", CI);
2261
2262 // Convert to the half type.
2263 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2264 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002265}
2266
SJW2c317da2020-03-23 07:39:13 -05002267bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2268 // convert half to float
2269 Module &M = *F.getParent();
2270 return replaceCallsWithValue(F, [&](CallInst *CI) {
2271 SmallVector<Type *, 3> types(3);
2272 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002273
SJW2c317da2020-03-23 07:39:13 -05002274 // Image
2275 types[0] = CI->getArgOperand(0)->getType();
2276 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002277
SJW2c317da2020-03-23 07:39:13 -05002278 // Coord
2279 types[1] = CI->getArgOperand(1)->getType();
2280 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002281
SJW2c317da2020-03-23 07:39:13 -05002282 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002283 types[2] =
2284 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2285 cast<VectorType>(CI->getArgOperand(2)->getType())
2286 ->getElementCount()
2287 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002288
SJW2c317da2020-03-23 07:39:13 -05002289 auto NewFType =
2290 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002291
SJW61531372020-06-09 07:31:08 -05002292 std::string NewFName =
2293 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002294
SJW2c317da2020-03-23 07:39:13 -05002295 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002296
SJW2c317da2020-03-23 07:39:13 -05002297 // Convert data to the float type.
2298 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2299 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002300
SJW2c317da2020-03-23 07:39:13 -05002301 return CallInst::Create(NewF, args, "", CI);
2302 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002303}
2304
SJW2c317da2020-03-23 07:39:13 -05002305bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2306 Function &F) {
2307 // convert read_image with int coords to float coords
2308 Module &M = *F.getParent();
2309 return replaceCallsWithValue(F, [&](CallInst *CI) {
2310 // The image.
2311 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002312
SJW2c317da2020-03-23 07:39:13 -05002313 // The sampler.
2314 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002315
SJW2c317da2020-03-23 07:39:13 -05002316 // The coordinate (integer type that we can't handle).
2317 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002318
SJW2c317da2020-03-23 07:39:13 -05002319 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2320 uint32_t components =
2321 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2322 Type *float_ty = nullptr;
2323 if (components == 1) {
2324 float_ty = Type::getFloatTy(M.getContext());
2325 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002326 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2327 cast<VectorType>(Arg2->getType())
2328 ->getElementCount()
2329 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002330 }
David Neto22f144c2017-06-12 14:26:21 -04002331
SJW2c317da2020-03-23 07:39:13 -05002332 auto NewFType = FunctionType::get(
2333 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2334
2335 std::string NewFName = F.getName().str();
2336 NewFName[NewFName.length() - 1] = 'f';
2337
2338 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2339
2340 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2341
2342 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2343 });
David Neto22f144c2017-06-12 14:26:21 -04002344}
2345
SJW2c317da2020-03-23 07:39:13 -05002346bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2347 return replaceCallsWithValue(F, [&](CallInst *CI) {
2348 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002349
SJW2c317da2020-03-23 07:39:13 -05002350 // We need to map the OpenCL constants to the SPIR-V equivalents.
2351 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2352 const auto ConstantMemorySemantics = ConstantInt::get(
2353 IntTy, spv::MemorySemanticsUniformMemoryMask |
2354 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002355
SJW2c317da2020-03-23 07:39:13 -05002356 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002357
SJW2c317da2020-03-23 07:39:13 -05002358 // The pointer.
2359 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002360
SJW2c317da2020-03-23 07:39:13 -05002361 // The memory scope.
2362 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002363
SJW2c317da2020-03-23 07:39:13 -05002364 // The memory semantics.
2365 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002366
SJW2c317da2020-03-23 07:39:13 -05002367 if (2 < CI->getNumArgOperands()) {
2368 // The unequal memory semantics.
2369 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002370
SJW2c317da2020-03-23 07:39:13 -05002371 // The value.
2372 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002373
SJW2c317da2020-03-23 07:39:13 -05002374 // The comparator.
2375 Params.push_back(CI->getArgOperand(1));
2376 } else if (1 < CI->getNumArgOperands()) {
2377 // The value.
2378 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002379 }
David Neto22f144c2017-06-12 14:26:21 -04002380
SJW2c317da2020-03-23 07:39:13 -05002381 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2382 });
David Neto22f144c2017-06-12 14:26:21 -04002383}
2384
SJW2c317da2020-03-23 07:39:13 -05002385bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2386 llvm::AtomicRMWInst::BinOp Op) {
2387 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002388 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2389 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002390 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002391 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002392 SyncScope::System, CI);
2393 });
2394}
David Neto22f144c2017-06-12 14:26:21 -04002395
SJW2c317da2020-03-23 07:39:13 -05002396bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2397 Module &M = *F.getParent();
2398 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002399 auto IntTy = Type::getInt32Ty(M.getContext());
2400 auto FloatTy = Type::getFloatTy(M.getContext());
2401
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002402 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2403 ConstantInt::get(IntTy, 1),
2404 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002405
2406 Constant *UpShuffleMask[4] = {
2407 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2408 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2409
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002410 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2411 UndefValue::get(FloatTy),
2412 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002413
Kévin Petite8edce32019-04-10 14:23:32 +01002414 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002415 auto Arg0 =
2416 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2417 ConstantVector::get(DownShuffleMask), "", CI);
2418 auto Arg1 =
2419 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2420 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002421 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002422
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002423 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002424 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002425
SJW61531372020-06-09 07:31:08 -05002426 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002427
Kévin Petite8edce32019-04-10 14:23:32 +01002428 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002429
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002430 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2431 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002432 });
David Neto22f144c2017-06-12 14:26:21 -04002433}
David Neto62653202017-10-16 19:05:18 -04002434
SJW2c317da2020-03-23 07:39:13 -05002435bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002436 // OpenCL's float result = fract(float x, float* ptr)
2437 //
2438 // In the LLVM domain:
2439 //
2440 // %floor_result = call spir_func float @floor(float %x)
2441 // store float %floor_result, float * %ptr
2442 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2443 // %result = call spir_func float
2444 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2445 //
2446 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2447 // and clspv.fract occur in the SPIR-V generator pass:
2448 //
2449 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2450 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2451 // ...
2452 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2453 // OpStore %ptr %floor_result
2454 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2455 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002456 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002457
David Neto62653202017-10-16 19:05:18 -04002458 using std::string;
2459
2460 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2461 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002462
SJW2c317da2020-03-23 07:39:13 -05002463 Module &M = *F.getParent();
2464 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002465
SJW2c317da2020-03-23 07:39:13 -05002466 // This is either float or a float vector. All the float-like
2467 // types are this type.
2468 auto result_ty = F.getReturnType();
2469
SJW61531372020-06-09 07:31:08 -05002470 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002471 Function *fmin_fn = M.getFunction(fmin_name);
2472 if (!fmin_fn) {
2473 // Make the fmin function.
2474 FunctionType *fn_ty =
2475 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2476 fmin_fn =
2477 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2478 fmin_fn->addFnAttr(Attribute::ReadNone);
2479 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2480 }
2481
SJW61531372020-06-09 07:31:08 -05002482 std::string floor_name =
2483 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002484 Function *floor_fn = M.getFunction(floor_name);
2485 if (!floor_fn) {
2486 // Make the floor function.
2487 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2488 floor_fn =
2489 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2490 floor_fn->addFnAttr(Attribute::ReadNone);
2491 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2492 }
2493
SJW61531372020-06-09 07:31:08 -05002494 std::string clspv_fract_name =
2495 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002496 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2497 if (!clspv_fract_fn) {
2498 // Make the clspv_fract function.
2499 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2500 clspv_fract_fn = cast<Function>(
2501 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2502 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2503 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2504 }
2505
2506 // Number of significant significand bits, whether represented or not.
2507 unsigned num_significand_bits;
2508 switch (result_ty->getScalarType()->getTypeID()) {
2509 case Type::HalfTyID:
2510 num_significand_bits = 11;
2511 break;
2512 case Type::FloatTyID:
2513 num_significand_bits = 24;
2514 break;
2515 case Type::DoubleTyID:
2516 num_significand_bits = 53;
2517 break;
2518 default:
2519 llvm_unreachable("Unhandled float type when processing fract builtin");
2520 break;
2521 }
2522 // Beware that the disassembler displays this value as
2523 // OpConstant %float 1
2524 // which is not quite right.
2525 const double kJustUnderOneScalar =
2526 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2527
2528 Constant *just_under_one =
2529 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2530 if (result_ty->isVectorTy()) {
2531 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002532 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002533 }
2534
2535 IRBuilder<> Builder(CI);
2536
2537 auto arg = CI->getArgOperand(0);
2538 auto ptr = CI->getArgOperand(1);
2539
2540 // Compute floor result and store it.
2541 auto floor = Builder.CreateCall(floor_fn, {arg});
2542 Builder.CreateStore(floor, ptr);
2543
2544 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2545 auto fract_result =
2546 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2547
2548 return fract_result;
2549 });
David Neto62653202017-10-16 19:05:18 -04002550}
alan-bakera52b7312020-10-26 08:58:51 -04002551
Kévin Petit8576f682020-11-02 14:51:32 +00002552bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04002553 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00002554 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04002555 // a_shr = a >> 1
2556 // b_shr = b >> 1
2557 // add1 = a_shr + b_shr
2558 // join = a |join_opcode| b
2559 // and = join & 1
2560 // add = add1 + and
2561 const auto a = Call->getArgOperand(0);
2562 const auto b = Call->getArgOperand(1);
2563 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00002564 Value *a_shift, *b_shift;
2565 if (is_signed) {
2566 a_shift = builder.CreateAShr(a, 1);
2567 b_shift = builder.CreateAShr(b, 1);
2568 } else {
2569 a_shift = builder.CreateLShr(a, 1);
2570 b_shift = builder.CreateLShr(b, 1);
2571 }
alan-bakerb6da5132020-10-29 15:59:06 -04002572 auto add = builder.CreateAdd(a_shift, b_shift);
2573 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
2574 auto constant_one = ConstantInt::get(a->getType(), 1);
2575 auto and_bit = builder.CreateAnd(join, constant_one);
2576 return builder.CreateAdd(add, and_bit);
2577 });
2578}
2579
alan-baker3f1bf492020-11-05 09:07:36 -05002580bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
2581 bool is_add) {
2582 return replaceCallsWithValue(F, [&F, this, is_signed,
2583 is_add](CallInst *Call) {
2584 auto ty = Call->getType();
2585 auto a = Call->getArgOperand(0);
2586 auto b = Call->getArgOperand(1);
2587 IRBuilder<> builder(Call);
alan-bakera52b7312020-10-26 08:58:51 -04002588 if (is_signed) {
2589 unsigned bitwidth = ty->getScalarSizeInBits();
2590 if (bitwidth < 32) {
alan-baker3f1bf492020-11-05 09:07:36 -05002591 unsigned extended_width = bitwidth << 1;
2592 Type *extended_ty =
2593 IntegerType::get(Call->getContext(), extended_width);
2594 Constant *min = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002595 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002596 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2597 Constant *max = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002598 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002599 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
alan-bakera52b7312020-10-26 08:58:51 -04002600 // Don't use the type in GetMangledFunctionName to ensure we get
2601 // signed parameters.
2602 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
alan-bakera52b7312020-10-26 08:58:51 -04002603 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
alan-baker3f1bf492020-11-05 09:07:36 -05002604 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2605 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2606 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2607 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2608 if (extended_width == 32) {
alan-bakera52b7312020-10-26 08:58:51 -04002609 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
alan-bakera52b7312020-10-26 08:58:51 -04002610 } else {
2611 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2612 }
alan-baker3f1bf492020-11-05 09:07:36 -05002613 } else {
2614 if (extended_width == 32) {
2615 sclamp_name += "iii";
2616 } else {
2617 sclamp_name += "sss";
2618 }
alan-bakera52b7312020-10-26 08:58:51 -04002619 }
alan-baker3f1bf492020-11-05 09:07:36 -05002620
2621 auto sext_a = builder.CreateSExt(a, extended_ty);
2622 auto sext_b = builder.CreateSExt(b, extended_ty);
2623 Value *op = nullptr;
2624 // Extended operations won't wrap.
2625 if (is_add)
2626 op = builder.CreateAdd(sext_a, sext_b, "", true, true);
2627 else
2628 op = builder.CreateSub(sext_a, sext_b, "", true, true);
2629 auto clamp_ty = FunctionType::get(
2630 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2631 auto callee = F.getParent()->getOrInsertFunction(sclamp_name, clamp_ty);
2632 auto clamp = builder.CreateCall(callee, {op, min, max});
2633 return builder.CreateTrunc(clamp, ty);
alan-bakera52b7312020-10-26 08:58:51 -04002634 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002635 // Add:
2636 // c = a + b
alan-bakera52b7312020-10-26 08:58:51 -04002637 // if (b < 0)
2638 // c = c > a ? min : c;
2639 // else
alan-baker3f1bf492020-11-05 09:07:36 -05002640 // c = c < a ? max : c;
alan-bakera52b7312020-10-26 08:58:51 -04002641 //
alan-baker3f1bf492020-11-05 09:07:36 -05002642 // Sub:
2643 // c = a - b;
2644 // if (b < 0)
2645 // c = c < a ? max : c;
2646 // else
2647 // c = c > a ? min : c;
2648 Constant *min = ConstantInt::get(Call->getContext(),
2649 APInt::getSignedMinValue(bitwidth));
2650 Constant *max = ConstantInt::get(Call->getContext(),
2651 APInt::getSignedMaxValue(bitwidth));
alan-bakera52b7312020-10-26 08:58:51 -04002652 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2653 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2654 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2655 }
alan-baker3f1bf492020-11-05 09:07:36 -05002656 Value *op = nullptr;
2657 if (is_add) {
2658 op = builder.CreateAdd(a, b);
2659 } else {
2660 op = builder.CreateSub(a, b);
2661 }
2662 auto b_lt_0 = builder.CreateICmpSLT(b, Constant::getNullValue(ty));
2663 auto op_gt_a = builder.CreateICmpSGT(op, a);
2664 auto op_lt_a = builder.CreateICmpSLT(op, a);
2665 auto neg_cmp = is_add ? op_gt_a : op_lt_a;
2666 auto pos_cmp = is_add ? op_lt_a : op_gt_a;
2667 auto neg_value = is_add ? min : max;
2668 auto pos_value = is_add ? max : min;
2669 auto neg_clamp = builder.CreateSelect(neg_cmp, neg_value, op);
2670 auto pos_clamp = builder.CreateSelect(pos_cmp, pos_value, op);
2671 return builder.CreateSelect(b_lt_0, neg_clamp, pos_clamp);
alan-bakera52b7312020-10-26 08:58:51 -04002672 }
2673 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002674 // Replace with OpIAddCarry/OpISubBorrow and clamp to max/0 on a
2675 // carr/borrow.
2676 spv::Op op = is_add ? spv::OpIAddCarry : spv::OpISubBorrow;
2677 auto clamp_value =
2678 is_add ? Constant::getAllOnesValue(ty) : Constant::getNullValue(ty);
2679 auto struct_ty = GetPairStruct(ty);
2680 auto call =
2681 InsertSPIRVOp(Call, op, {Attribute::ReadNone}, struct_ty, {a, b});
2682 auto add_sub = builder.CreateExtractValue(call, {0});
2683 auto carry_borrow = builder.CreateExtractValue(call, {1});
2684 auto cmp = builder.CreateICmpEQ(carry_borrow, Constant::getNullValue(ty));
2685 return builder.CreateSelect(cmp, add_sub, clamp_value);
alan-bakera52b7312020-10-26 08:58:51 -04002686 }
alan-bakera52b7312020-10-26 08:58:51 -04002687 });
2688}
alan-baker4986eff2020-10-29 13:38:00 -04002689
2690bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
2691 return replaceCallsWithValue(F, [](CallInst *Call) {
2692 auto pointer = Call->getArgOperand(0);
2693 // Clang emits an address space cast to the generic address space. Skip the
2694 // cast and use the input directly.
2695 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2696 pointer = cast->getPointerOperand();
2697 }
2698 Value *order_arg =
2699 Call->getNumArgOperands() > 1 ? Call->getArgOperand(1) : nullptr;
2700 Value *scope_arg =
2701 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2702 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2703 clspv::AddressSpace::Global;
2704 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
2705 spv::MemorySemanticsAcquireMask);
2706 auto scope = MemoryScope(scope_arg, is_global, Call);
2707 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
2708 Call->getType(), {pointer, scope, order});
2709 });
2710}
2711
2712bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
2713 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
2714 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
2715 auto pointer = Call->getArgOperand(0);
2716 // Clang emits an address space cast to the generic address space. Skip the
2717 // cast and use the input directly.
2718 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2719 pointer = cast->getPointerOperand();
2720 }
2721 Value *value = Call->getArgOperand(1);
2722 Value *order_arg =
2723 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2724 Value *scope_arg =
2725 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2726 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2727 clspv::AddressSpace::Global;
2728 auto scope = MemoryScope(scope_arg, is_global, Call);
2729 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
2730 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
2731 {pointer, scope, order, value});
2732 });
2733}
2734
2735bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
2736 return replaceCallsWithValue(F, [](CallInst *Call) {
2737 auto pointer = Call->getArgOperand(0);
2738 // Clang emits an address space cast to the generic address space. Skip the
2739 // cast and use the input directly.
2740 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2741 pointer = cast->getPointerOperand();
2742 }
2743 auto expected = Call->getArgOperand(1);
2744 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
2745 expected = cast->getPointerOperand();
2746 }
2747 auto value = Call->getArgOperand(2);
2748 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2749 clspv::AddressSpace::Global;
2750 Value *success_arg =
2751 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2752 Value *failure_arg =
2753 Call->getNumArgOperands() > 4 ? Call->getArgOperand(4) : nullptr;
2754 Value *scope_arg =
2755 Call->getNumArgOperands() > 5 ? Call->getArgOperand(5) : nullptr;
2756 auto scope = MemoryScope(scope_arg, is_global, Call);
2757 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
2758 spv::MemorySemanticsAcquireReleaseMask);
2759 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
2760 spv::MemorySemanticsAcquireMask);
2761
2762 // If the value pointed to by |expected| equals the value pointed to by
2763 // |pointer|, |value| is written into |pointer|, otherwise the value in
2764 // |pointer| is written into |expected|. In order to avoid extra stores,
2765 // the basic block with the original atomic is split and the store is
2766 // performed in the |then| block. The condition is the inversion of the
2767 // comparison result.
2768 IRBuilder<> builder(Call);
2769 auto load = builder.CreateLoad(expected);
2770 auto cmp_xchg = InsertSPIRVOp(
2771 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
2772 value->getType(), {pointer, scope, success, failure, value, load});
2773 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
2774 auto not_cmp = builder.CreateNot(cmp);
2775 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
2776 builder.SetInsertPoint(then_branch);
2777 builder.CreateStore(cmp_xchg, expected);
2778 return cmp;
2779 });
2780}
alan-bakercc2bafb2020-11-02 08:30:18 -05002781
alan-baker2cecaa72020-11-05 14:05:20 -05002782bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05002783 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
2784 return false;
2785
2786 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05002787 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05002788 return false;
2789
alan-baker5f2e88e2020-12-07 15:24:04 -05002790 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
2791 Function *intrinsic = Intrinsic::getDeclaration(
2792 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
2793 Call->getType());
2794 const auto c_false = ConstantInt::getFalse(Call->getContext());
2795 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
2796 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05002797 });
2798}
alan-baker6b9d1ee2020-11-03 23:11:32 -05002799
2800bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
2801 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
2802 const auto ty = Call->getType();
2803 const auto a = Call->getArgOperand(0);
2804 const auto b = Call->getArgOperand(1);
2805 const auto c = Call->getArgOperand(2);
2806 IRBuilder<> builder(Call);
2807 if (is_signed) {
2808 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
2809 if (bitwidth < 32) {
2810 // mul = sext(a) * sext(b)
2811 // add = mul + sext(c)
2812 // res = clamp(add, MIN, MAX)
2813 unsigned extended_width = bitwidth << 1;
2814 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
2815 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2816 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2817 }
2818 auto a_sext = builder.CreateSExt(a, extended_ty);
2819 auto b_sext = builder.CreateSExt(b, extended_ty);
2820 auto c_sext = builder.CreateSExt(c, extended_ty);
2821 // Extended the size so no overflows occur.
2822 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
2823 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
2824 auto func_ty = FunctionType::get(
2825 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2826 // Don't use function type because we need signed parameters.
2827 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
2828 // The clamp values are the signed min and max of the original bitwidth
2829 // sign extended to the extended bitwidth.
2830 Constant *min = ConstantInt::get(
2831 Call->getContext(),
2832 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2833 Constant *max = ConstantInt::get(
2834 Call->getContext(),
2835 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
2836 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2837 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2838 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2839 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2840 if (extended_width == 32)
2841 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
2842 else
2843 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2844 } else {
2845 if (extended_width == 32)
2846 clamp_name += "iii";
2847 else
2848 clamp_name += "sss";
2849 }
2850 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
2851 auto clamp = builder.CreateCall(callee, {add, min, max});
2852 return builder.CreateTrunc(clamp, ty);
2853 } else {
2854 auto struct_ty = GetPairStruct(ty);
2855 // Compute
2856 // {hi, lo} = smul_extended(a, b)
2857 // add = lo + c
2858 auto mul_ext = InsertSPIRVOp(Call, spv::OpSMulExtended,
2859 {Attribute::ReadNone}, struct_ty, {a, b});
2860 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
2861 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
2862 auto add = builder.CreateAdd(mul_lo, c);
2863
2864 // Constants for use in the calculation.
2865 Constant *min = ConstantInt::get(Call->getContext(),
2866 APInt::getSignedMinValue(bitwidth));
2867 Constant *max = ConstantInt::get(Call->getContext(),
2868 APInt::getSignedMaxValue(bitwidth));
2869 Constant *max_plus_1 = ConstantInt::get(
2870 Call->getContext(),
2871 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
2872 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2873 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2874 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2875 max_plus_1 =
2876 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
2877 }
2878
2879 auto a_xor_b = builder.CreateXor(a, b);
2880 auto same_sign =
2881 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
2882 auto different_sign = builder.CreateNot(same_sign);
2883 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
2884 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
2885 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
2886 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
2887 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
2888 auto add_gt_max = builder.CreateICmpUGT(add, max);
2889 auto hi_eq_m1 =
2890 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
2891 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
2892 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
2893 auto max_sub_lo = builder.CreateSub(max, mul_lo);
2894 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
2895
2896 // Equivalent to:
2897 // if (((x < 0) == (y < 0)) && mul_hi != 0)
2898 // return MAX
2899 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
2900 // return MAX
2901 // if (((x < 0) != (y < 0)) && mul_hi != -1)
2902 // return MIN
2903 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
2904 // return MIN
2905 // return add
2906 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
2907 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
2908 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
2909 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
2910 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
2911 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
2912 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
2913 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
2914 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
2915 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
2916 auto sel = builder.CreateSelect(min_clamp, min, add);
2917 return builder.CreateSelect(max_clamp, max, sel);
2918 }
2919 } else {
2920 // {lo, hi} = mul_extended(a, b)
2921 // {add, carry} = add_carry(lo, c)
2922 // cmp = (mul_hi | carry) == 0
2923 // mad_sat = cmp ? add : MAX
2924 auto struct_ty = GetPairStruct(ty);
2925 auto mul_ext = InsertSPIRVOp(Call, spv::OpUMulExtended,
2926 {Attribute::ReadNone}, struct_ty, {a, b});
2927 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
2928 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
2929 auto add_carry =
2930 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
2931 struct_ty, {mul_lo, c});
2932 auto add = builder.CreateExtractValue(add_carry, {0});
2933 auto carry = builder.CreateExtractValue(add_carry, {1});
2934 auto or_value = builder.CreateOr(mul_hi, carry);
2935 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
2936 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
2937 }
2938 });
2939}
alan-baker15106572020-11-06 15:08:10 -05002940
2941bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
2942 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
2943 return false;
2944
2945 if (F.getFunctionType()->getNumParams() != 2)
2946 return false;
2947
2948 if (F.getFunctionType()->getParamType(0) !=
2949 F.getFunctionType()->getParamType(1)) {
2950 return false;
2951 }
2952
2953 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
2954 case Type::FloatTyID:
2955 case Type::HalfTyID:
2956 case Type::DoubleTyID:
2957 break;
2958 default:
2959 return false;
2960 }
2961
2962 // Scalar versions all return an int, while vector versions return a vector
2963 // of an equally sized integer types (e.g. short, int or long).
2964 if (isa<VectorType>(F.getReturnType())) {
2965 if (F.getReturnType()->getScalarSizeInBits() !=
2966 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
2967 return false;
2968 }
2969 } else {
2970 if (F.getReturnType()->getScalarSizeInBits() != 32)
2971 return false;
2972 }
2973
2974 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
2975 // Replace with a floating point [un]ordered comparison followed by an
2976 // extension.
2977 auto x = Call->getArgOperand(0);
2978 auto y = Call->getArgOperand(1);
2979 IRBuilder<> builder(Call);
2980 Value *tmp = nullptr;
2981 if (is_ordered) {
2982 // This leads to a slight inefficiency in the SPIR-V that is easy for
2983 // drivers to optimize where the SPIR-V for the comparison and the
2984 // extension could be fused to drop the inversion of the OpIsNan.
2985 tmp = builder.CreateFCmpORD(x, y);
2986 } else {
2987 tmp = builder.CreateFCmpUNO(x, y);
2988 }
2989 // OpenCL CTS requires that vector versions use sign extension, but scalar
2990 // versions use zero extension.
2991 if (isa<VectorType>(Call->getType()))
2992 return builder.CreateSExt(tmp, Call->getType());
2993 return builder.CreateZExt(tmp, Call->getType());
2994 });
2995}
alan-baker497920b2020-11-09 16:41:36 -05002996
2997bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
2998 return replaceCallsWithValue(F, [this](CallInst *Call) {
2999 auto ty = Call->getType();
3000 auto x = Call->getArgOperand(0);
3001 unsigned width = x->getType()->getScalarSizeInBits();
3002 Type *int_ty = IntegerType::get(Call->getContext(), width);
3003 uint64_t abs_mask = 0x7fffffff;
3004 uint64_t exp_mask = 0x7f800000;
3005 uint64_t min_mask = 0x00800000;
3006 if (width == 16) {
3007 abs_mask = 0x7fff;
3008 exp_mask = 0x7c00;
3009 min_mask = 0x0400;
3010 } else if (width == 64) {
3011 abs_mask = 0x7fffffffffffffff;
3012 exp_mask = 0x7ff0000000000000;
3013 min_mask = 0x0010000000000000;
3014 }
3015 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3016 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3017 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3018 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3019 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3020 abs_const =
3021 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3022 exp_const =
3023 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3024 min_const =
3025 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3026 }
3027 // Drop the sign bit and then check that the number is between
3028 // (exclusive) the min and max exponent values for the bit width.
3029 IRBuilder<> builder(Call);
3030 auto bitcast = builder.CreateBitCast(x, int_ty);
3031 auto abs = builder.CreateAnd(bitcast, abs_const);
3032 auto lt = builder.CreateICmpULT(abs, exp_const);
3033 auto ge = builder.CreateICmpUGE(abs, min_const);
3034 auto tmp = builder.CreateAnd(lt, ge);
3035 // OpenCL CTS requires that vector versions use sign extension, but scalar
3036 // versions use zero extension.
3037 if (isa<VectorType>(ty))
3038 return builder.CreateSExt(tmp, ty);
3039 return builder.CreateZExt(tmp, ty);
3040 });
3041}
alan-bakere0406e72020-11-10 12:32:04 -05003042
3043bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3044 return replaceCallsWithValue(F, [](CallInst *Call) {
3045 const auto x = Call->getArgOperand(0);
3046 const auto y = Call->getArgOperand(1);
3047 IRBuilder<> builder(Call);
3048 auto sub = builder.CreateFSub(x, y);
3049 auto cmp = builder.CreateFCmpUGT(x, y);
3050 return builder.CreateSelect(cmp, sub,
3051 Constant::getNullValue(Call->getType()));
3052 });
3053}
alan-baker3e0de472020-12-08 15:57:17 -05003054
3055bool ReplaceOpenCLBuiltinPass::replaceRound(Function &F) {
3056 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3057 const auto x = Call->getArgOperand(0);
3058 const double c_halfway = 0.5;
3059 auto halfway = ConstantFP::get(Call->getType(), c_halfway);
3060
3061 const auto clspv_fract_name =
3062 Builtins::GetMangledFunctionName("clspv.fract", F.getFunctionType());
3063 Function *clspv_fract_fn = F.getParent()->getFunction(clspv_fract_name);
3064 if (!clspv_fract_fn) {
3065 // Make the clspv_fract function.
3066 clspv_fract_fn = cast<Function>(
3067 F.getParent()
3068 ->getOrInsertFunction(clspv_fract_name, F.getFunctionType())
3069 .getCallee());
3070 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3071 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3072 }
3073
3074 auto ceil = Intrinsic::getDeclaration(F.getParent(), Intrinsic::ceil,
3075 Call->getType());
3076 auto floor = Intrinsic::getDeclaration(F.getParent(), Intrinsic::floor,
3077 Call->getType());
3078 auto fabs = Intrinsic::getDeclaration(F.getParent(), Intrinsic::fabs,
3079 Call->getType());
3080 auto copysign = Intrinsic::getDeclaration(
3081 F.getParent(), Intrinsic::copysign, {Call->getType(), Call->getType()});
3082
3083 IRBuilder<> builder(Call);
3084
3085 auto fabs_call = builder.CreateCall(F.getFunctionType(), fabs, {x});
3086 auto ceil_call = builder.CreateCall(F.getFunctionType(), ceil, {fabs_call});
3087 auto floor_call =
3088 builder.CreateCall(F.getFunctionType(), floor, {fabs_call});
3089 auto fract_call =
3090 builder.CreateCall(F.getFunctionType(), clspv_fract_fn, {fabs_call});
3091 auto cmp = builder.CreateFCmpOGE(fract_call, halfway);
3092 auto sel = builder.CreateSelect(cmp, ceil_call, floor_call);
3093 return builder.CreateCall(copysign->getFunctionType(), copysign, {sel, x});
3094 });
3095}
3096
3097bool ReplaceOpenCLBuiltinPass::replaceTrigPi(Function &F,
3098 Builtins::BuiltinType type) {
3099 return replaceCallsWithValue(F, [&F, type](CallInst *Call) -> Value * {
3100 const auto x = Call->getArgOperand(0);
3101 const double k_pi = 0x1.921fb54442d18p+1;
3102 Constant *pi = ConstantFP::get(x->getType(), k_pi);
3103
3104 IRBuilder<> builder(Call);
3105 auto mul = builder.CreateFMul(x, pi);
3106 switch (type) {
3107 case Builtins::kSinpi: {
3108 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3109 x->getType());
3110 return builder.CreateCall(func->getFunctionType(), func, {mul});
3111 }
3112 case Builtins::kCospi: {
3113 auto func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3114 x->getType());
3115 return builder.CreateCall(func->getFunctionType(), func, {mul});
3116 }
3117 case Builtins::kTanpi: {
3118 auto sin = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3119 x->getType());
3120 auto sin_call = builder.CreateCall(sin->getFunctionType(), sin, {mul});
3121 auto cos = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3122 x->getType());
3123 auto cos_call = builder.CreateCall(cos->getFunctionType(), cos, {mul});
3124 return builder.CreateFDiv(sin_call, cos_call);
3125 }
3126 default:
3127 llvm_unreachable("unexpected builtin");
3128 break;
3129 }
3130 return nullptr;
3131 });
3132}
alan-baker8b968112020-12-15 15:53:29 -05003133
3134bool ReplaceOpenCLBuiltinPass::replaceSincos(Function &F) {
3135 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3136 auto sin_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::sin,
3137 Call->getType());
3138 auto cos_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::cos,
3139 Call->getType());
3140
3141 IRBuilder<> builder(Call);
3142 auto sin = builder.CreateCall(sin_func->getFunctionType(), sin_func,
3143 {Call->getArgOperand(0)});
3144 auto cos = builder.CreateCall(cos_func->getFunctionType(), cos_func,
3145 {Call->getArgOperand(0)});
3146 builder.CreateStore(cos, Call->getArgOperand(1));
3147 return sin;
3148 });
3149}
3150
3151bool ReplaceOpenCLBuiltinPass::replaceExpm1(Function &F) {
3152 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3153 auto exp_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::exp,
3154 Call->getType());
3155
3156 IRBuilder<> builder(Call);
3157 auto exp = builder.CreateCall(exp_func->getFunctionType(), exp_func,
3158 {Call->getArgOperand(0)});
3159 return builder.CreateFSub(exp, ConstantFP::get(Call->getType(), 1.0));
3160 });
3161}
3162
3163bool ReplaceOpenCLBuiltinPass::replacePown(Function &F) {
3164 return replaceCallsWithValue(F, [&F](CallInst *Call) {
3165 auto pow_func = Intrinsic::getDeclaration(F.getParent(), Intrinsic::pow,
3166 Call->getType());
3167
3168 IRBuilder<> builder(Call);
3169 auto conv = builder.CreateSIToFP(Call->getArgOperand(1), Call->getType());
3170 return builder.CreateCall(pow_func->getFunctionType(), pow_func,
3171 {Call->getArgOperand(0), conv});
3172 });
3173}