blob: 6d01f4c71c134e2c0fa036a3c7f4f1cf64c649a7 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039#include "Passes.h"
40#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
46#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
47
48namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000049
David Neto22f144c2017-06-12 14:26:21 -040050uint32_t clz(uint32_t v) {
51 uint32_t r;
52 uint32_t shift;
53
54 r = (v > 0xFFFF) << 4;
55 v >>= r;
56 shift = (v > 0xFF) << 3;
57 v >>= shift;
58 r |= shift;
59 shift = (v > 0xF) << 2;
60 v >>= shift;
61 r |= shift;
62 shift = (v > 0x3) << 1;
63 v >>= shift;
64 r |= shift;
65 r |= (v >> 1);
66
67 return r;
68}
69
Kévin Petitfdfa92e2019-09-25 14:20:58 +010070Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
71 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040072 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040073 IntTy = FixedVectorType::get(IntTy,
74 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010075 }
76 return IntTy;
77}
78
alan-baker4986eff2020-10-29 13:38:00 -040079Value *MemoryOrderSemantics(Value *order, bool is_global,
80 Instruction *InsertBefore,
81 spv::MemorySemanticsMask base_semantics) {
82 enum AtomicMemoryOrder : uint32_t {
83 kMemoryOrderRelaxed = 0,
84 kMemoryOrderAcquire = 2,
85 kMemoryOrderRelease = 3,
86 kMemoryOrderAcqRel = 4,
87 kMemoryOrderSeqCst = 5
88 };
89
90 IRBuilder<> builder(InsertBefore);
91
92 // Constants for OpenCL C 2.0 memory_order.
93 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
94 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
95 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
96 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
97
98 // Constants for SPIR-V ordering memory semantics.
99 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
100 const auto AcquireSemantics =
101 builder.getInt32(spv::MemorySemanticsAcquireMask);
102 const auto ReleaseSemantics =
103 builder.getInt32(spv::MemorySemanticsReleaseMask);
104 const auto AcqRelSemantics =
105 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
106
107 // Constants for SPIR-V storage class semantics.
108 const auto UniformSemantics =
109 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
110 const auto WorkgroupSemantics =
111 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
112
113 // Instead of sequentially consistent, use acquire, release or acquire
114 // release semantics.
115 Value *base_order = nullptr;
116 switch (base_semantics) {
117 case spv::MemorySemanticsAcquireMask:
118 base_order = AcquireSemantics;
119 break;
120 case spv::MemorySemanticsReleaseMask:
121 base_order = ReleaseSemantics;
122 break;
123 default:
124 base_order = AcqRelSemantics;
125 break;
126 }
127
128 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
129 if (order == nullptr)
130 return builder.CreateOr({storage, base_order});
131
132 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
133 auto is_acquire = builder.CreateICmpEQ(order, acquire);
134 auto is_release = builder.CreateICmpEQ(order, release);
135 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
136 auto semantics =
137 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
138 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
139 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
140 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
141 return builder.CreateOr({storage, semantics});
142}
143
144Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
145 enum AtomicMemoryScope : uint32_t {
146 kMemoryScopeWorkItem = 0,
147 kMemoryScopeWorkGroup = 1,
148 kMemoryScopeDevice = 2,
149 kMemoryScopeAllSVMDevices = 3, // not supported
150 kMemoryScopeSubGroup = 4
151 };
152
153 IRBuilder<> builder(InsertBefore);
154
155 // Constants for OpenCL C 2.0 memory_scope.
156 const auto work_item =
157 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
158 const auto work_group =
159 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
160 const auto sub_group =
161 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
162 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
163
164 // Constants for SPIR-V memory scopes.
165 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
166 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
167 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
168 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
169
170 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
171 if (scope == nullptr)
172 return base_scope;
173
174 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
175 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
176 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
177 auto is_device = builder.CreateICmpEQ(scope, device);
178
179 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
180 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
181 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
182 scope = builder.CreateSelect(is_device, DeviceScope, scope);
183
184 return scope;
185}
186
SJW2c317da2020-03-23 07:39:13 -0500187bool replaceCallsWithValue(Function &F,
188 std::function<Value *(CallInst *)> Replacer) {
189
190 bool Changed = false;
191
192 SmallVector<Instruction *, 4> ToRemoves;
193
194 // Walk the users of the function.
195 for (auto &U : F.uses()) {
196 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
197
198 auto NewValue = Replacer(CI);
199
200 if (NewValue != nullptr) {
201 CI->replaceAllUsesWith(NewValue);
202
203 // Lastly, remember to remove the user.
204 ToRemoves.push_back(CI);
205 }
206 }
207 }
208
209 Changed = !ToRemoves.empty();
210
211 // And cleanup the calls we don't use anymore.
212 for (auto V : ToRemoves) {
213 V->eraseFromParent();
214 }
215
216 return Changed;
217}
218
David Neto22f144c2017-06-12 14:26:21 -0400219struct ReplaceOpenCLBuiltinPass final : public ModulePass {
220 static char ID;
221 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
222
223 bool runOnModule(Module &M) override;
alan-baker6b9d1ee2020-11-03 23:11:32 -0500224
225private:
SJW2c317da2020-03-23 07:39:13 -0500226 bool runOnFunction(Function &F);
227 bool replaceAbs(Function &F);
228 bool replaceAbsDiff(Function &F, bool is_signed);
229 bool replaceCopysign(Function &F);
230 bool replaceRecip(Function &F);
231 bool replaceDivide(Function &F);
232 bool replaceDot(Function &F);
233 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500234 bool replaceExp10(Function &F, const std::string &basename);
235 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100236 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400237 bool replaceBarrier(Function &F, bool subgroup = false);
SJW2c317da2020-03-23 07:39:13 -0500238 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100239 bool replacePrefetch(Function &F);
alan-baker3e217772020-11-07 17:29:40 -0500240 bool replaceRelational(Function &F, CmpInst::Predicate P);
SJW2c317da2020-03-23 07:39:13 -0500241 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
242 bool replaceIsFinite(Function &F);
243 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
244 bool replaceUpsample(Function &F);
245 bool replaceRotate(Function &F);
246 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
247 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
248 bool replaceSelect(Function &F);
249 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500250 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500251 bool replaceSignbit(Function &F, bool is_vec);
252 bool replaceMul(Function &F, bool is_float, bool is_mad);
253 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
254 bool replaceVloadHalf(Function &F);
255 bool replaceVloadHalf2(Function &F);
256 bool replaceVloadHalf4(Function &F);
257 bool replaceClspvVloadaHalf2(Function &F);
258 bool replaceClspvVloadaHalf4(Function &F);
259 bool replaceVstoreHalf(Function &F, int vec_size);
260 bool replaceVstoreHalf(Function &F);
261 bool replaceVstoreHalf2(Function &F);
262 bool replaceVstoreHalf4(Function &F);
263 bool replaceHalfReadImage(Function &F);
264 bool replaceHalfWriteImage(Function &F);
265 bool replaceSampledReadImageWithIntCoords(Function &F);
266 bool replaceAtomics(Function &F, spv::Op Op);
267 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
alan-baker4986eff2020-10-29 13:38:00 -0400268 bool replaceAtomicLoad(Function &F);
269 bool replaceExplicitAtomics(Function &F, spv::Op Op,
270 spv::MemorySemanticsMask semantics =
271 spv::MemorySemanticsAcquireReleaseMask);
272 bool replaceAtomicCompareExchange(Function &);
SJW2c317da2020-03-23 07:39:13 -0500273 bool replaceCross(Function &F);
274 bool replaceFract(Function &F, int vec_size);
275 bool replaceVload(Function &F);
276 bool replaceVstore(Function &F);
alan-baker3f1bf492020-11-05 09:07:36 -0500277 bool replaceAddSubSat(Function &F, bool is_signed, bool is_add);
Kévin Petit8576f682020-11-02 14:51:32 +0000278 bool replaceHadd(Function &F, bool is_signed,
279 Instruction::BinaryOps join_opcode);
alan-baker2cecaa72020-11-05 14:05:20 -0500280 bool replaceCountZeroes(Function &F, bool leading);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500281 bool replaceMadSat(Function &F, bool is_signed);
alan-baker15106572020-11-06 15:08:10 -0500282 bool replaceOrdered(Function &F, bool is_ordered);
alan-baker497920b2020-11-09 16:41:36 -0500283 bool replaceIsNormal(Function &F);
alan-bakere0406e72020-11-10 12:32:04 -0500284 bool replaceFDim(Function &F);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500285
286 // Caches struct types for { |type|, |type| }. This prevents
287 // getOrInsertFunction from introducing a bitcasts between structs with
288 // identical contents.
289 Type *GetPairStruct(Type *type);
290
291 DenseMap<Type *, Type *> PairStructMap;
David Neto22f144c2017-06-12 14:26:21 -0400292};
SJW2c317da2020-03-23 07:39:13 -0500293
Kévin Petit91bc72e2019-04-08 15:17:46 +0100294} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400295
296char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400297INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
298 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400299
300namespace clspv {
301ModulePass *createReplaceOpenCLBuiltinPass() {
302 return new ReplaceOpenCLBuiltinPass();
303}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400304} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400305
306bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500307 std::list<Function *> func_list;
308 for (auto &F : M.getFunctionList()) {
309 // process only function declarations
310 if (F.isDeclaration() && runOnFunction(F)) {
311 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000312 }
313 }
SJW2c317da2020-03-23 07:39:13 -0500314 if (func_list.size() != 0) {
315 // recursively convert functions, but first remove dead
316 for (auto *F : func_list) {
317 if (F->use_empty()) {
318 F->eraseFromParent();
319 }
320 }
321 runOnModule(M);
322 return true;
323 }
324 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000325}
326
SJW2c317da2020-03-23 07:39:13 -0500327bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
328 auto &FI = Builtins::Lookup(&F);
329 switch (FI.getType()) {
330 case Builtins::kAbs:
331 if (!FI.getParameter(0).is_signed) {
332 return replaceAbs(F);
333 }
334 break;
335 case Builtins::kAbsDiff:
336 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400337
338 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500339 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400340
alan-bakercc2bafb2020-11-02 08:30:18 -0500341 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500342 return replaceCountZeroes(F, true);
343
344 case Builtins::kCtz:
345 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500346
alan-bakerb6da5132020-10-29 15:59:06 -0400347 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000348 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400349 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000350 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400351
SJW2c317da2020-03-23 07:39:13 -0500352 case Builtins::kCopysign:
353 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100354
SJW2c317da2020-03-23 07:39:13 -0500355 case Builtins::kHalfRecip:
356 case Builtins::kNativeRecip:
357 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100358
SJW2c317da2020-03-23 07:39:13 -0500359 case Builtins::kHalfDivide:
360 case Builtins::kNativeDivide:
361 return replaceDivide(F);
362
363 case Builtins::kDot:
364 return replaceDot(F);
365
366 case Builtins::kExp10:
367 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500368 case Builtins::kNativeExp10:
369 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500370
371 case Builtins::kLog10:
372 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500373 case Builtins::kNativeLog10:
374 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500375
gnl21636e7992020-09-09 16:08:16 +0100376 case Builtins::kLog1p:
377 return replaceLog1p(F);
378
alan-bakere0406e72020-11-10 12:32:04 -0500379 case Builtins::kFdim:
380 return replaceFDim(F);
381
SJW2c317da2020-03-23 07:39:13 -0500382 case Builtins::kFmod:
383 return replaceFmod(F);
384
385 case Builtins::kBarrier:
386 case Builtins::kWorkGroupBarrier:
387 return replaceBarrier(F);
388
alan-baker12d2c182020-07-20 08:22:42 -0400389 case Builtins::kSubGroupBarrier:
390 return replaceBarrier(F, true);
391
SJW2c317da2020-03-23 07:39:13 -0500392 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400393 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500394 case Builtins::kReadMemFence:
395 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
396 case Builtins::kWriteMemFence:
397 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
398
399 // Relational
400 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500401 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500402 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500403 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500404 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500405 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500406 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500407 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500408 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500409 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500410 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500411 return replaceRelational(F, CmpInst::FCMP_UNE);
412 case Builtins::kIslessgreater:
413 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500414
alan-baker15106572020-11-06 15:08:10 -0500415 case Builtins::kIsordered:
416 return replaceOrdered(F, true);
417
418 case Builtins::kIsunordered:
419 return replaceOrdered(F, false);
420
SJW2c317da2020-03-23 07:39:13 -0500421 case Builtins::kIsinf: {
422 bool is_vec = FI.getParameter(0).vector_size != 0;
423 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
424 }
425 case Builtins::kIsnan: {
426 bool is_vec = FI.getParameter(0).vector_size != 0;
427 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
428 }
429
430 case Builtins::kIsfinite:
431 return replaceIsFinite(F);
432
433 case Builtins::kAll: {
434 bool is_vec = FI.getParameter(0).vector_size != 0;
435 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
436 }
437 case Builtins::kAny: {
438 bool is_vec = FI.getParameter(0).vector_size != 0;
439 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
440 }
441
alan-baker497920b2020-11-09 16:41:36 -0500442 case Builtins::kIsnormal:
443 return replaceIsNormal(F);
444
SJW2c317da2020-03-23 07:39:13 -0500445 case Builtins::kUpsample:
446 return replaceUpsample(F);
447
448 case Builtins::kRotate:
449 return replaceRotate(F);
450
451 case Builtins::kConvert:
452 return replaceConvert(F, FI.getParameter(0).is_signed,
453 FI.getReturnType().is_signed);
454
alan-baker4986eff2020-10-29 13:38:00 -0400455 // OpenCL 2.0 explicit atomics have different default scopes and semantics
456 // than legacy atomic functions.
457 case Builtins::kAtomicLoad:
458 case Builtins::kAtomicLoadExplicit:
459 return replaceAtomicLoad(F);
460 case Builtins::kAtomicStore:
461 case Builtins::kAtomicStoreExplicit:
462 return replaceExplicitAtomics(F, spv::OpAtomicStore,
463 spv::MemorySemanticsReleaseMask);
464 case Builtins::kAtomicExchange:
465 case Builtins::kAtomicExchangeExplicit:
466 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
467 case Builtins::kAtomicFetchAdd:
468 case Builtins::kAtomicFetchAddExplicit:
469 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
470 case Builtins::kAtomicFetchSub:
471 case Builtins::kAtomicFetchSubExplicit:
472 return replaceExplicitAtomics(F, spv::OpAtomicISub);
473 case Builtins::kAtomicFetchOr:
474 case Builtins::kAtomicFetchOrExplicit:
475 return replaceExplicitAtomics(F, spv::OpAtomicOr);
476 case Builtins::kAtomicFetchXor:
477 case Builtins::kAtomicFetchXorExplicit:
478 return replaceExplicitAtomics(F, spv::OpAtomicXor);
479 case Builtins::kAtomicFetchAnd:
480 case Builtins::kAtomicFetchAndExplicit:
481 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
482 case Builtins::kAtomicFetchMin:
483 case Builtins::kAtomicFetchMinExplicit:
484 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
485 ? spv::OpAtomicSMin
486 : spv::OpAtomicUMin);
487 case Builtins::kAtomicFetchMax:
488 case Builtins::kAtomicFetchMaxExplicit:
489 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
490 ? spv::OpAtomicSMax
491 : spv::OpAtomicUMax);
492 // Weak compare exchange is generated as strong compare exchange.
493 case Builtins::kAtomicCompareExchangeWeak:
494 case Builtins::kAtomicCompareExchangeWeakExplicit:
495 case Builtins::kAtomicCompareExchangeStrong:
496 case Builtins::kAtomicCompareExchangeStrongExplicit:
497 return replaceAtomicCompareExchange(F);
498
499 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500500 case Builtins::kAtomicInc:
501 return replaceAtomics(F, spv::OpAtomicIIncrement);
502 case Builtins::kAtomicDec:
503 return replaceAtomics(F, spv::OpAtomicIDecrement);
504 case Builtins::kAtomicCmpxchg:
505 return replaceAtomics(F, spv::OpAtomicCompareExchange);
506 case Builtins::kAtomicAdd:
507 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
508 case Builtins::kAtomicSub:
509 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
510 case Builtins::kAtomicXchg:
511 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
512 case Builtins::kAtomicMin:
513 return replaceAtomics(F, FI.getParameter(0).is_signed
514 ? llvm::AtomicRMWInst::Min
515 : llvm::AtomicRMWInst::UMin);
516 case Builtins::kAtomicMax:
517 return replaceAtomics(F, FI.getParameter(0).is_signed
518 ? llvm::AtomicRMWInst::Max
519 : llvm::AtomicRMWInst::UMax);
520 case Builtins::kAtomicAnd:
521 return replaceAtomics(F, llvm::AtomicRMWInst::And);
522 case Builtins::kAtomicOr:
523 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
524 case Builtins::kAtomicXor:
525 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
526
527 case Builtins::kCross:
528 if (FI.getParameter(0).vector_size == 4) {
529 return replaceCross(F);
530 }
531 break;
532
533 case Builtins::kFract:
534 if (FI.getParameterCount()) {
535 return replaceFract(F, FI.getParameter(0).vector_size);
536 }
537 break;
538
539 case Builtins::kMadHi:
540 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
541 case Builtins::kMulHi:
542 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
543
alan-baker6b9d1ee2020-11-03 23:11:32 -0500544 case Builtins::kMadSat:
545 return replaceMadSat(F, FI.getParameter(0).is_signed);
546
SJW2c317da2020-03-23 07:39:13 -0500547 case Builtins::kMad:
548 case Builtins::kMad24:
549 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
550 true);
551 case Builtins::kMul24:
552 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
553 false);
554
555 case Builtins::kSelect:
556 return replaceSelect(F);
557
558 case Builtins::kBitselect:
559 return replaceBitSelect(F);
560
561 case Builtins::kVload:
562 return replaceVload(F);
563
564 case Builtins::kVloadaHalf:
565 case Builtins::kVloadHalf:
566 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
567
568 case Builtins::kVstore:
569 return replaceVstore(F);
570
571 case Builtins::kVstoreHalf:
572 case Builtins::kVstoreaHalf:
573 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
574
575 case Builtins::kSmoothstep: {
576 int vec_size = FI.getLastParameter().vector_size;
577 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500578 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500579 }
580 break;
581 }
582 case Builtins::kStep: {
583 int vec_size = FI.getLastParameter().vector_size;
584 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500585 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500586 }
587 break;
588 }
589
590 case Builtins::kSignbit:
591 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
592
alan-baker3f1bf492020-11-05 09:07:36 -0500593 case Builtins::kSubSat:
594 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
595
SJW2c317da2020-03-23 07:39:13 -0500596 case Builtins::kReadImageh:
597 return replaceHalfReadImage(F);
598 case Builtins::kReadImagef:
599 case Builtins::kReadImagei:
600 case Builtins::kReadImageui: {
601 if (FI.getParameter(1).isSampler() &&
602 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
603 return replaceSampledReadImageWithIntCoords(F);
604 }
605 break;
606 }
607
608 case Builtins::kWriteImageh:
609 return replaceHalfWriteImage(F);
610
Kévin Petit1cb45112020-04-27 18:55:48 +0100611 case Builtins::kPrefetch:
612 return replacePrefetch(F);
613
SJW2c317da2020-03-23 07:39:13 -0500614 default:
615 break;
616 }
617
618 return false;
619}
620
alan-baker6b9d1ee2020-11-03 23:11:32 -0500621Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
622 auto iter = PairStructMap.find(type);
623 if (iter != PairStructMap.end())
624 return iter->second;
625
626 auto new_struct = StructType::get(type->getContext(), {type, type});
627 PairStructMap[type] = new_struct;
628 return new_struct;
629}
630
SJW2c317da2020-03-23 07:39:13 -0500631bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
632 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400633 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100634}
635
SJW2c317da2020-03-23 07:39:13 -0500636bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
637 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100638 auto XValue = CI->getOperand(0);
639 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100640
Kévin Petite8edce32019-04-10 14:23:32 +0100641 IRBuilder<> Builder(CI);
642 auto XmY = Builder.CreateSub(XValue, YValue);
643 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100644
SJW2c317da2020-03-23 07:39:13 -0500645 Value *Cmp = nullptr;
646 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100647 Cmp = Builder.CreateICmpSGT(YValue, XValue);
648 } else {
649 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100650 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100651
Kévin Petite8edce32019-04-10 14:23:32 +0100652 return Builder.CreateSelect(Cmp, YmX, XmY);
653 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100654}
655
SJW2c317da2020-03-23 07:39:13 -0500656bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500657 return replaceCallsWithValue(F, [&F](CallInst *Call) {
658 const auto x = Call->getArgOperand(0);
659 const auto y = Call->getArgOperand(1);
660 auto intrinsic = Intrinsic::getDeclaration(
661 F.getParent(), Intrinsic::copysign, Call->getType());
662 return CallInst::Create(intrinsic->getFunctionType(), intrinsic, {x, y}, "",
663 Call);
Kévin Petite8edce32019-04-10 14:23:32 +0100664 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100665}
666
SJW2c317da2020-03-23 07:39:13 -0500667bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
668 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100669 // Recip has one arg.
670 auto Arg = CI->getOperand(0);
671 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
672 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
673 });
David Neto22f144c2017-06-12 14:26:21 -0400674}
675
SJW2c317da2020-03-23 07:39:13 -0500676bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
677 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100678 auto Op0 = CI->getOperand(0);
679 auto Op1 = CI->getOperand(1);
680 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
681 });
David Neto22f144c2017-06-12 14:26:21 -0400682}
683
SJW2c317da2020-03-23 07:39:13 -0500684bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
685 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100686 auto Op0 = CI->getOperand(0);
687 auto Op1 = CI->getOperand(1);
688
SJW2c317da2020-03-23 07:39:13 -0500689 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100690 if (Op0->getType()->isVectorTy()) {
691 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
692 CI->getType(), {Op0, Op1});
693 } else {
694 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
695 }
696
697 return V;
698 });
699}
700
SJW2c317da2020-03-23 07:39:13 -0500701bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500702 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500703 // convert to natural
704 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500705 std::string NewFName = basename.substr(0, slen);
706 NewFName =
707 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400708
SJW2c317da2020-03-23 07:39:13 -0500709 Module &M = *F.getParent();
710 return replaceCallsWithValue(F, [&](CallInst *CI) {
711 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
712
713 auto Arg = CI->getOperand(0);
714
715 // Constant of the natural log of 10 (ln(10)).
716 const double Ln10 =
717 2.302585092994045684017991454684364207601101488628772976033;
718
719 auto Mul = BinaryOperator::Create(
720 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
721
722 return CallInst::Create(NewF, Mul, "", CI);
723 });
David Neto22f144c2017-06-12 14:26:21 -0400724}
725
SJW2c317da2020-03-23 07:39:13 -0500726bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100727 // OpenCL fmod(x,y) is x - y * trunc(x/y)
728 // The sign for a non-zero result is taken from x.
729 // (Try an example.)
730 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500731 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100732 auto Op0 = CI->getOperand(0);
733 auto Op1 = CI->getOperand(1);
734 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
735 });
736}
737
SJW2c317da2020-03-23 07:39:13 -0500738bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500739 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500740 // convert to natural
741 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500742 std::string NewFName = basename.substr(0, slen);
743 NewFName =
744 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400745
SJW2c317da2020-03-23 07:39:13 -0500746 Module &M = *F.getParent();
747 return replaceCallsWithValue(F, [&](CallInst *CI) {
748 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
749
750 auto Arg = CI->getOperand(0);
751
752 // Constant of the reciprocal of the natural log of 10 (ln(10)).
753 const double Ln10 =
754 0.434294481903251827651128918916605082294397005803666566114;
755
756 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
757
758 return BinaryOperator::Create(Instruction::FMul,
759 ConstantFP::get(Arg->getType(), Ln10), NewCI,
760 "", CI);
761 });
David Neto22f144c2017-06-12 14:26:21 -0400762}
763
gnl21636e7992020-09-09 16:08:16 +0100764bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
765 // convert to natural
766 std::string NewFName =
767 Builtins::GetMangledFunctionName("log", F.getFunctionType());
768
769 Module &M = *F.getParent();
770 return replaceCallsWithValue(F, [&](CallInst *CI) {
771 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
772
773 auto Arg = CI->getOperand(0);
774
775 auto ArgP1 = BinaryOperator::Create(
776 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
777
778 return CallInst::Create(NewF, ArgP1, "", CI);
779 });
780}
781
alan-baker12d2c182020-07-20 08:22:42 -0400782bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400783
alan-bakerf6bc8252020-09-23 14:58:55 -0400784 enum {
785 CLK_LOCAL_MEM_FENCE = 0x01,
786 CLK_GLOBAL_MEM_FENCE = 0x02,
787 CLK_IMAGE_MEM_FENCE = 0x04
788 };
David Neto22f144c2017-06-12 14:26:21 -0400789
alan-baker12d2c182020-07-20 08:22:42 -0400790 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100791 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400792
Kévin Petitc4643922019-06-17 19:32:05 +0100793 // We need to map the OpenCL constants to the SPIR-V equivalents.
794 const auto LocalMemFence =
795 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
796 const auto GlobalMemFence =
797 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400798 const auto ImageMemFence =
799 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400800 const auto ConstantAcquireRelease = ConstantInt::get(
801 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100802 const auto ConstantScopeDevice =
803 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
804 const auto ConstantScopeWorkgroup =
805 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400806 const auto ConstantScopeSubgroup =
807 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400808
Kévin Petitc4643922019-06-17 19:32:05 +0100809 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
810 const auto LocalMemFenceMask =
811 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
812 const auto WorkgroupShiftAmount =
813 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
814 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
815 Instruction::Shl, LocalMemFenceMask,
816 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400817
Kévin Petitc4643922019-06-17 19:32:05 +0100818 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
819 const auto GlobalMemFenceMask =
820 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
821 const auto UniformShiftAmount =
822 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
823 const auto MemorySemanticsUniform = BinaryOperator::Create(
824 Instruction::Shl, GlobalMemFenceMask,
825 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400826
alan-bakerf6bc8252020-09-23 14:58:55 -0400827 // OpenCL 2.0
828 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
829 const auto ImageMemFenceMask =
830 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
831 const auto ImageShiftAmount =
832 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
833 const auto MemorySemanticsImage = BinaryOperator::Create(
834 Instruction::Shl, ImageMemFenceMask,
835 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
836
Kévin Petitc4643922019-06-17 19:32:05 +0100837 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400838 // MemorySemanticsSequentiallyConsistentMask.
839 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +0100840 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400841 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400842 auto MemorySemantics2 = BinaryOperator::Create(
843 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
844 auto MemorySemantics = BinaryOperator::Create(
845 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400846
alan-baker12d2c182020-07-20 08:22:42 -0400847 // If the memory scope is not specified explicitly, it is either Subgroup
848 // or Workgroup depending on the type of barrier.
849 Value *MemoryScope =
850 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
851 if (CI->data_operands_size() > 1) {
852 enum {
853 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
854 CL_MEMORY_SCOPE_DEVICE = 0x2,
855 CL_MEMORY_SCOPE_SUBGROUP = 0x4
856 };
857 // The call was given an explicit memory scope.
858 const auto MemoryScopeSubgroup =
859 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
860 const auto MemoryScopeDevice =
861 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400862
alan-baker12d2c182020-07-20 08:22:42 -0400863 auto Cmp =
864 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
865 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
866 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
867 ConstantScopeWorkgroup, "", CI);
868 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
869 MemoryScopeDevice, CI->getOperand(1), "", CI);
870 MemoryScope =
871 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
872 }
873
874 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
875 // the type of barrier;
876 const auto ExecutionScope =
877 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400878
Kévin Petitc4643922019-06-17 19:32:05 +0100879 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -0400880 {Attribute::NoDuplicate, Attribute::Convergent},
881 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +0100882 {ExecutionScope, MemoryScope, MemorySemantics});
883 });
David Neto22f144c2017-06-12 14:26:21 -0400884}
885
SJW2c317da2020-03-23 07:39:13 -0500886bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
887 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400888
SJW2c317da2020-03-23 07:39:13 -0500889 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -0400890 enum {
891 CLK_LOCAL_MEM_FENCE = 0x01,
892 CLK_GLOBAL_MEM_FENCE = 0x02,
893 CLK_IMAGE_MEM_FENCE = 0x04,
894 };
David Neto22f144c2017-06-12 14:26:21 -0400895
SJW2c317da2020-03-23 07:39:13 -0500896 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400897
SJW2c317da2020-03-23 07:39:13 -0500898 // We need to map the OpenCL constants to the SPIR-V equivalents.
899 const auto LocalMemFence =
900 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
901 const auto GlobalMemFence =
902 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400903 const auto ImageMemFence =
904 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -0500905 const auto ConstantMemorySemantics =
906 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400907 const auto ConstantScopeWorkgroup =
908 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400909
SJW2c317da2020-03-23 07:39:13 -0500910 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
911 const auto LocalMemFenceMask =
912 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
913 const auto WorkgroupShiftAmount =
914 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
915 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
916 Instruction::Shl, LocalMemFenceMask,
917 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400918
SJW2c317da2020-03-23 07:39:13 -0500919 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
920 const auto GlobalMemFenceMask =
921 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
922 const auto UniformShiftAmount =
923 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
924 const auto MemorySemanticsUniform = BinaryOperator::Create(
925 Instruction::Shl, GlobalMemFenceMask,
926 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400927
alan-bakerf6bc8252020-09-23 14:58:55 -0400928 // OpenCL 2.0
929 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
930 const auto ImageMemFenceMask =
931 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
932 const auto ImageShiftAmount =
933 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
934 const auto MemorySemanticsImage = BinaryOperator::Create(
935 Instruction::Shl, ImageMemFenceMask,
936 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
937
SJW2c317da2020-03-23 07:39:13 -0500938 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400939 // |semantics|.
940 auto MemorySemantics1 =
SJW2c317da2020-03-23 07:39:13 -0500941 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
942 ConstantMemorySemantics, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400943 auto MemorySemantics2 = BinaryOperator::Create(
944 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
945 auto MemorySemantics = BinaryOperator::Create(
946 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400947
alan-baker12d2c182020-07-20 08:22:42 -0400948 // Memory Scope is always workgroup.
949 const auto MemoryScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400950
alan-baker3d905692020-10-28 14:02:37 -0400951 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
952 {Attribute::Convergent}, CI->getType(),
SJW2c317da2020-03-23 07:39:13 -0500953 {MemoryScope, MemorySemantics});
954 });
David Neto22f144c2017-06-12 14:26:21 -0400955}
956
Kévin Petit1cb45112020-04-27 18:55:48 +0100957bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
958 bool Changed = false;
959
960 SmallVector<Instruction *, 4> ToRemoves;
961
962 // Find all calls to the function
963 for (auto &U : F.uses()) {
964 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
965 ToRemoves.push_back(CI);
966 }
967 }
968
969 Changed = !ToRemoves.empty();
970
971 // Delete them
972 for (auto V : ToRemoves) {
973 V->eraseFromParent();
974 }
975
976 return Changed;
977}
978
SJW2c317da2020-03-23 07:39:13 -0500979bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -0500980 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -0500981 return replaceCallsWithValue(F, [&](CallInst *CI) {
982 // The predicate to use in the CmpInst.
983 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -0400984
SJW2c317da2020-03-23 07:39:13 -0500985 auto Arg1 = CI->getOperand(0);
986 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -0400987
SJW2c317da2020-03-23 07:39:13 -0500988 const auto Cmp =
989 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -0500990 if (isa<VectorType>(F.getReturnType()))
991 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
992 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -0500993 });
David Neto22f144c2017-06-12 14:26:21 -0400994}
995
SJW2c317da2020-03-23 07:39:13 -0500996bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
997 spv::Op SPIRVOp,
998 int32_t C) {
999 Module &M = *F.getParent();
1000 return replaceCallsWithValue(F, [&](CallInst *CI) {
1001 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001002
SJW2c317da2020-03-23 07:39:13 -05001003 // The value to return for true.
1004 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001005
SJW2c317da2020-03-23 07:39:13 -05001006 // The value to return for false.
1007 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001008
SJW2c317da2020-03-23 07:39:13 -05001009 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001010 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001011 CorrespondingBoolTy =
1012 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1013 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001014 }
David Neto22f144c2017-06-12 14:26:21 -04001015
SJW2c317da2020-03-23 07:39:13 -05001016 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1017 CorrespondingBoolTy, {CI->getOperand(0)});
1018
1019 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1020 });
David Neto22f144c2017-06-12 14:26:21 -04001021}
1022
SJW2c317da2020-03-23 07:39:13 -05001023bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1024 Module &M = *F.getParent();
1025 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001026 auto &C = M.getContext();
1027 auto Val = CI->getOperand(0);
1028 auto ValTy = Val->getType();
1029 auto RetTy = CI->getType();
1030
1031 // Get a suitable integer type to represent the number
1032 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1033
1034 // Create Mask
1035 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001036 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001037 switch (ScalarSize) {
1038 case 16:
1039 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1040 break;
1041 case 32:
1042 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1043 break;
1044 case 64:
1045 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1046 break;
1047 default:
1048 llvm_unreachable("Unsupported floating-point type");
1049 }
1050
1051 IRBuilder<> Builder(CI);
1052
1053 // Bitcast to int
1054 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1055
1056 // Mask and compare
1057 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1058 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1059
1060 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001061 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001062 if (ValTy->isVectorTy()) {
1063 RetTrue = ConstantInt::getSigned(RetTy, -1);
1064 } else {
1065 RetTrue = ConstantInt::get(RetTy, 1);
1066 }
1067 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1068 });
1069}
1070
SJW2c317da2020-03-23 07:39:13 -05001071bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1072 Module &M = *F.getParent();
1073 return replaceCallsWithValue(F, [&](CallInst *CI) {
1074 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001075
SJW2c317da2020-03-23 07:39:13 -05001076 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001077
SJW2c317da2020-03-23 07:39:13 -05001078 // If the argument is a 32-bit int, just use a shift
1079 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1080 V = BinaryOperator::Create(Instruction::LShr, Arg,
1081 ConstantInt::get(Arg->getType(), 31), "", CI);
1082 } else {
1083 // The value for zero to compare against.
1084 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001085
SJW2c317da2020-03-23 07:39:13 -05001086 // The value to return for true.
1087 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001088
SJW2c317da2020-03-23 07:39:13 -05001089 // The value to return for false.
1090 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001091
SJW2c317da2020-03-23 07:39:13 -05001092 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1093 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001094
SJW2c317da2020-03-23 07:39:13 -05001095 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001096
SJW2c317da2020-03-23 07:39:13 -05001097 // If we have a function to call, call it!
1098 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001099
SJW2c317da2020-03-23 07:39:13 -05001100 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001101
SJW2c317da2020-03-23 07:39:13 -05001102 const auto NewCI = clspv::InsertSPIRVOp(
1103 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1104 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001105
SJW2c317da2020-03-23 07:39:13 -05001106 } else {
1107 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001108 }
1109
SJW2c317da2020-03-23 07:39:13 -05001110 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001111 }
SJW2c317da2020-03-23 07:39:13 -05001112 return V;
1113 });
David Neto22f144c2017-06-12 14:26:21 -04001114}
1115
SJW2c317da2020-03-23 07:39:13 -05001116bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1117 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1118 // Get arguments
1119 auto HiValue = CI->getOperand(0);
1120 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001121
SJW2c317da2020-03-23 07:39:13 -05001122 // Don't touch overloads that aren't in OpenCL C
1123 auto HiType = HiValue->getType();
1124 auto LoType = LoValue->getType();
1125
1126 if (HiType != LoType) {
1127 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001128 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001129
SJW2c317da2020-03-23 07:39:13 -05001130 if (!HiType->isIntOrIntVectorTy()) {
1131 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001132 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001133
SJW2c317da2020-03-23 07:39:13 -05001134 if (HiType->getScalarSizeInBits() * 2 !=
1135 CI->getType()->getScalarSizeInBits()) {
1136 return nullptr;
1137 }
1138
1139 if ((HiType->getScalarSizeInBits() != 8) &&
1140 (HiType->getScalarSizeInBits() != 16) &&
1141 (HiType->getScalarSizeInBits() != 32)) {
1142 return nullptr;
1143 }
1144
James Pricecf53df42020-04-20 14:41:24 -04001145 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001146 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001147 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1148 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001149 return nullptr;
1150 }
1151 }
1152
1153 // Convert both operands to the result type
1154 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1155 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1156
1157 // Shift high operand
1158 auto ShiftAmount =
1159 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1160 auto HiShifted =
1161 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1162
1163 // OR both results
1164 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1165 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001166}
1167
SJW2c317da2020-03-23 07:39:13 -05001168bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1169 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1170 // Get arguments
1171 auto SrcValue = CI->getOperand(0);
1172 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001173
SJW2c317da2020-03-23 07:39:13 -05001174 // Don't touch overloads that aren't in OpenCL C
1175 auto SrcType = SrcValue->getType();
1176 auto RotType = RotAmount->getType();
1177
1178 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1179 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001180 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001181
SJW2c317da2020-03-23 07:39:13 -05001182 if (!SrcType->isIntOrIntVectorTy()) {
1183 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001184 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001185
SJW2c317da2020-03-23 07:39:13 -05001186 if ((SrcType->getScalarSizeInBits() != 8) &&
1187 (SrcType->getScalarSizeInBits() != 16) &&
1188 (SrcType->getScalarSizeInBits() != 32) &&
1189 (SrcType->getScalarSizeInBits() != 64)) {
1190 return nullptr;
1191 }
1192
James Pricecf53df42020-04-20 14:41:24 -04001193 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001194 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001195 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1196 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001197 return nullptr;
1198 }
1199 }
1200
alan-bakerfd22ae12020-10-29 15:59:22 -04001201 // Replace with LLVM's funnel shift left intrinsic because it is more
1202 // generic than rotate.
1203 Function *intrinsic =
1204 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1205 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1206 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001207 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001208}
1209
SJW2c317da2020-03-23 07:39:13 -05001210bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1211 bool DstIsSigned) {
1212 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1213 Value *V = nullptr;
1214 // Get arguments
1215 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001216
SJW2c317da2020-03-23 07:39:13 -05001217 // Don't touch overloads that aren't in OpenCL C
1218 auto SrcType = SrcValue->getType();
1219 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001220
SJW2c317da2020-03-23 07:39:13 -05001221 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1222 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1223 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001224 }
1225
James Pricecf53df42020-04-20 14:41:24 -04001226 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001227 unsigned SrcNumElements =
1228 SrcVecType->getElementCount().getKnownMinValue();
1229 unsigned DstNumElements =
1230 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001231 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001232 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001233 }
1234
James Pricecf53df42020-04-20 14:41:24 -04001235 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1236 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1237 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001238 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001239 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001240 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001241
SJW2c317da2020-03-23 07:39:13 -05001242 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1243 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1244
1245 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1246 bool DstIsInt = DstType->isIntOrIntVectorTy();
1247
1248 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1249 // Unnecessary cast operation.
1250 V = SrcValue;
1251 } else if (SrcIsFloat && DstIsFloat) {
1252 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1253 } else if (SrcIsFloat && DstIsInt) {
1254 if (DstIsSigned) {
1255 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1256 } else {
1257 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1258 }
1259 } else if (SrcIsInt && DstIsFloat) {
1260 if (SrcIsSigned) {
1261 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1262 } else {
1263 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1264 }
1265 } else if (SrcIsInt && DstIsInt) {
1266 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1267 } else {
1268 // Not something we're supposed to handle, just move on
1269 }
1270
1271 return V;
1272 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001273}
1274
SJW2c317da2020-03-23 07:39:13 -05001275bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1276 bool is_mad) {
1277 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1278 Value *V = nullptr;
1279 // Get arguments
1280 auto AValue = CI->getOperand(0);
1281 auto BValue = CI->getOperand(1);
1282 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001283
SJW2c317da2020-03-23 07:39:13 -05001284 // Don't touch overloads that aren't in OpenCL C
1285 auto AType = AValue->getType();
1286 auto BType = BValue->getType();
1287 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001288
SJW2c317da2020-03-23 07:39:13 -05001289 if ((AType != BType) || (CI->getType() != AType) ||
1290 (is_mad && (AType != CType))) {
1291 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001292 }
1293
SJW2c317da2020-03-23 07:39:13 -05001294 if (!AType->isIntOrIntVectorTy()) {
1295 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001296 }
Kévin Petit8a560882019-03-21 15:24:34 +00001297
SJW2c317da2020-03-23 07:39:13 -05001298 if ((AType->getScalarSizeInBits() != 8) &&
1299 (AType->getScalarSizeInBits() != 16) &&
1300 (AType->getScalarSizeInBits() != 32) &&
1301 (AType->getScalarSizeInBits() != 64)) {
1302 return V;
1303 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001304
James Pricecf53df42020-04-20 14:41:24 -04001305 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001306 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001307 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1308 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001309 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001310 }
1311 }
1312
SJW2c317da2020-03-23 07:39:13 -05001313 // Our SPIR-V op returns a struct, create a type for it
alan-baker6b9d1ee2020-11-03 23:11:32 -05001314 auto ExMulRetType = GetPairStruct(AType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001315
SJW2c317da2020-03-23 07:39:13 -05001316 // Select the appropriate signed/unsigned SPIR-V op
1317 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1318
1319 // Call the SPIR-V op
1320 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1321 ExMulRetType, {AValue, BValue});
1322
1323 // Get the high part of the result
1324 unsigned Idxs[] = {1};
1325 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1326
1327 // If we're handling a mad_hi, add the third argument to the result
1328 if (is_mad) {
1329 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001330 }
1331
SJW2c317da2020-03-23 07:39:13 -05001332 return V;
1333 });
Kévin Petit8a560882019-03-21 15:24:34 +00001334}
1335
SJW2c317da2020-03-23 07:39:13 -05001336bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1337 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1338 // Get arguments
1339 auto FalseValue = CI->getOperand(0);
1340 auto TrueValue = CI->getOperand(1);
1341 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001342
SJW2c317da2020-03-23 07:39:13 -05001343 // Don't touch overloads that aren't in OpenCL C
1344 auto FalseType = FalseValue->getType();
1345 auto TrueType = TrueValue->getType();
1346 auto PredicateType = PredicateValue->getType();
1347
1348 if (FalseType != TrueType) {
1349 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001350 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001351
SJW2c317da2020-03-23 07:39:13 -05001352 if (!PredicateType->isIntOrIntVectorTy()) {
1353 return nullptr;
1354 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001355
SJW2c317da2020-03-23 07:39:13 -05001356 if (!FalseType->isIntOrIntVectorTy() &&
1357 !FalseType->getScalarType()->isFloatingPointTy()) {
1358 return nullptr;
1359 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001360
SJW2c317da2020-03-23 07:39:13 -05001361 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1362 return nullptr;
1363 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001364
SJW2c317da2020-03-23 07:39:13 -05001365 if (FalseType->getScalarSizeInBits() !=
1366 PredicateType->getScalarSizeInBits()) {
1367 return nullptr;
1368 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001369
James Pricecf53df42020-04-20 14:41:24 -04001370 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001371 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1372 if (NumElements != cast<VectorType>(PredicateType)
1373 ->getElementCount()
1374 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001375 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001376 }
1377
James Pricecf53df42020-04-20 14:41:24 -04001378 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1379 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001380 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001381 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001382 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001383
SJW2c317da2020-03-23 07:39:13 -05001384 // Create constant
1385 const auto ZeroValue = Constant::getNullValue(PredicateType);
1386
1387 // Scalar and vector are to be treated differently
1388 CmpInst::Predicate Pred;
1389 if (PredicateType->isVectorTy()) {
1390 Pred = CmpInst::ICMP_SLT;
1391 } else {
1392 Pred = CmpInst::ICMP_NE;
1393 }
1394
1395 // Create comparison instruction
1396 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1397 ZeroValue, "", CI);
1398
1399 // Create select
1400 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1401 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001402}
1403
SJW2c317da2020-03-23 07:39:13 -05001404bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1405 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1406 Value *V = nullptr;
1407 if (CI->getNumOperands() != 4) {
1408 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001409 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001410
SJW2c317da2020-03-23 07:39:13 -05001411 // Get arguments
1412 auto FalseValue = CI->getOperand(0);
1413 auto TrueValue = CI->getOperand(1);
1414 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001415
SJW2c317da2020-03-23 07:39:13 -05001416 // Don't touch overloads that aren't in OpenCL C
1417 auto FalseType = FalseValue->getType();
1418 auto TrueType = TrueValue->getType();
1419 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001420
SJW2c317da2020-03-23 07:39:13 -05001421 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1422 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001423 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001424
James Pricecf53df42020-04-20 14:41:24 -04001425 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001426 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1427 !TrueType->getScalarType()->isIntegerTy()) {
1428 return V;
1429 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001430 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001431 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1432 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001433 return V;
1434 }
1435 }
1436
1437 // Remember the type of the operands
1438 auto OpType = TrueType;
1439
1440 // The actual bit selection will always be done on an integer type,
1441 // declare it here
1442 Type *BitType;
1443
1444 // If the operands are float, then bitcast them to int
1445 if (OpType->getScalarType()->isFloatingPointTy()) {
1446
1447 // First create the new type
1448 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1449
1450 // Then bitcast all operands
1451 PredicateValue =
1452 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1453 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1454 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1455
1456 } else {
1457 // The operands have an integer type, use it directly
1458 BitType = OpType;
1459 }
1460
1461 // All the operands are now always integers
1462 // implement as (c & b) | (~c & a)
1463
1464 // Create our negated predicate value
1465 auto AllOnes = Constant::getAllOnesValue(BitType);
1466 auto NotPredicateValue = BinaryOperator::Create(
1467 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1468
1469 // Then put everything together
1470 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1471 FalseValue, "", CI);
1472 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1473 TrueValue, "", CI);
1474
1475 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1476
1477 // If we were dealing with a floating point type, we must bitcast
1478 // the result back to that
1479 if (OpType->getScalarType()->isFloatingPointTy()) {
1480 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1481 }
1482
1483 return V;
1484 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001485}
1486
SJW61531372020-06-09 07:31:08 -05001487bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001488 // convert to vector versions
1489 Module &M = *F.getParent();
1490 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1491 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1492 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001493
SJW2c317da2020-03-23 07:39:13 -05001494 // First figure out which function we're dealing with
1495 if (is_smooth) {
1496 ArgsToSplat.push_back(CI->getOperand(1));
1497 VectorArg = CI->getOperand(2);
1498 } else {
1499 VectorArg = CI->getOperand(1);
1500 }
1501
1502 // Splat arguments that need to be
1503 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001504 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001505
1506 for (auto arg : ArgsToSplat) {
1507 Value *NewVectorArg = UndefValue::get(VecType);
alan-baker5a8c3be2020-09-09 13:44:26 -04001508 for (auto i = 0; i < VecType->getElementCount().getKnownMinValue(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001509 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1510 NewVectorArg =
1511 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1512 }
1513 SplatArgs.push_back(NewVectorArg);
1514 }
1515
1516 // Replace the call with the vector/vector flavour
1517 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1518 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1519
SJW61531372020-06-09 07:31:08 -05001520 std::string NewFName = Builtins::GetMangledFunctionName(
1521 is_smooth ? "smoothstep" : "step", NewFType);
1522
SJW2c317da2020-03-23 07:39:13 -05001523 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1524
1525 SmallVector<Value *, 3> NewArgs;
1526 for (auto arg : SplatArgs) {
1527 NewArgs.push_back(arg);
1528 }
1529 NewArgs.push_back(VectorArg);
1530
1531 return CallInst::Create(NewF, NewArgs, "", CI);
1532 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001533}
1534
SJW2c317da2020-03-23 07:39:13 -05001535bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001536 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1537 auto Arg = CI->getOperand(0);
1538 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001539
SJW2c317da2020-03-23 07:39:13 -05001540 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001541
SJW2c317da2020-03-23 07:39:13 -05001542 return BinaryOperator::Create(Op, Bitcast,
1543 ConstantInt::get(CI->getType(), 31), "", CI);
1544 });
David Neto22f144c2017-06-12 14:26:21 -04001545}
1546
SJW2c317da2020-03-23 07:39:13 -05001547bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1548 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001549 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1550 // The multiply instruction to use.
1551 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001552
SJW2c317da2020-03-23 07:39:13 -05001553 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001554
SJW2c317da2020-03-23 07:39:13 -05001555 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1556 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001557
SJW2c317da2020-03-23 07:39:13 -05001558 if (is_mad) {
1559 // The add instruction to use.
1560 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001561
SJW2c317da2020-03-23 07:39:13 -05001562 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001563 }
David Neto22f144c2017-06-12 14:26:21 -04001564
SJW2c317da2020-03-23 07:39:13 -05001565 return V;
1566 });
David Neto22f144c2017-06-12 14:26:21 -04001567}
1568
SJW2c317da2020-03-23 07:39:13 -05001569bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001570 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1571 Value *V = nullptr;
1572 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001573
SJW2c317da2020-03-23 07:39:13 -05001574 auto data_type = data->getType();
1575 if (!data_type->isVectorTy())
1576 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001577
James Pricecf53df42020-04-20 14:41:24 -04001578 auto vec_data_type = cast<VectorType>(data_type);
1579
alan-baker5a8c3be2020-09-09 13:44:26 -04001580 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001581 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1582 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001583
SJW2c317da2020-03-23 07:39:13 -05001584 auto offset = CI->getOperand(1);
1585 auto ptr = CI->getOperand(2);
1586 auto ptr_type = ptr->getType();
1587 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001588 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001589 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001590
SJW2c317da2020-03-23 07:39:13 -05001591 // Avoid pointer casts. Instead generate the correct number of stores
1592 // and rely on drivers to coalesce appropriately.
1593 IRBuilder<> builder(CI);
1594 auto elems_const = builder.getInt32(elems);
1595 auto adjust = builder.CreateMul(offset, elems_const);
1596 for (auto i = 0; i < elems; ++i) {
1597 auto idx = builder.getInt32(i);
1598 auto add = builder.CreateAdd(adjust, idx);
1599 auto gep = builder.CreateGEP(ptr, add);
1600 auto extract = builder.CreateExtractElement(data, i);
1601 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001602 }
SJW2c317da2020-03-23 07:39:13 -05001603 return V;
1604 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001605}
1606
SJW2c317da2020-03-23 07:39:13 -05001607bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001608 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1609 Value *V = nullptr;
1610 auto ret_type = F.getReturnType();
1611 if (!ret_type->isVectorTy())
1612 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001613
James Pricecf53df42020-04-20 14:41:24 -04001614 auto vec_ret_type = cast<VectorType>(ret_type);
1615
alan-baker5a8c3be2020-09-09 13:44:26 -04001616 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001617 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1618 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001619
SJW2c317da2020-03-23 07:39:13 -05001620 auto offset = CI->getOperand(0);
1621 auto ptr = CI->getOperand(1);
1622 auto ptr_type = ptr->getType();
1623 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001624 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001625 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001626
SJW2c317da2020-03-23 07:39:13 -05001627 // Avoid pointer casts. Instead generate the correct number of loads
1628 // and rely on drivers to coalesce appropriately.
1629 IRBuilder<> builder(CI);
1630 auto elems_const = builder.getInt32(elems);
1631 V = UndefValue::get(ret_type);
1632 auto adjust = builder.CreateMul(offset, elems_const);
1633 for (auto i = 0; i < elems; ++i) {
1634 auto idx = builder.getInt32(i);
1635 auto add = builder.CreateAdd(adjust, idx);
1636 auto gep = builder.CreateGEP(ptr, add);
1637 auto load = builder.CreateLoad(gep);
1638 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001639 }
SJW2c317da2020-03-23 07:39:13 -05001640 return V;
1641 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001642}
1643
SJW2c317da2020-03-23 07:39:13 -05001644bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1645 const std::string &name,
1646 int vec_size) {
1647 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1648 if (!vec_size) {
1649 // deduce vec_size from last character of name (e.g. vload_half4)
1650 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001651 }
SJW2c317da2020-03-23 07:39:13 -05001652 switch (vec_size) {
1653 case 2:
1654 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1655 case 4:
1656 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1657 case 0:
1658 if (!is_clspv_version) {
1659 return replaceVloadHalf(F);
1660 }
1661 default:
1662 llvm_unreachable("Unsupported vload_half vector size");
1663 break;
1664 }
1665 return false;
David Neto22f144c2017-06-12 14:26:21 -04001666}
1667
SJW2c317da2020-03-23 07:39:13 -05001668bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1669 Module &M = *F.getParent();
1670 return replaceCallsWithValue(F, [&](CallInst *CI) {
1671 // The index argument from vload_half.
1672 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001673
SJW2c317da2020-03-23 07:39:13 -05001674 // The pointer argument from vload_half.
1675 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001676
SJW2c317da2020-03-23 07:39:13 -05001677 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001678 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001679 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1680
1681 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001682 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001683
1684 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1685
1686 Value *V = nullptr;
1687
alan-baker7efcaaa2020-05-06 19:33:27 -04001688 bool supports_16bit_storage = true;
1689 switch (Arg1->getType()->getPointerAddressSpace()) {
1690 case clspv::AddressSpace::Global:
1691 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1692 clspv::Option::StorageClass::kSSBO);
1693 break;
1694 case clspv::AddressSpace::Constant:
1695 if (clspv::Option::ConstantArgsInUniformBuffer())
1696 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1697 clspv::Option::StorageClass::kUBO);
1698 else
1699 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1700 clspv::Option::StorageClass::kSSBO);
1701 break;
1702 default:
1703 // Clspv will emit the Float16 capability if the half type is
1704 // encountered. That capability covers private and local addressspaces.
1705 break;
1706 }
1707
1708 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001709 auto ShortTy = Type::getInt16Ty(M.getContext());
1710 auto ShortPointerTy =
1711 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1712
1713 // Cast the half* pointer to short*.
1714 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1715
1716 // Index into the correct address of the casted pointer.
1717 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1718
1719 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001720 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001721
1722 // ZExt the short -> int.
1723 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1724
1725 // Get our float2.
1726 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1727
1728 // Extract out the bottom element which is our float result.
1729 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1730 } else {
1731 // Assume the pointer argument points to storage aligned to 32bits
1732 // or more.
1733 // TODO(dneto): Do more analysis to make sure this is true?
1734 //
1735 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1736 // with:
1737 //
1738 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1739 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1740 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1741 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1742 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1743 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1744 // x float> %converted, %index_is_odd32
1745
1746 auto IntPointerTy =
1747 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1748
1749 // Cast the base pointer to int*.
1750 // In a valid call (according to assumptions), this should get
1751 // optimized away in the simplify GEP pass.
1752 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1753
1754 auto One = ConstantInt::get(IntTy, 1);
1755 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1756 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1757
1758 // Index into the correct address of the casted pointer.
1759 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1760
1761 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001762 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001763
1764 // Get our float2.
1765 auto Call = CallInst::Create(NewF, Load, "", CI);
1766
1767 // Extract out the float result, where the element number is
1768 // determined by whether the original index was even or odd.
1769 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1770 }
1771 return V;
1772 });
1773}
1774
1775bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1776 Module &M = *F.getParent();
1777 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001778 // The index argument from vload_half.
1779 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001780
Kévin Petite8edce32019-04-10 14:23:32 +01001781 // The pointer argument from vload_half.
1782 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001783
Kévin Petite8edce32019-04-10 14:23:32 +01001784 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001785 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001786 auto NewPointerTy =
1787 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001788 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001789
Kévin Petite8edce32019-04-10 14:23:32 +01001790 // Cast the half* pointer to int*.
1791 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001792
Kévin Petite8edce32019-04-10 14:23:32 +01001793 // Index into the correct address of the casted pointer.
1794 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001795
Kévin Petite8edce32019-04-10 14:23:32 +01001796 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001797 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001798
Kévin Petite8edce32019-04-10 14:23:32 +01001799 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001800 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001801
Kévin Petite8edce32019-04-10 14:23:32 +01001802 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001803
Kévin Petite8edce32019-04-10 14:23:32 +01001804 // Get our float2.
1805 return CallInst::Create(NewF, Load, "", CI);
1806 });
David Neto22f144c2017-06-12 14:26:21 -04001807}
1808
SJW2c317da2020-03-23 07:39:13 -05001809bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1810 Module &M = *F.getParent();
1811 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001812 // The index argument from vload_half.
1813 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001814
Kévin Petite8edce32019-04-10 14:23:32 +01001815 // The pointer argument from vload_half.
1816 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001817
Kévin Petite8edce32019-04-10 14:23:32 +01001818 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001819 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1820 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001821 auto NewPointerTy =
1822 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001823 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001824
Kévin Petite8edce32019-04-10 14:23:32 +01001825 // Cast the half* pointer to int2*.
1826 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001827
Kévin Petite8edce32019-04-10 14:23:32 +01001828 // Index into the correct address of the casted pointer.
1829 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001830
Kévin Petite8edce32019-04-10 14:23:32 +01001831 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001832 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001833
Kévin Petite8edce32019-04-10 14:23:32 +01001834 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001835 auto X =
1836 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1837 auto Y =
1838 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001839
Kévin Petite8edce32019-04-10 14:23:32 +01001840 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001841 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001842
Kévin Petite8edce32019-04-10 14:23:32 +01001843 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001844
Kévin Petite8edce32019-04-10 14:23:32 +01001845 // Get the lower (x & y) components of our final float4.
1846 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001847
Kévin Petite8edce32019-04-10 14:23:32 +01001848 // Get the higher (z & w) components of our final float4.
1849 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001850
Kévin Petite8edce32019-04-10 14:23:32 +01001851 Constant *ShuffleMask[4] = {
1852 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1853 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001854
Kévin Petite8edce32019-04-10 14:23:32 +01001855 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001856 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1857 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001858 });
David Neto22f144c2017-06-12 14:26:21 -04001859}
1860
SJW2c317da2020-03-23 07:39:13 -05001861bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001862
1863 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1864 //
1865 // %u = load i32 %ptr
1866 // %fxy = call <2 x float> Unpack2xHalf(u)
1867 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001868 Module &M = *F.getParent();
1869 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001870 auto Index = CI->getOperand(0);
1871 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001872
Kévin Petite8edce32019-04-10 14:23:32 +01001873 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001874 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001875 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001876
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001877 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001878 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001879
Kévin Petite8edce32019-04-10 14:23:32 +01001880 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001881 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001882
Kévin Petite8edce32019-04-10 14:23:32 +01001883 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001884
Kévin Petite8edce32019-04-10 14:23:32 +01001885 // Get our final float2.
1886 return CallInst::Create(NewF, Load, "", CI);
1887 });
David Neto6ad93232018-06-07 15:42:58 -07001888}
1889
SJW2c317da2020-03-23 07:39:13 -05001890bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001891
1892 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1893 //
1894 // %u2 = load <2 x i32> %ptr
1895 // %u2xy = extractelement %u2, 0
1896 // %u2zw = extractelement %u2, 1
1897 // %fxy = call <2 x float> Unpack2xHalf(uint)
1898 // %fzw = call <2 x float> Unpack2xHalf(uint)
1899 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001900 Module &M = *F.getParent();
1901 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001902 auto Index = CI->getOperand(0);
1903 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001904
Kévin Petite8edce32019-04-10 14:23:32 +01001905 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001906 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1907 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001908 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001909
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001910 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001911 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001912
Kévin Petite8edce32019-04-10 14:23:32 +01001913 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001914 auto X =
1915 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1916 auto Y =
1917 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001918
Kévin Petite8edce32019-04-10 14:23:32 +01001919 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001920 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001921
Kévin Petite8edce32019-04-10 14:23:32 +01001922 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001923
Kévin Petite8edce32019-04-10 14:23:32 +01001924 // Get the lower (x & y) components of our final float4.
1925 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001926
Kévin Petite8edce32019-04-10 14:23:32 +01001927 // Get the higher (z & w) components of our final float4.
1928 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001929
Kévin Petite8edce32019-04-10 14:23:32 +01001930 Constant *ShuffleMask[4] = {
1931 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1932 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001933
Kévin Petite8edce32019-04-10 14:23:32 +01001934 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001935 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1936 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001937 });
David Neto6ad93232018-06-07 15:42:58 -07001938}
1939
SJW2c317da2020-03-23 07:39:13 -05001940bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1941 switch (vec_size) {
1942 case 0:
1943 return replaceVstoreHalf(F);
1944 case 2:
1945 return replaceVstoreHalf2(F);
1946 case 4:
1947 return replaceVstoreHalf4(F);
1948 default:
1949 llvm_unreachable("Unsupported vstore_half vector size");
1950 break;
1951 }
1952 return false;
1953}
David Neto22f144c2017-06-12 14:26:21 -04001954
SJW2c317da2020-03-23 07:39:13 -05001955bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1956 Module &M = *F.getParent();
1957 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001958 // The value to store.
1959 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001960
Kévin Petite8edce32019-04-10 14:23:32 +01001961 // The index argument from vstore_half.
1962 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001963
Kévin Petite8edce32019-04-10 14:23:32 +01001964 // The pointer argument from vstore_half.
1965 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001966
Kévin Petite8edce32019-04-10 14:23:32 +01001967 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001968 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001969 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1970 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001971
Kévin Petite8edce32019-04-10 14:23:32 +01001972 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001973 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001974
Kévin Petite8edce32019-04-10 14:23:32 +01001975 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001976
Kévin Petite8edce32019-04-10 14:23:32 +01001977 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001978 auto TempVec = InsertElementInst::Create(
1979 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001980
Kévin Petite8edce32019-04-10 14:23:32 +01001981 // Pack the float2 -> half2 (in an int).
1982 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001983
alan-baker7efcaaa2020-05-06 19:33:27 -04001984 bool supports_16bit_storage = true;
1985 switch (Arg2->getType()->getPointerAddressSpace()) {
1986 case clspv::AddressSpace::Global:
1987 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1988 clspv::Option::StorageClass::kSSBO);
1989 break;
1990 case clspv::AddressSpace::Constant:
1991 if (clspv::Option::ConstantArgsInUniformBuffer())
1992 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1993 clspv::Option::StorageClass::kUBO);
1994 else
1995 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1996 clspv::Option::StorageClass::kSSBO);
1997 break;
1998 default:
1999 // Clspv will emit the Float16 capability if the half type is
2000 // encountered. That capability covers private and local addressspaces.
2001 break;
2002 }
2003
SJW2c317da2020-03-23 07:39:13 -05002004 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002005 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002006 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002007 auto ShortPointerTy =
2008 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002009
Kévin Petite8edce32019-04-10 14:23:32 +01002010 // Truncate our i32 to an i16.
2011 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002012
Kévin Petite8edce32019-04-10 14:23:32 +01002013 // Cast the half* pointer to short*.
2014 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002015
Kévin Petite8edce32019-04-10 14:23:32 +01002016 // Index into the correct address of the casted pointer.
2017 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002018
Kévin Petite8edce32019-04-10 14:23:32 +01002019 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002020 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002021 } else {
2022 // We can only write to 32-bit aligned words.
2023 //
2024 // Assuming base is aligned to 32-bits, replace the equivalent of
2025 // vstore_half(value, index, base)
2026 // with:
2027 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2028 // uint32_t write_to_upper_half = index & 1u;
2029 // uint32_t shift = write_to_upper_half << 4;
2030 //
2031 // // Pack the float value as a half number in bottom 16 bits
2032 // // of an i32.
2033 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2034 //
2035 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2036 // ^ ((packed & 0xffff) << shift)
2037 // // We only need relaxed consistency, but OpenCL 1.2 only has
2038 // // sequentially consistent atomics.
2039 // // TODO(dneto): Use relaxed consistency.
2040 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002041 auto IntPointerTy =
2042 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002043
Kévin Petite8edce32019-04-10 14:23:32 +01002044 auto Four = ConstantInt::get(IntTy, 4);
2045 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002046
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002047 auto IndexIsOdd =
2048 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002049 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002050 auto IndexIntoI32 =
2051 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2052 auto BaseI32Ptr =
2053 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2054 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2055 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002056 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002057 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002058 auto MaskBitsToWrite =
2059 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2060 auto MaskedCurrent = BinaryOperator::CreateAnd(
2061 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002062
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002063 auto XLowerBits =
2064 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2065 auto NewBitsToWrite =
2066 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2067 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2068 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002069
Kévin Petite8edce32019-04-10 14:23:32 +01002070 // Generate the call to atomi_xor.
2071 SmallVector<Type *, 5> ParamTypes;
2072 // The pointer type.
2073 ParamTypes.push_back(IntPointerTy);
2074 // The Types for memory scope, semantics, and value.
2075 ParamTypes.push_back(IntTy);
2076 ParamTypes.push_back(IntTy);
2077 ParamTypes.push_back(IntTy);
2078 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2079 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002080
Kévin Petite8edce32019-04-10 14:23:32 +01002081 const auto ConstantScopeDevice =
2082 ConstantInt::get(IntTy, spv::ScopeDevice);
2083 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2084 // (SPIR-V Workgroup).
2085 const auto AddrSpaceSemanticsBits =
2086 IntPointerTy->getPointerAddressSpace() == 1
2087 ? spv::MemorySemanticsUniformMemoryMask
2088 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002089
Kévin Petite8edce32019-04-10 14:23:32 +01002090 // We're using relaxed consistency here.
2091 const auto ConstantMemorySemantics =
2092 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2093 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002094
Kévin Petite8edce32019-04-10 14:23:32 +01002095 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2096 ConstantMemorySemantics, ValueToXor};
2097 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002098
2099 // Return a Nop so the old Call is removed
2100 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2101 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002102 }
David Neto22f144c2017-06-12 14:26:21 -04002103
SJW2c317da2020-03-23 07:39:13 -05002104 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002105 });
David Neto22f144c2017-06-12 14:26:21 -04002106}
2107
SJW2c317da2020-03-23 07:39:13 -05002108bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2109 Module &M = *F.getParent();
2110 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002111 // The value to store.
2112 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002113
Kévin Petite8edce32019-04-10 14:23:32 +01002114 // The index argument from vstore_half.
2115 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002116
Kévin Petite8edce32019-04-10 14:23:32 +01002117 // The pointer argument from vstore_half.
2118 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002119
Kévin Petite8edce32019-04-10 14:23:32 +01002120 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002121 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002122 auto NewPointerTy =
2123 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002124 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002125
Kévin Petite8edce32019-04-10 14:23:32 +01002126 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002127 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002128
Kévin Petite8edce32019-04-10 14:23:32 +01002129 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002130
Kévin Petite8edce32019-04-10 14:23:32 +01002131 // Turn the packed x & y into the final packing.
2132 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002133
Kévin Petite8edce32019-04-10 14:23:32 +01002134 // Cast the half* pointer to int*.
2135 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002136
Kévin Petite8edce32019-04-10 14:23:32 +01002137 // Index into the correct address of the casted pointer.
2138 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002139
Kévin Petite8edce32019-04-10 14:23:32 +01002140 // Store to the int* we casted to.
2141 return new StoreInst(X, Index, CI);
2142 });
David Neto22f144c2017-06-12 14:26:21 -04002143}
2144
SJW2c317da2020-03-23 07:39:13 -05002145bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2146 Module &M = *F.getParent();
2147 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002148 // The value to store.
2149 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002150
Kévin Petite8edce32019-04-10 14:23:32 +01002151 // The index argument from vstore_half.
2152 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002153
Kévin Petite8edce32019-04-10 14:23:32 +01002154 // The pointer argument from vstore_half.
2155 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002156
Kévin Petite8edce32019-04-10 14:23:32 +01002157 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002158 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2159 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002160 auto NewPointerTy =
2161 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002162 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002163
Kévin Petite8edce32019-04-10 14:23:32 +01002164 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2165 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002166
Kévin Petite8edce32019-04-10 14:23:32 +01002167 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002168 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2169 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002170
Kévin Petite8edce32019-04-10 14:23:32 +01002171 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2172 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002173
Kévin Petite8edce32019-04-10 14:23:32 +01002174 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002175 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2176 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002177
Kévin Petite8edce32019-04-10 14:23:32 +01002178 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002179 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002180
Kévin Petite8edce32019-04-10 14:23:32 +01002181 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002182
Kévin Petite8edce32019-04-10 14:23:32 +01002183 // Turn the packed x & y into the final component of our int2.
2184 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002185
Kévin Petite8edce32019-04-10 14:23:32 +01002186 // Turn the packed z & w into the final component of our int2.
2187 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002188
Kévin Petite8edce32019-04-10 14:23:32 +01002189 auto Combine = InsertElementInst::Create(
2190 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002191 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2192 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002193
Kévin Petite8edce32019-04-10 14:23:32 +01002194 // Cast the half* pointer to int2*.
2195 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002196
Kévin Petite8edce32019-04-10 14:23:32 +01002197 // Index into the correct address of the casted pointer.
2198 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002199
Kévin Petite8edce32019-04-10 14:23:32 +01002200 // Store to the int2* we casted to.
2201 return new StoreInst(Combine, Index, CI);
2202 });
David Neto22f144c2017-06-12 14:26:21 -04002203}
2204
SJW2c317da2020-03-23 07:39:13 -05002205bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2206 // convert half to float
2207 Module &M = *F.getParent();
2208 return replaceCallsWithValue(F, [&](CallInst *CI) {
2209 SmallVector<Type *, 3> types;
2210 SmallVector<Value *, 3> args;
2211 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2212 types.push_back(CI->getArgOperand(i)->getType());
2213 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002214 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002215
alan-baker5a8c3be2020-09-09 13:44:26 -04002216 auto NewFType =
2217 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2218 cast<VectorType>(CI->getType())
2219 ->getElementCount()
2220 .getKnownMinValue()),
2221 types, false);
SJW2c317da2020-03-23 07:39:13 -05002222
SJW61531372020-06-09 07:31:08 -05002223 std::string NewFName =
2224 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002225
2226 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2227
2228 auto NewCI = CallInst::Create(NewF, args, "", CI);
2229
2230 // Convert to the half type.
2231 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2232 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002233}
2234
SJW2c317da2020-03-23 07:39:13 -05002235bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2236 // convert half to float
2237 Module &M = *F.getParent();
2238 return replaceCallsWithValue(F, [&](CallInst *CI) {
2239 SmallVector<Type *, 3> types(3);
2240 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002241
SJW2c317da2020-03-23 07:39:13 -05002242 // Image
2243 types[0] = CI->getArgOperand(0)->getType();
2244 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002245
SJW2c317da2020-03-23 07:39:13 -05002246 // Coord
2247 types[1] = CI->getArgOperand(1)->getType();
2248 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002249
SJW2c317da2020-03-23 07:39:13 -05002250 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002251 types[2] =
2252 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2253 cast<VectorType>(CI->getArgOperand(2)->getType())
2254 ->getElementCount()
2255 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002256
SJW2c317da2020-03-23 07:39:13 -05002257 auto NewFType =
2258 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002259
SJW61531372020-06-09 07:31:08 -05002260 std::string NewFName =
2261 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002262
SJW2c317da2020-03-23 07:39:13 -05002263 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002264
SJW2c317da2020-03-23 07:39:13 -05002265 // Convert data to the float type.
2266 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2267 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002268
SJW2c317da2020-03-23 07:39:13 -05002269 return CallInst::Create(NewF, args, "", CI);
2270 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002271}
2272
SJW2c317da2020-03-23 07:39:13 -05002273bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2274 Function &F) {
2275 // convert read_image with int coords to float coords
2276 Module &M = *F.getParent();
2277 return replaceCallsWithValue(F, [&](CallInst *CI) {
2278 // The image.
2279 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002280
SJW2c317da2020-03-23 07:39:13 -05002281 // The sampler.
2282 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002283
SJW2c317da2020-03-23 07:39:13 -05002284 // The coordinate (integer type that we can't handle).
2285 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002286
SJW2c317da2020-03-23 07:39:13 -05002287 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2288 uint32_t components =
2289 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2290 Type *float_ty = nullptr;
2291 if (components == 1) {
2292 float_ty = Type::getFloatTy(M.getContext());
2293 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002294 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2295 cast<VectorType>(Arg2->getType())
2296 ->getElementCount()
2297 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002298 }
David Neto22f144c2017-06-12 14:26:21 -04002299
SJW2c317da2020-03-23 07:39:13 -05002300 auto NewFType = FunctionType::get(
2301 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2302
2303 std::string NewFName = F.getName().str();
2304 NewFName[NewFName.length() - 1] = 'f';
2305
2306 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2307
2308 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2309
2310 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2311 });
David Neto22f144c2017-06-12 14:26:21 -04002312}
2313
SJW2c317da2020-03-23 07:39:13 -05002314bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2315 return replaceCallsWithValue(F, [&](CallInst *CI) {
2316 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002317
SJW2c317da2020-03-23 07:39:13 -05002318 // We need to map the OpenCL constants to the SPIR-V equivalents.
2319 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2320 const auto ConstantMemorySemantics = ConstantInt::get(
2321 IntTy, spv::MemorySemanticsUniformMemoryMask |
2322 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002323
SJW2c317da2020-03-23 07:39:13 -05002324 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002325
SJW2c317da2020-03-23 07:39:13 -05002326 // The pointer.
2327 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002328
SJW2c317da2020-03-23 07:39:13 -05002329 // The memory scope.
2330 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002331
SJW2c317da2020-03-23 07:39:13 -05002332 // The memory semantics.
2333 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002334
SJW2c317da2020-03-23 07:39:13 -05002335 if (2 < CI->getNumArgOperands()) {
2336 // The unequal memory semantics.
2337 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002338
SJW2c317da2020-03-23 07:39:13 -05002339 // The value.
2340 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002341
SJW2c317da2020-03-23 07:39:13 -05002342 // The comparator.
2343 Params.push_back(CI->getArgOperand(1));
2344 } else if (1 < CI->getNumArgOperands()) {
2345 // The value.
2346 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002347 }
David Neto22f144c2017-06-12 14:26:21 -04002348
SJW2c317da2020-03-23 07:39:13 -05002349 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2350 });
David Neto22f144c2017-06-12 14:26:21 -04002351}
2352
SJW2c317da2020-03-23 07:39:13 -05002353bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2354 llvm::AtomicRMWInst::BinOp Op) {
2355 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002356 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2357 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002358 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002359 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002360 SyncScope::System, CI);
2361 });
2362}
David Neto22f144c2017-06-12 14:26:21 -04002363
SJW2c317da2020-03-23 07:39:13 -05002364bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2365 Module &M = *F.getParent();
2366 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002367 auto IntTy = Type::getInt32Ty(M.getContext());
2368 auto FloatTy = Type::getFloatTy(M.getContext());
2369
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002370 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2371 ConstantInt::get(IntTy, 1),
2372 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002373
2374 Constant *UpShuffleMask[4] = {
2375 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2376 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2377
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002378 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2379 UndefValue::get(FloatTy),
2380 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002381
Kévin Petite8edce32019-04-10 14:23:32 +01002382 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002383 auto Arg0 =
2384 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2385 ConstantVector::get(DownShuffleMask), "", CI);
2386 auto Arg1 =
2387 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2388 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002389 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002390
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002391 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002392 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002393
SJW61531372020-06-09 07:31:08 -05002394 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002395
Kévin Petite8edce32019-04-10 14:23:32 +01002396 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002397
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002398 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2399 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002400 });
David Neto22f144c2017-06-12 14:26:21 -04002401}
David Neto62653202017-10-16 19:05:18 -04002402
SJW2c317da2020-03-23 07:39:13 -05002403bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002404 // OpenCL's float result = fract(float x, float* ptr)
2405 //
2406 // In the LLVM domain:
2407 //
2408 // %floor_result = call spir_func float @floor(float %x)
2409 // store float %floor_result, float * %ptr
2410 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2411 // %result = call spir_func float
2412 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2413 //
2414 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2415 // and clspv.fract occur in the SPIR-V generator pass:
2416 //
2417 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2418 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2419 // ...
2420 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2421 // OpStore %ptr %floor_result
2422 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2423 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002424 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002425
David Neto62653202017-10-16 19:05:18 -04002426 using std::string;
2427
2428 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2429 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002430
SJW2c317da2020-03-23 07:39:13 -05002431 Module &M = *F.getParent();
2432 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002433
SJW2c317da2020-03-23 07:39:13 -05002434 // This is either float or a float vector. All the float-like
2435 // types are this type.
2436 auto result_ty = F.getReturnType();
2437
SJW61531372020-06-09 07:31:08 -05002438 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002439 Function *fmin_fn = M.getFunction(fmin_name);
2440 if (!fmin_fn) {
2441 // Make the fmin function.
2442 FunctionType *fn_ty =
2443 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2444 fmin_fn =
2445 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2446 fmin_fn->addFnAttr(Attribute::ReadNone);
2447 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2448 }
2449
SJW61531372020-06-09 07:31:08 -05002450 std::string floor_name =
2451 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002452 Function *floor_fn = M.getFunction(floor_name);
2453 if (!floor_fn) {
2454 // Make the floor function.
2455 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2456 floor_fn =
2457 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2458 floor_fn->addFnAttr(Attribute::ReadNone);
2459 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2460 }
2461
SJW61531372020-06-09 07:31:08 -05002462 std::string clspv_fract_name =
2463 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002464 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2465 if (!clspv_fract_fn) {
2466 // Make the clspv_fract function.
2467 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2468 clspv_fract_fn = cast<Function>(
2469 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2470 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2471 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2472 }
2473
2474 // Number of significant significand bits, whether represented or not.
2475 unsigned num_significand_bits;
2476 switch (result_ty->getScalarType()->getTypeID()) {
2477 case Type::HalfTyID:
2478 num_significand_bits = 11;
2479 break;
2480 case Type::FloatTyID:
2481 num_significand_bits = 24;
2482 break;
2483 case Type::DoubleTyID:
2484 num_significand_bits = 53;
2485 break;
2486 default:
2487 llvm_unreachable("Unhandled float type when processing fract builtin");
2488 break;
2489 }
2490 // Beware that the disassembler displays this value as
2491 // OpConstant %float 1
2492 // which is not quite right.
2493 const double kJustUnderOneScalar =
2494 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2495
2496 Constant *just_under_one =
2497 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2498 if (result_ty->isVectorTy()) {
2499 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002500 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002501 }
2502
2503 IRBuilder<> Builder(CI);
2504
2505 auto arg = CI->getArgOperand(0);
2506 auto ptr = CI->getArgOperand(1);
2507
2508 // Compute floor result and store it.
2509 auto floor = Builder.CreateCall(floor_fn, {arg});
2510 Builder.CreateStore(floor, ptr);
2511
2512 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2513 auto fract_result =
2514 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2515
2516 return fract_result;
2517 });
David Neto62653202017-10-16 19:05:18 -04002518}
alan-bakera52b7312020-10-26 08:58:51 -04002519
Kévin Petit8576f682020-11-02 14:51:32 +00002520bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04002521 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00002522 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04002523 // a_shr = a >> 1
2524 // b_shr = b >> 1
2525 // add1 = a_shr + b_shr
2526 // join = a |join_opcode| b
2527 // and = join & 1
2528 // add = add1 + and
2529 const auto a = Call->getArgOperand(0);
2530 const auto b = Call->getArgOperand(1);
2531 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00002532 Value *a_shift, *b_shift;
2533 if (is_signed) {
2534 a_shift = builder.CreateAShr(a, 1);
2535 b_shift = builder.CreateAShr(b, 1);
2536 } else {
2537 a_shift = builder.CreateLShr(a, 1);
2538 b_shift = builder.CreateLShr(b, 1);
2539 }
alan-bakerb6da5132020-10-29 15:59:06 -04002540 auto add = builder.CreateAdd(a_shift, b_shift);
2541 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
2542 auto constant_one = ConstantInt::get(a->getType(), 1);
2543 auto and_bit = builder.CreateAnd(join, constant_one);
2544 return builder.CreateAdd(add, and_bit);
2545 });
2546}
2547
alan-baker3f1bf492020-11-05 09:07:36 -05002548bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
2549 bool is_add) {
2550 return replaceCallsWithValue(F, [&F, this, is_signed,
2551 is_add](CallInst *Call) {
2552 auto ty = Call->getType();
2553 auto a = Call->getArgOperand(0);
2554 auto b = Call->getArgOperand(1);
2555 IRBuilder<> builder(Call);
alan-bakera52b7312020-10-26 08:58:51 -04002556 if (is_signed) {
2557 unsigned bitwidth = ty->getScalarSizeInBits();
2558 if (bitwidth < 32) {
alan-baker3f1bf492020-11-05 09:07:36 -05002559 unsigned extended_width = bitwidth << 1;
2560 Type *extended_ty =
2561 IntegerType::get(Call->getContext(), extended_width);
2562 Constant *min = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002563 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002564 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2565 Constant *max = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002566 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002567 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
alan-bakera52b7312020-10-26 08:58:51 -04002568 // Don't use the type in GetMangledFunctionName to ensure we get
2569 // signed parameters.
2570 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
alan-bakera52b7312020-10-26 08:58:51 -04002571 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
alan-baker3f1bf492020-11-05 09:07:36 -05002572 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2573 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2574 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2575 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2576 if (extended_width == 32) {
alan-bakera52b7312020-10-26 08:58:51 -04002577 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
alan-bakera52b7312020-10-26 08:58:51 -04002578 } else {
2579 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2580 }
alan-baker3f1bf492020-11-05 09:07:36 -05002581 } else {
2582 if (extended_width == 32) {
2583 sclamp_name += "iii";
2584 } else {
2585 sclamp_name += "sss";
2586 }
alan-bakera52b7312020-10-26 08:58:51 -04002587 }
alan-baker3f1bf492020-11-05 09:07:36 -05002588
2589 auto sext_a = builder.CreateSExt(a, extended_ty);
2590 auto sext_b = builder.CreateSExt(b, extended_ty);
2591 Value *op = nullptr;
2592 // Extended operations won't wrap.
2593 if (is_add)
2594 op = builder.CreateAdd(sext_a, sext_b, "", true, true);
2595 else
2596 op = builder.CreateSub(sext_a, sext_b, "", true, true);
2597 auto clamp_ty = FunctionType::get(
2598 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2599 auto callee = F.getParent()->getOrInsertFunction(sclamp_name, clamp_ty);
2600 auto clamp = builder.CreateCall(callee, {op, min, max});
2601 return builder.CreateTrunc(clamp, ty);
alan-bakera52b7312020-10-26 08:58:51 -04002602 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002603 // Add:
2604 // c = a + b
alan-bakera52b7312020-10-26 08:58:51 -04002605 // if (b < 0)
2606 // c = c > a ? min : c;
2607 // else
alan-baker3f1bf492020-11-05 09:07:36 -05002608 // c = c < a ? max : c;
alan-bakera52b7312020-10-26 08:58:51 -04002609 //
alan-baker3f1bf492020-11-05 09:07:36 -05002610 // Sub:
2611 // c = a - b;
2612 // if (b < 0)
2613 // c = c < a ? max : c;
2614 // else
2615 // c = c > a ? min : c;
2616 Constant *min = ConstantInt::get(Call->getContext(),
2617 APInt::getSignedMinValue(bitwidth));
2618 Constant *max = ConstantInt::get(Call->getContext(),
2619 APInt::getSignedMaxValue(bitwidth));
alan-bakera52b7312020-10-26 08:58:51 -04002620 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2621 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2622 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2623 }
alan-baker3f1bf492020-11-05 09:07:36 -05002624 Value *op = nullptr;
2625 if (is_add) {
2626 op = builder.CreateAdd(a, b);
2627 } else {
2628 op = builder.CreateSub(a, b);
2629 }
2630 auto b_lt_0 = builder.CreateICmpSLT(b, Constant::getNullValue(ty));
2631 auto op_gt_a = builder.CreateICmpSGT(op, a);
2632 auto op_lt_a = builder.CreateICmpSLT(op, a);
2633 auto neg_cmp = is_add ? op_gt_a : op_lt_a;
2634 auto pos_cmp = is_add ? op_lt_a : op_gt_a;
2635 auto neg_value = is_add ? min : max;
2636 auto pos_value = is_add ? max : min;
2637 auto neg_clamp = builder.CreateSelect(neg_cmp, neg_value, op);
2638 auto pos_clamp = builder.CreateSelect(pos_cmp, pos_value, op);
2639 return builder.CreateSelect(b_lt_0, neg_clamp, pos_clamp);
alan-bakera52b7312020-10-26 08:58:51 -04002640 }
2641 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002642 // Replace with OpIAddCarry/OpISubBorrow and clamp to max/0 on a
2643 // carr/borrow.
2644 spv::Op op = is_add ? spv::OpIAddCarry : spv::OpISubBorrow;
2645 auto clamp_value =
2646 is_add ? Constant::getAllOnesValue(ty) : Constant::getNullValue(ty);
2647 auto struct_ty = GetPairStruct(ty);
2648 auto call =
2649 InsertSPIRVOp(Call, op, {Attribute::ReadNone}, struct_ty, {a, b});
2650 auto add_sub = builder.CreateExtractValue(call, {0});
2651 auto carry_borrow = builder.CreateExtractValue(call, {1});
2652 auto cmp = builder.CreateICmpEQ(carry_borrow, Constant::getNullValue(ty));
2653 return builder.CreateSelect(cmp, add_sub, clamp_value);
alan-bakera52b7312020-10-26 08:58:51 -04002654 }
alan-bakera52b7312020-10-26 08:58:51 -04002655 });
2656}
alan-baker4986eff2020-10-29 13:38:00 -04002657
2658bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
2659 return replaceCallsWithValue(F, [](CallInst *Call) {
2660 auto pointer = Call->getArgOperand(0);
2661 // Clang emits an address space cast to the generic address space. Skip the
2662 // cast and use the input directly.
2663 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2664 pointer = cast->getPointerOperand();
2665 }
2666 Value *order_arg =
2667 Call->getNumArgOperands() > 1 ? Call->getArgOperand(1) : nullptr;
2668 Value *scope_arg =
2669 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2670 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2671 clspv::AddressSpace::Global;
2672 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
2673 spv::MemorySemanticsAcquireMask);
2674 auto scope = MemoryScope(scope_arg, is_global, Call);
2675 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
2676 Call->getType(), {pointer, scope, order});
2677 });
2678}
2679
2680bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
2681 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
2682 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
2683 auto pointer = Call->getArgOperand(0);
2684 // Clang emits an address space cast to the generic address space. Skip the
2685 // cast and use the input directly.
2686 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2687 pointer = cast->getPointerOperand();
2688 }
2689 Value *value = Call->getArgOperand(1);
2690 Value *order_arg =
2691 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2692 Value *scope_arg =
2693 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2694 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2695 clspv::AddressSpace::Global;
2696 auto scope = MemoryScope(scope_arg, is_global, Call);
2697 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
2698 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
2699 {pointer, scope, order, value});
2700 });
2701}
2702
2703bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
2704 return replaceCallsWithValue(F, [](CallInst *Call) {
2705 auto pointer = Call->getArgOperand(0);
2706 // Clang emits an address space cast to the generic address space. Skip the
2707 // cast and use the input directly.
2708 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2709 pointer = cast->getPointerOperand();
2710 }
2711 auto expected = Call->getArgOperand(1);
2712 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
2713 expected = cast->getPointerOperand();
2714 }
2715 auto value = Call->getArgOperand(2);
2716 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2717 clspv::AddressSpace::Global;
2718 Value *success_arg =
2719 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2720 Value *failure_arg =
2721 Call->getNumArgOperands() > 4 ? Call->getArgOperand(4) : nullptr;
2722 Value *scope_arg =
2723 Call->getNumArgOperands() > 5 ? Call->getArgOperand(5) : nullptr;
2724 auto scope = MemoryScope(scope_arg, is_global, Call);
2725 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
2726 spv::MemorySemanticsAcquireReleaseMask);
2727 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
2728 spv::MemorySemanticsAcquireMask);
2729
2730 // If the value pointed to by |expected| equals the value pointed to by
2731 // |pointer|, |value| is written into |pointer|, otherwise the value in
2732 // |pointer| is written into |expected|. In order to avoid extra stores,
2733 // the basic block with the original atomic is split and the store is
2734 // performed in the |then| block. The condition is the inversion of the
2735 // comparison result.
2736 IRBuilder<> builder(Call);
2737 auto load = builder.CreateLoad(expected);
2738 auto cmp_xchg = InsertSPIRVOp(
2739 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
2740 value->getType(), {pointer, scope, success, failure, value, load});
2741 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
2742 auto not_cmp = builder.CreateNot(cmp);
2743 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
2744 builder.SetInsertPoint(then_branch);
2745 builder.CreateStore(cmp_xchg, expected);
2746 return cmp;
2747 });
2748}
alan-bakercc2bafb2020-11-02 08:30:18 -05002749
alan-baker2cecaa72020-11-05 14:05:20 -05002750bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05002751 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
2752 return false;
2753
2754 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
alan-baker5f2e88e2020-12-07 15:24:04 -05002755 if (bitwidth > 64)
alan-bakercc2bafb2020-11-02 08:30:18 -05002756 return false;
2757
alan-baker5f2e88e2020-12-07 15:24:04 -05002758 return replaceCallsWithValue(F, [&F, leading](CallInst *Call) {
2759 Function *intrinsic = Intrinsic::getDeclaration(
2760 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz,
2761 Call->getType());
2762 const auto c_false = ConstantInt::getFalse(Call->getContext());
2763 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
2764 {Call->getArgOperand(0), c_false}, "", Call);
alan-bakercc2bafb2020-11-02 08:30:18 -05002765 });
2766}
alan-baker6b9d1ee2020-11-03 23:11:32 -05002767
2768bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
2769 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
2770 const auto ty = Call->getType();
2771 const auto a = Call->getArgOperand(0);
2772 const auto b = Call->getArgOperand(1);
2773 const auto c = Call->getArgOperand(2);
2774 IRBuilder<> builder(Call);
2775 if (is_signed) {
2776 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
2777 if (bitwidth < 32) {
2778 // mul = sext(a) * sext(b)
2779 // add = mul + sext(c)
2780 // res = clamp(add, MIN, MAX)
2781 unsigned extended_width = bitwidth << 1;
2782 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
2783 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2784 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2785 }
2786 auto a_sext = builder.CreateSExt(a, extended_ty);
2787 auto b_sext = builder.CreateSExt(b, extended_ty);
2788 auto c_sext = builder.CreateSExt(c, extended_ty);
2789 // Extended the size so no overflows occur.
2790 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
2791 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
2792 auto func_ty = FunctionType::get(
2793 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2794 // Don't use function type because we need signed parameters.
2795 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
2796 // The clamp values are the signed min and max of the original bitwidth
2797 // sign extended to the extended bitwidth.
2798 Constant *min = ConstantInt::get(
2799 Call->getContext(),
2800 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2801 Constant *max = ConstantInt::get(
2802 Call->getContext(),
2803 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
2804 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2805 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2806 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2807 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2808 if (extended_width == 32)
2809 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
2810 else
2811 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2812 } else {
2813 if (extended_width == 32)
2814 clamp_name += "iii";
2815 else
2816 clamp_name += "sss";
2817 }
2818 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
2819 auto clamp = builder.CreateCall(callee, {add, min, max});
2820 return builder.CreateTrunc(clamp, ty);
2821 } else {
2822 auto struct_ty = GetPairStruct(ty);
2823 // Compute
2824 // {hi, lo} = smul_extended(a, b)
2825 // add = lo + c
2826 auto mul_ext = InsertSPIRVOp(Call, spv::OpSMulExtended,
2827 {Attribute::ReadNone}, struct_ty, {a, b});
2828 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
2829 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
2830 auto add = builder.CreateAdd(mul_lo, c);
2831
2832 // Constants for use in the calculation.
2833 Constant *min = ConstantInt::get(Call->getContext(),
2834 APInt::getSignedMinValue(bitwidth));
2835 Constant *max = ConstantInt::get(Call->getContext(),
2836 APInt::getSignedMaxValue(bitwidth));
2837 Constant *max_plus_1 = ConstantInt::get(
2838 Call->getContext(),
2839 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
2840 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2841 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2842 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2843 max_plus_1 =
2844 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
2845 }
2846
2847 auto a_xor_b = builder.CreateXor(a, b);
2848 auto same_sign =
2849 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
2850 auto different_sign = builder.CreateNot(same_sign);
2851 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
2852 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
2853 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
2854 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
2855 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
2856 auto add_gt_max = builder.CreateICmpUGT(add, max);
2857 auto hi_eq_m1 =
2858 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
2859 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
2860 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
2861 auto max_sub_lo = builder.CreateSub(max, mul_lo);
2862 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
2863
2864 // Equivalent to:
2865 // if (((x < 0) == (y < 0)) && mul_hi != 0)
2866 // return MAX
2867 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
2868 // return MAX
2869 // if (((x < 0) != (y < 0)) && mul_hi != -1)
2870 // return MIN
2871 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
2872 // return MIN
2873 // return add
2874 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
2875 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
2876 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
2877 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
2878 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
2879 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
2880 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
2881 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
2882 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
2883 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
2884 auto sel = builder.CreateSelect(min_clamp, min, add);
2885 return builder.CreateSelect(max_clamp, max, sel);
2886 }
2887 } else {
2888 // {lo, hi} = mul_extended(a, b)
2889 // {add, carry} = add_carry(lo, c)
2890 // cmp = (mul_hi | carry) == 0
2891 // mad_sat = cmp ? add : MAX
2892 auto struct_ty = GetPairStruct(ty);
2893 auto mul_ext = InsertSPIRVOp(Call, spv::OpUMulExtended,
2894 {Attribute::ReadNone}, struct_ty, {a, b});
2895 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
2896 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
2897 auto add_carry =
2898 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
2899 struct_ty, {mul_lo, c});
2900 auto add = builder.CreateExtractValue(add_carry, {0});
2901 auto carry = builder.CreateExtractValue(add_carry, {1});
2902 auto or_value = builder.CreateOr(mul_hi, carry);
2903 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
2904 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
2905 }
2906 });
2907}
alan-baker15106572020-11-06 15:08:10 -05002908
2909bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
2910 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
2911 return false;
2912
2913 if (F.getFunctionType()->getNumParams() != 2)
2914 return false;
2915
2916 if (F.getFunctionType()->getParamType(0) !=
2917 F.getFunctionType()->getParamType(1)) {
2918 return false;
2919 }
2920
2921 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
2922 case Type::FloatTyID:
2923 case Type::HalfTyID:
2924 case Type::DoubleTyID:
2925 break;
2926 default:
2927 return false;
2928 }
2929
2930 // Scalar versions all return an int, while vector versions return a vector
2931 // of an equally sized integer types (e.g. short, int or long).
2932 if (isa<VectorType>(F.getReturnType())) {
2933 if (F.getReturnType()->getScalarSizeInBits() !=
2934 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
2935 return false;
2936 }
2937 } else {
2938 if (F.getReturnType()->getScalarSizeInBits() != 32)
2939 return false;
2940 }
2941
2942 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
2943 // Replace with a floating point [un]ordered comparison followed by an
2944 // extension.
2945 auto x = Call->getArgOperand(0);
2946 auto y = Call->getArgOperand(1);
2947 IRBuilder<> builder(Call);
2948 Value *tmp = nullptr;
2949 if (is_ordered) {
2950 // This leads to a slight inefficiency in the SPIR-V that is easy for
2951 // drivers to optimize where the SPIR-V for the comparison and the
2952 // extension could be fused to drop the inversion of the OpIsNan.
2953 tmp = builder.CreateFCmpORD(x, y);
2954 } else {
2955 tmp = builder.CreateFCmpUNO(x, y);
2956 }
2957 // OpenCL CTS requires that vector versions use sign extension, but scalar
2958 // versions use zero extension.
2959 if (isa<VectorType>(Call->getType()))
2960 return builder.CreateSExt(tmp, Call->getType());
2961 return builder.CreateZExt(tmp, Call->getType());
2962 });
2963}
alan-baker497920b2020-11-09 16:41:36 -05002964
2965bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
2966 return replaceCallsWithValue(F, [this](CallInst *Call) {
2967 auto ty = Call->getType();
2968 auto x = Call->getArgOperand(0);
2969 unsigned width = x->getType()->getScalarSizeInBits();
2970 Type *int_ty = IntegerType::get(Call->getContext(), width);
2971 uint64_t abs_mask = 0x7fffffff;
2972 uint64_t exp_mask = 0x7f800000;
2973 uint64_t min_mask = 0x00800000;
2974 if (width == 16) {
2975 abs_mask = 0x7fff;
2976 exp_mask = 0x7c00;
2977 min_mask = 0x0400;
2978 } else if (width == 64) {
2979 abs_mask = 0x7fffffffffffffff;
2980 exp_mask = 0x7ff0000000000000;
2981 min_mask = 0x0010000000000000;
2982 }
2983 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
2984 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
2985 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
2986 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2987 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
2988 abs_const =
2989 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
2990 exp_const =
2991 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
2992 min_const =
2993 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
2994 }
2995 // Drop the sign bit and then check that the number is between
2996 // (exclusive) the min and max exponent values for the bit width.
2997 IRBuilder<> builder(Call);
2998 auto bitcast = builder.CreateBitCast(x, int_ty);
2999 auto abs = builder.CreateAnd(bitcast, abs_const);
3000 auto lt = builder.CreateICmpULT(abs, exp_const);
3001 auto ge = builder.CreateICmpUGE(abs, min_const);
3002 auto tmp = builder.CreateAnd(lt, ge);
3003 // OpenCL CTS requires that vector versions use sign extension, but scalar
3004 // versions use zero extension.
3005 if (isa<VectorType>(ty))
3006 return builder.CreateSExt(tmp, ty);
3007 return builder.CreateZExt(tmp, ty);
3008 });
3009}
alan-bakere0406e72020-11-10 12:32:04 -05003010
3011bool ReplaceOpenCLBuiltinPass::replaceFDim(Function &F) {
3012 return replaceCallsWithValue(F, [](CallInst *Call) {
3013 const auto x = Call->getArgOperand(0);
3014 const auto y = Call->getArgOperand(1);
3015 IRBuilder<> builder(Call);
3016 auto sub = builder.CreateFSub(x, y);
3017 auto cmp = builder.CreateFCmpUGT(x, y);
3018 return builder.CreateSelect(cmp, sub,
3019 Constant::getNullValue(Call->getType()));
3020 });
3021}