blob: c4319fecd7177b8884a3a35e39a78e7a97c4639f [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039#include "Passes.h"
40#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
46#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
47
48namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000049
David Neto22f144c2017-06-12 14:26:21 -040050uint32_t clz(uint32_t v) {
51 uint32_t r;
52 uint32_t shift;
53
54 r = (v > 0xFFFF) << 4;
55 v >>= r;
56 shift = (v > 0xFF) << 3;
57 v >>= shift;
58 r |= shift;
59 shift = (v > 0xF) << 2;
60 v >>= shift;
61 r |= shift;
62 shift = (v > 0x3) << 1;
63 v >>= shift;
64 r |= shift;
65 r |= (v >> 1);
66
67 return r;
68}
69
Kévin Petitfdfa92e2019-09-25 14:20:58 +010070Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
71 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040072 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040073 IntTy = FixedVectorType::get(IntTy,
74 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010075 }
76 return IntTy;
77}
78
alan-baker4986eff2020-10-29 13:38:00 -040079Value *MemoryOrderSemantics(Value *order, bool is_global,
80 Instruction *InsertBefore,
81 spv::MemorySemanticsMask base_semantics) {
82 enum AtomicMemoryOrder : uint32_t {
83 kMemoryOrderRelaxed = 0,
84 kMemoryOrderAcquire = 2,
85 kMemoryOrderRelease = 3,
86 kMemoryOrderAcqRel = 4,
87 kMemoryOrderSeqCst = 5
88 };
89
90 IRBuilder<> builder(InsertBefore);
91
92 // Constants for OpenCL C 2.0 memory_order.
93 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
94 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
95 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
96 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
97
98 // Constants for SPIR-V ordering memory semantics.
99 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
100 const auto AcquireSemantics =
101 builder.getInt32(spv::MemorySemanticsAcquireMask);
102 const auto ReleaseSemantics =
103 builder.getInt32(spv::MemorySemanticsReleaseMask);
104 const auto AcqRelSemantics =
105 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
106
107 // Constants for SPIR-V storage class semantics.
108 const auto UniformSemantics =
109 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
110 const auto WorkgroupSemantics =
111 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
112
113 // Instead of sequentially consistent, use acquire, release or acquire
114 // release semantics.
115 Value *base_order = nullptr;
116 switch (base_semantics) {
117 case spv::MemorySemanticsAcquireMask:
118 base_order = AcquireSemantics;
119 break;
120 case spv::MemorySemanticsReleaseMask:
121 base_order = ReleaseSemantics;
122 break;
123 default:
124 base_order = AcqRelSemantics;
125 break;
126 }
127
128 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
129 if (order == nullptr)
130 return builder.CreateOr({storage, base_order});
131
132 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
133 auto is_acquire = builder.CreateICmpEQ(order, acquire);
134 auto is_release = builder.CreateICmpEQ(order, release);
135 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
136 auto semantics =
137 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
138 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
139 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
140 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
141 return builder.CreateOr({storage, semantics});
142}
143
144Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
145 enum AtomicMemoryScope : uint32_t {
146 kMemoryScopeWorkItem = 0,
147 kMemoryScopeWorkGroup = 1,
148 kMemoryScopeDevice = 2,
149 kMemoryScopeAllSVMDevices = 3, // not supported
150 kMemoryScopeSubGroup = 4
151 };
152
153 IRBuilder<> builder(InsertBefore);
154
155 // Constants for OpenCL C 2.0 memory_scope.
156 const auto work_item =
157 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
158 const auto work_group =
159 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
160 const auto sub_group =
161 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
162 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
163
164 // Constants for SPIR-V memory scopes.
165 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
166 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
167 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
168 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
169
170 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
171 if (scope == nullptr)
172 return base_scope;
173
174 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
175 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
176 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
177 auto is_device = builder.CreateICmpEQ(scope, device);
178
179 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
180 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
181 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
182 scope = builder.CreateSelect(is_device, DeviceScope, scope);
183
184 return scope;
185}
186
SJW2c317da2020-03-23 07:39:13 -0500187bool replaceCallsWithValue(Function &F,
188 std::function<Value *(CallInst *)> Replacer) {
189
190 bool Changed = false;
191
192 SmallVector<Instruction *, 4> ToRemoves;
193
194 // Walk the users of the function.
195 for (auto &U : F.uses()) {
196 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
197
198 auto NewValue = Replacer(CI);
199
200 if (NewValue != nullptr) {
201 CI->replaceAllUsesWith(NewValue);
202
203 // Lastly, remember to remove the user.
204 ToRemoves.push_back(CI);
205 }
206 }
207 }
208
209 Changed = !ToRemoves.empty();
210
211 // And cleanup the calls we don't use anymore.
212 for (auto V : ToRemoves) {
213 V->eraseFromParent();
214 }
215
216 return Changed;
217}
218
David Neto22f144c2017-06-12 14:26:21 -0400219struct ReplaceOpenCLBuiltinPass final : public ModulePass {
220 static char ID;
221 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
222
223 bool runOnModule(Module &M) override;
alan-baker6b9d1ee2020-11-03 23:11:32 -0500224
225private:
SJW2c317da2020-03-23 07:39:13 -0500226 bool runOnFunction(Function &F);
227 bool replaceAbs(Function &F);
228 bool replaceAbsDiff(Function &F, bool is_signed);
229 bool replaceCopysign(Function &F);
230 bool replaceRecip(Function &F);
231 bool replaceDivide(Function &F);
232 bool replaceDot(Function &F);
233 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500234 bool replaceExp10(Function &F, const std::string &basename);
235 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100236 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400237 bool replaceBarrier(Function &F, bool subgroup = false);
SJW2c317da2020-03-23 07:39:13 -0500238 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100239 bool replacePrefetch(Function &F);
alan-baker3e217772020-11-07 17:29:40 -0500240 bool replaceRelational(Function &F, CmpInst::Predicate P);
SJW2c317da2020-03-23 07:39:13 -0500241 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
242 bool replaceIsFinite(Function &F);
243 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
244 bool replaceUpsample(Function &F);
245 bool replaceRotate(Function &F);
246 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
247 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
248 bool replaceSelect(Function &F);
249 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500250 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500251 bool replaceSignbit(Function &F, bool is_vec);
252 bool replaceMul(Function &F, bool is_float, bool is_mad);
253 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
254 bool replaceVloadHalf(Function &F);
255 bool replaceVloadHalf2(Function &F);
256 bool replaceVloadHalf4(Function &F);
257 bool replaceClspvVloadaHalf2(Function &F);
258 bool replaceClspvVloadaHalf4(Function &F);
259 bool replaceVstoreHalf(Function &F, int vec_size);
260 bool replaceVstoreHalf(Function &F);
261 bool replaceVstoreHalf2(Function &F);
262 bool replaceVstoreHalf4(Function &F);
263 bool replaceHalfReadImage(Function &F);
264 bool replaceHalfWriteImage(Function &F);
265 bool replaceSampledReadImageWithIntCoords(Function &F);
266 bool replaceAtomics(Function &F, spv::Op Op);
267 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
alan-baker4986eff2020-10-29 13:38:00 -0400268 bool replaceAtomicLoad(Function &F);
269 bool replaceExplicitAtomics(Function &F, spv::Op Op,
270 spv::MemorySemanticsMask semantics =
271 spv::MemorySemanticsAcquireReleaseMask);
272 bool replaceAtomicCompareExchange(Function &);
SJW2c317da2020-03-23 07:39:13 -0500273 bool replaceCross(Function &F);
274 bool replaceFract(Function &F, int vec_size);
275 bool replaceVload(Function &F);
276 bool replaceVstore(Function &F);
alan-baker3f1bf492020-11-05 09:07:36 -0500277 bool replaceAddSubSat(Function &F, bool is_signed, bool is_add);
Kévin Petit8576f682020-11-02 14:51:32 +0000278 bool replaceHadd(Function &F, bool is_signed,
279 Instruction::BinaryOps join_opcode);
alan-baker2cecaa72020-11-05 14:05:20 -0500280 bool replaceCountZeroes(Function &F, bool leading);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500281 bool replaceMadSat(Function &F, bool is_signed);
alan-baker15106572020-11-06 15:08:10 -0500282 bool replaceOrdered(Function &F, bool is_ordered);
alan-baker497920b2020-11-09 16:41:36 -0500283 bool replaceIsNormal(Function &F);
alan-baker6b9d1ee2020-11-03 23:11:32 -0500284
285 // Caches struct types for { |type|, |type| }. This prevents
286 // getOrInsertFunction from introducing a bitcasts between structs with
287 // identical contents.
288 Type *GetPairStruct(Type *type);
289
290 DenseMap<Type *, Type *> PairStructMap;
David Neto22f144c2017-06-12 14:26:21 -0400291};
SJW2c317da2020-03-23 07:39:13 -0500292
Kévin Petit91bc72e2019-04-08 15:17:46 +0100293} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400294
295char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400296INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
297 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400298
299namespace clspv {
300ModulePass *createReplaceOpenCLBuiltinPass() {
301 return new ReplaceOpenCLBuiltinPass();
302}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400303} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400304
305bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500306 std::list<Function *> func_list;
307 for (auto &F : M.getFunctionList()) {
308 // process only function declarations
309 if (F.isDeclaration() && runOnFunction(F)) {
310 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000311 }
312 }
SJW2c317da2020-03-23 07:39:13 -0500313 if (func_list.size() != 0) {
314 // recursively convert functions, but first remove dead
315 for (auto *F : func_list) {
316 if (F->use_empty()) {
317 F->eraseFromParent();
318 }
319 }
320 runOnModule(M);
321 return true;
322 }
323 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000324}
325
SJW2c317da2020-03-23 07:39:13 -0500326bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
327 auto &FI = Builtins::Lookup(&F);
328 switch (FI.getType()) {
329 case Builtins::kAbs:
330 if (!FI.getParameter(0).is_signed) {
331 return replaceAbs(F);
332 }
333 break;
334 case Builtins::kAbsDiff:
335 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400336
337 case Builtins::kAddSat:
alan-baker3f1bf492020-11-05 09:07:36 -0500338 return replaceAddSubSat(F, FI.getParameter(0).is_signed, true);
alan-bakera52b7312020-10-26 08:58:51 -0400339
alan-bakercc2bafb2020-11-02 08:30:18 -0500340 case Builtins::kClz:
alan-baker2cecaa72020-11-05 14:05:20 -0500341 return replaceCountZeroes(F, true);
342
343 case Builtins::kCtz:
344 return replaceCountZeroes(F, false);
alan-bakercc2bafb2020-11-02 08:30:18 -0500345
alan-bakerb6da5132020-10-29 15:59:06 -0400346 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000347 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400348 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000349 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400350
SJW2c317da2020-03-23 07:39:13 -0500351 case Builtins::kCopysign:
352 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100353
SJW2c317da2020-03-23 07:39:13 -0500354 case Builtins::kHalfRecip:
355 case Builtins::kNativeRecip:
356 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100357
SJW2c317da2020-03-23 07:39:13 -0500358 case Builtins::kHalfDivide:
359 case Builtins::kNativeDivide:
360 return replaceDivide(F);
361
362 case Builtins::kDot:
363 return replaceDot(F);
364
365 case Builtins::kExp10:
366 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500367 case Builtins::kNativeExp10:
368 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500369
370 case Builtins::kLog10:
371 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500372 case Builtins::kNativeLog10:
373 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500374
gnl21636e7992020-09-09 16:08:16 +0100375 case Builtins::kLog1p:
376 return replaceLog1p(F);
377
SJW2c317da2020-03-23 07:39:13 -0500378 case Builtins::kFmod:
379 return replaceFmod(F);
380
381 case Builtins::kBarrier:
382 case Builtins::kWorkGroupBarrier:
383 return replaceBarrier(F);
384
alan-baker12d2c182020-07-20 08:22:42 -0400385 case Builtins::kSubGroupBarrier:
386 return replaceBarrier(F, true);
387
SJW2c317da2020-03-23 07:39:13 -0500388 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400389 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500390 case Builtins::kReadMemFence:
391 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
392 case Builtins::kWriteMemFence:
393 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
394
395 // Relational
396 case Builtins::kIsequal:
alan-baker3e217772020-11-07 17:29:40 -0500397 return replaceRelational(F, CmpInst::FCMP_OEQ);
SJW2c317da2020-03-23 07:39:13 -0500398 case Builtins::kIsgreater:
alan-baker3e217772020-11-07 17:29:40 -0500399 return replaceRelational(F, CmpInst::FCMP_OGT);
SJW2c317da2020-03-23 07:39:13 -0500400 case Builtins::kIsgreaterequal:
alan-baker3e217772020-11-07 17:29:40 -0500401 return replaceRelational(F, CmpInst::FCMP_OGE);
SJW2c317da2020-03-23 07:39:13 -0500402 case Builtins::kIsless:
alan-baker3e217772020-11-07 17:29:40 -0500403 return replaceRelational(F, CmpInst::FCMP_OLT);
SJW2c317da2020-03-23 07:39:13 -0500404 case Builtins::kIslessequal:
alan-baker3e217772020-11-07 17:29:40 -0500405 return replaceRelational(F, CmpInst::FCMP_OLE);
SJW2c317da2020-03-23 07:39:13 -0500406 case Builtins::kIsnotequal:
alan-baker3e217772020-11-07 17:29:40 -0500407 return replaceRelational(F, CmpInst::FCMP_UNE);
408 case Builtins::kIslessgreater:
409 return replaceRelational(F, CmpInst::FCMP_ONE);
SJW2c317da2020-03-23 07:39:13 -0500410
alan-baker15106572020-11-06 15:08:10 -0500411 case Builtins::kIsordered:
412 return replaceOrdered(F, true);
413
414 case Builtins::kIsunordered:
415 return replaceOrdered(F, false);
416
SJW2c317da2020-03-23 07:39:13 -0500417 case Builtins::kIsinf: {
418 bool is_vec = FI.getParameter(0).vector_size != 0;
419 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
420 }
421 case Builtins::kIsnan: {
422 bool is_vec = FI.getParameter(0).vector_size != 0;
423 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
424 }
425
426 case Builtins::kIsfinite:
427 return replaceIsFinite(F);
428
429 case Builtins::kAll: {
430 bool is_vec = FI.getParameter(0).vector_size != 0;
431 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
432 }
433 case Builtins::kAny: {
434 bool is_vec = FI.getParameter(0).vector_size != 0;
435 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
436 }
437
alan-baker497920b2020-11-09 16:41:36 -0500438 case Builtins::kIsnormal:
439 return replaceIsNormal(F);
440
SJW2c317da2020-03-23 07:39:13 -0500441 case Builtins::kUpsample:
442 return replaceUpsample(F);
443
444 case Builtins::kRotate:
445 return replaceRotate(F);
446
447 case Builtins::kConvert:
448 return replaceConvert(F, FI.getParameter(0).is_signed,
449 FI.getReturnType().is_signed);
450
alan-baker4986eff2020-10-29 13:38:00 -0400451 // OpenCL 2.0 explicit atomics have different default scopes and semantics
452 // than legacy atomic functions.
453 case Builtins::kAtomicLoad:
454 case Builtins::kAtomicLoadExplicit:
455 return replaceAtomicLoad(F);
456 case Builtins::kAtomicStore:
457 case Builtins::kAtomicStoreExplicit:
458 return replaceExplicitAtomics(F, spv::OpAtomicStore,
459 spv::MemorySemanticsReleaseMask);
460 case Builtins::kAtomicExchange:
461 case Builtins::kAtomicExchangeExplicit:
462 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
463 case Builtins::kAtomicFetchAdd:
464 case Builtins::kAtomicFetchAddExplicit:
465 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
466 case Builtins::kAtomicFetchSub:
467 case Builtins::kAtomicFetchSubExplicit:
468 return replaceExplicitAtomics(F, spv::OpAtomicISub);
469 case Builtins::kAtomicFetchOr:
470 case Builtins::kAtomicFetchOrExplicit:
471 return replaceExplicitAtomics(F, spv::OpAtomicOr);
472 case Builtins::kAtomicFetchXor:
473 case Builtins::kAtomicFetchXorExplicit:
474 return replaceExplicitAtomics(F, spv::OpAtomicXor);
475 case Builtins::kAtomicFetchAnd:
476 case Builtins::kAtomicFetchAndExplicit:
477 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
478 case Builtins::kAtomicFetchMin:
479 case Builtins::kAtomicFetchMinExplicit:
480 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
481 ? spv::OpAtomicSMin
482 : spv::OpAtomicUMin);
483 case Builtins::kAtomicFetchMax:
484 case Builtins::kAtomicFetchMaxExplicit:
485 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
486 ? spv::OpAtomicSMax
487 : spv::OpAtomicUMax);
488 // Weak compare exchange is generated as strong compare exchange.
489 case Builtins::kAtomicCompareExchangeWeak:
490 case Builtins::kAtomicCompareExchangeWeakExplicit:
491 case Builtins::kAtomicCompareExchangeStrong:
492 case Builtins::kAtomicCompareExchangeStrongExplicit:
493 return replaceAtomicCompareExchange(F);
494
495 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500496 case Builtins::kAtomicInc:
497 return replaceAtomics(F, spv::OpAtomicIIncrement);
498 case Builtins::kAtomicDec:
499 return replaceAtomics(F, spv::OpAtomicIDecrement);
500 case Builtins::kAtomicCmpxchg:
501 return replaceAtomics(F, spv::OpAtomicCompareExchange);
502 case Builtins::kAtomicAdd:
503 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
504 case Builtins::kAtomicSub:
505 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
506 case Builtins::kAtomicXchg:
507 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
508 case Builtins::kAtomicMin:
509 return replaceAtomics(F, FI.getParameter(0).is_signed
510 ? llvm::AtomicRMWInst::Min
511 : llvm::AtomicRMWInst::UMin);
512 case Builtins::kAtomicMax:
513 return replaceAtomics(F, FI.getParameter(0).is_signed
514 ? llvm::AtomicRMWInst::Max
515 : llvm::AtomicRMWInst::UMax);
516 case Builtins::kAtomicAnd:
517 return replaceAtomics(F, llvm::AtomicRMWInst::And);
518 case Builtins::kAtomicOr:
519 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
520 case Builtins::kAtomicXor:
521 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
522
523 case Builtins::kCross:
524 if (FI.getParameter(0).vector_size == 4) {
525 return replaceCross(F);
526 }
527 break;
528
529 case Builtins::kFract:
530 if (FI.getParameterCount()) {
531 return replaceFract(F, FI.getParameter(0).vector_size);
532 }
533 break;
534
535 case Builtins::kMadHi:
536 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
537 case Builtins::kMulHi:
538 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
539
alan-baker6b9d1ee2020-11-03 23:11:32 -0500540 case Builtins::kMadSat:
541 return replaceMadSat(F, FI.getParameter(0).is_signed);
542
SJW2c317da2020-03-23 07:39:13 -0500543 case Builtins::kMad:
544 case Builtins::kMad24:
545 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
546 true);
547 case Builtins::kMul24:
548 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
549 false);
550
551 case Builtins::kSelect:
552 return replaceSelect(F);
553
554 case Builtins::kBitselect:
555 return replaceBitSelect(F);
556
557 case Builtins::kVload:
558 return replaceVload(F);
559
560 case Builtins::kVloadaHalf:
561 case Builtins::kVloadHalf:
562 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
563
564 case Builtins::kVstore:
565 return replaceVstore(F);
566
567 case Builtins::kVstoreHalf:
568 case Builtins::kVstoreaHalf:
569 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
570
571 case Builtins::kSmoothstep: {
572 int vec_size = FI.getLastParameter().vector_size;
573 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500574 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500575 }
576 break;
577 }
578 case Builtins::kStep: {
579 int vec_size = FI.getLastParameter().vector_size;
580 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500581 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500582 }
583 break;
584 }
585
586 case Builtins::kSignbit:
587 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
588
alan-baker3f1bf492020-11-05 09:07:36 -0500589 case Builtins::kSubSat:
590 return replaceAddSubSat(F, FI.getParameter(0).is_signed, false);
591
SJW2c317da2020-03-23 07:39:13 -0500592 case Builtins::kReadImageh:
593 return replaceHalfReadImage(F);
594 case Builtins::kReadImagef:
595 case Builtins::kReadImagei:
596 case Builtins::kReadImageui: {
597 if (FI.getParameter(1).isSampler() &&
598 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
599 return replaceSampledReadImageWithIntCoords(F);
600 }
601 break;
602 }
603
604 case Builtins::kWriteImageh:
605 return replaceHalfWriteImage(F);
606
Kévin Petit1cb45112020-04-27 18:55:48 +0100607 case Builtins::kPrefetch:
608 return replacePrefetch(F);
609
SJW2c317da2020-03-23 07:39:13 -0500610 default:
611 break;
612 }
613
614 return false;
615}
616
alan-baker6b9d1ee2020-11-03 23:11:32 -0500617Type *ReplaceOpenCLBuiltinPass::GetPairStruct(Type *type) {
618 auto iter = PairStructMap.find(type);
619 if (iter != PairStructMap.end())
620 return iter->second;
621
622 auto new_struct = StructType::get(type->getContext(), {type, type});
623 PairStructMap[type] = new_struct;
624 return new_struct;
625}
626
SJW2c317da2020-03-23 07:39:13 -0500627bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
628 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400629 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100630}
631
SJW2c317da2020-03-23 07:39:13 -0500632bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
633 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100634 auto XValue = CI->getOperand(0);
635 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100636
Kévin Petite8edce32019-04-10 14:23:32 +0100637 IRBuilder<> Builder(CI);
638 auto XmY = Builder.CreateSub(XValue, YValue);
639 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100640
SJW2c317da2020-03-23 07:39:13 -0500641 Value *Cmp = nullptr;
642 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100643 Cmp = Builder.CreateICmpSGT(YValue, XValue);
644 } else {
645 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100646 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100647
Kévin Petite8edce32019-04-10 14:23:32 +0100648 return Builder.CreateSelect(Cmp, YmX, XmY);
649 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100650}
651
SJW2c317da2020-03-23 07:39:13 -0500652bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
653 return replaceCallsWithValue(F, [&F](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100654 auto XValue = CI->getOperand(0);
655 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100656
Kévin Petite8edce32019-04-10 14:23:32 +0100657 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100658
SJW2c317da2020-03-23 07:39:13 -0500659 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -0400660 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400661 IntTy = FixedVectorType::get(
662 IntTy, vec_ty->getElementCount().getKnownMinValue());
Kévin Petit8c1be282019-04-02 19:34:25 +0100663 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100664
Kévin Petite8edce32019-04-10 14:23:32 +0100665 // Return X with the sign of Y
666
667 // Sign bit masks
668 auto SignBit = IntTy->getScalarSizeInBits() - 1;
669 auto SignBitMask = 1 << SignBit;
670 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
671 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
672
673 IRBuilder<> Builder(CI);
674
675 // Extract sign of Y
676 auto YInt = Builder.CreateBitCast(YValue, IntTy);
677 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
678
679 // Clear sign bit in X
680 auto XInt = Builder.CreateBitCast(XValue, IntTy);
681 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
682
683 // Insert sign bit of Y into X
684 auto NewXInt = Builder.CreateOr(XInt, YSign);
685
686 // And cast back to floating-point
687 return Builder.CreateBitCast(NewXInt, Ty);
688 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100689}
690
SJW2c317da2020-03-23 07:39:13 -0500691bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
692 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100693 // Recip has one arg.
694 auto Arg = CI->getOperand(0);
695 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
696 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
697 });
David Neto22f144c2017-06-12 14:26:21 -0400698}
699
SJW2c317da2020-03-23 07:39:13 -0500700bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
701 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100702 auto Op0 = CI->getOperand(0);
703 auto Op1 = CI->getOperand(1);
704 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
705 });
David Neto22f144c2017-06-12 14:26:21 -0400706}
707
SJW2c317da2020-03-23 07:39:13 -0500708bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
709 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100710 auto Op0 = CI->getOperand(0);
711 auto Op1 = CI->getOperand(1);
712
SJW2c317da2020-03-23 07:39:13 -0500713 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100714 if (Op0->getType()->isVectorTy()) {
715 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
716 CI->getType(), {Op0, Op1});
717 } else {
718 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
719 }
720
721 return V;
722 });
723}
724
SJW2c317da2020-03-23 07:39:13 -0500725bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500726 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500727 // convert to natural
728 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500729 std::string NewFName = basename.substr(0, slen);
730 NewFName =
731 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400732
SJW2c317da2020-03-23 07:39:13 -0500733 Module &M = *F.getParent();
734 return replaceCallsWithValue(F, [&](CallInst *CI) {
735 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
736
737 auto Arg = CI->getOperand(0);
738
739 // Constant of the natural log of 10 (ln(10)).
740 const double Ln10 =
741 2.302585092994045684017991454684364207601101488628772976033;
742
743 auto Mul = BinaryOperator::Create(
744 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
745
746 return CallInst::Create(NewF, Mul, "", CI);
747 });
David Neto22f144c2017-06-12 14:26:21 -0400748}
749
SJW2c317da2020-03-23 07:39:13 -0500750bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100751 // OpenCL fmod(x,y) is x - y * trunc(x/y)
752 // The sign for a non-zero result is taken from x.
753 // (Try an example.)
754 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500755 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100756 auto Op0 = CI->getOperand(0);
757 auto Op1 = CI->getOperand(1);
758 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
759 });
760}
761
SJW2c317da2020-03-23 07:39:13 -0500762bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500763 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500764 // convert to natural
765 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500766 std::string NewFName = basename.substr(0, slen);
767 NewFName =
768 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400769
SJW2c317da2020-03-23 07:39:13 -0500770 Module &M = *F.getParent();
771 return replaceCallsWithValue(F, [&](CallInst *CI) {
772 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
773
774 auto Arg = CI->getOperand(0);
775
776 // Constant of the reciprocal of the natural log of 10 (ln(10)).
777 const double Ln10 =
778 0.434294481903251827651128918916605082294397005803666566114;
779
780 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
781
782 return BinaryOperator::Create(Instruction::FMul,
783 ConstantFP::get(Arg->getType(), Ln10), NewCI,
784 "", CI);
785 });
David Neto22f144c2017-06-12 14:26:21 -0400786}
787
gnl21636e7992020-09-09 16:08:16 +0100788bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
789 // convert to natural
790 std::string NewFName =
791 Builtins::GetMangledFunctionName("log", F.getFunctionType());
792
793 Module &M = *F.getParent();
794 return replaceCallsWithValue(F, [&](CallInst *CI) {
795 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
796
797 auto Arg = CI->getOperand(0);
798
799 auto ArgP1 = BinaryOperator::Create(
800 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
801
802 return CallInst::Create(NewF, ArgP1, "", CI);
803 });
804}
805
alan-baker12d2c182020-07-20 08:22:42 -0400806bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400807
alan-bakerf6bc8252020-09-23 14:58:55 -0400808 enum {
809 CLK_LOCAL_MEM_FENCE = 0x01,
810 CLK_GLOBAL_MEM_FENCE = 0x02,
811 CLK_IMAGE_MEM_FENCE = 0x04
812 };
David Neto22f144c2017-06-12 14:26:21 -0400813
alan-baker12d2c182020-07-20 08:22:42 -0400814 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100815 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400816
Kévin Petitc4643922019-06-17 19:32:05 +0100817 // We need to map the OpenCL constants to the SPIR-V equivalents.
818 const auto LocalMemFence =
819 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
820 const auto GlobalMemFence =
821 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400822 const auto ImageMemFence =
823 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400824 const auto ConstantAcquireRelease = ConstantInt::get(
825 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100826 const auto ConstantScopeDevice =
827 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
828 const auto ConstantScopeWorkgroup =
829 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400830 const auto ConstantScopeSubgroup =
831 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400832
Kévin Petitc4643922019-06-17 19:32:05 +0100833 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
834 const auto LocalMemFenceMask =
835 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
836 const auto WorkgroupShiftAmount =
837 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
838 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
839 Instruction::Shl, LocalMemFenceMask,
840 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400841
Kévin Petitc4643922019-06-17 19:32:05 +0100842 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
843 const auto GlobalMemFenceMask =
844 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
845 const auto UniformShiftAmount =
846 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
847 const auto MemorySemanticsUniform = BinaryOperator::Create(
848 Instruction::Shl, GlobalMemFenceMask,
849 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400850
alan-bakerf6bc8252020-09-23 14:58:55 -0400851 // OpenCL 2.0
852 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
853 const auto ImageMemFenceMask =
854 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
855 const auto ImageShiftAmount =
856 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
857 const auto MemorySemanticsImage = BinaryOperator::Create(
858 Instruction::Shl, ImageMemFenceMask,
859 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
860
Kévin Petitc4643922019-06-17 19:32:05 +0100861 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400862 // MemorySemanticsSequentiallyConsistentMask.
863 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +0100864 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400865 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400866 auto MemorySemantics2 = BinaryOperator::Create(
867 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
868 auto MemorySemantics = BinaryOperator::Create(
869 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400870
alan-baker12d2c182020-07-20 08:22:42 -0400871 // If the memory scope is not specified explicitly, it is either Subgroup
872 // or Workgroup depending on the type of barrier.
873 Value *MemoryScope =
874 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
875 if (CI->data_operands_size() > 1) {
876 enum {
877 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
878 CL_MEMORY_SCOPE_DEVICE = 0x2,
879 CL_MEMORY_SCOPE_SUBGROUP = 0x4
880 };
881 // The call was given an explicit memory scope.
882 const auto MemoryScopeSubgroup =
883 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
884 const auto MemoryScopeDevice =
885 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400886
alan-baker12d2c182020-07-20 08:22:42 -0400887 auto Cmp =
888 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
889 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
890 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
891 ConstantScopeWorkgroup, "", CI);
892 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
893 MemoryScopeDevice, CI->getOperand(1), "", CI);
894 MemoryScope =
895 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
896 }
897
898 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
899 // the type of barrier;
900 const auto ExecutionScope =
901 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400902
Kévin Petitc4643922019-06-17 19:32:05 +0100903 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -0400904 {Attribute::NoDuplicate, Attribute::Convergent},
905 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +0100906 {ExecutionScope, MemoryScope, MemorySemantics});
907 });
David Neto22f144c2017-06-12 14:26:21 -0400908}
909
SJW2c317da2020-03-23 07:39:13 -0500910bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
911 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400912
SJW2c317da2020-03-23 07:39:13 -0500913 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -0400914 enum {
915 CLK_LOCAL_MEM_FENCE = 0x01,
916 CLK_GLOBAL_MEM_FENCE = 0x02,
917 CLK_IMAGE_MEM_FENCE = 0x04,
918 };
David Neto22f144c2017-06-12 14:26:21 -0400919
SJW2c317da2020-03-23 07:39:13 -0500920 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400921
SJW2c317da2020-03-23 07:39:13 -0500922 // We need to map the OpenCL constants to the SPIR-V equivalents.
923 const auto LocalMemFence =
924 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
925 const auto GlobalMemFence =
926 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400927 const auto ImageMemFence =
928 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -0500929 const auto ConstantMemorySemantics =
930 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400931 const auto ConstantScopeWorkgroup =
932 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400933
SJW2c317da2020-03-23 07:39:13 -0500934 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
935 const auto LocalMemFenceMask =
936 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
937 const auto WorkgroupShiftAmount =
938 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
939 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
940 Instruction::Shl, LocalMemFenceMask,
941 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400942
SJW2c317da2020-03-23 07:39:13 -0500943 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
944 const auto GlobalMemFenceMask =
945 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
946 const auto UniformShiftAmount =
947 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
948 const auto MemorySemanticsUniform = BinaryOperator::Create(
949 Instruction::Shl, GlobalMemFenceMask,
950 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400951
alan-bakerf6bc8252020-09-23 14:58:55 -0400952 // OpenCL 2.0
953 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
954 const auto ImageMemFenceMask =
955 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
956 const auto ImageShiftAmount =
957 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
958 const auto MemorySemanticsImage = BinaryOperator::Create(
959 Instruction::Shl, ImageMemFenceMask,
960 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
961
SJW2c317da2020-03-23 07:39:13 -0500962 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400963 // |semantics|.
964 auto MemorySemantics1 =
SJW2c317da2020-03-23 07:39:13 -0500965 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
966 ConstantMemorySemantics, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400967 auto MemorySemantics2 = BinaryOperator::Create(
968 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
969 auto MemorySemantics = BinaryOperator::Create(
970 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400971
alan-baker12d2c182020-07-20 08:22:42 -0400972 // Memory Scope is always workgroup.
973 const auto MemoryScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400974
alan-baker3d905692020-10-28 14:02:37 -0400975 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
976 {Attribute::Convergent}, CI->getType(),
SJW2c317da2020-03-23 07:39:13 -0500977 {MemoryScope, MemorySemantics});
978 });
David Neto22f144c2017-06-12 14:26:21 -0400979}
980
Kévin Petit1cb45112020-04-27 18:55:48 +0100981bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
982 bool Changed = false;
983
984 SmallVector<Instruction *, 4> ToRemoves;
985
986 // Find all calls to the function
987 for (auto &U : F.uses()) {
988 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
989 ToRemoves.push_back(CI);
990 }
991 }
992
993 Changed = !ToRemoves.empty();
994
995 // Delete them
996 for (auto V : ToRemoves) {
997 V->eraseFromParent();
998 }
999
1000 return Changed;
1001}
1002
SJW2c317da2020-03-23 07:39:13 -05001003bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
alan-baker3e217772020-11-07 17:29:40 -05001004 CmpInst::Predicate P) {
SJW2c317da2020-03-23 07:39:13 -05001005 return replaceCallsWithValue(F, [&](CallInst *CI) {
1006 // The predicate to use in the CmpInst.
1007 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -04001008
SJW2c317da2020-03-23 07:39:13 -05001009 auto Arg1 = CI->getOperand(0);
1010 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001011
SJW2c317da2020-03-23 07:39:13 -05001012 const auto Cmp =
1013 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
alan-baker3e217772020-11-07 17:29:40 -05001014 if (isa<VectorType>(F.getReturnType()))
1015 return CastInst::Create(Instruction::SExt, Cmp, CI->getType(), "", CI);
1016 return CastInst::Create(Instruction::ZExt, Cmp, CI->getType(), "", CI);
SJW2c317da2020-03-23 07:39:13 -05001017 });
David Neto22f144c2017-06-12 14:26:21 -04001018}
1019
SJW2c317da2020-03-23 07:39:13 -05001020bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
1021 spv::Op SPIRVOp,
1022 int32_t C) {
1023 Module &M = *F.getParent();
1024 return replaceCallsWithValue(F, [&](CallInst *CI) {
1025 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -04001026
SJW2c317da2020-03-23 07:39:13 -05001027 // The value to return for true.
1028 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -04001029
SJW2c317da2020-03-23 07:39:13 -05001030 // The value to return for false.
1031 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001032
SJW2c317da2020-03-23 07:39:13 -05001033 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001034 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001035 CorrespondingBoolTy =
1036 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1037 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001038 }
David Neto22f144c2017-06-12 14:26:21 -04001039
SJW2c317da2020-03-23 07:39:13 -05001040 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1041 CorrespondingBoolTy, {CI->getOperand(0)});
1042
1043 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1044 });
David Neto22f144c2017-06-12 14:26:21 -04001045}
1046
SJW2c317da2020-03-23 07:39:13 -05001047bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1048 Module &M = *F.getParent();
1049 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001050 auto &C = M.getContext();
1051 auto Val = CI->getOperand(0);
1052 auto ValTy = Val->getType();
1053 auto RetTy = CI->getType();
1054
1055 // Get a suitable integer type to represent the number
1056 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1057
1058 // Create Mask
1059 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001060 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001061 switch (ScalarSize) {
1062 case 16:
1063 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1064 break;
1065 case 32:
1066 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1067 break;
1068 case 64:
1069 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1070 break;
1071 default:
1072 llvm_unreachable("Unsupported floating-point type");
1073 }
1074
1075 IRBuilder<> Builder(CI);
1076
1077 // Bitcast to int
1078 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1079
1080 // Mask and compare
1081 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1082 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1083
1084 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001085 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001086 if (ValTy->isVectorTy()) {
1087 RetTrue = ConstantInt::getSigned(RetTy, -1);
1088 } else {
1089 RetTrue = ConstantInt::get(RetTy, 1);
1090 }
1091 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1092 });
1093}
1094
SJW2c317da2020-03-23 07:39:13 -05001095bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1096 Module &M = *F.getParent();
1097 return replaceCallsWithValue(F, [&](CallInst *CI) {
1098 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001099
SJW2c317da2020-03-23 07:39:13 -05001100 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001101
SJW2c317da2020-03-23 07:39:13 -05001102 // If the argument is a 32-bit int, just use a shift
1103 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1104 V = BinaryOperator::Create(Instruction::LShr, Arg,
1105 ConstantInt::get(Arg->getType(), 31), "", CI);
1106 } else {
1107 // The value for zero to compare against.
1108 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001109
SJW2c317da2020-03-23 07:39:13 -05001110 // The value to return for true.
1111 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001112
SJW2c317da2020-03-23 07:39:13 -05001113 // The value to return for false.
1114 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001115
SJW2c317da2020-03-23 07:39:13 -05001116 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1117 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001118
SJW2c317da2020-03-23 07:39:13 -05001119 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001120
SJW2c317da2020-03-23 07:39:13 -05001121 // If we have a function to call, call it!
1122 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001123
SJW2c317da2020-03-23 07:39:13 -05001124 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001125
SJW2c317da2020-03-23 07:39:13 -05001126 const auto NewCI = clspv::InsertSPIRVOp(
1127 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1128 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001129
SJW2c317da2020-03-23 07:39:13 -05001130 } else {
1131 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001132 }
1133
SJW2c317da2020-03-23 07:39:13 -05001134 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001135 }
SJW2c317da2020-03-23 07:39:13 -05001136 return V;
1137 });
David Neto22f144c2017-06-12 14:26:21 -04001138}
1139
SJW2c317da2020-03-23 07:39:13 -05001140bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1141 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1142 // Get arguments
1143 auto HiValue = CI->getOperand(0);
1144 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001145
SJW2c317da2020-03-23 07:39:13 -05001146 // Don't touch overloads that aren't in OpenCL C
1147 auto HiType = HiValue->getType();
1148 auto LoType = LoValue->getType();
1149
1150 if (HiType != LoType) {
1151 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001152 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001153
SJW2c317da2020-03-23 07:39:13 -05001154 if (!HiType->isIntOrIntVectorTy()) {
1155 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001156 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001157
SJW2c317da2020-03-23 07:39:13 -05001158 if (HiType->getScalarSizeInBits() * 2 !=
1159 CI->getType()->getScalarSizeInBits()) {
1160 return nullptr;
1161 }
1162
1163 if ((HiType->getScalarSizeInBits() != 8) &&
1164 (HiType->getScalarSizeInBits() != 16) &&
1165 (HiType->getScalarSizeInBits() != 32)) {
1166 return nullptr;
1167 }
1168
James Pricecf53df42020-04-20 14:41:24 -04001169 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001170 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001171 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1172 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001173 return nullptr;
1174 }
1175 }
1176
1177 // Convert both operands to the result type
1178 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1179 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1180
1181 // Shift high operand
1182 auto ShiftAmount =
1183 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1184 auto HiShifted =
1185 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1186
1187 // OR both results
1188 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1189 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001190}
1191
SJW2c317da2020-03-23 07:39:13 -05001192bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1193 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1194 // Get arguments
1195 auto SrcValue = CI->getOperand(0);
1196 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001197
SJW2c317da2020-03-23 07:39:13 -05001198 // Don't touch overloads that aren't in OpenCL C
1199 auto SrcType = SrcValue->getType();
1200 auto RotType = RotAmount->getType();
1201
1202 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1203 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001204 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001205
SJW2c317da2020-03-23 07:39:13 -05001206 if (!SrcType->isIntOrIntVectorTy()) {
1207 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001208 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001209
SJW2c317da2020-03-23 07:39:13 -05001210 if ((SrcType->getScalarSizeInBits() != 8) &&
1211 (SrcType->getScalarSizeInBits() != 16) &&
1212 (SrcType->getScalarSizeInBits() != 32) &&
1213 (SrcType->getScalarSizeInBits() != 64)) {
1214 return nullptr;
1215 }
1216
James Pricecf53df42020-04-20 14:41:24 -04001217 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001218 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001219 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1220 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001221 return nullptr;
1222 }
1223 }
1224
alan-bakerfd22ae12020-10-29 15:59:22 -04001225 // Replace with LLVM's funnel shift left intrinsic because it is more
1226 // generic than rotate.
1227 Function *intrinsic =
1228 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1229 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1230 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001231 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001232}
1233
SJW2c317da2020-03-23 07:39:13 -05001234bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1235 bool DstIsSigned) {
1236 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1237 Value *V = nullptr;
1238 // Get arguments
1239 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001240
SJW2c317da2020-03-23 07:39:13 -05001241 // Don't touch overloads that aren't in OpenCL C
1242 auto SrcType = SrcValue->getType();
1243 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001244
SJW2c317da2020-03-23 07:39:13 -05001245 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1246 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1247 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001248 }
1249
James Pricecf53df42020-04-20 14:41:24 -04001250 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001251 unsigned SrcNumElements =
1252 SrcVecType->getElementCount().getKnownMinValue();
1253 unsigned DstNumElements =
1254 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001255 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001256 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001257 }
1258
James Pricecf53df42020-04-20 14:41:24 -04001259 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1260 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1261 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001262 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001263 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001264 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001265
SJW2c317da2020-03-23 07:39:13 -05001266 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1267 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1268
1269 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1270 bool DstIsInt = DstType->isIntOrIntVectorTy();
1271
1272 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1273 // Unnecessary cast operation.
1274 V = SrcValue;
1275 } else if (SrcIsFloat && DstIsFloat) {
1276 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1277 } else if (SrcIsFloat && DstIsInt) {
1278 if (DstIsSigned) {
1279 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1280 } else {
1281 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1282 }
1283 } else if (SrcIsInt && DstIsFloat) {
1284 if (SrcIsSigned) {
1285 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1286 } else {
1287 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1288 }
1289 } else if (SrcIsInt && DstIsInt) {
1290 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1291 } else {
1292 // Not something we're supposed to handle, just move on
1293 }
1294
1295 return V;
1296 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001297}
1298
SJW2c317da2020-03-23 07:39:13 -05001299bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1300 bool is_mad) {
1301 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1302 Value *V = nullptr;
1303 // Get arguments
1304 auto AValue = CI->getOperand(0);
1305 auto BValue = CI->getOperand(1);
1306 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001307
SJW2c317da2020-03-23 07:39:13 -05001308 // Don't touch overloads that aren't in OpenCL C
1309 auto AType = AValue->getType();
1310 auto BType = BValue->getType();
1311 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001312
SJW2c317da2020-03-23 07:39:13 -05001313 if ((AType != BType) || (CI->getType() != AType) ||
1314 (is_mad && (AType != CType))) {
1315 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001316 }
1317
SJW2c317da2020-03-23 07:39:13 -05001318 if (!AType->isIntOrIntVectorTy()) {
1319 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001320 }
Kévin Petit8a560882019-03-21 15:24:34 +00001321
SJW2c317da2020-03-23 07:39:13 -05001322 if ((AType->getScalarSizeInBits() != 8) &&
1323 (AType->getScalarSizeInBits() != 16) &&
1324 (AType->getScalarSizeInBits() != 32) &&
1325 (AType->getScalarSizeInBits() != 64)) {
1326 return V;
1327 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001328
James Pricecf53df42020-04-20 14:41:24 -04001329 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001330 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001331 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1332 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001333 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001334 }
1335 }
1336
SJW2c317da2020-03-23 07:39:13 -05001337 // Our SPIR-V op returns a struct, create a type for it
alan-baker6b9d1ee2020-11-03 23:11:32 -05001338 auto ExMulRetType = GetPairStruct(AType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001339
SJW2c317da2020-03-23 07:39:13 -05001340 // Select the appropriate signed/unsigned SPIR-V op
1341 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1342
1343 // Call the SPIR-V op
1344 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1345 ExMulRetType, {AValue, BValue});
1346
1347 // Get the high part of the result
1348 unsigned Idxs[] = {1};
1349 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1350
1351 // If we're handling a mad_hi, add the third argument to the result
1352 if (is_mad) {
1353 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001354 }
1355
SJW2c317da2020-03-23 07:39:13 -05001356 return V;
1357 });
Kévin Petit8a560882019-03-21 15:24:34 +00001358}
1359
SJW2c317da2020-03-23 07:39:13 -05001360bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1361 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1362 // Get arguments
1363 auto FalseValue = CI->getOperand(0);
1364 auto TrueValue = CI->getOperand(1);
1365 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001366
SJW2c317da2020-03-23 07:39:13 -05001367 // Don't touch overloads that aren't in OpenCL C
1368 auto FalseType = FalseValue->getType();
1369 auto TrueType = TrueValue->getType();
1370 auto PredicateType = PredicateValue->getType();
1371
1372 if (FalseType != TrueType) {
1373 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001374 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001375
SJW2c317da2020-03-23 07:39:13 -05001376 if (!PredicateType->isIntOrIntVectorTy()) {
1377 return nullptr;
1378 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001379
SJW2c317da2020-03-23 07:39:13 -05001380 if (!FalseType->isIntOrIntVectorTy() &&
1381 !FalseType->getScalarType()->isFloatingPointTy()) {
1382 return nullptr;
1383 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001384
SJW2c317da2020-03-23 07:39:13 -05001385 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1386 return nullptr;
1387 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001388
SJW2c317da2020-03-23 07:39:13 -05001389 if (FalseType->getScalarSizeInBits() !=
1390 PredicateType->getScalarSizeInBits()) {
1391 return nullptr;
1392 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001393
James Pricecf53df42020-04-20 14:41:24 -04001394 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001395 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1396 if (NumElements != cast<VectorType>(PredicateType)
1397 ->getElementCount()
1398 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001399 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001400 }
1401
James Pricecf53df42020-04-20 14:41:24 -04001402 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1403 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001404 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001405 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001406 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001407
SJW2c317da2020-03-23 07:39:13 -05001408 // Create constant
1409 const auto ZeroValue = Constant::getNullValue(PredicateType);
1410
1411 // Scalar and vector are to be treated differently
1412 CmpInst::Predicate Pred;
1413 if (PredicateType->isVectorTy()) {
1414 Pred = CmpInst::ICMP_SLT;
1415 } else {
1416 Pred = CmpInst::ICMP_NE;
1417 }
1418
1419 // Create comparison instruction
1420 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1421 ZeroValue, "", CI);
1422
1423 // Create select
1424 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1425 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001426}
1427
SJW2c317da2020-03-23 07:39:13 -05001428bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1429 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1430 Value *V = nullptr;
1431 if (CI->getNumOperands() != 4) {
1432 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001433 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001434
SJW2c317da2020-03-23 07:39:13 -05001435 // Get arguments
1436 auto FalseValue = CI->getOperand(0);
1437 auto TrueValue = CI->getOperand(1);
1438 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001439
SJW2c317da2020-03-23 07:39:13 -05001440 // Don't touch overloads that aren't in OpenCL C
1441 auto FalseType = FalseValue->getType();
1442 auto TrueType = TrueValue->getType();
1443 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001444
SJW2c317da2020-03-23 07:39:13 -05001445 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1446 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001447 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001448
James Pricecf53df42020-04-20 14:41:24 -04001449 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001450 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1451 !TrueType->getScalarType()->isIntegerTy()) {
1452 return V;
1453 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001454 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001455 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1456 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001457 return V;
1458 }
1459 }
1460
1461 // Remember the type of the operands
1462 auto OpType = TrueType;
1463
1464 // The actual bit selection will always be done on an integer type,
1465 // declare it here
1466 Type *BitType;
1467
1468 // If the operands are float, then bitcast them to int
1469 if (OpType->getScalarType()->isFloatingPointTy()) {
1470
1471 // First create the new type
1472 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1473
1474 // Then bitcast all operands
1475 PredicateValue =
1476 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1477 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1478 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1479
1480 } else {
1481 // The operands have an integer type, use it directly
1482 BitType = OpType;
1483 }
1484
1485 // All the operands are now always integers
1486 // implement as (c & b) | (~c & a)
1487
1488 // Create our negated predicate value
1489 auto AllOnes = Constant::getAllOnesValue(BitType);
1490 auto NotPredicateValue = BinaryOperator::Create(
1491 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1492
1493 // Then put everything together
1494 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1495 FalseValue, "", CI);
1496 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1497 TrueValue, "", CI);
1498
1499 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1500
1501 // If we were dealing with a floating point type, we must bitcast
1502 // the result back to that
1503 if (OpType->getScalarType()->isFloatingPointTy()) {
1504 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1505 }
1506
1507 return V;
1508 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001509}
1510
SJW61531372020-06-09 07:31:08 -05001511bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001512 // convert to vector versions
1513 Module &M = *F.getParent();
1514 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1515 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1516 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001517
SJW2c317da2020-03-23 07:39:13 -05001518 // First figure out which function we're dealing with
1519 if (is_smooth) {
1520 ArgsToSplat.push_back(CI->getOperand(1));
1521 VectorArg = CI->getOperand(2);
1522 } else {
1523 VectorArg = CI->getOperand(1);
1524 }
1525
1526 // Splat arguments that need to be
1527 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001528 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001529
1530 for (auto arg : ArgsToSplat) {
1531 Value *NewVectorArg = UndefValue::get(VecType);
alan-baker5a8c3be2020-09-09 13:44:26 -04001532 for (auto i = 0; i < VecType->getElementCount().getKnownMinValue(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001533 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1534 NewVectorArg =
1535 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1536 }
1537 SplatArgs.push_back(NewVectorArg);
1538 }
1539
1540 // Replace the call with the vector/vector flavour
1541 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1542 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1543
SJW61531372020-06-09 07:31:08 -05001544 std::string NewFName = Builtins::GetMangledFunctionName(
1545 is_smooth ? "smoothstep" : "step", NewFType);
1546
SJW2c317da2020-03-23 07:39:13 -05001547 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1548
1549 SmallVector<Value *, 3> NewArgs;
1550 for (auto arg : SplatArgs) {
1551 NewArgs.push_back(arg);
1552 }
1553 NewArgs.push_back(VectorArg);
1554
1555 return CallInst::Create(NewF, NewArgs, "", CI);
1556 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001557}
1558
SJW2c317da2020-03-23 07:39:13 -05001559bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001560 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1561 auto Arg = CI->getOperand(0);
1562 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001563
SJW2c317da2020-03-23 07:39:13 -05001564 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001565
SJW2c317da2020-03-23 07:39:13 -05001566 return BinaryOperator::Create(Op, Bitcast,
1567 ConstantInt::get(CI->getType(), 31), "", CI);
1568 });
David Neto22f144c2017-06-12 14:26:21 -04001569}
1570
SJW2c317da2020-03-23 07:39:13 -05001571bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1572 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001573 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1574 // The multiply instruction to use.
1575 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001576
SJW2c317da2020-03-23 07:39:13 -05001577 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001578
SJW2c317da2020-03-23 07:39:13 -05001579 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1580 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001581
SJW2c317da2020-03-23 07:39:13 -05001582 if (is_mad) {
1583 // The add instruction to use.
1584 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001585
SJW2c317da2020-03-23 07:39:13 -05001586 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001587 }
David Neto22f144c2017-06-12 14:26:21 -04001588
SJW2c317da2020-03-23 07:39:13 -05001589 return V;
1590 });
David Neto22f144c2017-06-12 14:26:21 -04001591}
1592
SJW2c317da2020-03-23 07:39:13 -05001593bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001594 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1595 Value *V = nullptr;
1596 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001597
SJW2c317da2020-03-23 07:39:13 -05001598 auto data_type = data->getType();
1599 if (!data_type->isVectorTy())
1600 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001601
James Pricecf53df42020-04-20 14:41:24 -04001602 auto vec_data_type = cast<VectorType>(data_type);
1603
alan-baker5a8c3be2020-09-09 13:44:26 -04001604 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001605 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1606 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001607
SJW2c317da2020-03-23 07:39:13 -05001608 auto offset = CI->getOperand(1);
1609 auto ptr = CI->getOperand(2);
1610 auto ptr_type = ptr->getType();
1611 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001612 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001613 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001614
SJW2c317da2020-03-23 07:39:13 -05001615 // Avoid pointer casts. Instead generate the correct number of stores
1616 // and rely on drivers to coalesce appropriately.
1617 IRBuilder<> builder(CI);
1618 auto elems_const = builder.getInt32(elems);
1619 auto adjust = builder.CreateMul(offset, elems_const);
1620 for (auto i = 0; i < elems; ++i) {
1621 auto idx = builder.getInt32(i);
1622 auto add = builder.CreateAdd(adjust, idx);
1623 auto gep = builder.CreateGEP(ptr, add);
1624 auto extract = builder.CreateExtractElement(data, i);
1625 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001626 }
SJW2c317da2020-03-23 07:39:13 -05001627 return V;
1628 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001629}
1630
SJW2c317da2020-03-23 07:39:13 -05001631bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001632 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1633 Value *V = nullptr;
1634 auto ret_type = F.getReturnType();
1635 if (!ret_type->isVectorTy())
1636 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001637
James Pricecf53df42020-04-20 14:41:24 -04001638 auto vec_ret_type = cast<VectorType>(ret_type);
1639
alan-baker5a8c3be2020-09-09 13:44:26 -04001640 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001641 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1642 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001643
SJW2c317da2020-03-23 07:39:13 -05001644 auto offset = CI->getOperand(0);
1645 auto ptr = CI->getOperand(1);
1646 auto ptr_type = ptr->getType();
1647 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001648 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001649 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001650
SJW2c317da2020-03-23 07:39:13 -05001651 // Avoid pointer casts. Instead generate the correct number of loads
1652 // and rely on drivers to coalesce appropriately.
1653 IRBuilder<> builder(CI);
1654 auto elems_const = builder.getInt32(elems);
1655 V = UndefValue::get(ret_type);
1656 auto adjust = builder.CreateMul(offset, elems_const);
1657 for (auto i = 0; i < elems; ++i) {
1658 auto idx = builder.getInt32(i);
1659 auto add = builder.CreateAdd(adjust, idx);
1660 auto gep = builder.CreateGEP(ptr, add);
1661 auto load = builder.CreateLoad(gep);
1662 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001663 }
SJW2c317da2020-03-23 07:39:13 -05001664 return V;
1665 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001666}
1667
SJW2c317da2020-03-23 07:39:13 -05001668bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1669 const std::string &name,
1670 int vec_size) {
1671 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1672 if (!vec_size) {
1673 // deduce vec_size from last character of name (e.g. vload_half4)
1674 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001675 }
SJW2c317da2020-03-23 07:39:13 -05001676 switch (vec_size) {
1677 case 2:
1678 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1679 case 4:
1680 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1681 case 0:
1682 if (!is_clspv_version) {
1683 return replaceVloadHalf(F);
1684 }
1685 default:
1686 llvm_unreachable("Unsupported vload_half vector size");
1687 break;
1688 }
1689 return false;
David Neto22f144c2017-06-12 14:26:21 -04001690}
1691
SJW2c317da2020-03-23 07:39:13 -05001692bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1693 Module &M = *F.getParent();
1694 return replaceCallsWithValue(F, [&](CallInst *CI) {
1695 // The index argument from vload_half.
1696 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001697
SJW2c317da2020-03-23 07:39:13 -05001698 // The pointer argument from vload_half.
1699 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001700
SJW2c317da2020-03-23 07:39:13 -05001701 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001702 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001703 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1704
1705 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001706 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001707
1708 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1709
1710 Value *V = nullptr;
1711
alan-baker7efcaaa2020-05-06 19:33:27 -04001712 bool supports_16bit_storage = true;
1713 switch (Arg1->getType()->getPointerAddressSpace()) {
1714 case clspv::AddressSpace::Global:
1715 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1716 clspv::Option::StorageClass::kSSBO);
1717 break;
1718 case clspv::AddressSpace::Constant:
1719 if (clspv::Option::ConstantArgsInUniformBuffer())
1720 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1721 clspv::Option::StorageClass::kUBO);
1722 else
1723 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1724 clspv::Option::StorageClass::kSSBO);
1725 break;
1726 default:
1727 // Clspv will emit the Float16 capability if the half type is
1728 // encountered. That capability covers private and local addressspaces.
1729 break;
1730 }
1731
1732 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001733 auto ShortTy = Type::getInt16Ty(M.getContext());
1734 auto ShortPointerTy =
1735 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1736
1737 // Cast the half* pointer to short*.
1738 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1739
1740 // Index into the correct address of the casted pointer.
1741 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1742
1743 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001744 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001745
1746 // ZExt the short -> int.
1747 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1748
1749 // Get our float2.
1750 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1751
1752 // Extract out the bottom element which is our float result.
1753 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1754 } else {
1755 // Assume the pointer argument points to storage aligned to 32bits
1756 // or more.
1757 // TODO(dneto): Do more analysis to make sure this is true?
1758 //
1759 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1760 // with:
1761 //
1762 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1763 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1764 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1765 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1766 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1767 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1768 // x float> %converted, %index_is_odd32
1769
1770 auto IntPointerTy =
1771 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1772
1773 // Cast the base pointer to int*.
1774 // In a valid call (according to assumptions), this should get
1775 // optimized away in the simplify GEP pass.
1776 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1777
1778 auto One = ConstantInt::get(IntTy, 1);
1779 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1780 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1781
1782 // Index into the correct address of the casted pointer.
1783 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1784
1785 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001786 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001787
1788 // Get our float2.
1789 auto Call = CallInst::Create(NewF, Load, "", CI);
1790
1791 // Extract out the float result, where the element number is
1792 // determined by whether the original index was even or odd.
1793 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1794 }
1795 return V;
1796 });
1797}
1798
1799bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1800 Module &M = *F.getParent();
1801 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001802 // The index argument from vload_half.
1803 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001804
Kévin Petite8edce32019-04-10 14:23:32 +01001805 // The pointer argument from vload_half.
1806 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001807
Kévin Petite8edce32019-04-10 14:23:32 +01001808 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001809 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001810 auto NewPointerTy =
1811 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001812 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001813
Kévin Petite8edce32019-04-10 14:23:32 +01001814 // Cast the half* pointer to int*.
1815 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001816
Kévin Petite8edce32019-04-10 14:23:32 +01001817 // Index into the correct address of the casted pointer.
1818 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001819
Kévin Petite8edce32019-04-10 14:23:32 +01001820 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001821 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001822
Kévin Petite8edce32019-04-10 14:23:32 +01001823 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001824 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001825
Kévin Petite8edce32019-04-10 14:23:32 +01001826 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001827
Kévin Petite8edce32019-04-10 14:23:32 +01001828 // Get our float2.
1829 return CallInst::Create(NewF, Load, "", CI);
1830 });
David Neto22f144c2017-06-12 14:26:21 -04001831}
1832
SJW2c317da2020-03-23 07:39:13 -05001833bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1834 Module &M = *F.getParent();
1835 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001836 // The index argument from vload_half.
1837 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001838
Kévin Petite8edce32019-04-10 14:23:32 +01001839 // The pointer argument from vload_half.
1840 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001841
Kévin Petite8edce32019-04-10 14:23:32 +01001842 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001843 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1844 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001845 auto NewPointerTy =
1846 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001847 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001848
Kévin Petite8edce32019-04-10 14:23:32 +01001849 // Cast the half* pointer to int2*.
1850 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001851
Kévin Petite8edce32019-04-10 14:23:32 +01001852 // Index into the correct address of the casted pointer.
1853 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001854
Kévin Petite8edce32019-04-10 14:23:32 +01001855 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001856 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001857
Kévin Petite8edce32019-04-10 14:23:32 +01001858 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001859 auto X =
1860 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1861 auto Y =
1862 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001863
Kévin Petite8edce32019-04-10 14:23:32 +01001864 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001865 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001866
Kévin Petite8edce32019-04-10 14:23:32 +01001867 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001868
Kévin Petite8edce32019-04-10 14:23:32 +01001869 // Get the lower (x & y) components of our final float4.
1870 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001871
Kévin Petite8edce32019-04-10 14:23:32 +01001872 // Get the higher (z & w) components of our final float4.
1873 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001874
Kévin Petite8edce32019-04-10 14:23:32 +01001875 Constant *ShuffleMask[4] = {
1876 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1877 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001878
Kévin Petite8edce32019-04-10 14:23:32 +01001879 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001880 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1881 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001882 });
David Neto22f144c2017-06-12 14:26:21 -04001883}
1884
SJW2c317da2020-03-23 07:39:13 -05001885bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001886
1887 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1888 //
1889 // %u = load i32 %ptr
1890 // %fxy = call <2 x float> Unpack2xHalf(u)
1891 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001892 Module &M = *F.getParent();
1893 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001894 auto Index = CI->getOperand(0);
1895 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001896
Kévin Petite8edce32019-04-10 14:23:32 +01001897 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001898 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001899 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001900
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001901 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001902 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001903
Kévin Petite8edce32019-04-10 14:23:32 +01001904 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001905 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001906
Kévin Petite8edce32019-04-10 14:23:32 +01001907 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001908
Kévin Petite8edce32019-04-10 14:23:32 +01001909 // Get our final float2.
1910 return CallInst::Create(NewF, Load, "", CI);
1911 });
David Neto6ad93232018-06-07 15:42:58 -07001912}
1913
SJW2c317da2020-03-23 07:39:13 -05001914bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001915
1916 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1917 //
1918 // %u2 = load <2 x i32> %ptr
1919 // %u2xy = extractelement %u2, 0
1920 // %u2zw = extractelement %u2, 1
1921 // %fxy = call <2 x float> Unpack2xHalf(uint)
1922 // %fzw = call <2 x float> Unpack2xHalf(uint)
1923 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001924 Module &M = *F.getParent();
1925 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001926 auto Index = CI->getOperand(0);
1927 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001928
Kévin Petite8edce32019-04-10 14:23:32 +01001929 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001930 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1931 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001932 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001933
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001934 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001935 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001936
Kévin Petite8edce32019-04-10 14:23:32 +01001937 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001938 auto X =
1939 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1940 auto Y =
1941 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001942
Kévin Petite8edce32019-04-10 14:23:32 +01001943 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001944 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001945
Kévin Petite8edce32019-04-10 14:23:32 +01001946 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001947
Kévin Petite8edce32019-04-10 14:23:32 +01001948 // Get the lower (x & y) components of our final float4.
1949 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001950
Kévin Petite8edce32019-04-10 14:23:32 +01001951 // Get the higher (z & w) components of our final float4.
1952 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001953
Kévin Petite8edce32019-04-10 14:23:32 +01001954 Constant *ShuffleMask[4] = {
1955 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1956 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001957
Kévin Petite8edce32019-04-10 14:23:32 +01001958 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001959 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1960 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001961 });
David Neto6ad93232018-06-07 15:42:58 -07001962}
1963
SJW2c317da2020-03-23 07:39:13 -05001964bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1965 switch (vec_size) {
1966 case 0:
1967 return replaceVstoreHalf(F);
1968 case 2:
1969 return replaceVstoreHalf2(F);
1970 case 4:
1971 return replaceVstoreHalf4(F);
1972 default:
1973 llvm_unreachable("Unsupported vstore_half vector size");
1974 break;
1975 }
1976 return false;
1977}
David Neto22f144c2017-06-12 14:26:21 -04001978
SJW2c317da2020-03-23 07:39:13 -05001979bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1980 Module &M = *F.getParent();
1981 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001982 // The value to store.
1983 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001984
Kévin Petite8edce32019-04-10 14:23:32 +01001985 // The index argument from vstore_half.
1986 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001987
Kévin Petite8edce32019-04-10 14:23:32 +01001988 // The pointer argument from vstore_half.
1989 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001990
Kévin Petite8edce32019-04-10 14:23:32 +01001991 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001992 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001993 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1994 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001995
Kévin Petite8edce32019-04-10 14:23:32 +01001996 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001997 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001998
Kévin Petite8edce32019-04-10 14:23:32 +01001999 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002000
Kévin Petite8edce32019-04-10 14:23:32 +01002001 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002002 auto TempVec = InsertElementInst::Create(
2003 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002004
Kévin Petite8edce32019-04-10 14:23:32 +01002005 // Pack the float2 -> half2 (in an int).
2006 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002007
alan-baker7efcaaa2020-05-06 19:33:27 -04002008 bool supports_16bit_storage = true;
2009 switch (Arg2->getType()->getPointerAddressSpace()) {
2010 case clspv::AddressSpace::Global:
2011 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2012 clspv::Option::StorageClass::kSSBO);
2013 break;
2014 case clspv::AddressSpace::Constant:
2015 if (clspv::Option::ConstantArgsInUniformBuffer())
2016 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2017 clspv::Option::StorageClass::kUBO);
2018 else
2019 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
2020 clspv::Option::StorageClass::kSSBO);
2021 break;
2022 default:
2023 // Clspv will emit the Float16 capability if the half type is
2024 // encountered. That capability covers private and local addressspaces.
2025 break;
2026 }
2027
SJW2c317da2020-03-23 07:39:13 -05002028 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002029 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002030 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002031 auto ShortPointerTy =
2032 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002033
Kévin Petite8edce32019-04-10 14:23:32 +01002034 // Truncate our i32 to an i16.
2035 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002036
Kévin Petite8edce32019-04-10 14:23:32 +01002037 // Cast the half* pointer to short*.
2038 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002039
Kévin Petite8edce32019-04-10 14:23:32 +01002040 // Index into the correct address of the casted pointer.
2041 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002042
Kévin Petite8edce32019-04-10 14:23:32 +01002043 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002044 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002045 } else {
2046 // We can only write to 32-bit aligned words.
2047 //
2048 // Assuming base is aligned to 32-bits, replace the equivalent of
2049 // vstore_half(value, index, base)
2050 // with:
2051 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2052 // uint32_t write_to_upper_half = index & 1u;
2053 // uint32_t shift = write_to_upper_half << 4;
2054 //
2055 // // Pack the float value as a half number in bottom 16 bits
2056 // // of an i32.
2057 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2058 //
2059 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2060 // ^ ((packed & 0xffff) << shift)
2061 // // We only need relaxed consistency, but OpenCL 1.2 only has
2062 // // sequentially consistent atomics.
2063 // // TODO(dneto): Use relaxed consistency.
2064 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002065 auto IntPointerTy =
2066 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002067
Kévin Petite8edce32019-04-10 14:23:32 +01002068 auto Four = ConstantInt::get(IntTy, 4);
2069 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002070
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002071 auto IndexIsOdd =
2072 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002073 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002074 auto IndexIntoI32 =
2075 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2076 auto BaseI32Ptr =
2077 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2078 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2079 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002080 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002081 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002082 auto MaskBitsToWrite =
2083 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2084 auto MaskedCurrent = BinaryOperator::CreateAnd(
2085 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002086
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002087 auto XLowerBits =
2088 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2089 auto NewBitsToWrite =
2090 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2091 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2092 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002093
Kévin Petite8edce32019-04-10 14:23:32 +01002094 // Generate the call to atomi_xor.
2095 SmallVector<Type *, 5> ParamTypes;
2096 // The pointer type.
2097 ParamTypes.push_back(IntPointerTy);
2098 // The Types for memory scope, semantics, and value.
2099 ParamTypes.push_back(IntTy);
2100 ParamTypes.push_back(IntTy);
2101 ParamTypes.push_back(IntTy);
2102 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2103 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002104
Kévin Petite8edce32019-04-10 14:23:32 +01002105 const auto ConstantScopeDevice =
2106 ConstantInt::get(IntTy, spv::ScopeDevice);
2107 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2108 // (SPIR-V Workgroup).
2109 const auto AddrSpaceSemanticsBits =
2110 IntPointerTy->getPointerAddressSpace() == 1
2111 ? spv::MemorySemanticsUniformMemoryMask
2112 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002113
Kévin Petite8edce32019-04-10 14:23:32 +01002114 // We're using relaxed consistency here.
2115 const auto ConstantMemorySemantics =
2116 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2117 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002118
Kévin Petite8edce32019-04-10 14:23:32 +01002119 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2120 ConstantMemorySemantics, ValueToXor};
2121 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002122
2123 // Return a Nop so the old Call is removed
2124 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2125 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002126 }
David Neto22f144c2017-06-12 14:26:21 -04002127
SJW2c317da2020-03-23 07:39:13 -05002128 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002129 });
David Neto22f144c2017-06-12 14:26:21 -04002130}
2131
SJW2c317da2020-03-23 07:39:13 -05002132bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2133 Module &M = *F.getParent();
2134 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002135 // The value to store.
2136 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002137
Kévin Petite8edce32019-04-10 14:23:32 +01002138 // The index argument from vstore_half.
2139 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002140
Kévin Petite8edce32019-04-10 14:23:32 +01002141 // The pointer argument from vstore_half.
2142 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002143
Kévin Petite8edce32019-04-10 14:23:32 +01002144 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002145 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002146 auto NewPointerTy =
2147 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002148 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002149
Kévin Petite8edce32019-04-10 14:23:32 +01002150 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002151 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002152
Kévin Petite8edce32019-04-10 14:23:32 +01002153 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002154
Kévin Petite8edce32019-04-10 14:23:32 +01002155 // Turn the packed x & y into the final packing.
2156 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002157
Kévin Petite8edce32019-04-10 14:23:32 +01002158 // Cast the half* pointer to int*.
2159 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002160
Kévin Petite8edce32019-04-10 14:23:32 +01002161 // Index into the correct address of the casted pointer.
2162 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002163
Kévin Petite8edce32019-04-10 14:23:32 +01002164 // Store to the int* we casted to.
2165 return new StoreInst(X, Index, CI);
2166 });
David Neto22f144c2017-06-12 14:26:21 -04002167}
2168
SJW2c317da2020-03-23 07:39:13 -05002169bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2170 Module &M = *F.getParent();
2171 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002172 // The value to store.
2173 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002174
Kévin Petite8edce32019-04-10 14:23:32 +01002175 // The index argument from vstore_half.
2176 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002177
Kévin Petite8edce32019-04-10 14:23:32 +01002178 // The pointer argument from vstore_half.
2179 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002180
Kévin Petite8edce32019-04-10 14:23:32 +01002181 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002182 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2183 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002184 auto NewPointerTy =
2185 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002186 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002187
Kévin Petite8edce32019-04-10 14:23:32 +01002188 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2189 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002190
Kévin Petite8edce32019-04-10 14:23:32 +01002191 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002192 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2193 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002194
Kévin Petite8edce32019-04-10 14:23:32 +01002195 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2196 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002197
Kévin Petite8edce32019-04-10 14:23:32 +01002198 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002199 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2200 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002201
Kévin Petite8edce32019-04-10 14:23:32 +01002202 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002203 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002204
Kévin Petite8edce32019-04-10 14:23:32 +01002205 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002206
Kévin Petite8edce32019-04-10 14:23:32 +01002207 // Turn the packed x & y into the final component of our int2.
2208 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002209
Kévin Petite8edce32019-04-10 14:23:32 +01002210 // Turn the packed z & w into the final component of our int2.
2211 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002212
Kévin Petite8edce32019-04-10 14:23:32 +01002213 auto Combine = InsertElementInst::Create(
2214 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002215 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2216 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002217
Kévin Petite8edce32019-04-10 14:23:32 +01002218 // Cast the half* pointer to int2*.
2219 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002220
Kévin Petite8edce32019-04-10 14:23:32 +01002221 // Index into the correct address of the casted pointer.
2222 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002223
Kévin Petite8edce32019-04-10 14:23:32 +01002224 // Store to the int2* we casted to.
2225 return new StoreInst(Combine, Index, CI);
2226 });
David Neto22f144c2017-06-12 14:26:21 -04002227}
2228
SJW2c317da2020-03-23 07:39:13 -05002229bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2230 // convert half to float
2231 Module &M = *F.getParent();
2232 return replaceCallsWithValue(F, [&](CallInst *CI) {
2233 SmallVector<Type *, 3> types;
2234 SmallVector<Value *, 3> args;
2235 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2236 types.push_back(CI->getArgOperand(i)->getType());
2237 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002238 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002239
alan-baker5a8c3be2020-09-09 13:44:26 -04002240 auto NewFType =
2241 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2242 cast<VectorType>(CI->getType())
2243 ->getElementCount()
2244 .getKnownMinValue()),
2245 types, false);
SJW2c317da2020-03-23 07:39:13 -05002246
SJW61531372020-06-09 07:31:08 -05002247 std::string NewFName =
2248 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002249
2250 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2251
2252 auto NewCI = CallInst::Create(NewF, args, "", CI);
2253
2254 // Convert to the half type.
2255 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2256 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002257}
2258
SJW2c317da2020-03-23 07:39:13 -05002259bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2260 // convert half to float
2261 Module &M = *F.getParent();
2262 return replaceCallsWithValue(F, [&](CallInst *CI) {
2263 SmallVector<Type *, 3> types(3);
2264 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002265
SJW2c317da2020-03-23 07:39:13 -05002266 // Image
2267 types[0] = CI->getArgOperand(0)->getType();
2268 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002269
SJW2c317da2020-03-23 07:39:13 -05002270 // Coord
2271 types[1] = CI->getArgOperand(1)->getType();
2272 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002273
SJW2c317da2020-03-23 07:39:13 -05002274 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002275 types[2] =
2276 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2277 cast<VectorType>(CI->getArgOperand(2)->getType())
2278 ->getElementCount()
2279 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002280
SJW2c317da2020-03-23 07:39:13 -05002281 auto NewFType =
2282 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002283
SJW61531372020-06-09 07:31:08 -05002284 std::string NewFName =
2285 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002286
SJW2c317da2020-03-23 07:39:13 -05002287 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002288
SJW2c317da2020-03-23 07:39:13 -05002289 // Convert data to the float type.
2290 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2291 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002292
SJW2c317da2020-03-23 07:39:13 -05002293 return CallInst::Create(NewF, args, "", CI);
2294 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002295}
2296
SJW2c317da2020-03-23 07:39:13 -05002297bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2298 Function &F) {
2299 // convert read_image with int coords to float coords
2300 Module &M = *F.getParent();
2301 return replaceCallsWithValue(F, [&](CallInst *CI) {
2302 // The image.
2303 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002304
SJW2c317da2020-03-23 07:39:13 -05002305 // The sampler.
2306 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002307
SJW2c317da2020-03-23 07:39:13 -05002308 // The coordinate (integer type that we can't handle).
2309 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002310
SJW2c317da2020-03-23 07:39:13 -05002311 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2312 uint32_t components =
2313 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2314 Type *float_ty = nullptr;
2315 if (components == 1) {
2316 float_ty = Type::getFloatTy(M.getContext());
2317 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002318 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2319 cast<VectorType>(Arg2->getType())
2320 ->getElementCount()
2321 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002322 }
David Neto22f144c2017-06-12 14:26:21 -04002323
SJW2c317da2020-03-23 07:39:13 -05002324 auto NewFType = FunctionType::get(
2325 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2326
2327 std::string NewFName = F.getName().str();
2328 NewFName[NewFName.length() - 1] = 'f';
2329
2330 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2331
2332 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2333
2334 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2335 });
David Neto22f144c2017-06-12 14:26:21 -04002336}
2337
SJW2c317da2020-03-23 07:39:13 -05002338bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2339 return replaceCallsWithValue(F, [&](CallInst *CI) {
2340 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002341
SJW2c317da2020-03-23 07:39:13 -05002342 // We need to map the OpenCL constants to the SPIR-V equivalents.
2343 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2344 const auto ConstantMemorySemantics = ConstantInt::get(
2345 IntTy, spv::MemorySemanticsUniformMemoryMask |
2346 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002347
SJW2c317da2020-03-23 07:39:13 -05002348 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002349
SJW2c317da2020-03-23 07:39:13 -05002350 // The pointer.
2351 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002352
SJW2c317da2020-03-23 07:39:13 -05002353 // The memory scope.
2354 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002355
SJW2c317da2020-03-23 07:39:13 -05002356 // The memory semantics.
2357 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002358
SJW2c317da2020-03-23 07:39:13 -05002359 if (2 < CI->getNumArgOperands()) {
2360 // The unequal memory semantics.
2361 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002362
SJW2c317da2020-03-23 07:39:13 -05002363 // The value.
2364 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002365
SJW2c317da2020-03-23 07:39:13 -05002366 // The comparator.
2367 Params.push_back(CI->getArgOperand(1));
2368 } else if (1 < CI->getNumArgOperands()) {
2369 // The value.
2370 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002371 }
David Neto22f144c2017-06-12 14:26:21 -04002372
SJW2c317da2020-03-23 07:39:13 -05002373 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2374 });
David Neto22f144c2017-06-12 14:26:21 -04002375}
2376
SJW2c317da2020-03-23 07:39:13 -05002377bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2378 llvm::AtomicRMWInst::BinOp Op) {
2379 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002380 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2381 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002382 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002383 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002384 SyncScope::System, CI);
2385 });
2386}
David Neto22f144c2017-06-12 14:26:21 -04002387
SJW2c317da2020-03-23 07:39:13 -05002388bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2389 Module &M = *F.getParent();
2390 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002391 auto IntTy = Type::getInt32Ty(M.getContext());
2392 auto FloatTy = Type::getFloatTy(M.getContext());
2393
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002394 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2395 ConstantInt::get(IntTy, 1),
2396 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002397
2398 Constant *UpShuffleMask[4] = {
2399 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2400 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2401
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002402 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2403 UndefValue::get(FloatTy),
2404 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002405
Kévin Petite8edce32019-04-10 14:23:32 +01002406 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002407 auto Arg0 =
2408 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2409 ConstantVector::get(DownShuffleMask), "", CI);
2410 auto Arg1 =
2411 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2412 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002413 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002414
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002415 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002416 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002417
SJW61531372020-06-09 07:31:08 -05002418 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002419
Kévin Petite8edce32019-04-10 14:23:32 +01002420 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002421
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002422 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2423 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002424 });
David Neto22f144c2017-06-12 14:26:21 -04002425}
David Neto62653202017-10-16 19:05:18 -04002426
SJW2c317da2020-03-23 07:39:13 -05002427bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002428 // OpenCL's float result = fract(float x, float* ptr)
2429 //
2430 // In the LLVM domain:
2431 //
2432 // %floor_result = call spir_func float @floor(float %x)
2433 // store float %floor_result, float * %ptr
2434 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2435 // %result = call spir_func float
2436 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2437 //
2438 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2439 // and clspv.fract occur in the SPIR-V generator pass:
2440 //
2441 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2442 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2443 // ...
2444 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2445 // OpStore %ptr %floor_result
2446 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2447 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002448 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002449
David Neto62653202017-10-16 19:05:18 -04002450 using std::string;
2451
2452 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2453 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002454
SJW2c317da2020-03-23 07:39:13 -05002455 Module &M = *F.getParent();
2456 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002457
SJW2c317da2020-03-23 07:39:13 -05002458 // This is either float or a float vector. All the float-like
2459 // types are this type.
2460 auto result_ty = F.getReturnType();
2461
SJW61531372020-06-09 07:31:08 -05002462 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002463 Function *fmin_fn = M.getFunction(fmin_name);
2464 if (!fmin_fn) {
2465 // Make the fmin function.
2466 FunctionType *fn_ty =
2467 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2468 fmin_fn =
2469 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2470 fmin_fn->addFnAttr(Attribute::ReadNone);
2471 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2472 }
2473
SJW61531372020-06-09 07:31:08 -05002474 std::string floor_name =
2475 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002476 Function *floor_fn = M.getFunction(floor_name);
2477 if (!floor_fn) {
2478 // Make the floor function.
2479 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2480 floor_fn =
2481 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2482 floor_fn->addFnAttr(Attribute::ReadNone);
2483 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2484 }
2485
SJW61531372020-06-09 07:31:08 -05002486 std::string clspv_fract_name =
2487 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002488 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2489 if (!clspv_fract_fn) {
2490 // Make the clspv_fract function.
2491 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2492 clspv_fract_fn = cast<Function>(
2493 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2494 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2495 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2496 }
2497
2498 // Number of significant significand bits, whether represented or not.
2499 unsigned num_significand_bits;
2500 switch (result_ty->getScalarType()->getTypeID()) {
2501 case Type::HalfTyID:
2502 num_significand_bits = 11;
2503 break;
2504 case Type::FloatTyID:
2505 num_significand_bits = 24;
2506 break;
2507 case Type::DoubleTyID:
2508 num_significand_bits = 53;
2509 break;
2510 default:
2511 llvm_unreachable("Unhandled float type when processing fract builtin");
2512 break;
2513 }
2514 // Beware that the disassembler displays this value as
2515 // OpConstant %float 1
2516 // which is not quite right.
2517 const double kJustUnderOneScalar =
2518 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2519
2520 Constant *just_under_one =
2521 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2522 if (result_ty->isVectorTy()) {
2523 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002524 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002525 }
2526
2527 IRBuilder<> Builder(CI);
2528
2529 auto arg = CI->getArgOperand(0);
2530 auto ptr = CI->getArgOperand(1);
2531
2532 // Compute floor result and store it.
2533 auto floor = Builder.CreateCall(floor_fn, {arg});
2534 Builder.CreateStore(floor, ptr);
2535
2536 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2537 auto fract_result =
2538 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2539
2540 return fract_result;
2541 });
David Neto62653202017-10-16 19:05:18 -04002542}
alan-bakera52b7312020-10-26 08:58:51 -04002543
Kévin Petit8576f682020-11-02 14:51:32 +00002544bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04002545 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00002546 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04002547 // a_shr = a >> 1
2548 // b_shr = b >> 1
2549 // add1 = a_shr + b_shr
2550 // join = a |join_opcode| b
2551 // and = join & 1
2552 // add = add1 + and
2553 const auto a = Call->getArgOperand(0);
2554 const auto b = Call->getArgOperand(1);
2555 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00002556 Value *a_shift, *b_shift;
2557 if (is_signed) {
2558 a_shift = builder.CreateAShr(a, 1);
2559 b_shift = builder.CreateAShr(b, 1);
2560 } else {
2561 a_shift = builder.CreateLShr(a, 1);
2562 b_shift = builder.CreateLShr(b, 1);
2563 }
alan-bakerb6da5132020-10-29 15:59:06 -04002564 auto add = builder.CreateAdd(a_shift, b_shift);
2565 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
2566 auto constant_one = ConstantInt::get(a->getType(), 1);
2567 auto and_bit = builder.CreateAnd(join, constant_one);
2568 return builder.CreateAdd(add, and_bit);
2569 });
2570}
2571
alan-baker3f1bf492020-11-05 09:07:36 -05002572bool ReplaceOpenCLBuiltinPass::replaceAddSubSat(Function &F, bool is_signed,
2573 bool is_add) {
2574 return replaceCallsWithValue(F, [&F, this, is_signed,
2575 is_add](CallInst *Call) {
2576 auto ty = Call->getType();
2577 auto a = Call->getArgOperand(0);
2578 auto b = Call->getArgOperand(1);
2579 IRBuilder<> builder(Call);
alan-bakera52b7312020-10-26 08:58:51 -04002580 if (is_signed) {
2581 unsigned bitwidth = ty->getScalarSizeInBits();
2582 if (bitwidth < 32) {
alan-baker3f1bf492020-11-05 09:07:36 -05002583 unsigned extended_width = bitwidth << 1;
2584 Type *extended_ty =
2585 IntegerType::get(Call->getContext(), extended_width);
2586 Constant *min = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002587 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002588 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2589 Constant *max = ConstantInt::get(
alan-bakera52b7312020-10-26 08:58:51 -04002590 Call->getContext(),
alan-baker3f1bf492020-11-05 09:07:36 -05002591 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
alan-bakera52b7312020-10-26 08:58:51 -04002592 // Don't use the type in GetMangledFunctionName to ensure we get
2593 // signed parameters.
2594 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
alan-bakera52b7312020-10-26 08:58:51 -04002595 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
alan-baker3f1bf492020-11-05 09:07:36 -05002596 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2597 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2598 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2599 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2600 if (extended_width == 32) {
alan-bakera52b7312020-10-26 08:58:51 -04002601 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
alan-bakera52b7312020-10-26 08:58:51 -04002602 } else {
2603 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2604 }
alan-baker3f1bf492020-11-05 09:07:36 -05002605 } else {
2606 if (extended_width == 32) {
2607 sclamp_name += "iii";
2608 } else {
2609 sclamp_name += "sss";
2610 }
alan-bakera52b7312020-10-26 08:58:51 -04002611 }
alan-baker3f1bf492020-11-05 09:07:36 -05002612
2613 auto sext_a = builder.CreateSExt(a, extended_ty);
2614 auto sext_b = builder.CreateSExt(b, extended_ty);
2615 Value *op = nullptr;
2616 // Extended operations won't wrap.
2617 if (is_add)
2618 op = builder.CreateAdd(sext_a, sext_b, "", true, true);
2619 else
2620 op = builder.CreateSub(sext_a, sext_b, "", true, true);
2621 auto clamp_ty = FunctionType::get(
2622 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2623 auto callee = F.getParent()->getOrInsertFunction(sclamp_name, clamp_ty);
2624 auto clamp = builder.CreateCall(callee, {op, min, max});
2625 return builder.CreateTrunc(clamp, ty);
alan-bakera52b7312020-10-26 08:58:51 -04002626 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002627 // Add:
2628 // c = a + b
alan-bakera52b7312020-10-26 08:58:51 -04002629 // if (b < 0)
2630 // c = c > a ? min : c;
2631 // else
alan-baker3f1bf492020-11-05 09:07:36 -05002632 // c = c < a ? max : c;
alan-bakera52b7312020-10-26 08:58:51 -04002633 //
alan-baker3f1bf492020-11-05 09:07:36 -05002634 // Sub:
2635 // c = a - b;
2636 // if (b < 0)
2637 // c = c < a ? max : c;
2638 // else
2639 // c = c > a ? min : c;
2640 Constant *min = ConstantInt::get(Call->getContext(),
2641 APInt::getSignedMinValue(bitwidth));
2642 Constant *max = ConstantInt::get(Call->getContext(),
2643 APInt::getSignedMaxValue(bitwidth));
alan-bakera52b7312020-10-26 08:58:51 -04002644 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2645 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2646 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2647 }
alan-baker3f1bf492020-11-05 09:07:36 -05002648 Value *op = nullptr;
2649 if (is_add) {
2650 op = builder.CreateAdd(a, b);
2651 } else {
2652 op = builder.CreateSub(a, b);
2653 }
2654 auto b_lt_0 = builder.CreateICmpSLT(b, Constant::getNullValue(ty));
2655 auto op_gt_a = builder.CreateICmpSGT(op, a);
2656 auto op_lt_a = builder.CreateICmpSLT(op, a);
2657 auto neg_cmp = is_add ? op_gt_a : op_lt_a;
2658 auto pos_cmp = is_add ? op_lt_a : op_gt_a;
2659 auto neg_value = is_add ? min : max;
2660 auto pos_value = is_add ? max : min;
2661 auto neg_clamp = builder.CreateSelect(neg_cmp, neg_value, op);
2662 auto pos_clamp = builder.CreateSelect(pos_cmp, pos_value, op);
2663 return builder.CreateSelect(b_lt_0, neg_clamp, pos_clamp);
alan-bakera52b7312020-10-26 08:58:51 -04002664 }
2665 } else {
alan-baker3f1bf492020-11-05 09:07:36 -05002666 // Replace with OpIAddCarry/OpISubBorrow and clamp to max/0 on a
2667 // carr/borrow.
2668 spv::Op op = is_add ? spv::OpIAddCarry : spv::OpISubBorrow;
2669 auto clamp_value =
2670 is_add ? Constant::getAllOnesValue(ty) : Constant::getNullValue(ty);
2671 auto struct_ty = GetPairStruct(ty);
2672 auto call =
2673 InsertSPIRVOp(Call, op, {Attribute::ReadNone}, struct_ty, {a, b});
2674 auto add_sub = builder.CreateExtractValue(call, {0});
2675 auto carry_borrow = builder.CreateExtractValue(call, {1});
2676 auto cmp = builder.CreateICmpEQ(carry_borrow, Constant::getNullValue(ty));
2677 return builder.CreateSelect(cmp, add_sub, clamp_value);
alan-bakera52b7312020-10-26 08:58:51 -04002678 }
alan-bakera52b7312020-10-26 08:58:51 -04002679 });
2680}
alan-baker4986eff2020-10-29 13:38:00 -04002681
2682bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
2683 return replaceCallsWithValue(F, [](CallInst *Call) {
2684 auto pointer = Call->getArgOperand(0);
2685 // Clang emits an address space cast to the generic address space. Skip the
2686 // cast and use the input directly.
2687 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2688 pointer = cast->getPointerOperand();
2689 }
2690 Value *order_arg =
2691 Call->getNumArgOperands() > 1 ? Call->getArgOperand(1) : nullptr;
2692 Value *scope_arg =
2693 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2694 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2695 clspv::AddressSpace::Global;
2696 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
2697 spv::MemorySemanticsAcquireMask);
2698 auto scope = MemoryScope(scope_arg, is_global, Call);
2699 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
2700 Call->getType(), {pointer, scope, order});
2701 });
2702}
2703
2704bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
2705 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
2706 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
2707 auto pointer = Call->getArgOperand(0);
2708 // Clang emits an address space cast to the generic address space. Skip the
2709 // cast and use the input directly.
2710 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2711 pointer = cast->getPointerOperand();
2712 }
2713 Value *value = Call->getArgOperand(1);
2714 Value *order_arg =
2715 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2716 Value *scope_arg =
2717 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2718 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2719 clspv::AddressSpace::Global;
2720 auto scope = MemoryScope(scope_arg, is_global, Call);
2721 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
2722 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
2723 {pointer, scope, order, value});
2724 });
2725}
2726
2727bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
2728 return replaceCallsWithValue(F, [](CallInst *Call) {
2729 auto pointer = Call->getArgOperand(0);
2730 // Clang emits an address space cast to the generic address space. Skip the
2731 // cast and use the input directly.
2732 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2733 pointer = cast->getPointerOperand();
2734 }
2735 auto expected = Call->getArgOperand(1);
2736 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
2737 expected = cast->getPointerOperand();
2738 }
2739 auto value = Call->getArgOperand(2);
2740 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2741 clspv::AddressSpace::Global;
2742 Value *success_arg =
2743 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2744 Value *failure_arg =
2745 Call->getNumArgOperands() > 4 ? Call->getArgOperand(4) : nullptr;
2746 Value *scope_arg =
2747 Call->getNumArgOperands() > 5 ? Call->getArgOperand(5) : nullptr;
2748 auto scope = MemoryScope(scope_arg, is_global, Call);
2749 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
2750 spv::MemorySemanticsAcquireReleaseMask);
2751 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
2752 spv::MemorySemanticsAcquireMask);
2753
2754 // If the value pointed to by |expected| equals the value pointed to by
2755 // |pointer|, |value| is written into |pointer|, otherwise the value in
2756 // |pointer| is written into |expected|. In order to avoid extra stores,
2757 // the basic block with the original atomic is split and the store is
2758 // performed in the |then| block. The condition is the inversion of the
2759 // comparison result.
2760 IRBuilder<> builder(Call);
2761 auto load = builder.CreateLoad(expected);
2762 auto cmp_xchg = InsertSPIRVOp(
2763 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
2764 value->getType(), {pointer, scope, success, failure, value, load});
2765 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
2766 auto not_cmp = builder.CreateNot(cmp);
2767 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
2768 builder.SetInsertPoint(then_branch);
2769 builder.CreateStore(cmp_xchg, expected);
2770 return cmp;
2771 });
2772}
alan-bakercc2bafb2020-11-02 08:30:18 -05002773
alan-baker2cecaa72020-11-05 14:05:20 -05002774bool ReplaceOpenCLBuiltinPass::replaceCountZeroes(Function &F, bool leading) {
alan-bakercc2bafb2020-11-02 08:30:18 -05002775 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
2776 return false;
2777
2778 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
2779 if (bitwidth == 32 || bitwidth > 64)
2780 return false;
2781
alan-baker2cecaa72020-11-05 14:05:20 -05002782 return replaceCallsWithValue(F, [&F, bitwidth, leading](CallInst *Call) {
alan-bakercc2bafb2020-11-02 08:30:18 -05002783 auto in = Call->getArgOperand(0);
2784 IRBuilder<> builder(Call);
2785 auto int32_ty = builder.getInt32Ty();
2786 Type *ty = int32_ty;
alan-baker2cecaa72020-11-05 14:05:20 -05002787 Constant *c32 = builder.getInt32(32);
alan-bakercc2bafb2020-11-02 08:30:18 -05002788 if (auto vec_ty = dyn_cast<VectorType>(Call->getType())) {
2789 ty = VectorType::get(ty, vec_ty->getElementCount());
alan-baker2cecaa72020-11-05 14:05:20 -05002790 c32 = ConstantVector::getSplat(vec_ty->getElementCount(), c32);
alan-bakercc2bafb2020-11-02 08:30:18 -05002791 }
alan-baker2cecaa72020-11-05 14:05:20 -05002792 auto func_32bit_ty = FunctionType::get(ty, {ty}, false);
2793 std::string func_32bit_name =
2794 Builtins::GetMangledFunctionName((leading ? "clz" : "ctz"), ty);
2795 auto func_32bit =
2796 F.getParent()->getOrInsertFunction(func_32bit_name, func_32bit_ty);
alan-bakercc2bafb2020-11-02 08:30:18 -05002797 if (bitwidth < 32) {
alan-baker2cecaa72020-11-05 14:05:20 -05002798 // Extend the input to 32-bits and perform a clz/ctz.
alan-bakercc2bafb2020-11-02 08:30:18 -05002799 auto zext = builder.CreateZExt(in, ty);
alan-baker2cecaa72020-11-05 14:05:20 -05002800 Value *call_input = zext;
2801 if (!leading) {
2802 // Or the extended input value with a constant that caps the max to the
2803 // right bitwidth (e.g. 256 for i8 and 65536 for i16).
2804 Constant *mask = builder.getInt32(1 << bitwidth);
2805 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2806 mask = ConstantVector::getSplat(vec_ty->getElementCount(), mask);
2807 }
2808 call_input = builder.CreateOr(zext, mask);
alan-bakercc2bafb2020-11-02 08:30:18 -05002809 }
alan-baker2cecaa72020-11-05 14:05:20 -05002810 auto call = builder.CreateCall(func_32bit, {call_input});
2811 Value *tmp = call;
2812 if (leading) {
2813 // Clz is implemented as 31 - FindUMsb(|zext|), so adjust the result
2814 // the right bitwidth.
2815 Constant *sub_const = builder.getInt32(32 - bitwidth);
2816 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2817 sub_const =
2818 ConstantVector::getSplat(vec_ty->getElementCount(), sub_const);
2819 }
2820 tmp = builder.CreateSub(call, sub_const);
2821 }
2822 // Truncate the intermediate result to the right size.
2823 return builder.CreateTrunc(tmp, Call->getType());
alan-bakercc2bafb2020-11-02 08:30:18 -05002824 } else {
alan-baker2cecaa72020-11-05 14:05:20 -05002825 // Perform a 32-bit version of clz/ctz on each half of the 64-bit input.
alan-bakercc2bafb2020-11-02 08:30:18 -05002826 auto lshr = builder.CreateLShr(in, 32);
2827 auto top_bits = builder.CreateTrunc(lshr, ty);
2828 auto bot_bits = builder.CreateTrunc(in, ty);
alan-baker2cecaa72020-11-05 14:05:20 -05002829 auto top_func = builder.CreateCall(func_32bit, {top_bits});
2830 auto bot_func = builder.CreateCall(func_32bit, {bot_bits});
2831 Value *tmp = nullptr;
2832 if (leading) {
2833 // For clz, if clz(top) is 32, return 32 + clz(bot).
2834 auto cmp = builder.CreateICmpEQ(top_func, c32);
2835 auto adjust = builder.CreateAdd(bot_func, c32);
2836 tmp = builder.CreateSelect(cmp, adjust, top_func);
2837 } else {
2838 // For ctz, if clz(bot) is 32, return 32 + ctz(top)
2839 auto bot_cmp = builder.CreateICmpEQ(bot_func, c32);
2840 auto adjust = builder.CreateAdd(top_func, c32);
2841 tmp = builder.CreateSelect(bot_cmp, adjust, bot_func);
alan-bakercc2bafb2020-11-02 08:30:18 -05002842 }
alan-baker2cecaa72020-11-05 14:05:20 -05002843 // Extend the intermediate result to the correct size.
2844 return builder.CreateZExt(tmp, Call->getType());
alan-bakercc2bafb2020-11-02 08:30:18 -05002845 }
2846 });
2847}
alan-baker6b9d1ee2020-11-03 23:11:32 -05002848
2849bool ReplaceOpenCLBuiltinPass::replaceMadSat(Function &F, bool is_signed) {
2850 return replaceCallsWithValue(F, [&F, is_signed, this](CallInst *Call) {
2851 const auto ty = Call->getType();
2852 const auto a = Call->getArgOperand(0);
2853 const auto b = Call->getArgOperand(1);
2854 const auto c = Call->getArgOperand(2);
2855 IRBuilder<> builder(Call);
2856 if (is_signed) {
2857 unsigned bitwidth = Call->getType()->getScalarSizeInBits();
2858 if (bitwidth < 32) {
2859 // mul = sext(a) * sext(b)
2860 // add = mul + sext(c)
2861 // res = clamp(add, MIN, MAX)
2862 unsigned extended_width = bitwidth << 1;
2863 Type *extended_ty = IntegerType::get(F.getContext(), extended_width);
2864 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2865 extended_ty = VectorType::get(extended_ty, vec_ty->getElementCount());
2866 }
2867 auto a_sext = builder.CreateSExt(a, extended_ty);
2868 auto b_sext = builder.CreateSExt(b, extended_ty);
2869 auto c_sext = builder.CreateSExt(c, extended_ty);
2870 // Extended the size so no overflows occur.
2871 auto mul = builder.CreateMul(a_sext, b_sext, "", true, true);
2872 auto add = builder.CreateAdd(mul, c_sext, "", true, true);
2873 auto func_ty = FunctionType::get(
2874 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2875 // Don't use function type because we need signed parameters.
2876 std::string clamp_name = Builtins::GetMangledFunctionName("clamp");
2877 // The clamp values are the signed min and max of the original bitwidth
2878 // sign extended to the extended bitwidth.
2879 Constant *min = ConstantInt::get(
2880 Call->getContext(),
2881 APInt::getSignedMinValue(bitwidth).sext(extended_width));
2882 Constant *max = ConstantInt::get(
2883 Call->getContext(),
2884 APInt::getSignedMaxValue(bitwidth).sext(extended_width));
2885 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2886 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2887 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2888 unsigned vec_width = vec_ty->getElementCount().getKnownMinValue();
2889 if (extended_width == 32)
2890 clamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
2891 else
2892 clamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2893 } else {
2894 if (extended_width == 32)
2895 clamp_name += "iii";
2896 else
2897 clamp_name += "sss";
2898 }
2899 auto callee = F.getParent()->getOrInsertFunction(clamp_name, func_ty);
2900 auto clamp = builder.CreateCall(callee, {add, min, max});
2901 return builder.CreateTrunc(clamp, ty);
2902 } else {
2903 auto struct_ty = GetPairStruct(ty);
2904 // Compute
2905 // {hi, lo} = smul_extended(a, b)
2906 // add = lo + c
2907 auto mul_ext = InsertSPIRVOp(Call, spv::OpSMulExtended,
2908 {Attribute::ReadNone}, struct_ty, {a, b});
2909 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
2910 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
2911 auto add = builder.CreateAdd(mul_lo, c);
2912
2913 // Constants for use in the calculation.
2914 Constant *min = ConstantInt::get(Call->getContext(),
2915 APInt::getSignedMinValue(bitwidth));
2916 Constant *max = ConstantInt::get(Call->getContext(),
2917 APInt::getSignedMaxValue(bitwidth));
2918 Constant *max_plus_1 = ConstantInt::get(
2919 Call->getContext(),
2920 APInt::getSignedMaxValue(bitwidth) + APInt(bitwidth, 1));
2921 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2922 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2923 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2924 max_plus_1 =
2925 ConstantVector::getSplat(vec_ty->getElementCount(), max_plus_1);
2926 }
2927
2928 auto a_xor_b = builder.CreateXor(a, b);
2929 auto same_sign =
2930 builder.CreateICmpSGT(a_xor_b, Constant::getAllOnesValue(ty));
2931 auto different_sign = builder.CreateNot(same_sign);
2932 auto hi_eq_0 = builder.CreateICmpEQ(mul_hi, Constant::getNullValue(ty));
2933 auto hi_ne_0 = builder.CreateNot(hi_eq_0);
2934 auto lo_ge_max = builder.CreateICmpUGE(mul_lo, max);
2935 auto c_gt_0 = builder.CreateICmpSGT(c, Constant::getNullValue(ty));
2936 auto c_lt_0 = builder.CreateICmpSLT(c, Constant::getNullValue(ty));
2937 auto add_gt_max = builder.CreateICmpUGT(add, max);
2938 auto hi_eq_m1 =
2939 builder.CreateICmpEQ(mul_hi, Constant::getAllOnesValue(ty));
2940 auto hi_ne_m1 = builder.CreateNot(hi_eq_m1);
2941 auto lo_le_max_plus_1 = builder.CreateICmpULE(mul_lo, max_plus_1);
2942 auto max_sub_lo = builder.CreateSub(max, mul_lo);
2943 auto c_lt_max_sub_lo = builder.CreateICmpULT(c, max_sub_lo);
2944
2945 // Equivalent to:
2946 // if (((x < 0) == (y < 0)) && mul_hi != 0)
2947 // return MAX
2948 // if (mul_hi == 0 && mul_lo >= MAX && (z > 0 || add > MAX))
2949 // return MAX
2950 // if (((x < 0) != (y < 0)) && mul_hi != -1)
2951 // return MIN
2952 // if (hi == -1 && mul_lo <= (MAX + 1) && (z < 0 || z < (MAX - mul_lo))
2953 // return MIN
2954 // return add
2955 auto max_clamp_1 = builder.CreateAnd(same_sign, hi_ne_0);
2956 auto max_clamp_2 = builder.CreateOr(c_gt_0, add_gt_max);
2957 auto tmp = builder.CreateAnd(hi_eq_0, lo_ge_max);
2958 max_clamp_2 = builder.CreateAnd(tmp, max_clamp_2);
2959 auto max_clamp = builder.CreateOr(max_clamp_1, max_clamp_2);
2960 auto min_clamp_1 = builder.CreateAnd(different_sign, hi_ne_m1);
2961 auto min_clamp_2 = builder.CreateOr(c_lt_0, c_lt_max_sub_lo);
2962 tmp = builder.CreateAnd(hi_eq_m1, lo_le_max_plus_1);
2963 min_clamp_2 = builder.CreateAnd(tmp, min_clamp_2);
2964 auto min_clamp = builder.CreateOr(min_clamp_1, min_clamp_2);
2965 auto sel = builder.CreateSelect(min_clamp, min, add);
2966 return builder.CreateSelect(max_clamp, max, sel);
2967 }
2968 } else {
2969 // {lo, hi} = mul_extended(a, b)
2970 // {add, carry} = add_carry(lo, c)
2971 // cmp = (mul_hi | carry) == 0
2972 // mad_sat = cmp ? add : MAX
2973 auto struct_ty = GetPairStruct(ty);
2974 auto mul_ext = InsertSPIRVOp(Call, spv::OpUMulExtended,
2975 {Attribute::ReadNone}, struct_ty, {a, b});
2976 auto mul_lo = builder.CreateExtractValue(mul_ext, {0});
2977 auto mul_hi = builder.CreateExtractValue(mul_ext, {1});
2978 auto add_carry =
2979 InsertSPIRVOp(Call, spv::OpIAddCarry, {Attribute::ReadNone},
2980 struct_ty, {mul_lo, c});
2981 auto add = builder.CreateExtractValue(add_carry, {0});
2982 auto carry = builder.CreateExtractValue(add_carry, {1});
2983 auto or_value = builder.CreateOr(mul_hi, carry);
2984 auto cmp = builder.CreateICmpEQ(or_value, Constant::getNullValue(ty));
2985 return builder.CreateSelect(cmp, add, Constant::getAllOnesValue(ty));
2986 }
2987 });
2988}
alan-baker15106572020-11-06 15:08:10 -05002989
2990bool ReplaceOpenCLBuiltinPass::replaceOrdered(Function &F, bool is_ordered) {
2991 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
2992 return false;
2993
2994 if (F.getFunctionType()->getNumParams() != 2)
2995 return false;
2996
2997 if (F.getFunctionType()->getParamType(0) !=
2998 F.getFunctionType()->getParamType(1)) {
2999 return false;
3000 }
3001
3002 switch (F.getFunctionType()->getParamType(0)->getScalarType()->getTypeID()) {
3003 case Type::FloatTyID:
3004 case Type::HalfTyID:
3005 case Type::DoubleTyID:
3006 break;
3007 default:
3008 return false;
3009 }
3010
3011 // Scalar versions all return an int, while vector versions return a vector
3012 // of an equally sized integer types (e.g. short, int or long).
3013 if (isa<VectorType>(F.getReturnType())) {
3014 if (F.getReturnType()->getScalarSizeInBits() !=
3015 F.getFunctionType()->getParamType(0)->getScalarSizeInBits()) {
3016 return false;
3017 }
3018 } else {
3019 if (F.getReturnType()->getScalarSizeInBits() != 32)
3020 return false;
3021 }
3022
3023 return replaceCallsWithValue(F, [is_ordered](CallInst *Call) {
3024 // Replace with a floating point [un]ordered comparison followed by an
3025 // extension.
3026 auto x = Call->getArgOperand(0);
3027 auto y = Call->getArgOperand(1);
3028 IRBuilder<> builder(Call);
3029 Value *tmp = nullptr;
3030 if (is_ordered) {
3031 // This leads to a slight inefficiency in the SPIR-V that is easy for
3032 // drivers to optimize where the SPIR-V for the comparison and the
3033 // extension could be fused to drop the inversion of the OpIsNan.
3034 tmp = builder.CreateFCmpORD(x, y);
3035 } else {
3036 tmp = builder.CreateFCmpUNO(x, y);
3037 }
3038 // OpenCL CTS requires that vector versions use sign extension, but scalar
3039 // versions use zero extension.
3040 if (isa<VectorType>(Call->getType()))
3041 return builder.CreateSExt(tmp, Call->getType());
3042 return builder.CreateZExt(tmp, Call->getType());
3043 });
3044}
alan-baker497920b2020-11-09 16:41:36 -05003045
3046bool ReplaceOpenCLBuiltinPass::replaceIsNormal(Function &F) {
3047 return replaceCallsWithValue(F, [this](CallInst *Call) {
3048 auto ty = Call->getType();
3049 auto x = Call->getArgOperand(0);
3050 unsigned width = x->getType()->getScalarSizeInBits();
3051 Type *int_ty = IntegerType::get(Call->getContext(), width);
3052 uint64_t abs_mask = 0x7fffffff;
3053 uint64_t exp_mask = 0x7f800000;
3054 uint64_t min_mask = 0x00800000;
3055 if (width == 16) {
3056 abs_mask = 0x7fff;
3057 exp_mask = 0x7c00;
3058 min_mask = 0x0400;
3059 } else if (width == 64) {
3060 abs_mask = 0x7fffffffffffffff;
3061 exp_mask = 0x7ff0000000000000;
3062 min_mask = 0x0010000000000000;
3063 }
3064 Constant *abs_const = ConstantInt::get(int_ty, APInt(width, abs_mask));
3065 Constant *exp_const = ConstantInt::get(int_ty, APInt(width, exp_mask));
3066 Constant *min_const = ConstantInt::get(int_ty, APInt(width, min_mask));
3067 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
3068 int_ty = VectorType::get(int_ty, vec_ty->getElementCount());
3069 abs_const =
3070 ConstantVector::getSplat(vec_ty->getElementCount(), abs_const);
3071 exp_const =
3072 ConstantVector::getSplat(vec_ty->getElementCount(), exp_const);
3073 min_const =
3074 ConstantVector::getSplat(vec_ty->getElementCount(), min_const);
3075 }
3076 // Drop the sign bit and then check that the number is between
3077 // (exclusive) the min and max exponent values for the bit width.
3078 IRBuilder<> builder(Call);
3079 auto bitcast = builder.CreateBitCast(x, int_ty);
3080 auto abs = builder.CreateAnd(bitcast, abs_const);
3081 auto lt = builder.CreateICmpULT(abs, exp_const);
3082 auto ge = builder.CreateICmpUGE(abs, min_const);
3083 auto tmp = builder.CreateAnd(lt, ge);
3084 // OpenCL CTS requires that vector versions use sign extension, but scalar
3085 // versions use zero extension.
3086 if (isa<VectorType>(ty))
3087 return builder.CreateSExt(tmp, ty);
3088 return builder.CreateZExt(tmp, ty);
3089 });
3090}