blob: 3adda2a27c18b2078d9b63524be7928576557d65 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
alan-baker4986eff2020-10-29 13:38:00 -040024#include "llvm/IR/Operator.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000025#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040026#include "llvm/Pass.h"
27#include "llvm/Support/CommandLine.h"
28#include "llvm/Support/raw_ostream.h"
alan-baker4986eff2020-10-29 13:38:00 -040029#include "llvm/Transforms/Utils/BasicBlockUtils.h"
David Neto118188e2018-08-24 11:27:54 -040030#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-bakere0902602020-03-23 08:43:40 -040032#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040033
alan-baker931d18a2019-12-12 08:21:32 -050034#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040035#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070036
SJW2c317da2020-03-23 07:39:13 -050037#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050038#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039#include "Passes.h"
40#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050041#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040042
SJW2c317da2020-03-23 07:39:13 -050043using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040044using namespace llvm;
45
46#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
47
48namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000049
David Neto22f144c2017-06-12 14:26:21 -040050uint32_t clz(uint32_t v) {
51 uint32_t r;
52 uint32_t shift;
53
54 r = (v > 0xFFFF) << 4;
55 v >>= r;
56 shift = (v > 0xFF) << 3;
57 v >>= shift;
58 r |= shift;
59 shift = (v > 0xF) << 2;
60 v >>= shift;
61 r |= shift;
62 shift = (v > 0x3) << 1;
63 v >>= shift;
64 r |= shift;
65 r |= (v >> 1);
66
67 return r;
68}
69
Kévin Petitfdfa92e2019-09-25 14:20:58 +010070Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
71 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040072 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040073 IntTy = FixedVectorType::get(IntTy,
74 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010075 }
76 return IntTy;
77}
78
alan-baker4986eff2020-10-29 13:38:00 -040079Value *MemoryOrderSemantics(Value *order, bool is_global,
80 Instruction *InsertBefore,
81 spv::MemorySemanticsMask base_semantics) {
82 enum AtomicMemoryOrder : uint32_t {
83 kMemoryOrderRelaxed = 0,
84 kMemoryOrderAcquire = 2,
85 kMemoryOrderRelease = 3,
86 kMemoryOrderAcqRel = 4,
87 kMemoryOrderSeqCst = 5
88 };
89
90 IRBuilder<> builder(InsertBefore);
91
92 // Constants for OpenCL C 2.0 memory_order.
93 const auto relaxed = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelaxed);
94 const auto acquire = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcquire);
95 const auto release = builder.getInt32(AtomicMemoryOrder::kMemoryOrderRelease);
96 const auto acq_rel = builder.getInt32(AtomicMemoryOrder::kMemoryOrderAcqRel);
97
98 // Constants for SPIR-V ordering memory semantics.
99 const auto RelaxedSemantics = builder.getInt32(spv::MemorySemanticsMaskNone);
100 const auto AcquireSemantics =
101 builder.getInt32(spv::MemorySemanticsAcquireMask);
102 const auto ReleaseSemantics =
103 builder.getInt32(spv::MemorySemanticsReleaseMask);
104 const auto AcqRelSemantics =
105 builder.getInt32(spv::MemorySemanticsAcquireReleaseMask);
106
107 // Constants for SPIR-V storage class semantics.
108 const auto UniformSemantics =
109 builder.getInt32(spv::MemorySemanticsUniformMemoryMask);
110 const auto WorkgroupSemantics =
111 builder.getInt32(spv::MemorySemanticsWorkgroupMemoryMask);
112
113 // Instead of sequentially consistent, use acquire, release or acquire
114 // release semantics.
115 Value *base_order = nullptr;
116 switch (base_semantics) {
117 case spv::MemorySemanticsAcquireMask:
118 base_order = AcquireSemantics;
119 break;
120 case spv::MemorySemanticsReleaseMask:
121 base_order = ReleaseSemantics;
122 break;
123 default:
124 base_order = AcqRelSemantics;
125 break;
126 }
127
128 Value *storage = is_global ? UniformSemantics : WorkgroupSemantics;
129 if (order == nullptr)
130 return builder.CreateOr({storage, base_order});
131
132 auto is_relaxed = builder.CreateICmpEQ(order, relaxed);
133 auto is_acquire = builder.CreateICmpEQ(order, acquire);
134 auto is_release = builder.CreateICmpEQ(order, release);
135 auto is_acq_rel = builder.CreateICmpEQ(order, acq_rel);
136 auto semantics =
137 builder.CreateSelect(is_relaxed, RelaxedSemantics, base_order);
138 semantics = builder.CreateSelect(is_acquire, AcquireSemantics, semantics);
139 semantics = builder.CreateSelect(is_release, ReleaseSemantics, semantics);
140 semantics = builder.CreateSelect(is_acq_rel, AcqRelSemantics, semantics);
141 return builder.CreateOr({storage, semantics});
142}
143
144Value *MemoryScope(Value *scope, bool is_global, Instruction *InsertBefore) {
145 enum AtomicMemoryScope : uint32_t {
146 kMemoryScopeWorkItem = 0,
147 kMemoryScopeWorkGroup = 1,
148 kMemoryScopeDevice = 2,
149 kMemoryScopeAllSVMDevices = 3, // not supported
150 kMemoryScopeSubGroup = 4
151 };
152
153 IRBuilder<> builder(InsertBefore);
154
155 // Constants for OpenCL C 2.0 memory_scope.
156 const auto work_item =
157 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkItem);
158 const auto work_group =
159 builder.getInt32(AtomicMemoryScope::kMemoryScopeWorkGroup);
160 const auto sub_group =
161 builder.getInt32(AtomicMemoryScope::kMemoryScopeSubGroup);
162 const auto device = builder.getInt32(AtomicMemoryScope::kMemoryScopeDevice);
163
164 // Constants for SPIR-V memory scopes.
165 const auto InvocationScope = builder.getInt32(spv::ScopeInvocation);
166 const auto WorkgroupScope = builder.getInt32(spv::ScopeWorkgroup);
167 const auto DeviceScope = builder.getInt32(spv::ScopeDevice);
168 const auto SubgroupScope = builder.getInt32(spv::ScopeSubgroup);
169
170 auto base_scope = is_global ? DeviceScope : WorkgroupScope;
171 if (scope == nullptr)
172 return base_scope;
173
174 auto is_work_item = builder.CreateICmpEQ(scope, work_item);
175 auto is_work_group = builder.CreateICmpEQ(scope, work_group);
176 auto is_sub_group = builder.CreateICmpEQ(scope, sub_group);
177 auto is_device = builder.CreateICmpEQ(scope, device);
178
179 scope = builder.CreateSelect(is_work_item, InvocationScope, base_scope);
180 scope = builder.CreateSelect(is_work_group, WorkgroupScope, scope);
181 scope = builder.CreateSelect(is_sub_group, SubgroupScope, scope);
182 scope = builder.CreateSelect(is_device, DeviceScope, scope);
183
184 return scope;
185}
186
SJW2c317da2020-03-23 07:39:13 -0500187bool replaceCallsWithValue(Function &F,
188 std::function<Value *(CallInst *)> Replacer) {
189
190 bool Changed = false;
191
192 SmallVector<Instruction *, 4> ToRemoves;
193
194 // Walk the users of the function.
195 for (auto &U : F.uses()) {
196 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
197
198 auto NewValue = Replacer(CI);
199
200 if (NewValue != nullptr) {
201 CI->replaceAllUsesWith(NewValue);
202
203 // Lastly, remember to remove the user.
204 ToRemoves.push_back(CI);
205 }
206 }
207 }
208
209 Changed = !ToRemoves.empty();
210
211 // And cleanup the calls we don't use anymore.
212 for (auto V : ToRemoves) {
213 V->eraseFromParent();
214 }
215
216 return Changed;
217}
218
David Neto22f144c2017-06-12 14:26:21 -0400219struct ReplaceOpenCLBuiltinPass final : public ModulePass {
220 static char ID;
221 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
222
223 bool runOnModule(Module &M) override;
SJW2c317da2020-03-23 07:39:13 -0500224 bool runOnFunction(Function &F);
225 bool replaceAbs(Function &F);
226 bool replaceAbsDiff(Function &F, bool is_signed);
227 bool replaceCopysign(Function &F);
228 bool replaceRecip(Function &F);
229 bool replaceDivide(Function &F);
230 bool replaceDot(Function &F);
231 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500232 bool replaceExp10(Function &F, const std::string &basename);
233 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100234 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400235 bool replaceBarrier(Function &F, bool subgroup = false);
SJW2c317da2020-03-23 07:39:13 -0500236 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100237 bool replacePrefetch(Function &F);
SJW2c317da2020-03-23 07:39:13 -0500238 bool replaceRelational(Function &F, CmpInst::Predicate P, int32_t C);
239 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
240 bool replaceIsFinite(Function &F);
241 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
242 bool replaceUpsample(Function &F);
243 bool replaceRotate(Function &F);
244 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
245 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
246 bool replaceSelect(Function &F);
247 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500248 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500249 bool replaceSignbit(Function &F, bool is_vec);
250 bool replaceMul(Function &F, bool is_float, bool is_mad);
251 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
252 bool replaceVloadHalf(Function &F);
253 bool replaceVloadHalf2(Function &F);
254 bool replaceVloadHalf4(Function &F);
255 bool replaceClspvVloadaHalf2(Function &F);
256 bool replaceClspvVloadaHalf4(Function &F);
257 bool replaceVstoreHalf(Function &F, int vec_size);
258 bool replaceVstoreHalf(Function &F);
259 bool replaceVstoreHalf2(Function &F);
260 bool replaceVstoreHalf4(Function &F);
261 bool replaceHalfReadImage(Function &F);
262 bool replaceHalfWriteImage(Function &F);
263 bool replaceSampledReadImageWithIntCoords(Function &F);
264 bool replaceAtomics(Function &F, spv::Op Op);
265 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
alan-baker4986eff2020-10-29 13:38:00 -0400266 bool replaceAtomicLoad(Function &F);
267 bool replaceExplicitAtomics(Function &F, spv::Op Op,
268 spv::MemorySemanticsMask semantics =
269 spv::MemorySemanticsAcquireReleaseMask);
270 bool replaceAtomicCompareExchange(Function &);
SJW2c317da2020-03-23 07:39:13 -0500271 bool replaceCross(Function &F);
272 bool replaceFract(Function &F, int vec_size);
273 bool replaceVload(Function &F);
274 bool replaceVstore(Function &F);
alan-bakera52b7312020-10-26 08:58:51 -0400275 bool replaceAddSat(Function &F, bool is_signed);
Kévin Petit8576f682020-11-02 14:51:32 +0000276 bool replaceHadd(Function &F, bool is_signed,
277 Instruction::BinaryOps join_opcode);
alan-bakercc2bafb2020-11-02 08:30:18 -0500278 bool replaceClz(Function &F);
David Neto22f144c2017-06-12 14:26:21 -0400279};
SJW2c317da2020-03-23 07:39:13 -0500280
Kévin Petit91bc72e2019-04-08 15:17:46 +0100281} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400282
283char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400284INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
285 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400286
287namespace clspv {
288ModulePass *createReplaceOpenCLBuiltinPass() {
289 return new ReplaceOpenCLBuiltinPass();
290}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400291} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400292
293bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500294 std::list<Function *> func_list;
295 for (auto &F : M.getFunctionList()) {
296 // process only function declarations
297 if (F.isDeclaration() && runOnFunction(F)) {
298 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000299 }
300 }
SJW2c317da2020-03-23 07:39:13 -0500301 if (func_list.size() != 0) {
302 // recursively convert functions, but first remove dead
303 for (auto *F : func_list) {
304 if (F->use_empty()) {
305 F->eraseFromParent();
306 }
307 }
308 runOnModule(M);
309 return true;
310 }
311 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000312}
313
SJW2c317da2020-03-23 07:39:13 -0500314bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
315 auto &FI = Builtins::Lookup(&F);
316 switch (FI.getType()) {
317 case Builtins::kAbs:
318 if (!FI.getParameter(0).is_signed) {
319 return replaceAbs(F);
320 }
321 break;
322 case Builtins::kAbsDiff:
323 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400324
325 case Builtins::kAddSat:
326 return replaceAddSat(F, FI.getParameter(0).is_signed);
327
alan-bakercc2bafb2020-11-02 08:30:18 -0500328 case Builtins::kClz:
329 return replaceClz(F);
330
alan-bakerb6da5132020-10-29 15:59:06 -0400331 case Builtins::kHadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000332 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::And);
alan-bakerb6da5132020-10-29 15:59:06 -0400333 case Builtins::kRhadd:
Kévin Petit8576f682020-11-02 14:51:32 +0000334 return replaceHadd(F, FI.getParameter(0).is_signed, Instruction::Or);
alan-bakerb6da5132020-10-29 15:59:06 -0400335
SJW2c317da2020-03-23 07:39:13 -0500336 case Builtins::kCopysign:
337 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100338
SJW2c317da2020-03-23 07:39:13 -0500339 case Builtins::kHalfRecip:
340 case Builtins::kNativeRecip:
341 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100342
SJW2c317da2020-03-23 07:39:13 -0500343 case Builtins::kHalfDivide:
344 case Builtins::kNativeDivide:
345 return replaceDivide(F);
346
347 case Builtins::kDot:
348 return replaceDot(F);
349
350 case Builtins::kExp10:
351 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500352 case Builtins::kNativeExp10:
353 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500354
355 case Builtins::kLog10:
356 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500357 case Builtins::kNativeLog10:
358 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500359
gnl21636e7992020-09-09 16:08:16 +0100360 case Builtins::kLog1p:
361 return replaceLog1p(F);
362
SJW2c317da2020-03-23 07:39:13 -0500363 case Builtins::kFmod:
364 return replaceFmod(F);
365
366 case Builtins::kBarrier:
367 case Builtins::kWorkGroupBarrier:
368 return replaceBarrier(F);
369
alan-baker12d2c182020-07-20 08:22:42 -0400370 case Builtins::kSubGroupBarrier:
371 return replaceBarrier(F, true);
372
SJW2c317da2020-03-23 07:39:13 -0500373 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400374 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500375 case Builtins::kReadMemFence:
376 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
377 case Builtins::kWriteMemFence:
378 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
379
380 // Relational
381 case Builtins::kIsequal:
382 return replaceRelational(F, CmpInst::FCMP_OEQ,
383 FI.getParameter(0).vector_size ? -1 : 1);
384 case Builtins::kIsgreater:
385 return replaceRelational(F, CmpInst::FCMP_OGT,
386 FI.getParameter(0).vector_size ? -1 : 1);
387 case Builtins::kIsgreaterequal:
388 return replaceRelational(F, CmpInst::FCMP_OGE,
389 FI.getParameter(0).vector_size ? -1 : 1);
390 case Builtins::kIsless:
391 return replaceRelational(F, CmpInst::FCMP_OLT,
392 FI.getParameter(0).vector_size ? -1 : 1);
393 case Builtins::kIslessequal:
394 return replaceRelational(F, CmpInst::FCMP_OLE,
395 FI.getParameter(0).vector_size ? -1 : 1);
396 case Builtins::kIsnotequal:
397 return replaceRelational(F, CmpInst::FCMP_ONE,
398 FI.getParameter(0).vector_size ? -1 : 1);
399
400 case Builtins::kIsinf: {
401 bool is_vec = FI.getParameter(0).vector_size != 0;
402 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
403 }
404 case Builtins::kIsnan: {
405 bool is_vec = FI.getParameter(0).vector_size != 0;
406 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
407 }
408
409 case Builtins::kIsfinite:
410 return replaceIsFinite(F);
411
412 case Builtins::kAll: {
413 bool is_vec = FI.getParameter(0).vector_size != 0;
414 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
415 }
416 case Builtins::kAny: {
417 bool is_vec = FI.getParameter(0).vector_size != 0;
418 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
419 }
420
421 case Builtins::kUpsample:
422 return replaceUpsample(F);
423
424 case Builtins::kRotate:
425 return replaceRotate(F);
426
427 case Builtins::kConvert:
428 return replaceConvert(F, FI.getParameter(0).is_signed,
429 FI.getReturnType().is_signed);
430
alan-baker4986eff2020-10-29 13:38:00 -0400431 // OpenCL 2.0 explicit atomics have different default scopes and semantics
432 // than legacy atomic functions.
433 case Builtins::kAtomicLoad:
434 case Builtins::kAtomicLoadExplicit:
435 return replaceAtomicLoad(F);
436 case Builtins::kAtomicStore:
437 case Builtins::kAtomicStoreExplicit:
438 return replaceExplicitAtomics(F, spv::OpAtomicStore,
439 spv::MemorySemanticsReleaseMask);
440 case Builtins::kAtomicExchange:
441 case Builtins::kAtomicExchangeExplicit:
442 return replaceExplicitAtomics(F, spv::OpAtomicExchange);
443 case Builtins::kAtomicFetchAdd:
444 case Builtins::kAtomicFetchAddExplicit:
445 return replaceExplicitAtomics(F, spv::OpAtomicIAdd);
446 case Builtins::kAtomicFetchSub:
447 case Builtins::kAtomicFetchSubExplicit:
448 return replaceExplicitAtomics(F, spv::OpAtomicISub);
449 case Builtins::kAtomicFetchOr:
450 case Builtins::kAtomicFetchOrExplicit:
451 return replaceExplicitAtomics(F, spv::OpAtomicOr);
452 case Builtins::kAtomicFetchXor:
453 case Builtins::kAtomicFetchXorExplicit:
454 return replaceExplicitAtomics(F, spv::OpAtomicXor);
455 case Builtins::kAtomicFetchAnd:
456 case Builtins::kAtomicFetchAndExplicit:
457 return replaceExplicitAtomics(F, spv::OpAtomicAnd);
458 case Builtins::kAtomicFetchMin:
459 case Builtins::kAtomicFetchMinExplicit:
460 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
461 ? spv::OpAtomicSMin
462 : spv::OpAtomicUMin);
463 case Builtins::kAtomicFetchMax:
464 case Builtins::kAtomicFetchMaxExplicit:
465 return replaceExplicitAtomics(F, FI.getParameter(1).is_signed
466 ? spv::OpAtomicSMax
467 : spv::OpAtomicUMax);
468 // Weak compare exchange is generated as strong compare exchange.
469 case Builtins::kAtomicCompareExchangeWeak:
470 case Builtins::kAtomicCompareExchangeWeakExplicit:
471 case Builtins::kAtomicCompareExchangeStrong:
472 case Builtins::kAtomicCompareExchangeStrongExplicit:
473 return replaceAtomicCompareExchange(F);
474
475 // Legacy atomic functions.
SJW2c317da2020-03-23 07:39:13 -0500476 case Builtins::kAtomicInc:
477 return replaceAtomics(F, spv::OpAtomicIIncrement);
478 case Builtins::kAtomicDec:
479 return replaceAtomics(F, spv::OpAtomicIDecrement);
480 case Builtins::kAtomicCmpxchg:
481 return replaceAtomics(F, spv::OpAtomicCompareExchange);
482 case Builtins::kAtomicAdd:
483 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
484 case Builtins::kAtomicSub:
485 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
486 case Builtins::kAtomicXchg:
487 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
488 case Builtins::kAtomicMin:
489 return replaceAtomics(F, FI.getParameter(0).is_signed
490 ? llvm::AtomicRMWInst::Min
491 : llvm::AtomicRMWInst::UMin);
492 case Builtins::kAtomicMax:
493 return replaceAtomics(F, FI.getParameter(0).is_signed
494 ? llvm::AtomicRMWInst::Max
495 : llvm::AtomicRMWInst::UMax);
496 case Builtins::kAtomicAnd:
497 return replaceAtomics(F, llvm::AtomicRMWInst::And);
498 case Builtins::kAtomicOr:
499 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
500 case Builtins::kAtomicXor:
501 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
502
503 case Builtins::kCross:
504 if (FI.getParameter(0).vector_size == 4) {
505 return replaceCross(F);
506 }
507 break;
508
509 case Builtins::kFract:
510 if (FI.getParameterCount()) {
511 return replaceFract(F, FI.getParameter(0).vector_size);
512 }
513 break;
514
515 case Builtins::kMadHi:
516 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
517 case Builtins::kMulHi:
518 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
519
520 case Builtins::kMad:
521 case Builtins::kMad24:
522 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
523 true);
524 case Builtins::kMul24:
525 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
526 false);
527
528 case Builtins::kSelect:
529 return replaceSelect(F);
530
531 case Builtins::kBitselect:
532 return replaceBitSelect(F);
533
534 case Builtins::kVload:
535 return replaceVload(F);
536
537 case Builtins::kVloadaHalf:
538 case Builtins::kVloadHalf:
539 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
540
541 case Builtins::kVstore:
542 return replaceVstore(F);
543
544 case Builtins::kVstoreHalf:
545 case Builtins::kVstoreaHalf:
546 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
547
548 case Builtins::kSmoothstep: {
549 int vec_size = FI.getLastParameter().vector_size;
550 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500551 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500552 }
553 break;
554 }
555 case Builtins::kStep: {
556 int vec_size = FI.getLastParameter().vector_size;
557 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500558 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500559 }
560 break;
561 }
562
563 case Builtins::kSignbit:
564 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
565
566 case Builtins::kReadImageh:
567 return replaceHalfReadImage(F);
568 case Builtins::kReadImagef:
569 case Builtins::kReadImagei:
570 case Builtins::kReadImageui: {
571 if (FI.getParameter(1).isSampler() &&
572 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
573 return replaceSampledReadImageWithIntCoords(F);
574 }
575 break;
576 }
577
578 case Builtins::kWriteImageh:
579 return replaceHalfWriteImage(F);
580
Kévin Petit1cb45112020-04-27 18:55:48 +0100581 case Builtins::kPrefetch:
582 return replacePrefetch(F);
583
SJW2c317da2020-03-23 07:39:13 -0500584 default:
585 break;
586 }
587
588 return false;
589}
590
591bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
592 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400593 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100594}
595
SJW2c317da2020-03-23 07:39:13 -0500596bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
597 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100598 auto XValue = CI->getOperand(0);
599 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100600
Kévin Petite8edce32019-04-10 14:23:32 +0100601 IRBuilder<> Builder(CI);
602 auto XmY = Builder.CreateSub(XValue, YValue);
603 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100604
SJW2c317da2020-03-23 07:39:13 -0500605 Value *Cmp = nullptr;
606 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100607 Cmp = Builder.CreateICmpSGT(YValue, XValue);
608 } else {
609 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100610 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100611
Kévin Petite8edce32019-04-10 14:23:32 +0100612 return Builder.CreateSelect(Cmp, YmX, XmY);
613 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100614}
615
SJW2c317da2020-03-23 07:39:13 -0500616bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
617 return replaceCallsWithValue(F, [&F](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100618 auto XValue = CI->getOperand(0);
619 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100620
Kévin Petite8edce32019-04-10 14:23:32 +0100621 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100622
SJW2c317da2020-03-23 07:39:13 -0500623 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -0400624 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400625 IntTy = FixedVectorType::get(
626 IntTy, vec_ty->getElementCount().getKnownMinValue());
Kévin Petit8c1be282019-04-02 19:34:25 +0100627 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100628
Kévin Petite8edce32019-04-10 14:23:32 +0100629 // Return X with the sign of Y
630
631 // Sign bit masks
632 auto SignBit = IntTy->getScalarSizeInBits() - 1;
633 auto SignBitMask = 1 << SignBit;
634 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
635 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
636
637 IRBuilder<> Builder(CI);
638
639 // Extract sign of Y
640 auto YInt = Builder.CreateBitCast(YValue, IntTy);
641 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
642
643 // Clear sign bit in X
644 auto XInt = Builder.CreateBitCast(XValue, IntTy);
645 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
646
647 // Insert sign bit of Y into X
648 auto NewXInt = Builder.CreateOr(XInt, YSign);
649
650 // And cast back to floating-point
651 return Builder.CreateBitCast(NewXInt, Ty);
652 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100653}
654
SJW2c317da2020-03-23 07:39:13 -0500655bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
656 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100657 // Recip has one arg.
658 auto Arg = CI->getOperand(0);
659 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
660 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
661 });
David Neto22f144c2017-06-12 14:26:21 -0400662}
663
SJW2c317da2020-03-23 07:39:13 -0500664bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
665 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100666 auto Op0 = CI->getOperand(0);
667 auto Op1 = CI->getOperand(1);
668 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
669 });
David Neto22f144c2017-06-12 14:26:21 -0400670}
671
SJW2c317da2020-03-23 07:39:13 -0500672bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
673 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100674 auto Op0 = CI->getOperand(0);
675 auto Op1 = CI->getOperand(1);
676
SJW2c317da2020-03-23 07:39:13 -0500677 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100678 if (Op0->getType()->isVectorTy()) {
679 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
680 CI->getType(), {Op0, Op1});
681 } else {
682 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
683 }
684
685 return V;
686 });
687}
688
SJW2c317da2020-03-23 07:39:13 -0500689bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500690 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500691 // convert to natural
692 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500693 std::string NewFName = basename.substr(0, slen);
694 NewFName =
695 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400696
SJW2c317da2020-03-23 07:39:13 -0500697 Module &M = *F.getParent();
698 return replaceCallsWithValue(F, [&](CallInst *CI) {
699 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
700
701 auto Arg = CI->getOperand(0);
702
703 // Constant of the natural log of 10 (ln(10)).
704 const double Ln10 =
705 2.302585092994045684017991454684364207601101488628772976033;
706
707 auto Mul = BinaryOperator::Create(
708 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
709
710 return CallInst::Create(NewF, Mul, "", CI);
711 });
David Neto22f144c2017-06-12 14:26:21 -0400712}
713
SJW2c317da2020-03-23 07:39:13 -0500714bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100715 // OpenCL fmod(x,y) is x - y * trunc(x/y)
716 // The sign for a non-zero result is taken from x.
717 // (Try an example.)
718 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500719 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100720 auto Op0 = CI->getOperand(0);
721 auto Op1 = CI->getOperand(1);
722 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
723 });
724}
725
SJW2c317da2020-03-23 07:39:13 -0500726bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500727 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500728 // convert to natural
729 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500730 std::string NewFName = basename.substr(0, slen);
731 NewFName =
732 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400733
SJW2c317da2020-03-23 07:39:13 -0500734 Module &M = *F.getParent();
735 return replaceCallsWithValue(F, [&](CallInst *CI) {
736 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
737
738 auto Arg = CI->getOperand(0);
739
740 // Constant of the reciprocal of the natural log of 10 (ln(10)).
741 const double Ln10 =
742 0.434294481903251827651128918916605082294397005803666566114;
743
744 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
745
746 return BinaryOperator::Create(Instruction::FMul,
747 ConstantFP::get(Arg->getType(), Ln10), NewCI,
748 "", CI);
749 });
David Neto22f144c2017-06-12 14:26:21 -0400750}
751
gnl21636e7992020-09-09 16:08:16 +0100752bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
753 // convert to natural
754 std::string NewFName =
755 Builtins::GetMangledFunctionName("log", F.getFunctionType());
756
757 Module &M = *F.getParent();
758 return replaceCallsWithValue(F, [&](CallInst *CI) {
759 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
760
761 auto Arg = CI->getOperand(0);
762
763 auto ArgP1 = BinaryOperator::Create(
764 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
765
766 return CallInst::Create(NewF, ArgP1, "", CI);
767 });
768}
769
alan-baker12d2c182020-07-20 08:22:42 -0400770bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400771
alan-bakerf6bc8252020-09-23 14:58:55 -0400772 enum {
773 CLK_LOCAL_MEM_FENCE = 0x01,
774 CLK_GLOBAL_MEM_FENCE = 0x02,
775 CLK_IMAGE_MEM_FENCE = 0x04
776 };
David Neto22f144c2017-06-12 14:26:21 -0400777
alan-baker12d2c182020-07-20 08:22:42 -0400778 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100779 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400780
Kévin Petitc4643922019-06-17 19:32:05 +0100781 // We need to map the OpenCL constants to the SPIR-V equivalents.
782 const auto LocalMemFence =
783 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
784 const auto GlobalMemFence =
785 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400786 const auto ImageMemFence =
787 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400788 const auto ConstantAcquireRelease = ConstantInt::get(
789 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100790 const auto ConstantScopeDevice =
791 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
792 const auto ConstantScopeWorkgroup =
793 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400794 const auto ConstantScopeSubgroup =
795 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400796
Kévin Petitc4643922019-06-17 19:32:05 +0100797 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
798 const auto LocalMemFenceMask =
799 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
800 const auto WorkgroupShiftAmount =
801 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
802 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
803 Instruction::Shl, LocalMemFenceMask,
804 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400805
Kévin Petitc4643922019-06-17 19:32:05 +0100806 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
807 const auto GlobalMemFenceMask =
808 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
809 const auto UniformShiftAmount =
810 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
811 const auto MemorySemanticsUniform = BinaryOperator::Create(
812 Instruction::Shl, GlobalMemFenceMask,
813 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400814
alan-bakerf6bc8252020-09-23 14:58:55 -0400815 // OpenCL 2.0
816 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
817 const auto ImageMemFenceMask =
818 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
819 const auto ImageShiftAmount =
820 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
821 const auto MemorySemanticsImage = BinaryOperator::Create(
822 Instruction::Shl, ImageMemFenceMask,
823 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
824
Kévin Petitc4643922019-06-17 19:32:05 +0100825 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400826 // MemorySemanticsSequentiallyConsistentMask.
827 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +0100828 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400829 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400830 auto MemorySemantics2 = BinaryOperator::Create(
831 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
832 auto MemorySemantics = BinaryOperator::Create(
833 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400834
alan-baker12d2c182020-07-20 08:22:42 -0400835 // If the memory scope is not specified explicitly, it is either Subgroup
836 // or Workgroup depending on the type of barrier.
837 Value *MemoryScope =
838 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
839 if (CI->data_operands_size() > 1) {
840 enum {
841 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
842 CL_MEMORY_SCOPE_DEVICE = 0x2,
843 CL_MEMORY_SCOPE_SUBGROUP = 0x4
844 };
845 // The call was given an explicit memory scope.
846 const auto MemoryScopeSubgroup =
847 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
848 const auto MemoryScopeDevice =
849 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400850
alan-baker12d2c182020-07-20 08:22:42 -0400851 auto Cmp =
852 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
853 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
854 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
855 ConstantScopeWorkgroup, "", CI);
856 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
857 MemoryScopeDevice, CI->getOperand(1), "", CI);
858 MemoryScope =
859 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
860 }
861
862 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
863 // the type of barrier;
864 const auto ExecutionScope =
865 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400866
Kévin Petitc4643922019-06-17 19:32:05 +0100867 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
alan-baker3d905692020-10-28 14:02:37 -0400868 {Attribute::NoDuplicate, Attribute::Convergent},
869 CI->getType(),
Kévin Petitc4643922019-06-17 19:32:05 +0100870 {ExecutionScope, MemoryScope, MemorySemantics});
871 });
David Neto22f144c2017-06-12 14:26:21 -0400872}
873
SJW2c317da2020-03-23 07:39:13 -0500874bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
875 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400876
SJW2c317da2020-03-23 07:39:13 -0500877 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -0400878 enum {
879 CLK_LOCAL_MEM_FENCE = 0x01,
880 CLK_GLOBAL_MEM_FENCE = 0x02,
881 CLK_IMAGE_MEM_FENCE = 0x04,
882 };
David Neto22f144c2017-06-12 14:26:21 -0400883
SJW2c317da2020-03-23 07:39:13 -0500884 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400885
SJW2c317da2020-03-23 07:39:13 -0500886 // We need to map the OpenCL constants to the SPIR-V equivalents.
887 const auto LocalMemFence =
888 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
889 const auto GlobalMemFence =
890 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400891 const auto ImageMemFence =
892 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -0500893 const auto ConstantMemorySemantics =
894 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400895 const auto ConstantScopeWorkgroup =
896 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400897
SJW2c317da2020-03-23 07:39:13 -0500898 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
899 const auto LocalMemFenceMask =
900 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
901 const auto WorkgroupShiftAmount =
902 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
903 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
904 Instruction::Shl, LocalMemFenceMask,
905 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400906
SJW2c317da2020-03-23 07:39:13 -0500907 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
908 const auto GlobalMemFenceMask =
909 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
910 const auto UniformShiftAmount =
911 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
912 const auto MemorySemanticsUniform = BinaryOperator::Create(
913 Instruction::Shl, GlobalMemFenceMask,
914 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400915
alan-bakerf6bc8252020-09-23 14:58:55 -0400916 // OpenCL 2.0
917 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
918 const auto ImageMemFenceMask =
919 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
920 const auto ImageShiftAmount =
921 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
922 const auto MemorySemanticsImage = BinaryOperator::Create(
923 Instruction::Shl, ImageMemFenceMask,
924 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
925
SJW2c317da2020-03-23 07:39:13 -0500926 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400927 // |semantics|.
928 auto MemorySemantics1 =
SJW2c317da2020-03-23 07:39:13 -0500929 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
930 ConstantMemorySemantics, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400931 auto MemorySemantics2 = BinaryOperator::Create(
932 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
933 auto MemorySemantics = BinaryOperator::Create(
934 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400935
alan-baker12d2c182020-07-20 08:22:42 -0400936 // Memory Scope is always workgroup.
937 const auto MemoryScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400938
alan-baker3d905692020-10-28 14:02:37 -0400939 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier,
940 {Attribute::Convergent}, CI->getType(),
SJW2c317da2020-03-23 07:39:13 -0500941 {MemoryScope, MemorySemantics});
942 });
David Neto22f144c2017-06-12 14:26:21 -0400943}
944
Kévin Petit1cb45112020-04-27 18:55:48 +0100945bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
946 bool Changed = false;
947
948 SmallVector<Instruction *, 4> ToRemoves;
949
950 // Find all calls to the function
951 for (auto &U : F.uses()) {
952 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
953 ToRemoves.push_back(CI);
954 }
955 }
956
957 Changed = !ToRemoves.empty();
958
959 // Delete them
960 for (auto V : ToRemoves) {
961 V->eraseFromParent();
962 }
963
964 return Changed;
965}
966
SJW2c317da2020-03-23 07:39:13 -0500967bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
968 CmpInst::Predicate P,
969 int32_t C) {
970 return replaceCallsWithValue(F, [&](CallInst *CI) {
971 // The predicate to use in the CmpInst.
972 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -0400973
SJW2c317da2020-03-23 07:39:13 -0500974 // The value to return for true.
975 auto TrueValue = ConstantInt::getSigned(CI->getType(), C);
David Neto22f144c2017-06-12 14:26:21 -0400976
SJW2c317da2020-03-23 07:39:13 -0500977 // The value to return for false.
978 auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400979
SJW2c317da2020-03-23 07:39:13 -0500980 auto Arg1 = CI->getOperand(0);
981 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -0400982
SJW2c317da2020-03-23 07:39:13 -0500983 const auto Cmp =
984 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400985
SJW2c317da2020-03-23 07:39:13 -0500986 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
987 });
David Neto22f144c2017-06-12 14:26:21 -0400988}
989
SJW2c317da2020-03-23 07:39:13 -0500990bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
991 spv::Op SPIRVOp,
992 int32_t C) {
993 Module &M = *F.getParent();
994 return replaceCallsWithValue(F, [&](CallInst *CI) {
995 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -0400996
SJW2c317da2020-03-23 07:39:13 -0500997 // The value to return for true.
998 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -0400999
SJW2c317da2020-03-23 07:39:13 -05001000 // The value to return for false.
1001 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -04001002
SJW2c317da2020-03-23 07:39:13 -05001003 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -04001004 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001005 CorrespondingBoolTy =
1006 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
1007 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04001008 }
David Neto22f144c2017-06-12 14:26:21 -04001009
SJW2c317da2020-03-23 07:39:13 -05001010 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
1011 CorrespondingBoolTy, {CI->getOperand(0)});
1012
1013 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1014 });
David Neto22f144c2017-06-12 14:26:21 -04001015}
1016
SJW2c317da2020-03-23 07:39:13 -05001017bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
1018 Module &M = *F.getParent();
1019 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001020 auto &C = M.getContext();
1021 auto Val = CI->getOperand(0);
1022 auto ValTy = Val->getType();
1023 auto RetTy = CI->getType();
1024
1025 // Get a suitable integer type to represent the number
1026 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
1027
1028 // Create Mask
1029 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -05001030 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001031 switch (ScalarSize) {
1032 case 16:
1033 InfMask = ConstantInt::get(IntTy, 0x7C00U);
1034 break;
1035 case 32:
1036 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
1037 break;
1038 case 64:
1039 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
1040 break;
1041 default:
1042 llvm_unreachable("Unsupported floating-point type");
1043 }
1044
1045 IRBuilder<> Builder(CI);
1046
1047 // Bitcast to int
1048 auto ValInt = Builder.CreateBitCast(Val, IntTy);
1049
1050 // Mask and compare
1051 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
1052 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
1053
1054 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -05001055 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001056 if (ValTy->isVectorTy()) {
1057 RetTrue = ConstantInt::getSigned(RetTy, -1);
1058 } else {
1059 RetTrue = ConstantInt::get(RetTy, 1);
1060 }
1061 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
1062 });
1063}
1064
SJW2c317da2020-03-23 07:39:13 -05001065bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
1066 Module &M = *F.getParent();
1067 return replaceCallsWithValue(F, [&](CallInst *CI) {
1068 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001069
SJW2c317da2020-03-23 07:39:13 -05001070 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001071
SJW2c317da2020-03-23 07:39:13 -05001072 // If the argument is a 32-bit int, just use a shift
1073 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1074 V = BinaryOperator::Create(Instruction::LShr, Arg,
1075 ConstantInt::get(Arg->getType(), 31), "", CI);
1076 } else {
1077 // The value for zero to compare against.
1078 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -04001079
SJW2c317da2020-03-23 07:39:13 -05001080 // The value to return for true.
1081 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -04001082
SJW2c317da2020-03-23 07:39:13 -05001083 // The value to return for false.
1084 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -04001085
SJW2c317da2020-03-23 07:39:13 -05001086 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
1087 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001088
SJW2c317da2020-03-23 07:39:13 -05001089 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04001090
SJW2c317da2020-03-23 07:39:13 -05001091 // If we have a function to call, call it!
1092 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -04001093
SJW2c317da2020-03-23 07:39:13 -05001094 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -04001095
SJW2c317da2020-03-23 07:39:13 -05001096 const auto NewCI = clspv::InsertSPIRVOp(
1097 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
1098 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -04001099
SJW2c317da2020-03-23 07:39:13 -05001100 } else {
1101 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -04001102 }
1103
SJW2c317da2020-03-23 07:39:13 -05001104 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001105 }
SJW2c317da2020-03-23 07:39:13 -05001106 return V;
1107 });
David Neto22f144c2017-06-12 14:26:21 -04001108}
1109
SJW2c317da2020-03-23 07:39:13 -05001110bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
1111 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1112 // Get arguments
1113 auto HiValue = CI->getOperand(0);
1114 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001115
SJW2c317da2020-03-23 07:39:13 -05001116 // Don't touch overloads that aren't in OpenCL C
1117 auto HiType = HiValue->getType();
1118 auto LoType = LoValue->getType();
1119
1120 if (HiType != LoType) {
1121 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001122 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001123
SJW2c317da2020-03-23 07:39:13 -05001124 if (!HiType->isIntOrIntVectorTy()) {
1125 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +00001126 }
Kévin Petitbf0036c2019-03-06 13:57:10 +00001127
SJW2c317da2020-03-23 07:39:13 -05001128 if (HiType->getScalarSizeInBits() * 2 !=
1129 CI->getType()->getScalarSizeInBits()) {
1130 return nullptr;
1131 }
1132
1133 if ((HiType->getScalarSizeInBits() != 8) &&
1134 (HiType->getScalarSizeInBits() != 16) &&
1135 (HiType->getScalarSizeInBits() != 32)) {
1136 return nullptr;
1137 }
1138
James Pricecf53df42020-04-20 14:41:24 -04001139 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001140 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001141 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1142 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001143 return nullptr;
1144 }
1145 }
1146
1147 // Convert both operands to the result type
1148 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1149 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
1150
1151 // Shift high operand
1152 auto ShiftAmount =
1153 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
1154 auto HiShifted =
1155 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
1156
1157 // OR both results
1158 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
1159 });
Kévin Petitbf0036c2019-03-06 13:57:10 +00001160}
1161
SJW2c317da2020-03-23 07:39:13 -05001162bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
1163 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1164 // Get arguments
1165 auto SrcValue = CI->getOperand(0);
1166 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001167
SJW2c317da2020-03-23 07:39:13 -05001168 // Don't touch overloads that aren't in OpenCL C
1169 auto SrcType = SrcValue->getType();
1170 auto RotType = RotAmount->getType();
1171
1172 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1173 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001174 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001175
SJW2c317da2020-03-23 07:39:13 -05001176 if (!SrcType->isIntOrIntVectorTy()) {
1177 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001178 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001179
SJW2c317da2020-03-23 07:39:13 -05001180 if ((SrcType->getScalarSizeInBits() != 8) &&
1181 (SrcType->getScalarSizeInBits() != 16) &&
1182 (SrcType->getScalarSizeInBits() != 32) &&
1183 (SrcType->getScalarSizeInBits() != 64)) {
1184 return nullptr;
1185 }
1186
James Pricecf53df42020-04-20 14:41:24 -04001187 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001188 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001189 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1190 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001191 return nullptr;
1192 }
1193 }
1194
alan-bakerfd22ae12020-10-29 15:59:22 -04001195 // Replace with LLVM's funnel shift left intrinsic because it is more
1196 // generic than rotate.
1197 Function *intrinsic =
1198 Intrinsic::getDeclaration(F.getParent(), Intrinsic::fshl, SrcType);
1199 return CallInst::Create(intrinsic->getFunctionType(), intrinsic,
1200 {SrcValue, SrcValue, RotAmount}, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001201 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001202}
1203
SJW2c317da2020-03-23 07:39:13 -05001204bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1205 bool DstIsSigned) {
1206 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1207 Value *V = nullptr;
1208 // Get arguments
1209 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001210
SJW2c317da2020-03-23 07:39:13 -05001211 // Don't touch overloads that aren't in OpenCL C
1212 auto SrcType = SrcValue->getType();
1213 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001214
SJW2c317da2020-03-23 07:39:13 -05001215 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1216 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1217 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001218 }
1219
James Pricecf53df42020-04-20 14:41:24 -04001220 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001221 unsigned SrcNumElements =
1222 SrcVecType->getElementCount().getKnownMinValue();
1223 unsigned DstNumElements =
1224 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001225 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001226 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001227 }
1228
James Pricecf53df42020-04-20 14:41:24 -04001229 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1230 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1231 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001232 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001233 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001234 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001235
SJW2c317da2020-03-23 07:39:13 -05001236 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1237 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1238
1239 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1240 bool DstIsInt = DstType->isIntOrIntVectorTy();
1241
1242 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1243 // Unnecessary cast operation.
1244 V = SrcValue;
1245 } else if (SrcIsFloat && DstIsFloat) {
1246 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1247 } else if (SrcIsFloat && DstIsInt) {
1248 if (DstIsSigned) {
1249 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1250 } else {
1251 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1252 }
1253 } else if (SrcIsInt && DstIsFloat) {
1254 if (SrcIsSigned) {
1255 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1256 } else {
1257 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1258 }
1259 } else if (SrcIsInt && DstIsInt) {
1260 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1261 } else {
1262 // Not something we're supposed to handle, just move on
1263 }
1264
1265 return V;
1266 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001267}
1268
SJW2c317da2020-03-23 07:39:13 -05001269bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1270 bool is_mad) {
1271 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1272 Value *V = nullptr;
1273 // Get arguments
1274 auto AValue = CI->getOperand(0);
1275 auto BValue = CI->getOperand(1);
1276 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001277
SJW2c317da2020-03-23 07:39:13 -05001278 // Don't touch overloads that aren't in OpenCL C
1279 auto AType = AValue->getType();
1280 auto BType = BValue->getType();
1281 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001282
SJW2c317da2020-03-23 07:39:13 -05001283 if ((AType != BType) || (CI->getType() != AType) ||
1284 (is_mad && (AType != CType))) {
1285 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001286 }
1287
SJW2c317da2020-03-23 07:39:13 -05001288 if (!AType->isIntOrIntVectorTy()) {
1289 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001290 }
Kévin Petit8a560882019-03-21 15:24:34 +00001291
SJW2c317da2020-03-23 07:39:13 -05001292 if ((AType->getScalarSizeInBits() != 8) &&
1293 (AType->getScalarSizeInBits() != 16) &&
1294 (AType->getScalarSizeInBits() != 32) &&
1295 (AType->getScalarSizeInBits() != 64)) {
1296 return V;
1297 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001298
James Pricecf53df42020-04-20 14:41:24 -04001299 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001300 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001301 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1302 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001303 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001304 }
1305 }
1306
SJW2c317da2020-03-23 07:39:13 -05001307 // Our SPIR-V op returns a struct, create a type for it
1308 SmallVector<Type *, 2> TwoValueType = {AType, AType};
1309 auto ExMulRetType = StructType::create(TwoValueType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001310
SJW2c317da2020-03-23 07:39:13 -05001311 // Select the appropriate signed/unsigned SPIR-V op
1312 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1313
1314 // Call the SPIR-V op
1315 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1316 ExMulRetType, {AValue, BValue});
1317
1318 // Get the high part of the result
1319 unsigned Idxs[] = {1};
1320 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1321
1322 // If we're handling a mad_hi, add the third argument to the result
1323 if (is_mad) {
1324 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001325 }
1326
SJW2c317da2020-03-23 07:39:13 -05001327 return V;
1328 });
Kévin Petit8a560882019-03-21 15:24:34 +00001329}
1330
SJW2c317da2020-03-23 07:39:13 -05001331bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1332 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1333 // Get arguments
1334 auto FalseValue = CI->getOperand(0);
1335 auto TrueValue = CI->getOperand(1);
1336 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001337
SJW2c317da2020-03-23 07:39:13 -05001338 // Don't touch overloads that aren't in OpenCL C
1339 auto FalseType = FalseValue->getType();
1340 auto TrueType = TrueValue->getType();
1341 auto PredicateType = PredicateValue->getType();
1342
1343 if (FalseType != TrueType) {
1344 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001345 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001346
SJW2c317da2020-03-23 07:39:13 -05001347 if (!PredicateType->isIntOrIntVectorTy()) {
1348 return nullptr;
1349 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001350
SJW2c317da2020-03-23 07:39:13 -05001351 if (!FalseType->isIntOrIntVectorTy() &&
1352 !FalseType->getScalarType()->isFloatingPointTy()) {
1353 return nullptr;
1354 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001355
SJW2c317da2020-03-23 07:39:13 -05001356 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1357 return nullptr;
1358 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001359
SJW2c317da2020-03-23 07:39:13 -05001360 if (FalseType->getScalarSizeInBits() !=
1361 PredicateType->getScalarSizeInBits()) {
1362 return nullptr;
1363 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001364
James Pricecf53df42020-04-20 14:41:24 -04001365 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001366 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1367 if (NumElements != cast<VectorType>(PredicateType)
1368 ->getElementCount()
1369 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001370 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001371 }
1372
James Pricecf53df42020-04-20 14:41:24 -04001373 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1374 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001375 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001376 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001377 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001378
SJW2c317da2020-03-23 07:39:13 -05001379 // Create constant
1380 const auto ZeroValue = Constant::getNullValue(PredicateType);
1381
1382 // Scalar and vector are to be treated differently
1383 CmpInst::Predicate Pred;
1384 if (PredicateType->isVectorTy()) {
1385 Pred = CmpInst::ICMP_SLT;
1386 } else {
1387 Pred = CmpInst::ICMP_NE;
1388 }
1389
1390 // Create comparison instruction
1391 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1392 ZeroValue, "", CI);
1393
1394 // Create select
1395 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1396 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001397}
1398
SJW2c317da2020-03-23 07:39:13 -05001399bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1400 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1401 Value *V = nullptr;
1402 if (CI->getNumOperands() != 4) {
1403 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001404 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001405
SJW2c317da2020-03-23 07:39:13 -05001406 // Get arguments
1407 auto FalseValue = CI->getOperand(0);
1408 auto TrueValue = CI->getOperand(1);
1409 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001410
SJW2c317da2020-03-23 07:39:13 -05001411 // Don't touch overloads that aren't in OpenCL C
1412 auto FalseType = FalseValue->getType();
1413 auto TrueType = TrueValue->getType();
1414 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001415
SJW2c317da2020-03-23 07:39:13 -05001416 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1417 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001418 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001419
James Pricecf53df42020-04-20 14:41:24 -04001420 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001421 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1422 !TrueType->getScalarType()->isIntegerTy()) {
1423 return V;
1424 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001425 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001426 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1427 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001428 return V;
1429 }
1430 }
1431
1432 // Remember the type of the operands
1433 auto OpType = TrueType;
1434
1435 // The actual bit selection will always be done on an integer type,
1436 // declare it here
1437 Type *BitType;
1438
1439 // If the operands are float, then bitcast them to int
1440 if (OpType->getScalarType()->isFloatingPointTy()) {
1441
1442 // First create the new type
1443 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1444
1445 // Then bitcast all operands
1446 PredicateValue =
1447 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1448 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1449 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1450
1451 } else {
1452 // The operands have an integer type, use it directly
1453 BitType = OpType;
1454 }
1455
1456 // All the operands are now always integers
1457 // implement as (c & b) | (~c & a)
1458
1459 // Create our negated predicate value
1460 auto AllOnes = Constant::getAllOnesValue(BitType);
1461 auto NotPredicateValue = BinaryOperator::Create(
1462 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1463
1464 // Then put everything together
1465 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1466 FalseValue, "", CI);
1467 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1468 TrueValue, "", CI);
1469
1470 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1471
1472 // If we were dealing with a floating point type, we must bitcast
1473 // the result back to that
1474 if (OpType->getScalarType()->isFloatingPointTy()) {
1475 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1476 }
1477
1478 return V;
1479 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001480}
1481
SJW61531372020-06-09 07:31:08 -05001482bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001483 // convert to vector versions
1484 Module &M = *F.getParent();
1485 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1486 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1487 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001488
SJW2c317da2020-03-23 07:39:13 -05001489 // First figure out which function we're dealing with
1490 if (is_smooth) {
1491 ArgsToSplat.push_back(CI->getOperand(1));
1492 VectorArg = CI->getOperand(2);
1493 } else {
1494 VectorArg = CI->getOperand(1);
1495 }
1496
1497 // Splat arguments that need to be
1498 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001499 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001500
1501 for (auto arg : ArgsToSplat) {
1502 Value *NewVectorArg = UndefValue::get(VecType);
alan-baker5a8c3be2020-09-09 13:44:26 -04001503 for (auto i = 0; i < VecType->getElementCount().getKnownMinValue(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001504 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1505 NewVectorArg =
1506 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1507 }
1508 SplatArgs.push_back(NewVectorArg);
1509 }
1510
1511 // Replace the call with the vector/vector flavour
1512 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1513 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1514
SJW61531372020-06-09 07:31:08 -05001515 std::string NewFName = Builtins::GetMangledFunctionName(
1516 is_smooth ? "smoothstep" : "step", NewFType);
1517
SJW2c317da2020-03-23 07:39:13 -05001518 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1519
1520 SmallVector<Value *, 3> NewArgs;
1521 for (auto arg : SplatArgs) {
1522 NewArgs.push_back(arg);
1523 }
1524 NewArgs.push_back(VectorArg);
1525
1526 return CallInst::Create(NewF, NewArgs, "", CI);
1527 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001528}
1529
SJW2c317da2020-03-23 07:39:13 -05001530bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001531 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1532 auto Arg = CI->getOperand(0);
1533 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001534
SJW2c317da2020-03-23 07:39:13 -05001535 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001536
SJW2c317da2020-03-23 07:39:13 -05001537 return BinaryOperator::Create(Op, Bitcast,
1538 ConstantInt::get(CI->getType(), 31), "", CI);
1539 });
David Neto22f144c2017-06-12 14:26:21 -04001540}
1541
SJW2c317da2020-03-23 07:39:13 -05001542bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1543 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001544 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1545 // The multiply instruction to use.
1546 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001547
SJW2c317da2020-03-23 07:39:13 -05001548 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001549
SJW2c317da2020-03-23 07:39:13 -05001550 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1551 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001552
SJW2c317da2020-03-23 07:39:13 -05001553 if (is_mad) {
1554 // The add instruction to use.
1555 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001556
SJW2c317da2020-03-23 07:39:13 -05001557 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001558 }
David Neto22f144c2017-06-12 14:26:21 -04001559
SJW2c317da2020-03-23 07:39:13 -05001560 return V;
1561 });
David Neto22f144c2017-06-12 14:26:21 -04001562}
1563
SJW2c317da2020-03-23 07:39:13 -05001564bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001565 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1566 Value *V = nullptr;
1567 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001568
SJW2c317da2020-03-23 07:39:13 -05001569 auto data_type = data->getType();
1570 if (!data_type->isVectorTy())
1571 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001572
James Pricecf53df42020-04-20 14:41:24 -04001573 auto vec_data_type = cast<VectorType>(data_type);
1574
alan-baker5a8c3be2020-09-09 13:44:26 -04001575 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001576 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1577 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001578
SJW2c317da2020-03-23 07:39:13 -05001579 auto offset = CI->getOperand(1);
1580 auto ptr = CI->getOperand(2);
1581 auto ptr_type = ptr->getType();
1582 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001583 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001584 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001585
SJW2c317da2020-03-23 07:39:13 -05001586 // Avoid pointer casts. Instead generate the correct number of stores
1587 // and rely on drivers to coalesce appropriately.
1588 IRBuilder<> builder(CI);
1589 auto elems_const = builder.getInt32(elems);
1590 auto adjust = builder.CreateMul(offset, elems_const);
1591 for (auto i = 0; i < elems; ++i) {
1592 auto idx = builder.getInt32(i);
1593 auto add = builder.CreateAdd(adjust, idx);
1594 auto gep = builder.CreateGEP(ptr, add);
1595 auto extract = builder.CreateExtractElement(data, i);
1596 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001597 }
SJW2c317da2020-03-23 07:39:13 -05001598 return V;
1599 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001600}
1601
SJW2c317da2020-03-23 07:39:13 -05001602bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001603 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1604 Value *V = nullptr;
1605 auto ret_type = F.getReturnType();
1606 if (!ret_type->isVectorTy())
1607 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001608
James Pricecf53df42020-04-20 14:41:24 -04001609 auto vec_ret_type = cast<VectorType>(ret_type);
1610
alan-baker5a8c3be2020-09-09 13:44:26 -04001611 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001612 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1613 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001614
SJW2c317da2020-03-23 07:39:13 -05001615 auto offset = CI->getOperand(0);
1616 auto ptr = CI->getOperand(1);
1617 auto ptr_type = ptr->getType();
1618 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001619 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001620 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001621
SJW2c317da2020-03-23 07:39:13 -05001622 // Avoid pointer casts. Instead generate the correct number of loads
1623 // and rely on drivers to coalesce appropriately.
1624 IRBuilder<> builder(CI);
1625 auto elems_const = builder.getInt32(elems);
1626 V = UndefValue::get(ret_type);
1627 auto adjust = builder.CreateMul(offset, elems_const);
1628 for (auto i = 0; i < elems; ++i) {
1629 auto idx = builder.getInt32(i);
1630 auto add = builder.CreateAdd(adjust, idx);
1631 auto gep = builder.CreateGEP(ptr, add);
1632 auto load = builder.CreateLoad(gep);
1633 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001634 }
SJW2c317da2020-03-23 07:39:13 -05001635 return V;
1636 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001637}
1638
SJW2c317da2020-03-23 07:39:13 -05001639bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1640 const std::string &name,
1641 int vec_size) {
1642 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1643 if (!vec_size) {
1644 // deduce vec_size from last character of name (e.g. vload_half4)
1645 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001646 }
SJW2c317da2020-03-23 07:39:13 -05001647 switch (vec_size) {
1648 case 2:
1649 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1650 case 4:
1651 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1652 case 0:
1653 if (!is_clspv_version) {
1654 return replaceVloadHalf(F);
1655 }
1656 default:
1657 llvm_unreachable("Unsupported vload_half vector size");
1658 break;
1659 }
1660 return false;
David Neto22f144c2017-06-12 14:26:21 -04001661}
1662
SJW2c317da2020-03-23 07:39:13 -05001663bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1664 Module &M = *F.getParent();
1665 return replaceCallsWithValue(F, [&](CallInst *CI) {
1666 // The index argument from vload_half.
1667 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001668
SJW2c317da2020-03-23 07:39:13 -05001669 // The pointer argument from vload_half.
1670 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001671
SJW2c317da2020-03-23 07:39:13 -05001672 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001673 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001674 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1675
1676 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001677 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001678
1679 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1680
1681 Value *V = nullptr;
1682
alan-baker7efcaaa2020-05-06 19:33:27 -04001683 bool supports_16bit_storage = true;
1684 switch (Arg1->getType()->getPointerAddressSpace()) {
1685 case clspv::AddressSpace::Global:
1686 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1687 clspv::Option::StorageClass::kSSBO);
1688 break;
1689 case clspv::AddressSpace::Constant:
1690 if (clspv::Option::ConstantArgsInUniformBuffer())
1691 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1692 clspv::Option::StorageClass::kUBO);
1693 else
1694 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1695 clspv::Option::StorageClass::kSSBO);
1696 break;
1697 default:
1698 // Clspv will emit the Float16 capability if the half type is
1699 // encountered. That capability covers private and local addressspaces.
1700 break;
1701 }
1702
1703 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001704 auto ShortTy = Type::getInt16Ty(M.getContext());
1705 auto ShortPointerTy =
1706 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1707
1708 // Cast the half* pointer to short*.
1709 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1710
1711 // Index into the correct address of the casted pointer.
1712 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1713
1714 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001715 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001716
1717 // ZExt the short -> int.
1718 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1719
1720 // Get our float2.
1721 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1722
1723 // Extract out the bottom element which is our float result.
1724 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1725 } else {
1726 // Assume the pointer argument points to storage aligned to 32bits
1727 // or more.
1728 // TODO(dneto): Do more analysis to make sure this is true?
1729 //
1730 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1731 // with:
1732 //
1733 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1734 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1735 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1736 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1737 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1738 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1739 // x float> %converted, %index_is_odd32
1740
1741 auto IntPointerTy =
1742 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1743
1744 // Cast the base pointer to int*.
1745 // In a valid call (according to assumptions), this should get
1746 // optimized away in the simplify GEP pass.
1747 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1748
1749 auto One = ConstantInt::get(IntTy, 1);
1750 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1751 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1752
1753 // Index into the correct address of the casted pointer.
1754 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1755
1756 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001757 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001758
1759 // Get our float2.
1760 auto Call = CallInst::Create(NewF, Load, "", CI);
1761
1762 // Extract out the float result, where the element number is
1763 // determined by whether the original index was even or odd.
1764 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1765 }
1766 return V;
1767 });
1768}
1769
1770bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1771 Module &M = *F.getParent();
1772 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001773 // The index argument from vload_half.
1774 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001775
Kévin Petite8edce32019-04-10 14:23:32 +01001776 // The pointer argument from vload_half.
1777 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001778
Kévin Petite8edce32019-04-10 14:23:32 +01001779 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001780 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001781 auto NewPointerTy =
1782 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001783 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001784
Kévin Petite8edce32019-04-10 14:23:32 +01001785 // Cast the half* pointer to int*.
1786 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001787
Kévin Petite8edce32019-04-10 14:23:32 +01001788 // Index into the correct address of the casted pointer.
1789 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001790
Kévin Petite8edce32019-04-10 14:23:32 +01001791 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001792 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001793
Kévin Petite8edce32019-04-10 14:23:32 +01001794 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001795 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001796
Kévin Petite8edce32019-04-10 14:23:32 +01001797 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001798
Kévin Petite8edce32019-04-10 14:23:32 +01001799 // Get our float2.
1800 return CallInst::Create(NewF, Load, "", CI);
1801 });
David Neto22f144c2017-06-12 14:26:21 -04001802}
1803
SJW2c317da2020-03-23 07:39:13 -05001804bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1805 Module &M = *F.getParent();
1806 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001807 // The index argument from vload_half.
1808 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001809
Kévin Petite8edce32019-04-10 14:23:32 +01001810 // The pointer argument from vload_half.
1811 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001812
Kévin Petite8edce32019-04-10 14:23:32 +01001813 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001814 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1815 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001816 auto NewPointerTy =
1817 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001818 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001819
Kévin Petite8edce32019-04-10 14:23:32 +01001820 // Cast the half* pointer to int2*.
1821 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001822
Kévin Petite8edce32019-04-10 14:23:32 +01001823 // Index into the correct address of the casted pointer.
1824 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001825
Kévin Petite8edce32019-04-10 14:23:32 +01001826 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001827 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001828
Kévin Petite8edce32019-04-10 14:23:32 +01001829 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001830 auto X =
1831 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1832 auto Y =
1833 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001834
Kévin Petite8edce32019-04-10 14:23:32 +01001835 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001836 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001837
Kévin Petite8edce32019-04-10 14:23:32 +01001838 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001839
Kévin Petite8edce32019-04-10 14:23:32 +01001840 // Get the lower (x & y) components of our final float4.
1841 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001842
Kévin Petite8edce32019-04-10 14:23:32 +01001843 // Get the higher (z & w) components of our final float4.
1844 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001845
Kévin Petite8edce32019-04-10 14:23:32 +01001846 Constant *ShuffleMask[4] = {
1847 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1848 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001849
Kévin Petite8edce32019-04-10 14:23:32 +01001850 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001851 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1852 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001853 });
David Neto22f144c2017-06-12 14:26:21 -04001854}
1855
SJW2c317da2020-03-23 07:39:13 -05001856bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001857
1858 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1859 //
1860 // %u = load i32 %ptr
1861 // %fxy = call <2 x float> Unpack2xHalf(u)
1862 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001863 Module &M = *F.getParent();
1864 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001865 auto Index = CI->getOperand(0);
1866 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001867
Kévin Petite8edce32019-04-10 14:23:32 +01001868 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001869 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001870 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001871
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001872 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001873 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001874
Kévin Petite8edce32019-04-10 14:23:32 +01001875 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001876 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001877
Kévin Petite8edce32019-04-10 14:23:32 +01001878 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001879
Kévin Petite8edce32019-04-10 14:23:32 +01001880 // Get our final float2.
1881 return CallInst::Create(NewF, Load, "", CI);
1882 });
David Neto6ad93232018-06-07 15:42:58 -07001883}
1884
SJW2c317da2020-03-23 07:39:13 -05001885bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001886
1887 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1888 //
1889 // %u2 = load <2 x i32> %ptr
1890 // %u2xy = extractelement %u2, 0
1891 // %u2zw = extractelement %u2, 1
1892 // %fxy = call <2 x float> Unpack2xHalf(uint)
1893 // %fzw = call <2 x float> Unpack2xHalf(uint)
1894 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001895 Module &M = *F.getParent();
1896 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001897 auto Index = CI->getOperand(0);
1898 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001899
Kévin Petite8edce32019-04-10 14:23:32 +01001900 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001901 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1902 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001903 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001904
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001905 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001906 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001907
Kévin Petite8edce32019-04-10 14:23:32 +01001908 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001909 auto X =
1910 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1911 auto Y =
1912 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001913
Kévin Petite8edce32019-04-10 14:23:32 +01001914 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001915 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001916
Kévin Petite8edce32019-04-10 14:23:32 +01001917 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001918
Kévin Petite8edce32019-04-10 14:23:32 +01001919 // Get the lower (x & y) components of our final float4.
1920 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001921
Kévin Petite8edce32019-04-10 14:23:32 +01001922 // Get the higher (z & w) components of our final float4.
1923 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001924
Kévin Petite8edce32019-04-10 14:23:32 +01001925 Constant *ShuffleMask[4] = {
1926 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1927 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001928
Kévin Petite8edce32019-04-10 14:23:32 +01001929 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001930 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1931 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001932 });
David Neto6ad93232018-06-07 15:42:58 -07001933}
1934
SJW2c317da2020-03-23 07:39:13 -05001935bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1936 switch (vec_size) {
1937 case 0:
1938 return replaceVstoreHalf(F);
1939 case 2:
1940 return replaceVstoreHalf2(F);
1941 case 4:
1942 return replaceVstoreHalf4(F);
1943 default:
1944 llvm_unreachable("Unsupported vstore_half vector size");
1945 break;
1946 }
1947 return false;
1948}
David Neto22f144c2017-06-12 14:26:21 -04001949
SJW2c317da2020-03-23 07:39:13 -05001950bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1951 Module &M = *F.getParent();
1952 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001953 // The value to store.
1954 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001955
Kévin Petite8edce32019-04-10 14:23:32 +01001956 // The index argument from vstore_half.
1957 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001958
Kévin Petite8edce32019-04-10 14:23:32 +01001959 // The pointer argument from vstore_half.
1960 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001961
Kévin Petite8edce32019-04-10 14:23:32 +01001962 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001963 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001964 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1965 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001966
Kévin Petite8edce32019-04-10 14:23:32 +01001967 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001968 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001969
Kévin Petite8edce32019-04-10 14:23:32 +01001970 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001971
Kévin Petite8edce32019-04-10 14:23:32 +01001972 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001973 auto TempVec = InsertElementInst::Create(
1974 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001975
Kévin Petite8edce32019-04-10 14:23:32 +01001976 // Pack the float2 -> half2 (in an int).
1977 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001978
alan-baker7efcaaa2020-05-06 19:33:27 -04001979 bool supports_16bit_storage = true;
1980 switch (Arg2->getType()->getPointerAddressSpace()) {
1981 case clspv::AddressSpace::Global:
1982 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1983 clspv::Option::StorageClass::kSSBO);
1984 break;
1985 case clspv::AddressSpace::Constant:
1986 if (clspv::Option::ConstantArgsInUniformBuffer())
1987 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1988 clspv::Option::StorageClass::kUBO);
1989 else
1990 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1991 clspv::Option::StorageClass::kSSBO);
1992 break;
1993 default:
1994 // Clspv will emit the Float16 capability if the half type is
1995 // encountered. That capability covers private and local addressspaces.
1996 break;
1997 }
1998
SJW2c317da2020-03-23 07:39:13 -05001999 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04002000 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01002001 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002002 auto ShortPointerTy =
2003 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002004
Kévin Petite8edce32019-04-10 14:23:32 +01002005 // Truncate our i32 to an i16.
2006 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002007
Kévin Petite8edce32019-04-10 14:23:32 +01002008 // Cast the half* pointer to short*.
2009 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002010
Kévin Petite8edce32019-04-10 14:23:32 +01002011 // Index into the correct address of the casted pointer.
2012 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002013
Kévin Petite8edce32019-04-10 14:23:32 +01002014 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05002015 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002016 } else {
2017 // We can only write to 32-bit aligned words.
2018 //
2019 // Assuming base is aligned to 32-bits, replace the equivalent of
2020 // vstore_half(value, index, base)
2021 // with:
2022 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2023 // uint32_t write_to_upper_half = index & 1u;
2024 // uint32_t shift = write_to_upper_half << 4;
2025 //
2026 // // Pack the float value as a half number in bottom 16 bits
2027 // // of an i32.
2028 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2029 //
2030 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2031 // ^ ((packed & 0xffff) << shift)
2032 // // We only need relaxed consistency, but OpenCL 1.2 only has
2033 // // sequentially consistent atomics.
2034 // // TODO(dneto): Use relaxed consistency.
2035 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002036 auto IntPointerTy =
2037 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002038
Kévin Petite8edce32019-04-10 14:23:32 +01002039 auto Four = ConstantInt::get(IntTy, 4);
2040 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002041
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002042 auto IndexIsOdd =
2043 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002044 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002045 auto IndexIntoI32 =
2046 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2047 auto BaseI32Ptr =
2048 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2049 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2050 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04002051 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002052 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002053 auto MaskBitsToWrite =
2054 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2055 auto MaskedCurrent = BinaryOperator::CreateAnd(
2056 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002057
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002058 auto XLowerBits =
2059 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2060 auto NewBitsToWrite =
2061 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2062 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2063 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002064
Kévin Petite8edce32019-04-10 14:23:32 +01002065 // Generate the call to atomi_xor.
2066 SmallVector<Type *, 5> ParamTypes;
2067 // The pointer type.
2068 ParamTypes.push_back(IntPointerTy);
2069 // The Types for memory scope, semantics, and value.
2070 ParamTypes.push_back(IntTy);
2071 ParamTypes.push_back(IntTy);
2072 ParamTypes.push_back(IntTy);
2073 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2074 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002075
Kévin Petite8edce32019-04-10 14:23:32 +01002076 const auto ConstantScopeDevice =
2077 ConstantInt::get(IntTy, spv::ScopeDevice);
2078 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2079 // (SPIR-V Workgroup).
2080 const auto AddrSpaceSemanticsBits =
2081 IntPointerTy->getPointerAddressSpace() == 1
2082 ? spv::MemorySemanticsUniformMemoryMask
2083 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002084
Kévin Petite8edce32019-04-10 14:23:32 +01002085 // We're using relaxed consistency here.
2086 const auto ConstantMemorySemantics =
2087 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2088 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002089
Kévin Petite8edce32019-04-10 14:23:32 +01002090 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2091 ConstantMemorySemantics, ValueToXor};
2092 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05002093
2094 // Return a Nop so the old Call is removed
2095 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
2096 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002097 }
David Neto22f144c2017-06-12 14:26:21 -04002098
SJW2c317da2020-03-23 07:39:13 -05002099 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01002100 });
David Neto22f144c2017-06-12 14:26:21 -04002101}
2102
SJW2c317da2020-03-23 07:39:13 -05002103bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
2104 Module &M = *F.getParent();
2105 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002106 // The value to store.
2107 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002108
Kévin Petite8edce32019-04-10 14:23:32 +01002109 // The index argument from vstore_half.
2110 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002111
Kévin Petite8edce32019-04-10 14:23:32 +01002112 // The pointer argument from vstore_half.
2113 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002114
Kévin Petite8edce32019-04-10 14:23:32 +01002115 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002116 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002117 auto NewPointerTy =
2118 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002119 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002120
Kévin Petite8edce32019-04-10 14:23:32 +01002121 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002122 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002123
Kévin Petite8edce32019-04-10 14:23:32 +01002124 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002125
Kévin Petite8edce32019-04-10 14:23:32 +01002126 // Turn the packed x & y into the final packing.
2127 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002128
Kévin Petite8edce32019-04-10 14:23:32 +01002129 // Cast the half* pointer to int*.
2130 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002131
Kévin Petite8edce32019-04-10 14:23:32 +01002132 // Index into the correct address of the casted pointer.
2133 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002134
Kévin Petite8edce32019-04-10 14:23:32 +01002135 // Store to the int* we casted to.
2136 return new StoreInst(X, Index, CI);
2137 });
David Neto22f144c2017-06-12 14:26:21 -04002138}
2139
SJW2c317da2020-03-23 07:39:13 -05002140bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
2141 Module &M = *F.getParent();
2142 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002143 // The value to store.
2144 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002145
Kévin Petite8edce32019-04-10 14:23:32 +01002146 // The index argument from vstore_half.
2147 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002148
Kévin Petite8edce32019-04-10 14:23:32 +01002149 // The pointer argument from vstore_half.
2150 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002151
Kévin Petite8edce32019-04-10 14:23:32 +01002152 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002153 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2154 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002155 auto NewPointerTy =
2156 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002157 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002158
Kévin Petite8edce32019-04-10 14:23:32 +01002159 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2160 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002161
Kévin Petite8edce32019-04-10 14:23:32 +01002162 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002163 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2164 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002165
Kévin Petite8edce32019-04-10 14:23:32 +01002166 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2167 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002168
Kévin Petite8edce32019-04-10 14:23:32 +01002169 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002170 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2171 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002172
Kévin Petite8edce32019-04-10 14:23:32 +01002173 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002174 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002175
Kévin Petite8edce32019-04-10 14:23:32 +01002176 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002177
Kévin Petite8edce32019-04-10 14:23:32 +01002178 // Turn the packed x & y into the final component of our int2.
2179 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002180
Kévin Petite8edce32019-04-10 14:23:32 +01002181 // Turn the packed z & w into the final component of our int2.
2182 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002183
Kévin Petite8edce32019-04-10 14:23:32 +01002184 auto Combine = InsertElementInst::Create(
2185 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002186 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2187 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002188
Kévin Petite8edce32019-04-10 14:23:32 +01002189 // Cast the half* pointer to int2*.
2190 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002191
Kévin Petite8edce32019-04-10 14:23:32 +01002192 // Index into the correct address of the casted pointer.
2193 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002194
Kévin Petite8edce32019-04-10 14:23:32 +01002195 // Store to the int2* we casted to.
2196 return new StoreInst(Combine, Index, CI);
2197 });
David Neto22f144c2017-06-12 14:26:21 -04002198}
2199
SJW2c317da2020-03-23 07:39:13 -05002200bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2201 // convert half to float
2202 Module &M = *F.getParent();
2203 return replaceCallsWithValue(F, [&](CallInst *CI) {
2204 SmallVector<Type *, 3> types;
2205 SmallVector<Value *, 3> args;
2206 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2207 types.push_back(CI->getArgOperand(i)->getType());
2208 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002209 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002210
alan-baker5a8c3be2020-09-09 13:44:26 -04002211 auto NewFType =
2212 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2213 cast<VectorType>(CI->getType())
2214 ->getElementCount()
2215 .getKnownMinValue()),
2216 types, false);
SJW2c317da2020-03-23 07:39:13 -05002217
SJW61531372020-06-09 07:31:08 -05002218 std::string NewFName =
2219 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002220
2221 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2222
2223 auto NewCI = CallInst::Create(NewF, args, "", CI);
2224
2225 // Convert to the half type.
2226 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2227 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002228}
2229
SJW2c317da2020-03-23 07:39:13 -05002230bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2231 // convert half to float
2232 Module &M = *F.getParent();
2233 return replaceCallsWithValue(F, [&](CallInst *CI) {
2234 SmallVector<Type *, 3> types(3);
2235 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002236
SJW2c317da2020-03-23 07:39:13 -05002237 // Image
2238 types[0] = CI->getArgOperand(0)->getType();
2239 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002240
SJW2c317da2020-03-23 07:39:13 -05002241 // Coord
2242 types[1] = CI->getArgOperand(1)->getType();
2243 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002244
SJW2c317da2020-03-23 07:39:13 -05002245 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002246 types[2] =
2247 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2248 cast<VectorType>(CI->getArgOperand(2)->getType())
2249 ->getElementCount()
2250 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002251
SJW2c317da2020-03-23 07:39:13 -05002252 auto NewFType =
2253 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002254
SJW61531372020-06-09 07:31:08 -05002255 std::string NewFName =
2256 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002257
SJW2c317da2020-03-23 07:39:13 -05002258 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002259
SJW2c317da2020-03-23 07:39:13 -05002260 // Convert data to the float type.
2261 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2262 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002263
SJW2c317da2020-03-23 07:39:13 -05002264 return CallInst::Create(NewF, args, "", CI);
2265 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002266}
2267
SJW2c317da2020-03-23 07:39:13 -05002268bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2269 Function &F) {
2270 // convert read_image with int coords to float coords
2271 Module &M = *F.getParent();
2272 return replaceCallsWithValue(F, [&](CallInst *CI) {
2273 // The image.
2274 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002275
SJW2c317da2020-03-23 07:39:13 -05002276 // The sampler.
2277 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002278
SJW2c317da2020-03-23 07:39:13 -05002279 // The coordinate (integer type that we can't handle).
2280 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002281
SJW2c317da2020-03-23 07:39:13 -05002282 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2283 uint32_t components =
2284 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2285 Type *float_ty = nullptr;
2286 if (components == 1) {
2287 float_ty = Type::getFloatTy(M.getContext());
2288 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002289 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2290 cast<VectorType>(Arg2->getType())
2291 ->getElementCount()
2292 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002293 }
David Neto22f144c2017-06-12 14:26:21 -04002294
SJW2c317da2020-03-23 07:39:13 -05002295 auto NewFType = FunctionType::get(
2296 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2297
2298 std::string NewFName = F.getName().str();
2299 NewFName[NewFName.length() - 1] = 'f';
2300
2301 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2302
2303 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2304
2305 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2306 });
David Neto22f144c2017-06-12 14:26:21 -04002307}
2308
SJW2c317da2020-03-23 07:39:13 -05002309bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2310 return replaceCallsWithValue(F, [&](CallInst *CI) {
2311 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002312
SJW2c317da2020-03-23 07:39:13 -05002313 // We need to map the OpenCL constants to the SPIR-V equivalents.
2314 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2315 const auto ConstantMemorySemantics = ConstantInt::get(
2316 IntTy, spv::MemorySemanticsUniformMemoryMask |
2317 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002318
SJW2c317da2020-03-23 07:39:13 -05002319 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002320
SJW2c317da2020-03-23 07:39:13 -05002321 // The pointer.
2322 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002323
SJW2c317da2020-03-23 07:39:13 -05002324 // The memory scope.
2325 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002326
SJW2c317da2020-03-23 07:39:13 -05002327 // The memory semantics.
2328 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002329
SJW2c317da2020-03-23 07:39:13 -05002330 if (2 < CI->getNumArgOperands()) {
2331 // The unequal memory semantics.
2332 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002333
SJW2c317da2020-03-23 07:39:13 -05002334 // The value.
2335 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002336
SJW2c317da2020-03-23 07:39:13 -05002337 // The comparator.
2338 Params.push_back(CI->getArgOperand(1));
2339 } else if (1 < CI->getNumArgOperands()) {
2340 // The value.
2341 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002342 }
David Neto22f144c2017-06-12 14:26:21 -04002343
SJW2c317da2020-03-23 07:39:13 -05002344 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2345 });
David Neto22f144c2017-06-12 14:26:21 -04002346}
2347
SJW2c317da2020-03-23 07:39:13 -05002348bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2349 llvm::AtomicRMWInst::BinOp Op) {
2350 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002351 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2352 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002353 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002354 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002355 SyncScope::System, CI);
2356 });
2357}
David Neto22f144c2017-06-12 14:26:21 -04002358
SJW2c317da2020-03-23 07:39:13 -05002359bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2360 Module &M = *F.getParent();
2361 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002362 auto IntTy = Type::getInt32Ty(M.getContext());
2363 auto FloatTy = Type::getFloatTy(M.getContext());
2364
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002365 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2366 ConstantInt::get(IntTy, 1),
2367 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002368
2369 Constant *UpShuffleMask[4] = {
2370 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2371 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2372
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002373 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2374 UndefValue::get(FloatTy),
2375 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002376
Kévin Petite8edce32019-04-10 14:23:32 +01002377 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002378 auto Arg0 =
2379 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2380 ConstantVector::get(DownShuffleMask), "", CI);
2381 auto Arg1 =
2382 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2383 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002384 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002385
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002386 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002387 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002388
SJW61531372020-06-09 07:31:08 -05002389 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002390
Kévin Petite8edce32019-04-10 14:23:32 +01002391 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002392
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002393 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2394 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002395 });
David Neto22f144c2017-06-12 14:26:21 -04002396}
David Neto62653202017-10-16 19:05:18 -04002397
SJW2c317da2020-03-23 07:39:13 -05002398bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002399 // OpenCL's float result = fract(float x, float* ptr)
2400 //
2401 // In the LLVM domain:
2402 //
2403 // %floor_result = call spir_func float @floor(float %x)
2404 // store float %floor_result, float * %ptr
2405 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2406 // %result = call spir_func float
2407 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2408 //
2409 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2410 // and clspv.fract occur in the SPIR-V generator pass:
2411 //
2412 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2413 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2414 // ...
2415 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2416 // OpStore %ptr %floor_result
2417 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2418 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002419 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002420
David Neto62653202017-10-16 19:05:18 -04002421 using std::string;
2422
2423 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2424 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002425
SJW2c317da2020-03-23 07:39:13 -05002426 Module &M = *F.getParent();
2427 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002428
SJW2c317da2020-03-23 07:39:13 -05002429 // This is either float or a float vector. All the float-like
2430 // types are this type.
2431 auto result_ty = F.getReturnType();
2432
SJW61531372020-06-09 07:31:08 -05002433 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002434 Function *fmin_fn = M.getFunction(fmin_name);
2435 if (!fmin_fn) {
2436 // Make the fmin function.
2437 FunctionType *fn_ty =
2438 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2439 fmin_fn =
2440 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2441 fmin_fn->addFnAttr(Attribute::ReadNone);
2442 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2443 }
2444
SJW61531372020-06-09 07:31:08 -05002445 std::string floor_name =
2446 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002447 Function *floor_fn = M.getFunction(floor_name);
2448 if (!floor_fn) {
2449 // Make the floor function.
2450 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2451 floor_fn =
2452 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2453 floor_fn->addFnAttr(Attribute::ReadNone);
2454 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2455 }
2456
SJW61531372020-06-09 07:31:08 -05002457 std::string clspv_fract_name =
2458 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002459 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2460 if (!clspv_fract_fn) {
2461 // Make the clspv_fract function.
2462 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2463 clspv_fract_fn = cast<Function>(
2464 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2465 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2466 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2467 }
2468
2469 // Number of significant significand bits, whether represented or not.
2470 unsigned num_significand_bits;
2471 switch (result_ty->getScalarType()->getTypeID()) {
2472 case Type::HalfTyID:
2473 num_significand_bits = 11;
2474 break;
2475 case Type::FloatTyID:
2476 num_significand_bits = 24;
2477 break;
2478 case Type::DoubleTyID:
2479 num_significand_bits = 53;
2480 break;
2481 default:
2482 llvm_unreachable("Unhandled float type when processing fract builtin");
2483 break;
2484 }
2485 // Beware that the disassembler displays this value as
2486 // OpConstant %float 1
2487 // which is not quite right.
2488 const double kJustUnderOneScalar =
2489 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2490
2491 Constant *just_under_one =
2492 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2493 if (result_ty->isVectorTy()) {
2494 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002495 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002496 }
2497
2498 IRBuilder<> Builder(CI);
2499
2500 auto arg = CI->getArgOperand(0);
2501 auto ptr = CI->getArgOperand(1);
2502
2503 // Compute floor result and store it.
2504 auto floor = Builder.CreateCall(floor_fn, {arg});
2505 Builder.CreateStore(floor, ptr);
2506
2507 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2508 auto fract_result =
2509 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2510
2511 return fract_result;
2512 });
David Neto62653202017-10-16 19:05:18 -04002513}
alan-bakera52b7312020-10-26 08:58:51 -04002514
Kévin Petit8576f682020-11-02 14:51:32 +00002515bool ReplaceOpenCLBuiltinPass::replaceHadd(Function &F, bool is_signed,
alan-bakerb6da5132020-10-29 15:59:06 -04002516 Instruction::BinaryOps join_opcode) {
Kévin Petit8576f682020-11-02 14:51:32 +00002517 return replaceCallsWithValue(F, [is_signed, join_opcode](CallInst *Call) {
alan-bakerb6da5132020-10-29 15:59:06 -04002518 // a_shr = a >> 1
2519 // b_shr = b >> 1
2520 // add1 = a_shr + b_shr
2521 // join = a |join_opcode| b
2522 // and = join & 1
2523 // add = add1 + and
2524 const auto a = Call->getArgOperand(0);
2525 const auto b = Call->getArgOperand(1);
2526 IRBuilder<> builder(Call);
Kévin Petit8576f682020-11-02 14:51:32 +00002527 Value *a_shift, *b_shift;
2528 if (is_signed) {
2529 a_shift = builder.CreateAShr(a, 1);
2530 b_shift = builder.CreateAShr(b, 1);
2531 } else {
2532 a_shift = builder.CreateLShr(a, 1);
2533 b_shift = builder.CreateLShr(b, 1);
2534 }
alan-bakerb6da5132020-10-29 15:59:06 -04002535 auto add = builder.CreateAdd(a_shift, b_shift);
2536 auto join = BinaryOperator::Create(join_opcode, a, b, "", Call);
2537 auto constant_one = ConstantInt::get(a->getType(), 1);
2538 auto and_bit = builder.CreateAnd(join, constant_one);
2539 return builder.CreateAdd(add, and_bit);
2540 });
2541}
2542
alan-bakera52b7312020-10-26 08:58:51 -04002543bool ReplaceOpenCLBuiltinPass::replaceAddSat(Function &F, bool is_signed) {
2544 Module *module = F.getParent();
2545 return replaceCallsWithValue(F, [&module, is_signed](CallInst *Call) {
2546 // SPIR-V OpIAddCarry interprets inputs as unsigned. We use that
2547 // instruction for unsigned additions. For signed addition, it is more
2548 // complicated. For values with bit widths less than 32 bits, we extend
2549 // to the next power of two and perform the addition. For 32- and
2550 // 64-bit values we test the signedness of op1 to determine how to clamp
2551 // the addition.
2552 Type *ty = Call->getType();
2553 Value *op0 = Call->getArgOperand(0);
2554 Value *op1 = Call->getArgOperand(1);
2555 Value *result = nullptr;
2556 if (is_signed) {
2557 unsigned bitwidth = ty->getScalarSizeInBits();
2558 if (bitwidth < 32) {
2559 // sext_op0 = sext op0
2560 // sext_op1 = sext op1
2561 // add = add sext_op0 sext_op1
2562 // clamp = clamp(add, min, max)
2563 // result = trunc clamp
2564 unsigned extended_bits = static_cast<unsigned>(bitwidth << 1);
2565 // The clamp values are the signed min and max of the original bitwidth
2566 // sign extended to the extended bitwidth.
2567 Constant *scalar_min = ConstantInt::get(
2568 Call->getContext(),
2569 APInt::getSignedMinValue(bitwidth).sext(extended_bits));
2570 Constant *scalar_max = ConstantInt::get(
2571 Call->getContext(),
2572 APInt::getSignedMaxValue(bitwidth).sext(extended_bits));
2573 Constant *min = scalar_min;
2574 Constant *max = scalar_max;
2575 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2576 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2577 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2578 }
2579 Type *extended_scalar_ty =
2580 IntegerType::get(Call->getContext(), extended_bits);
2581 Type *extended_ty = extended_scalar_ty;
2582 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2583 extended_ty =
2584 VectorType::get(extended_scalar_ty, vec_ty->getElementCount());
2585 }
2586 auto sext_op0 =
2587 CastInst::Create(Instruction::SExt, op0, extended_ty, "", Call);
2588 auto sext_op1 =
2589 CastInst::Create(Instruction::SExt, op1, extended_ty, "", Call);
2590 // Add the nsw flag since we know no overflow can occur.
2591 auto add = BinaryOperator::CreateNSW(Instruction::Add, sext_op0,
2592 sext_op1, "", Call);
2593 FunctionType *func_ty = FunctionType::get(
2594 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2595
2596 // Don't use the type in GetMangledFunctionName to ensure we get
2597 // signed parameters.
2598 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
2599 uint32_t vec_width = 1;
2600 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2601 vec_width = vec_ty->getElementCount().getKnownMinValue();
2602 }
2603 if (extended_bits == 32) {
2604 if (vec_width == 1) {
2605 sclamp_name += "iii";
2606 } else {
2607 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
2608 }
2609 } else {
2610 if (vec_width == 1) {
2611 sclamp_name += "sss";
2612 } else {
2613 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2614 }
2615 }
2616 auto sclamp_callee = module->getOrInsertFunction(sclamp_name, func_ty);
2617 auto clamp = CallInst::Create(sclamp_callee, {add, min, max}, "", Call);
2618 result = CastInst::Create(Instruction::Trunc, clamp, ty, "", Call);
2619 } else {
2620 // Pseudo-code:
2621 // c = a + b;
2622 // if (b < 0)
2623 // c = c > a ? min : c;
2624 // else
2625 // c = c < a ? max : c;
2626 //
2627 unsigned bitwidth = ty->getScalarSizeInBits();
2628 Constant *scalar_min = ConstantInt::get(
2629 Call->getContext(), APInt::getSignedMinValue(bitwidth));
2630 Constant *scalar_max = ConstantInt::get(
2631 Call->getContext(), APInt::getSignedMaxValue(bitwidth));
2632 Constant *min = scalar_min;
2633 Constant *max = scalar_max;
2634 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2635 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2636 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2637 }
2638 auto zero = Constant::getNullValue(ty);
2639 // Cannot add the nsw flag.
2640 auto add = BinaryOperator::Create(Instruction::Add, op0, op1, "", Call);
2641 auto add_gt_op0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SGT,
2642 add, op0, "", Call);
2643 auto min_clamp = SelectInst::Create(add_gt_op0, min, add, "", Call);
2644 auto add_lt_op0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
2645 add, op0, "", Call);
2646 auto max_clamp = SelectInst::Create(add_lt_op0, max, add, "", Call);
2647 auto op1_lt_0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
2648 op1, zero, "", Call);
2649 result = SelectInst::Create(op1_lt_0, min_clamp, max_clamp, "", Call);
2650 }
2651 } else {
2652 // Just use OpIAddCarry and use the carry to clamp the result.
2653 auto ret_ty = StructType::get(Call->getContext(), {ty, ty});
2654 auto add = clspv::InsertSPIRVOp(
2655 Call, spv::OpIAddCarry, {Attribute::ReadNone}, ret_ty, {op0, op1});
2656 auto ex0 = ExtractValueInst::Create(add, {0}, "", Call);
2657 auto ex1 = ExtractValueInst::Create(add, {1}, "", Call);
2658 auto cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, ex1,
2659 Constant::getNullValue(ty), "", Call);
2660 result =
2661 SelectInst::Create(cmp, ex0, Constant::getAllOnesValue(ty), "", Call);
2662 }
2663
2664 return result;
2665 });
2666}
alan-baker4986eff2020-10-29 13:38:00 -04002667
2668bool ReplaceOpenCLBuiltinPass::replaceAtomicLoad(Function &F) {
2669 return replaceCallsWithValue(F, [](CallInst *Call) {
2670 auto pointer = Call->getArgOperand(0);
2671 // Clang emits an address space cast to the generic address space. Skip the
2672 // cast and use the input directly.
2673 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2674 pointer = cast->getPointerOperand();
2675 }
2676 Value *order_arg =
2677 Call->getNumArgOperands() > 1 ? Call->getArgOperand(1) : nullptr;
2678 Value *scope_arg =
2679 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2680 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2681 clspv::AddressSpace::Global;
2682 auto order = MemoryOrderSemantics(order_arg, is_global, Call,
2683 spv::MemorySemanticsAcquireMask);
2684 auto scope = MemoryScope(scope_arg, is_global, Call);
2685 return InsertSPIRVOp(Call, spv::OpAtomicLoad, {Attribute::Convergent},
2686 Call->getType(), {pointer, scope, order});
2687 });
2688}
2689
2690bool ReplaceOpenCLBuiltinPass::replaceExplicitAtomics(
2691 Function &F, spv::Op Op, spv::MemorySemanticsMask semantics) {
2692 return replaceCallsWithValue(F, [Op, semantics](CallInst *Call) {
2693 auto pointer = Call->getArgOperand(0);
2694 // Clang emits an address space cast to the generic address space. Skip the
2695 // cast and use the input directly.
2696 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2697 pointer = cast->getPointerOperand();
2698 }
2699 Value *value = Call->getArgOperand(1);
2700 Value *order_arg =
2701 Call->getNumArgOperands() > 2 ? Call->getArgOperand(2) : nullptr;
2702 Value *scope_arg =
2703 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2704 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2705 clspv::AddressSpace::Global;
2706 auto scope = MemoryScope(scope_arg, is_global, Call);
2707 auto order = MemoryOrderSemantics(order_arg, is_global, Call, semantics);
2708 return InsertSPIRVOp(Call, Op, {Attribute::Convergent}, Call->getType(),
2709 {pointer, scope, order, value});
2710 });
2711}
2712
2713bool ReplaceOpenCLBuiltinPass::replaceAtomicCompareExchange(Function &F) {
2714 return replaceCallsWithValue(F, [](CallInst *Call) {
2715 auto pointer = Call->getArgOperand(0);
2716 // Clang emits an address space cast to the generic address space. Skip the
2717 // cast and use the input directly.
2718 if (auto cast = dyn_cast<AddrSpaceCastOperator>(pointer)) {
2719 pointer = cast->getPointerOperand();
2720 }
2721 auto expected = Call->getArgOperand(1);
2722 if (auto cast = dyn_cast<AddrSpaceCastOperator>(expected)) {
2723 expected = cast->getPointerOperand();
2724 }
2725 auto value = Call->getArgOperand(2);
2726 bool is_global = pointer->getType()->getPointerAddressSpace() ==
2727 clspv::AddressSpace::Global;
2728 Value *success_arg =
2729 Call->getNumArgOperands() > 3 ? Call->getArgOperand(3) : nullptr;
2730 Value *failure_arg =
2731 Call->getNumArgOperands() > 4 ? Call->getArgOperand(4) : nullptr;
2732 Value *scope_arg =
2733 Call->getNumArgOperands() > 5 ? Call->getArgOperand(5) : nullptr;
2734 auto scope = MemoryScope(scope_arg, is_global, Call);
2735 auto success = MemoryOrderSemantics(success_arg, is_global, Call,
2736 spv::MemorySemanticsAcquireReleaseMask);
2737 auto failure = MemoryOrderSemantics(failure_arg, is_global, Call,
2738 spv::MemorySemanticsAcquireMask);
2739
2740 // If the value pointed to by |expected| equals the value pointed to by
2741 // |pointer|, |value| is written into |pointer|, otherwise the value in
2742 // |pointer| is written into |expected|. In order to avoid extra stores,
2743 // the basic block with the original atomic is split and the store is
2744 // performed in the |then| block. The condition is the inversion of the
2745 // comparison result.
2746 IRBuilder<> builder(Call);
2747 auto load = builder.CreateLoad(expected);
2748 auto cmp_xchg = InsertSPIRVOp(
2749 Call, spv::OpAtomicCompareExchange, {Attribute::Convergent},
2750 value->getType(), {pointer, scope, success, failure, value, load});
2751 auto cmp = builder.CreateICmpEQ(cmp_xchg, load);
2752 auto not_cmp = builder.CreateNot(cmp);
2753 auto then_branch = SplitBlockAndInsertIfThen(not_cmp, Call, false);
2754 builder.SetInsertPoint(then_branch);
2755 builder.CreateStore(cmp_xchg, expected);
2756 return cmp;
2757 });
2758}
alan-bakercc2bafb2020-11-02 08:30:18 -05002759
2760bool ReplaceOpenCLBuiltinPass::replaceClz(Function &F) {
2761 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
2762 return false;
2763
2764 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
2765 if (bitwidth == 32 || bitwidth > 64)
2766 return false;
2767
2768 return replaceCallsWithValue(F, [&F, bitwidth](CallInst *Call) {
2769 auto in = Call->getArgOperand(0);
2770 IRBuilder<> builder(Call);
2771 auto int32_ty = builder.getInt32Ty();
2772 Type *ty = int32_ty;
2773 if (auto vec_ty = dyn_cast<VectorType>(Call->getType())) {
2774 ty = VectorType::get(ty, vec_ty->getElementCount());
2775 }
2776 auto clz_32bit_ty = FunctionType::get(ty, {ty}, false);
2777 std::string clz_32bit_name = Builtins::GetMangledFunctionName("clz", ty);
2778 auto clz_32bit =
2779 F.getParent()->getOrInsertFunction(clz_32bit_name, clz_32bit_ty);
2780 if (bitwidth < 32) {
2781 // Extend the input to 32-bits and perform a clz. The clz for 32-bit is
2782 // translated as 31 - FindUMsb(in). Adjust that result to the right size.
2783 auto zext = builder.CreateZExt(in, ty);
2784 auto clz = builder.CreateCall(clz_32bit, {zext});
2785 Constant *sub_const = builder.getInt32(32 - bitwidth);
2786 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2787 sub_const =
2788 ConstantVector::getSplat(vec_ty->getElementCount(), sub_const);
2789 }
2790 auto sub = builder.CreateSub(clz, sub_const);
2791 return builder.CreateTrunc(sub, Call->getType());
2792 } else {
2793 // Split the input into top and bottom parts and perform clz on both. If
2794 // the most significant 1 is in the upper 32-bits, return the top result
2795 // directly. Otherwise return 32 + the bottom result to adjust for the
2796 // correct size.
2797 auto lshr = builder.CreateLShr(in, 32);
2798 auto top_bits = builder.CreateTrunc(lshr, ty);
2799 auto bot_bits = builder.CreateTrunc(in, ty);
2800 auto top_clz = builder.CreateCall(clz_32bit, {top_bits});
2801 auto bot_clz = builder.CreateCall(clz_32bit, {bot_bits});
2802 Constant *c32 = builder.getInt32(32);
2803 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2804 c32 = ConstantVector::getSplat(vec_ty->getElementCount(), c32);
2805 }
2806 auto cmp = builder.CreateICmpEQ(top_clz, c32);
2807 auto bot_adjust = builder.CreateAdd(bot_clz, c32);
2808 auto sel = builder.CreateSelect(cmp, bot_adjust, top_clz);
2809 return builder.CreateZExt(sel, Call->getType());
2810 }
2811 });
2812}