blob: 280030f6cc8498cdb11d7723efcc9a0d0462c0c0 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
alan-bakere0902602020-03-23 08:43:40 -040030#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
James Pricec05f6052020-01-14 13:37:20 -050033#include "clspv/DescriptorMap.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040034#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070035
SJW2c317da2020-03-23 07:39:13 -050036#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050037#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040038#include "Passes.h"
39#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050040#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040041
SJW2c317da2020-03-23 07:39:13 -050042using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040043using namespace llvm;
44
45#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
46
47namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000048
David Neto22f144c2017-06-12 14:26:21 -040049uint32_t clz(uint32_t v) {
50 uint32_t r;
51 uint32_t shift;
52
53 r = (v > 0xFFFF) << 4;
54 v >>= r;
55 shift = (v > 0xFF) << 3;
56 v >>= shift;
57 r |= shift;
58 shift = (v > 0xF) << 2;
59 v >>= shift;
60 r |= shift;
61 shift = (v > 0x3) << 1;
62 v >>= shift;
63 r |= shift;
64 r |= (v >> 1);
65
66 return r;
67}
68
Kévin Petitfdfa92e2019-09-25 14:20:58 +010069Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
70 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040071 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -040072 IntTy = FixedVectorType::get(IntTy, vec_ty->getNumElements());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010073 }
74 return IntTy;
75}
76
SJW2c317da2020-03-23 07:39:13 -050077bool replaceCallsWithValue(Function &F,
78 std::function<Value *(CallInst *)> Replacer) {
79
80 bool Changed = false;
81
82 SmallVector<Instruction *, 4> ToRemoves;
83
84 // Walk the users of the function.
85 for (auto &U : F.uses()) {
86 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
87
88 auto NewValue = Replacer(CI);
89
90 if (NewValue != nullptr) {
91 CI->replaceAllUsesWith(NewValue);
92
93 // Lastly, remember to remove the user.
94 ToRemoves.push_back(CI);
95 }
96 }
97 }
98
99 Changed = !ToRemoves.empty();
100
101 // And cleanup the calls we don't use anymore.
102 for (auto V : ToRemoves) {
103 V->eraseFromParent();
104 }
105
106 return Changed;
107}
108
David Neto22f144c2017-06-12 14:26:21 -0400109struct ReplaceOpenCLBuiltinPass final : public ModulePass {
110 static char ID;
111 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
112
113 bool runOnModule(Module &M) override;
SJW2c317da2020-03-23 07:39:13 -0500114 bool runOnFunction(Function &F);
115 bool replaceAbs(Function &F);
116 bool replaceAbsDiff(Function &F, bool is_signed);
117 bool replaceCopysign(Function &F);
118 bool replaceRecip(Function &F);
119 bool replaceDivide(Function &F);
120 bool replaceDot(Function &F);
121 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500122 bool replaceExp10(Function &F, const std::string &basename);
123 bool replaceLog10(Function &F, const std::string &basename);
alan-baker12d2c182020-07-20 08:22:42 -0400124 bool replaceBarrier(Function &F, bool subgroup = false);
SJW2c317da2020-03-23 07:39:13 -0500125 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100126 bool replacePrefetch(Function &F);
SJW2c317da2020-03-23 07:39:13 -0500127 bool replaceRelational(Function &F, CmpInst::Predicate P, int32_t C);
128 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
129 bool replaceIsFinite(Function &F);
130 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
131 bool replaceUpsample(Function &F);
132 bool replaceRotate(Function &F);
133 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
134 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
135 bool replaceSelect(Function &F);
136 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500137 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500138 bool replaceSignbit(Function &F, bool is_vec);
139 bool replaceMul(Function &F, bool is_float, bool is_mad);
140 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
141 bool replaceVloadHalf(Function &F);
142 bool replaceVloadHalf2(Function &F);
143 bool replaceVloadHalf4(Function &F);
144 bool replaceClspvVloadaHalf2(Function &F);
145 bool replaceClspvVloadaHalf4(Function &F);
146 bool replaceVstoreHalf(Function &F, int vec_size);
147 bool replaceVstoreHalf(Function &F);
148 bool replaceVstoreHalf2(Function &F);
149 bool replaceVstoreHalf4(Function &F);
150 bool replaceHalfReadImage(Function &F);
151 bool replaceHalfWriteImage(Function &F);
152 bool replaceSampledReadImageWithIntCoords(Function &F);
153 bool replaceAtomics(Function &F, spv::Op Op);
154 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
155 bool replaceCross(Function &F);
156 bool replaceFract(Function &F, int vec_size);
157 bool replaceVload(Function &F);
158 bool replaceVstore(Function &F);
David Neto22f144c2017-06-12 14:26:21 -0400159};
SJW2c317da2020-03-23 07:39:13 -0500160
Kévin Petit91bc72e2019-04-08 15:17:46 +0100161} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400162
163char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400164INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
165 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400166
167namespace clspv {
168ModulePass *createReplaceOpenCLBuiltinPass() {
169 return new ReplaceOpenCLBuiltinPass();
170}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400171} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400172
173bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500174 std::list<Function *> func_list;
175 for (auto &F : M.getFunctionList()) {
176 // process only function declarations
177 if (F.isDeclaration() && runOnFunction(F)) {
178 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000179 }
180 }
SJW2c317da2020-03-23 07:39:13 -0500181 if (func_list.size() != 0) {
182 // recursively convert functions, but first remove dead
183 for (auto *F : func_list) {
184 if (F->use_empty()) {
185 F->eraseFromParent();
186 }
187 }
188 runOnModule(M);
189 return true;
190 }
191 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000192}
193
SJW2c317da2020-03-23 07:39:13 -0500194bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
195 auto &FI = Builtins::Lookup(&F);
196 switch (FI.getType()) {
197 case Builtins::kAbs:
198 if (!FI.getParameter(0).is_signed) {
199 return replaceAbs(F);
200 }
201 break;
202 case Builtins::kAbsDiff:
203 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
204 case Builtins::kCopysign:
205 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100206
SJW2c317da2020-03-23 07:39:13 -0500207 case Builtins::kHalfRecip:
208 case Builtins::kNativeRecip:
209 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100210
SJW2c317da2020-03-23 07:39:13 -0500211 case Builtins::kHalfDivide:
212 case Builtins::kNativeDivide:
213 return replaceDivide(F);
214
215 case Builtins::kDot:
216 return replaceDot(F);
217
218 case Builtins::kExp10:
219 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500220 case Builtins::kNativeExp10:
221 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500222
223 case Builtins::kLog10:
224 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500225 case Builtins::kNativeLog10:
226 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500227
228 case Builtins::kFmod:
229 return replaceFmod(F);
230
231 case Builtins::kBarrier:
232 case Builtins::kWorkGroupBarrier:
233 return replaceBarrier(F);
234
alan-baker12d2c182020-07-20 08:22:42 -0400235 case Builtins::kSubGroupBarrier:
236 return replaceBarrier(F, true);
237
SJW2c317da2020-03-23 07:39:13 -0500238 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400239 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500240 case Builtins::kReadMemFence:
241 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
242 case Builtins::kWriteMemFence:
243 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
244
245 // Relational
246 case Builtins::kIsequal:
247 return replaceRelational(F, CmpInst::FCMP_OEQ,
248 FI.getParameter(0).vector_size ? -1 : 1);
249 case Builtins::kIsgreater:
250 return replaceRelational(F, CmpInst::FCMP_OGT,
251 FI.getParameter(0).vector_size ? -1 : 1);
252 case Builtins::kIsgreaterequal:
253 return replaceRelational(F, CmpInst::FCMP_OGE,
254 FI.getParameter(0).vector_size ? -1 : 1);
255 case Builtins::kIsless:
256 return replaceRelational(F, CmpInst::FCMP_OLT,
257 FI.getParameter(0).vector_size ? -1 : 1);
258 case Builtins::kIslessequal:
259 return replaceRelational(F, CmpInst::FCMP_OLE,
260 FI.getParameter(0).vector_size ? -1 : 1);
261 case Builtins::kIsnotequal:
262 return replaceRelational(F, CmpInst::FCMP_ONE,
263 FI.getParameter(0).vector_size ? -1 : 1);
264
265 case Builtins::kIsinf: {
266 bool is_vec = FI.getParameter(0).vector_size != 0;
267 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
268 }
269 case Builtins::kIsnan: {
270 bool is_vec = FI.getParameter(0).vector_size != 0;
271 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
272 }
273
274 case Builtins::kIsfinite:
275 return replaceIsFinite(F);
276
277 case Builtins::kAll: {
278 bool is_vec = FI.getParameter(0).vector_size != 0;
279 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
280 }
281 case Builtins::kAny: {
282 bool is_vec = FI.getParameter(0).vector_size != 0;
283 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
284 }
285
286 case Builtins::kUpsample:
287 return replaceUpsample(F);
288
289 case Builtins::kRotate:
290 return replaceRotate(F);
291
292 case Builtins::kConvert:
293 return replaceConvert(F, FI.getParameter(0).is_signed,
294 FI.getReturnType().is_signed);
295
296 case Builtins::kAtomicInc:
297 return replaceAtomics(F, spv::OpAtomicIIncrement);
298 case Builtins::kAtomicDec:
299 return replaceAtomics(F, spv::OpAtomicIDecrement);
300 case Builtins::kAtomicCmpxchg:
301 return replaceAtomics(F, spv::OpAtomicCompareExchange);
302 case Builtins::kAtomicAdd:
303 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
304 case Builtins::kAtomicSub:
305 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
306 case Builtins::kAtomicXchg:
307 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
308 case Builtins::kAtomicMin:
309 return replaceAtomics(F, FI.getParameter(0).is_signed
310 ? llvm::AtomicRMWInst::Min
311 : llvm::AtomicRMWInst::UMin);
312 case Builtins::kAtomicMax:
313 return replaceAtomics(F, FI.getParameter(0).is_signed
314 ? llvm::AtomicRMWInst::Max
315 : llvm::AtomicRMWInst::UMax);
316 case Builtins::kAtomicAnd:
317 return replaceAtomics(F, llvm::AtomicRMWInst::And);
318 case Builtins::kAtomicOr:
319 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
320 case Builtins::kAtomicXor:
321 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
322
323 case Builtins::kCross:
324 if (FI.getParameter(0).vector_size == 4) {
325 return replaceCross(F);
326 }
327 break;
328
329 case Builtins::kFract:
330 if (FI.getParameterCount()) {
331 return replaceFract(F, FI.getParameter(0).vector_size);
332 }
333 break;
334
335 case Builtins::kMadHi:
336 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
337 case Builtins::kMulHi:
338 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
339
340 case Builtins::kMad:
341 case Builtins::kMad24:
342 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
343 true);
344 case Builtins::kMul24:
345 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
346 false);
347
348 case Builtins::kSelect:
349 return replaceSelect(F);
350
351 case Builtins::kBitselect:
352 return replaceBitSelect(F);
353
354 case Builtins::kVload:
355 return replaceVload(F);
356
357 case Builtins::kVloadaHalf:
358 case Builtins::kVloadHalf:
359 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
360
361 case Builtins::kVstore:
362 return replaceVstore(F);
363
364 case Builtins::kVstoreHalf:
365 case Builtins::kVstoreaHalf:
366 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
367
368 case Builtins::kSmoothstep: {
369 int vec_size = FI.getLastParameter().vector_size;
370 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500371 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500372 }
373 break;
374 }
375 case Builtins::kStep: {
376 int vec_size = FI.getLastParameter().vector_size;
377 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500378 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500379 }
380 break;
381 }
382
383 case Builtins::kSignbit:
384 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
385
386 case Builtins::kReadImageh:
387 return replaceHalfReadImage(F);
388 case Builtins::kReadImagef:
389 case Builtins::kReadImagei:
390 case Builtins::kReadImageui: {
391 if (FI.getParameter(1).isSampler() &&
392 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
393 return replaceSampledReadImageWithIntCoords(F);
394 }
395 break;
396 }
397
398 case Builtins::kWriteImageh:
399 return replaceHalfWriteImage(F);
400
Kévin Petit1cb45112020-04-27 18:55:48 +0100401 case Builtins::kPrefetch:
402 return replacePrefetch(F);
403
SJW2c317da2020-03-23 07:39:13 -0500404 default:
405 break;
406 }
407
408 return false;
409}
410
411bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
412 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400413 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100414}
415
SJW2c317da2020-03-23 07:39:13 -0500416bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
417 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100418 auto XValue = CI->getOperand(0);
419 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100420
Kévin Petite8edce32019-04-10 14:23:32 +0100421 IRBuilder<> Builder(CI);
422 auto XmY = Builder.CreateSub(XValue, YValue);
423 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100424
SJW2c317da2020-03-23 07:39:13 -0500425 Value *Cmp = nullptr;
426 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100427 Cmp = Builder.CreateICmpSGT(YValue, XValue);
428 } else {
429 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100430 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100431
Kévin Petite8edce32019-04-10 14:23:32 +0100432 return Builder.CreateSelect(Cmp, YmX, XmY);
433 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100434}
435
SJW2c317da2020-03-23 07:39:13 -0500436bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
437 return replaceCallsWithValue(F, [&F](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100438 auto XValue = CI->getOperand(0);
439 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100440
Kévin Petite8edce32019-04-10 14:23:32 +0100441 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100442
SJW2c317da2020-03-23 07:39:13 -0500443 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -0400444 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400445 IntTy = FixedVectorType::get(IntTy, vec_ty->getNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100446 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100447
Kévin Petite8edce32019-04-10 14:23:32 +0100448 // Return X with the sign of Y
449
450 // Sign bit masks
451 auto SignBit = IntTy->getScalarSizeInBits() - 1;
452 auto SignBitMask = 1 << SignBit;
453 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
454 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
455
456 IRBuilder<> Builder(CI);
457
458 // Extract sign of Y
459 auto YInt = Builder.CreateBitCast(YValue, IntTy);
460 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
461
462 // Clear sign bit in X
463 auto XInt = Builder.CreateBitCast(XValue, IntTy);
464 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
465
466 // Insert sign bit of Y into X
467 auto NewXInt = Builder.CreateOr(XInt, YSign);
468
469 // And cast back to floating-point
470 return Builder.CreateBitCast(NewXInt, Ty);
471 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100472}
473
SJW2c317da2020-03-23 07:39:13 -0500474bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
475 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100476 // Recip has one arg.
477 auto Arg = CI->getOperand(0);
478 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
479 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
480 });
David Neto22f144c2017-06-12 14:26:21 -0400481}
482
SJW2c317da2020-03-23 07:39:13 -0500483bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
484 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100485 auto Op0 = CI->getOperand(0);
486 auto Op1 = CI->getOperand(1);
487 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
488 });
David Neto22f144c2017-06-12 14:26:21 -0400489}
490
SJW2c317da2020-03-23 07:39:13 -0500491bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
492 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100493 auto Op0 = CI->getOperand(0);
494 auto Op1 = CI->getOperand(1);
495
SJW2c317da2020-03-23 07:39:13 -0500496 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100497 if (Op0->getType()->isVectorTy()) {
498 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
499 CI->getType(), {Op0, Op1});
500 } else {
501 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
502 }
503
504 return V;
505 });
506}
507
SJW2c317da2020-03-23 07:39:13 -0500508bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500509 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500510 // convert to natural
511 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500512 std::string NewFName = basename.substr(0, slen);
513 NewFName =
514 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400515
SJW2c317da2020-03-23 07:39:13 -0500516 Module &M = *F.getParent();
517 return replaceCallsWithValue(F, [&](CallInst *CI) {
518 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
519
520 auto Arg = CI->getOperand(0);
521
522 // Constant of the natural log of 10 (ln(10)).
523 const double Ln10 =
524 2.302585092994045684017991454684364207601101488628772976033;
525
526 auto Mul = BinaryOperator::Create(
527 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
528
529 return CallInst::Create(NewF, Mul, "", CI);
530 });
David Neto22f144c2017-06-12 14:26:21 -0400531}
532
SJW2c317da2020-03-23 07:39:13 -0500533bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100534 // OpenCL fmod(x,y) is x - y * trunc(x/y)
535 // The sign for a non-zero result is taken from x.
536 // (Try an example.)
537 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500538 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100539 auto Op0 = CI->getOperand(0);
540 auto Op1 = CI->getOperand(1);
541 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
542 });
543}
544
SJW2c317da2020-03-23 07:39:13 -0500545bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500546 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500547 // convert to natural
548 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500549 std::string NewFName = basename.substr(0, slen);
550 NewFName =
551 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400552
SJW2c317da2020-03-23 07:39:13 -0500553 Module &M = *F.getParent();
554 return replaceCallsWithValue(F, [&](CallInst *CI) {
555 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
556
557 auto Arg = CI->getOperand(0);
558
559 // Constant of the reciprocal of the natural log of 10 (ln(10)).
560 const double Ln10 =
561 0.434294481903251827651128918916605082294397005803666566114;
562
563 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
564
565 return BinaryOperator::Create(Instruction::FMul,
566 ConstantFP::get(Arg->getType(), Ln10), NewCI,
567 "", CI);
568 });
David Neto22f144c2017-06-12 14:26:21 -0400569}
570
alan-baker12d2c182020-07-20 08:22:42 -0400571bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400572
573 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
574
alan-baker12d2c182020-07-20 08:22:42 -0400575 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100576 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400577
Kévin Petitc4643922019-06-17 19:32:05 +0100578 // We need to map the OpenCL constants to the SPIR-V equivalents.
579 const auto LocalMemFence =
580 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
581 const auto GlobalMemFence =
582 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400583 const auto ConstantAcquireRelease = ConstantInt::get(
584 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100585 const auto ConstantScopeDevice =
586 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
587 const auto ConstantScopeWorkgroup =
588 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400589 const auto ConstantScopeSubgroup =
590 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400591
Kévin Petitc4643922019-06-17 19:32:05 +0100592 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
593 const auto LocalMemFenceMask =
594 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
595 const auto WorkgroupShiftAmount =
596 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
597 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
598 Instruction::Shl, LocalMemFenceMask,
599 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400600
Kévin Petitc4643922019-06-17 19:32:05 +0100601 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
602 const auto GlobalMemFenceMask =
603 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
604 const auto UniformShiftAmount =
605 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
606 const auto MemorySemanticsUniform = BinaryOperator::Create(
607 Instruction::Shl, GlobalMemFenceMask,
608 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400609
Kévin Petitc4643922019-06-17 19:32:05 +0100610 // And combine the above together, also adding in
alan-baker12d2c182020-07-20 08:22:42 -0400611 // MemorySemanticsAcquireReleaseMask.
Kévin Petitc4643922019-06-17 19:32:05 +0100612 auto MemorySemantics =
613 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400614 ConstantAcquireRelease, "", CI);
Kévin Petitc4643922019-06-17 19:32:05 +0100615 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
616 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400617
alan-baker12d2c182020-07-20 08:22:42 -0400618 // If the memory scope is not specified explicitly, it is either Subgroup
619 // or Workgroup depending on the type of barrier.
620 Value *MemoryScope =
621 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
622 if (CI->data_operands_size() > 1) {
623 enum {
624 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
625 CL_MEMORY_SCOPE_DEVICE = 0x2,
626 CL_MEMORY_SCOPE_SUBGROUP = 0x4
627 };
628 // The call was given an explicit memory scope.
629 const auto MemoryScopeSubgroup =
630 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
631 const auto MemoryScopeDevice =
632 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400633
alan-baker12d2c182020-07-20 08:22:42 -0400634 auto Cmp =
635 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
636 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
637 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
638 ConstantScopeWorkgroup, "", CI);
639 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
640 MemoryScopeDevice, CI->getOperand(1), "", CI);
641 MemoryScope =
642 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
643 }
644
645 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
646 // the type of barrier;
647 const auto ExecutionScope =
648 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400649
Kévin Petitc4643922019-06-17 19:32:05 +0100650 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
651 {Attribute::NoDuplicate}, CI->getType(),
652 {ExecutionScope, MemoryScope, MemorySemantics});
653 });
David Neto22f144c2017-06-12 14:26:21 -0400654}
655
SJW2c317da2020-03-23 07:39:13 -0500656bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
657 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400658
SJW2c317da2020-03-23 07:39:13 -0500659 return replaceCallsWithValue(F, [&](CallInst *CI) {
660 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
David Neto22f144c2017-06-12 14:26:21 -0400661
SJW2c317da2020-03-23 07:39:13 -0500662 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400663
SJW2c317da2020-03-23 07:39:13 -0500664 // We need to map the OpenCL constants to the SPIR-V equivalents.
665 const auto LocalMemFence =
666 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
667 const auto GlobalMemFence =
668 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
669 const auto ConstantMemorySemantics =
670 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400671 const auto ConstantScopeWorkgroup =
672 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400673
SJW2c317da2020-03-23 07:39:13 -0500674 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
675 const auto LocalMemFenceMask =
676 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
677 const auto WorkgroupShiftAmount =
678 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
679 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
680 Instruction::Shl, LocalMemFenceMask,
681 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400682
SJW2c317da2020-03-23 07:39:13 -0500683 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
684 const auto GlobalMemFenceMask =
685 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
686 const auto UniformShiftAmount =
687 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
688 const auto MemorySemanticsUniform = BinaryOperator::Create(
689 Instruction::Shl, GlobalMemFenceMask,
690 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400691
SJW2c317da2020-03-23 07:39:13 -0500692 // And combine the above together, also adding in
693 // MemorySemanticsSequentiallyConsistentMask.
694 auto MemorySemantics =
695 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
696 ConstantMemorySemantics, "", CI);
697 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
698 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400699
alan-baker12d2c182020-07-20 08:22:42 -0400700 // Memory Scope is always workgroup.
701 const auto MemoryScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400702
SJW2c317da2020-03-23 07:39:13 -0500703 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier, {}, CI->getType(),
704 {MemoryScope, MemorySemantics});
705 });
David Neto22f144c2017-06-12 14:26:21 -0400706}
707
Kévin Petit1cb45112020-04-27 18:55:48 +0100708bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
709 bool Changed = false;
710
711 SmallVector<Instruction *, 4> ToRemoves;
712
713 // Find all calls to the function
714 for (auto &U : F.uses()) {
715 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
716 ToRemoves.push_back(CI);
717 }
718 }
719
720 Changed = !ToRemoves.empty();
721
722 // Delete them
723 for (auto V : ToRemoves) {
724 V->eraseFromParent();
725 }
726
727 return Changed;
728}
729
SJW2c317da2020-03-23 07:39:13 -0500730bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
731 CmpInst::Predicate P,
732 int32_t C) {
733 return replaceCallsWithValue(F, [&](CallInst *CI) {
734 // The predicate to use in the CmpInst.
735 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -0400736
SJW2c317da2020-03-23 07:39:13 -0500737 // The value to return for true.
738 auto TrueValue = ConstantInt::getSigned(CI->getType(), C);
David Neto22f144c2017-06-12 14:26:21 -0400739
SJW2c317da2020-03-23 07:39:13 -0500740 // The value to return for false.
741 auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400742
SJW2c317da2020-03-23 07:39:13 -0500743 auto Arg1 = CI->getOperand(0);
744 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -0400745
SJW2c317da2020-03-23 07:39:13 -0500746 const auto Cmp =
747 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400748
SJW2c317da2020-03-23 07:39:13 -0500749 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
750 });
David Neto22f144c2017-06-12 14:26:21 -0400751}
752
SJW2c317da2020-03-23 07:39:13 -0500753bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
754 spv::Op SPIRVOp,
755 int32_t C) {
756 Module &M = *F.getParent();
757 return replaceCallsWithValue(F, [&](CallInst *CI) {
758 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -0400759
SJW2c317da2020-03-23 07:39:13 -0500760 // The value to return for true.
761 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -0400762
SJW2c317da2020-03-23 07:39:13 -0500763 // The value to return for false.
764 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -0400765
SJW2c317da2020-03-23 07:39:13 -0500766 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -0400767 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400768 CorrespondingBoolTy = FixedVectorType::get(
769 Type::getInt1Ty(M.getContext()), CIVecTy->getNumElements());
David Neto22f144c2017-06-12 14:26:21 -0400770 }
David Neto22f144c2017-06-12 14:26:21 -0400771
SJW2c317da2020-03-23 07:39:13 -0500772 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
773 CorrespondingBoolTy, {CI->getOperand(0)});
774
775 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
776 });
David Neto22f144c2017-06-12 14:26:21 -0400777}
778
SJW2c317da2020-03-23 07:39:13 -0500779bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
780 Module &M = *F.getParent();
781 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100782 auto &C = M.getContext();
783 auto Val = CI->getOperand(0);
784 auto ValTy = Val->getType();
785 auto RetTy = CI->getType();
786
787 // Get a suitable integer type to represent the number
788 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
789
790 // Create Mask
791 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -0500792 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100793 switch (ScalarSize) {
794 case 16:
795 InfMask = ConstantInt::get(IntTy, 0x7C00U);
796 break;
797 case 32:
798 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
799 break;
800 case 64:
801 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
802 break;
803 default:
804 llvm_unreachable("Unsupported floating-point type");
805 }
806
807 IRBuilder<> Builder(CI);
808
809 // Bitcast to int
810 auto ValInt = Builder.CreateBitCast(Val, IntTy);
811
812 // Mask and compare
813 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
814 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
815
816 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -0500817 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100818 if (ValTy->isVectorTy()) {
819 RetTrue = ConstantInt::getSigned(RetTy, -1);
820 } else {
821 RetTrue = ConstantInt::get(RetTy, 1);
822 }
823 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
824 });
825}
826
SJW2c317da2020-03-23 07:39:13 -0500827bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
828 Module &M = *F.getParent();
829 return replaceCallsWithValue(F, [&](CallInst *CI) {
830 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400831
SJW2c317da2020-03-23 07:39:13 -0500832 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000833
SJW2c317da2020-03-23 07:39:13 -0500834 // If the argument is a 32-bit int, just use a shift
835 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
836 V = BinaryOperator::Create(Instruction::LShr, Arg,
837 ConstantInt::get(Arg->getType(), 31), "", CI);
838 } else {
839 // The value for zero to compare against.
840 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -0400841
SJW2c317da2020-03-23 07:39:13 -0500842 // The value to return for true.
843 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -0400844
SJW2c317da2020-03-23 07:39:13 -0500845 // The value to return for false.
846 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400847
SJW2c317da2020-03-23 07:39:13 -0500848 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
849 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400850
SJW2c317da2020-03-23 07:39:13 -0500851 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -0400852
SJW2c317da2020-03-23 07:39:13 -0500853 // If we have a function to call, call it!
854 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -0400855
SJW2c317da2020-03-23 07:39:13 -0500856 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -0400857
SJW2c317da2020-03-23 07:39:13 -0500858 const auto NewCI = clspv::InsertSPIRVOp(
859 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
860 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -0400861
SJW2c317da2020-03-23 07:39:13 -0500862 } else {
863 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -0400864 }
865
SJW2c317da2020-03-23 07:39:13 -0500866 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400867 }
SJW2c317da2020-03-23 07:39:13 -0500868 return V;
869 });
David Neto22f144c2017-06-12 14:26:21 -0400870}
871
SJW2c317da2020-03-23 07:39:13 -0500872bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
873 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
874 // Get arguments
875 auto HiValue = CI->getOperand(0);
876 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000877
SJW2c317da2020-03-23 07:39:13 -0500878 // Don't touch overloads that aren't in OpenCL C
879 auto HiType = HiValue->getType();
880 auto LoType = LoValue->getType();
881
882 if (HiType != LoType) {
883 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000884 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000885
SJW2c317da2020-03-23 07:39:13 -0500886 if (!HiType->isIntOrIntVectorTy()) {
887 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000888 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000889
SJW2c317da2020-03-23 07:39:13 -0500890 if (HiType->getScalarSizeInBits() * 2 !=
891 CI->getType()->getScalarSizeInBits()) {
892 return nullptr;
893 }
894
895 if ((HiType->getScalarSizeInBits() != 8) &&
896 (HiType->getScalarSizeInBits() != 16) &&
897 (HiType->getScalarSizeInBits() != 32)) {
898 return nullptr;
899 }
900
James Pricecf53df42020-04-20 14:41:24 -0400901 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
902 unsigned NumElements = HiVecType->getNumElements();
903 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
904 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500905 return nullptr;
906 }
907 }
908
909 // Convert both operands to the result type
910 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
911 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
912
913 // Shift high operand
914 auto ShiftAmount =
915 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
916 auto HiShifted =
917 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
918
919 // OR both results
920 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
921 });
Kévin Petitbf0036c2019-03-06 13:57:10 +0000922}
923
SJW2c317da2020-03-23 07:39:13 -0500924bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
925 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
926 // Get arguments
927 auto SrcValue = CI->getOperand(0);
928 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +0000929
SJW2c317da2020-03-23 07:39:13 -0500930 // Don't touch overloads that aren't in OpenCL C
931 auto SrcType = SrcValue->getType();
932 auto RotType = RotAmount->getType();
933
934 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
935 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +0000936 }
Kévin Petitd44eef52019-03-08 13:22:14 +0000937
SJW2c317da2020-03-23 07:39:13 -0500938 if (!SrcType->isIntOrIntVectorTy()) {
939 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +0000940 }
Kévin Petitd44eef52019-03-08 13:22:14 +0000941
SJW2c317da2020-03-23 07:39:13 -0500942 if ((SrcType->getScalarSizeInBits() != 8) &&
943 (SrcType->getScalarSizeInBits() != 16) &&
944 (SrcType->getScalarSizeInBits() != 32) &&
945 (SrcType->getScalarSizeInBits() != 64)) {
946 return nullptr;
947 }
948
James Pricecf53df42020-04-20 14:41:24 -0400949 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
950 unsigned NumElements = SrcVecType->getNumElements();
951 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
952 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500953 return nullptr;
954 }
955 }
956
957 // The approach used is to shift the top bits down, the bottom bits up
958 // and OR the two shifted values.
959
960 // The rotation amount is to be treated modulo the element size.
961 // Since SPIR-V shift ops don't support this, let's apply the
962 // modulo ahead of shifting. The element size is always a power of
963 // two so we can just AND with a mask.
964 auto ModMask =
965 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
966 RotAmount =
967 BinaryOperator::Create(Instruction::And, RotAmount, ModMask, "", CI);
968
969 // Let's calc the amount by which to shift top bits down
970 auto ScalarSize = ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
971 auto DownAmount =
972 BinaryOperator::Create(Instruction::Sub, ScalarSize, RotAmount, "", CI);
973
974 // Now shift the bottom bits up and the top bits down
975 auto LoRotated =
976 BinaryOperator::Create(Instruction::Shl, SrcValue, RotAmount, "", CI);
977 auto HiRotated =
978 BinaryOperator::Create(Instruction::LShr, SrcValue, DownAmount, "", CI);
979
980 // Finally OR the two shifted values
981 return BinaryOperator::Create(Instruction::Or, LoRotated, HiRotated, "",
982 CI);
983 });
Kévin Petitd44eef52019-03-08 13:22:14 +0000984}
985
SJW2c317da2020-03-23 07:39:13 -0500986bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
987 bool DstIsSigned) {
988 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
989 Value *V = nullptr;
990 // Get arguments
991 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000992
SJW2c317da2020-03-23 07:39:13 -0500993 // Don't touch overloads that aren't in OpenCL C
994 auto SrcType = SrcValue->getType();
995 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000996
SJW2c317da2020-03-23 07:39:13 -0500997 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
998 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
999 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001000 }
1001
James Pricecf53df42020-04-20 14:41:24 -04001002 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
1003 unsigned SrcNumElements = SrcVecType->getNumElements();
1004 unsigned DstNumElements = cast<VectorType>(DstType)->getNumElements();
1005 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001006 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001007 }
1008
James Pricecf53df42020-04-20 14:41:24 -04001009 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1010 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1011 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001012 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001013 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001014 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001015
SJW2c317da2020-03-23 07:39:13 -05001016 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1017 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1018
1019 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1020 bool DstIsInt = DstType->isIntOrIntVectorTy();
1021
1022 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1023 // Unnecessary cast operation.
1024 V = SrcValue;
1025 } else if (SrcIsFloat && DstIsFloat) {
1026 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1027 } else if (SrcIsFloat && DstIsInt) {
1028 if (DstIsSigned) {
1029 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1030 } else {
1031 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1032 }
1033 } else if (SrcIsInt && DstIsFloat) {
1034 if (SrcIsSigned) {
1035 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1036 } else {
1037 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1038 }
1039 } else if (SrcIsInt && DstIsInt) {
1040 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1041 } else {
1042 // Not something we're supposed to handle, just move on
1043 }
1044
1045 return V;
1046 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001047}
1048
SJW2c317da2020-03-23 07:39:13 -05001049bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1050 bool is_mad) {
1051 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1052 Value *V = nullptr;
1053 // Get arguments
1054 auto AValue = CI->getOperand(0);
1055 auto BValue = CI->getOperand(1);
1056 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001057
SJW2c317da2020-03-23 07:39:13 -05001058 // Don't touch overloads that aren't in OpenCL C
1059 auto AType = AValue->getType();
1060 auto BType = BValue->getType();
1061 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001062
SJW2c317da2020-03-23 07:39:13 -05001063 if ((AType != BType) || (CI->getType() != AType) ||
1064 (is_mad && (AType != CType))) {
1065 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001066 }
1067
SJW2c317da2020-03-23 07:39:13 -05001068 if (!AType->isIntOrIntVectorTy()) {
1069 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001070 }
Kévin Petit8a560882019-03-21 15:24:34 +00001071
SJW2c317da2020-03-23 07:39:13 -05001072 if ((AType->getScalarSizeInBits() != 8) &&
1073 (AType->getScalarSizeInBits() != 16) &&
1074 (AType->getScalarSizeInBits() != 32) &&
1075 (AType->getScalarSizeInBits() != 64)) {
1076 return V;
1077 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001078
James Pricecf53df42020-04-20 14:41:24 -04001079 if (auto AVecType = dyn_cast<VectorType>(AType)) {
1080 unsigned NumElements = AVecType->getNumElements();
1081 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1082 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001083 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001084 }
1085 }
1086
SJW2c317da2020-03-23 07:39:13 -05001087 // Our SPIR-V op returns a struct, create a type for it
1088 SmallVector<Type *, 2> TwoValueType = {AType, AType};
1089 auto ExMulRetType = StructType::create(TwoValueType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001090
SJW2c317da2020-03-23 07:39:13 -05001091 // Select the appropriate signed/unsigned SPIR-V op
1092 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1093
1094 // Call the SPIR-V op
1095 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1096 ExMulRetType, {AValue, BValue});
1097
1098 // Get the high part of the result
1099 unsigned Idxs[] = {1};
1100 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1101
1102 // If we're handling a mad_hi, add the third argument to the result
1103 if (is_mad) {
1104 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001105 }
1106
SJW2c317da2020-03-23 07:39:13 -05001107 return V;
1108 });
Kévin Petit8a560882019-03-21 15:24:34 +00001109}
1110
SJW2c317da2020-03-23 07:39:13 -05001111bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1112 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1113 // Get arguments
1114 auto FalseValue = CI->getOperand(0);
1115 auto TrueValue = CI->getOperand(1);
1116 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001117
SJW2c317da2020-03-23 07:39:13 -05001118 // Don't touch overloads that aren't in OpenCL C
1119 auto FalseType = FalseValue->getType();
1120 auto TrueType = TrueValue->getType();
1121 auto PredicateType = PredicateValue->getType();
1122
1123 if (FalseType != TrueType) {
1124 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001125 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001126
SJW2c317da2020-03-23 07:39:13 -05001127 if (!PredicateType->isIntOrIntVectorTy()) {
1128 return nullptr;
1129 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001130
SJW2c317da2020-03-23 07:39:13 -05001131 if (!FalseType->isIntOrIntVectorTy() &&
1132 !FalseType->getScalarType()->isFloatingPointTy()) {
1133 return nullptr;
1134 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001135
SJW2c317da2020-03-23 07:39:13 -05001136 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1137 return nullptr;
1138 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001139
SJW2c317da2020-03-23 07:39:13 -05001140 if (FalseType->getScalarSizeInBits() !=
1141 PredicateType->getScalarSizeInBits()) {
1142 return nullptr;
1143 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001144
James Pricecf53df42020-04-20 14:41:24 -04001145 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
1146 unsigned NumElements = FalseVecType->getNumElements();
1147 if (NumElements != cast<VectorType>(PredicateType)->getNumElements()) {
SJW2c317da2020-03-23 07:39:13 -05001148 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001149 }
1150
James Pricecf53df42020-04-20 14:41:24 -04001151 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1152 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001153 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001154 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001155 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001156
SJW2c317da2020-03-23 07:39:13 -05001157 // Create constant
1158 const auto ZeroValue = Constant::getNullValue(PredicateType);
1159
1160 // Scalar and vector are to be treated differently
1161 CmpInst::Predicate Pred;
1162 if (PredicateType->isVectorTy()) {
1163 Pred = CmpInst::ICMP_SLT;
1164 } else {
1165 Pred = CmpInst::ICMP_NE;
1166 }
1167
1168 // Create comparison instruction
1169 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1170 ZeroValue, "", CI);
1171
1172 // Create select
1173 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1174 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001175}
1176
SJW2c317da2020-03-23 07:39:13 -05001177bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1178 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1179 Value *V = nullptr;
1180 if (CI->getNumOperands() != 4) {
1181 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001182 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001183
SJW2c317da2020-03-23 07:39:13 -05001184 // Get arguments
1185 auto FalseValue = CI->getOperand(0);
1186 auto TrueValue = CI->getOperand(1);
1187 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001188
SJW2c317da2020-03-23 07:39:13 -05001189 // Don't touch overloads that aren't in OpenCL C
1190 auto FalseType = FalseValue->getType();
1191 auto TrueType = TrueValue->getType();
1192 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001193
SJW2c317da2020-03-23 07:39:13 -05001194 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1195 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001196 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001197
James Pricecf53df42020-04-20 14:41:24 -04001198 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001199 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1200 !TrueType->getScalarType()->isIntegerTy()) {
1201 return V;
1202 }
James Pricecf53df42020-04-20 14:41:24 -04001203 unsigned NumElements = TrueVecType->getNumElements();
1204 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1205 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001206 return V;
1207 }
1208 }
1209
1210 // Remember the type of the operands
1211 auto OpType = TrueType;
1212
1213 // The actual bit selection will always be done on an integer type,
1214 // declare it here
1215 Type *BitType;
1216
1217 // If the operands are float, then bitcast them to int
1218 if (OpType->getScalarType()->isFloatingPointTy()) {
1219
1220 // First create the new type
1221 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1222
1223 // Then bitcast all operands
1224 PredicateValue =
1225 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1226 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1227 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1228
1229 } else {
1230 // The operands have an integer type, use it directly
1231 BitType = OpType;
1232 }
1233
1234 // All the operands are now always integers
1235 // implement as (c & b) | (~c & a)
1236
1237 // Create our negated predicate value
1238 auto AllOnes = Constant::getAllOnesValue(BitType);
1239 auto NotPredicateValue = BinaryOperator::Create(
1240 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1241
1242 // Then put everything together
1243 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1244 FalseValue, "", CI);
1245 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1246 TrueValue, "", CI);
1247
1248 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1249
1250 // If we were dealing with a floating point type, we must bitcast
1251 // the result back to that
1252 if (OpType->getScalarType()->isFloatingPointTy()) {
1253 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1254 }
1255
1256 return V;
1257 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001258}
1259
SJW61531372020-06-09 07:31:08 -05001260bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001261 // convert to vector versions
1262 Module &M = *F.getParent();
1263 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1264 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1265 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001266
SJW2c317da2020-03-23 07:39:13 -05001267 // First figure out which function we're dealing with
1268 if (is_smooth) {
1269 ArgsToSplat.push_back(CI->getOperand(1));
1270 VectorArg = CI->getOperand(2);
1271 } else {
1272 VectorArg = CI->getOperand(1);
1273 }
1274
1275 // Splat arguments that need to be
1276 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001277 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001278
1279 for (auto arg : ArgsToSplat) {
1280 Value *NewVectorArg = UndefValue::get(VecType);
James Pricecf53df42020-04-20 14:41:24 -04001281 for (auto i = 0; i < VecType->getNumElements(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001282 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1283 NewVectorArg =
1284 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1285 }
1286 SplatArgs.push_back(NewVectorArg);
1287 }
1288
1289 // Replace the call with the vector/vector flavour
1290 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1291 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1292
SJW61531372020-06-09 07:31:08 -05001293 std::string NewFName = Builtins::GetMangledFunctionName(
1294 is_smooth ? "smoothstep" : "step", NewFType);
1295
SJW2c317da2020-03-23 07:39:13 -05001296 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1297
1298 SmallVector<Value *, 3> NewArgs;
1299 for (auto arg : SplatArgs) {
1300 NewArgs.push_back(arg);
1301 }
1302 NewArgs.push_back(VectorArg);
1303
1304 return CallInst::Create(NewF, NewArgs, "", CI);
1305 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001306}
1307
SJW2c317da2020-03-23 07:39:13 -05001308bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001309 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1310 auto Arg = CI->getOperand(0);
1311 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001312
SJW2c317da2020-03-23 07:39:13 -05001313 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001314
SJW2c317da2020-03-23 07:39:13 -05001315 return BinaryOperator::Create(Op, Bitcast,
1316 ConstantInt::get(CI->getType(), 31), "", CI);
1317 });
David Neto22f144c2017-06-12 14:26:21 -04001318}
1319
SJW2c317da2020-03-23 07:39:13 -05001320bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1321 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001322 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1323 // The multiply instruction to use.
1324 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001325
SJW2c317da2020-03-23 07:39:13 -05001326 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001327
SJW2c317da2020-03-23 07:39:13 -05001328 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1329 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001330
SJW2c317da2020-03-23 07:39:13 -05001331 if (is_mad) {
1332 // The add instruction to use.
1333 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001334
SJW2c317da2020-03-23 07:39:13 -05001335 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001336 }
David Neto22f144c2017-06-12 14:26:21 -04001337
SJW2c317da2020-03-23 07:39:13 -05001338 return V;
1339 });
David Neto22f144c2017-06-12 14:26:21 -04001340}
1341
SJW2c317da2020-03-23 07:39:13 -05001342bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001343 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1344 Value *V = nullptr;
1345 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001346
SJW2c317da2020-03-23 07:39:13 -05001347 auto data_type = data->getType();
1348 if (!data_type->isVectorTy())
1349 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001350
James Pricecf53df42020-04-20 14:41:24 -04001351 auto vec_data_type = cast<VectorType>(data_type);
1352
1353 auto elems = vec_data_type->getNumElements();
SJW2c317da2020-03-23 07:39:13 -05001354 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1355 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001356
SJW2c317da2020-03-23 07:39:13 -05001357 auto offset = CI->getOperand(1);
1358 auto ptr = CI->getOperand(2);
1359 auto ptr_type = ptr->getType();
1360 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001361 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001362 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001363
SJW2c317da2020-03-23 07:39:13 -05001364 // Avoid pointer casts. Instead generate the correct number of stores
1365 // and rely on drivers to coalesce appropriately.
1366 IRBuilder<> builder(CI);
1367 auto elems_const = builder.getInt32(elems);
1368 auto adjust = builder.CreateMul(offset, elems_const);
1369 for (auto i = 0; i < elems; ++i) {
1370 auto idx = builder.getInt32(i);
1371 auto add = builder.CreateAdd(adjust, idx);
1372 auto gep = builder.CreateGEP(ptr, add);
1373 auto extract = builder.CreateExtractElement(data, i);
1374 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001375 }
SJW2c317da2020-03-23 07:39:13 -05001376 return V;
1377 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001378}
1379
SJW2c317da2020-03-23 07:39:13 -05001380bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001381 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1382 Value *V = nullptr;
1383 auto ret_type = F.getReturnType();
1384 if (!ret_type->isVectorTy())
1385 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001386
James Pricecf53df42020-04-20 14:41:24 -04001387 auto vec_ret_type = cast<VectorType>(ret_type);
1388
1389 auto elems = vec_ret_type->getNumElements();
SJW2c317da2020-03-23 07:39:13 -05001390 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1391 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001392
SJW2c317da2020-03-23 07:39:13 -05001393 auto offset = CI->getOperand(0);
1394 auto ptr = CI->getOperand(1);
1395 auto ptr_type = ptr->getType();
1396 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001397 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001398 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001399
SJW2c317da2020-03-23 07:39:13 -05001400 // Avoid pointer casts. Instead generate the correct number of loads
1401 // and rely on drivers to coalesce appropriately.
1402 IRBuilder<> builder(CI);
1403 auto elems_const = builder.getInt32(elems);
1404 V = UndefValue::get(ret_type);
1405 auto adjust = builder.CreateMul(offset, elems_const);
1406 for (auto i = 0; i < elems; ++i) {
1407 auto idx = builder.getInt32(i);
1408 auto add = builder.CreateAdd(adjust, idx);
1409 auto gep = builder.CreateGEP(ptr, add);
1410 auto load = builder.CreateLoad(gep);
1411 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001412 }
SJW2c317da2020-03-23 07:39:13 -05001413 return V;
1414 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001415}
1416
SJW2c317da2020-03-23 07:39:13 -05001417bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1418 const std::string &name,
1419 int vec_size) {
1420 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1421 if (!vec_size) {
1422 // deduce vec_size from last character of name (e.g. vload_half4)
1423 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001424 }
SJW2c317da2020-03-23 07:39:13 -05001425 switch (vec_size) {
1426 case 2:
1427 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1428 case 4:
1429 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1430 case 0:
1431 if (!is_clspv_version) {
1432 return replaceVloadHalf(F);
1433 }
1434 default:
1435 llvm_unreachable("Unsupported vload_half vector size");
1436 break;
1437 }
1438 return false;
David Neto22f144c2017-06-12 14:26:21 -04001439}
1440
SJW2c317da2020-03-23 07:39:13 -05001441bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1442 Module &M = *F.getParent();
1443 return replaceCallsWithValue(F, [&](CallInst *CI) {
1444 // The index argument from vload_half.
1445 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001446
SJW2c317da2020-03-23 07:39:13 -05001447 // The pointer argument from vload_half.
1448 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001449
SJW2c317da2020-03-23 07:39:13 -05001450 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001451 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001452 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1453
1454 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001455 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001456
1457 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1458
1459 Value *V = nullptr;
1460
alan-baker7efcaaa2020-05-06 19:33:27 -04001461 bool supports_16bit_storage = true;
1462 switch (Arg1->getType()->getPointerAddressSpace()) {
1463 case clspv::AddressSpace::Global:
1464 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1465 clspv::Option::StorageClass::kSSBO);
1466 break;
1467 case clspv::AddressSpace::Constant:
1468 if (clspv::Option::ConstantArgsInUniformBuffer())
1469 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1470 clspv::Option::StorageClass::kUBO);
1471 else
1472 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1473 clspv::Option::StorageClass::kSSBO);
1474 break;
1475 default:
1476 // Clspv will emit the Float16 capability if the half type is
1477 // encountered. That capability covers private and local addressspaces.
1478 break;
1479 }
1480
1481 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001482 auto ShortTy = Type::getInt16Ty(M.getContext());
1483 auto ShortPointerTy =
1484 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1485
1486 // Cast the half* pointer to short*.
1487 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1488
1489 // Index into the correct address of the casted pointer.
1490 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1491
1492 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001493 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001494
1495 // ZExt the short -> int.
1496 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1497
1498 // Get our float2.
1499 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1500
1501 // Extract out the bottom element which is our float result.
1502 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1503 } else {
1504 // Assume the pointer argument points to storage aligned to 32bits
1505 // or more.
1506 // TODO(dneto): Do more analysis to make sure this is true?
1507 //
1508 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1509 // with:
1510 //
1511 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1512 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1513 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1514 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1515 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1516 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1517 // x float> %converted, %index_is_odd32
1518
1519 auto IntPointerTy =
1520 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1521
1522 // Cast the base pointer to int*.
1523 // In a valid call (according to assumptions), this should get
1524 // optimized away in the simplify GEP pass.
1525 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1526
1527 auto One = ConstantInt::get(IntTy, 1);
1528 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1529 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1530
1531 // Index into the correct address of the casted pointer.
1532 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1533
1534 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001535 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001536
1537 // Get our float2.
1538 auto Call = CallInst::Create(NewF, Load, "", CI);
1539
1540 // Extract out the float result, where the element number is
1541 // determined by whether the original index was even or odd.
1542 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1543 }
1544 return V;
1545 });
1546}
1547
1548bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1549 Module &M = *F.getParent();
1550 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001551 // The index argument from vload_half.
1552 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001553
Kévin Petite8edce32019-04-10 14:23:32 +01001554 // The pointer argument from vload_half.
1555 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001556
Kévin Petite8edce32019-04-10 14:23:32 +01001557 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001558 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001559 auto NewPointerTy =
1560 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001561 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001562
Kévin Petite8edce32019-04-10 14:23:32 +01001563 // Cast the half* pointer to int*.
1564 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001565
Kévin Petite8edce32019-04-10 14:23:32 +01001566 // Index into the correct address of the casted pointer.
1567 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001568
Kévin Petite8edce32019-04-10 14:23:32 +01001569 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001570 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001571
Kévin Petite8edce32019-04-10 14:23:32 +01001572 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001573 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001574
Kévin Petite8edce32019-04-10 14:23:32 +01001575 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001576
Kévin Petite8edce32019-04-10 14:23:32 +01001577 // Get our float2.
1578 return CallInst::Create(NewF, Load, "", CI);
1579 });
David Neto22f144c2017-06-12 14:26:21 -04001580}
1581
SJW2c317da2020-03-23 07:39:13 -05001582bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1583 Module &M = *F.getParent();
1584 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001585 // The index argument from vload_half.
1586 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001587
Kévin Petite8edce32019-04-10 14:23:32 +01001588 // The pointer argument from vload_half.
1589 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001590
Kévin Petite8edce32019-04-10 14:23:32 +01001591 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001592 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1593 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001594 auto NewPointerTy =
1595 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001596 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001597
Kévin Petite8edce32019-04-10 14:23:32 +01001598 // Cast the half* pointer to int2*.
1599 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001600
Kévin Petite8edce32019-04-10 14:23:32 +01001601 // Index into the correct address of the casted pointer.
1602 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001603
Kévin Petite8edce32019-04-10 14:23:32 +01001604 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001605 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001606
Kévin Petite8edce32019-04-10 14:23:32 +01001607 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001608 auto X =
1609 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1610 auto Y =
1611 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001612
Kévin Petite8edce32019-04-10 14:23:32 +01001613 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001614 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001615
Kévin Petite8edce32019-04-10 14:23:32 +01001616 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001617
Kévin Petite8edce32019-04-10 14:23:32 +01001618 // Get the lower (x & y) components of our final float4.
1619 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001620
Kévin Petite8edce32019-04-10 14:23:32 +01001621 // Get the higher (z & w) components of our final float4.
1622 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001623
Kévin Petite8edce32019-04-10 14:23:32 +01001624 Constant *ShuffleMask[4] = {
1625 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1626 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001627
Kévin Petite8edce32019-04-10 14:23:32 +01001628 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001629 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1630 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001631 });
David Neto22f144c2017-06-12 14:26:21 -04001632}
1633
SJW2c317da2020-03-23 07:39:13 -05001634bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001635
1636 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1637 //
1638 // %u = load i32 %ptr
1639 // %fxy = call <2 x float> Unpack2xHalf(u)
1640 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001641 Module &M = *F.getParent();
1642 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001643 auto Index = CI->getOperand(0);
1644 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001645
Kévin Petite8edce32019-04-10 14:23:32 +01001646 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001647 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001648 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001649
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001650 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001651 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001652
Kévin Petite8edce32019-04-10 14:23:32 +01001653 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001654 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001655
Kévin Petite8edce32019-04-10 14:23:32 +01001656 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001657
Kévin Petite8edce32019-04-10 14:23:32 +01001658 // Get our final float2.
1659 return CallInst::Create(NewF, Load, "", CI);
1660 });
David Neto6ad93232018-06-07 15:42:58 -07001661}
1662
SJW2c317da2020-03-23 07:39:13 -05001663bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001664
1665 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1666 //
1667 // %u2 = load <2 x i32> %ptr
1668 // %u2xy = extractelement %u2, 0
1669 // %u2zw = extractelement %u2, 1
1670 // %fxy = call <2 x float> Unpack2xHalf(uint)
1671 // %fzw = call <2 x float> Unpack2xHalf(uint)
1672 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001673 Module &M = *F.getParent();
1674 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001675 auto Index = CI->getOperand(0);
1676 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001677
Kévin Petite8edce32019-04-10 14:23:32 +01001678 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001679 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1680 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001681 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001682
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001683 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001684 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001685
Kévin Petite8edce32019-04-10 14:23:32 +01001686 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001687 auto X =
1688 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1689 auto Y =
1690 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001691
Kévin Petite8edce32019-04-10 14:23:32 +01001692 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001693 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001694
Kévin Petite8edce32019-04-10 14:23:32 +01001695 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001696
Kévin Petite8edce32019-04-10 14:23:32 +01001697 // Get the lower (x & y) components of our final float4.
1698 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001699
Kévin Petite8edce32019-04-10 14:23:32 +01001700 // Get the higher (z & w) components of our final float4.
1701 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001702
Kévin Petite8edce32019-04-10 14:23:32 +01001703 Constant *ShuffleMask[4] = {
1704 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1705 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001706
Kévin Petite8edce32019-04-10 14:23:32 +01001707 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001708 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1709 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001710 });
David Neto6ad93232018-06-07 15:42:58 -07001711}
1712
SJW2c317da2020-03-23 07:39:13 -05001713bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1714 switch (vec_size) {
1715 case 0:
1716 return replaceVstoreHalf(F);
1717 case 2:
1718 return replaceVstoreHalf2(F);
1719 case 4:
1720 return replaceVstoreHalf4(F);
1721 default:
1722 llvm_unreachable("Unsupported vstore_half vector size");
1723 break;
1724 }
1725 return false;
1726}
David Neto22f144c2017-06-12 14:26:21 -04001727
SJW2c317da2020-03-23 07:39:13 -05001728bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1729 Module &M = *F.getParent();
1730 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001731 // The value to store.
1732 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001733
Kévin Petite8edce32019-04-10 14:23:32 +01001734 // The index argument from vstore_half.
1735 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001736
Kévin Petite8edce32019-04-10 14:23:32 +01001737 // The pointer argument from vstore_half.
1738 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001739
Kévin Petite8edce32019-04-10 14:23:32 +01001740 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001741 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001742 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1743 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001744
Kévin Petite8edce32019-04-10 14:23:32 +01001745 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001746 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001747
Kévin Petite8edce32019-04-10 14:23:32 +01001748 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001749
Kévin Petite8edce32019-04-10 14:23:32 +01001750 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001751 auto TempVec = InsertElementInst::Create(
1752 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001753
Kévin Petite8edce32019-04-10 14:23:32 +01001754 // Pack the float2 -> half2 (in an int).
1755 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001756
alan-baker7efcaaa2020-05-06 19:33:27 -04001757 bool supports_16bit_storage = true;
1758 switch (Arg2->getType()->getPointerAddressSpace()) {
1759 case clspv::AddressSpace::Global:
1760 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1761 clspv::Option::StorageClass::kSSBO);
1762 break;
1763 case clspv::AddressSpace::Constant:
1764 if (clspv::Option::ConstantArgsInUniformBuffer())
1765 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1766 clspv::Option::StorageClass::kUBO);
1767 else
1768 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1769 clspv::Option::StorageClass::kSSBO);
1770 break;
1771 default:
1772 // Clspv will emit the Float16 capability if the half type is
1773 // encountered. That capability covers private and local addressspaces.
1774 break;
1775 }
1776
SJW2c317da2020-03-23 07:39:13 -05001777 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04001778 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01001779 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001780 auto ShortPointerTy =
1781 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001782
Kévin Petite8edce32019-04-10 14:23:32 +01001783 // Truncate our i32 to an i16.
1784 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001785
Kévin Petite8edce32019-04-10 14:23:32 +01001786 // Cast the half* pointer to short*.
1787 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001788
Kévin Petite8edce32019-04-10 14:23:32 +01001789 // Index into the correct address of the casted pointer.
1790 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001791
Kévin Petite8edce32019-04-10 14:23:32 +01001792 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05001793 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001794 } else {
1795 // We can only write to 32-bit aligned words.
1796 //
1797 // Assuming base is aligned to 32-bits, replace the equivalent of
1798 // vstore_half(value, index, base)
1799 // with:
1800 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1801 // uint32_t write_to_upper_half = index & 1u;
1802 // uint32_t shift = write_to_upper_half << 4;
1803 //
1804 // // Pack the float value as a half number in bottom 16 bits
1805 // // of an i32.
1806 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1807 //
1808 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1809 // ^ ((packed & 0xffff) << shift)
1810 // // We only need relaxed consistency, but OpenCL 1.2 only has
1811 // // sequentially consistent atomics.
1812 // // TODO(dneto): Use relaxed consistency.
1813 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001814 auto IntPointerTy =
1815 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001816
Kévin Petite8edce32019-04-10 14:23:32 +01001817 auto Four = ConstantInt::get(IntTy, 4);
1818 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04001819
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001820 auto IndexIsOdd =
1821 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001822 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001823 auto IndexIntoI32 =
1824 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1825 auto BaseI32Ptr =
1826 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1827 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
1828 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001829 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001830 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001831 auto MaskBitsToWrite =
1832 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1833 auto MaskedCurrent = BinaryOperator::CreateAnd(
1834 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04001835
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001836 auto XLowerBits =
1837 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1838 auto NewBitsToWrite =
1839 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1840 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
1841 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04001842
Kévin Petite8edce32019-04-10 14:23:32 +01001843 // Generate the call to atomi_xor.
1844 SmallVector<Type *, 5> ParamTypes;
1845 // The pointer type.
1846 ParamTypes.push_back(IntPointerTy);
1847 // The Types for memory scope, semantics, and value.
1848 ParamTypes.push_back(IntTy);
1849 ParamTypes.push_back(IntTy);
1850 ParamTypes.push_back(IntTy);
1851 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1852 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04001853
Kévin Petite8edce32019-04-10 14:23:32 +01001854 const auto ConstantScopeDevice =
1855 ConstantInt::get(IntTy, spv::ScopeDevice);
1856 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1857 // (SPIR-V Workgroup).
1858 const auto AddrSpaceSemanticsBits =
1859 IntPointerTy->getPointerAddressSpace() == 1
1860 ? spv::MemorySemanticsUniformMemoryMask
1861 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04001862
Kévin Petite8edce32019-04-10 14:23:32 +01001863 // We're using relaxed consistency here.
1864 const auto ConstantMemorySemantics =
1865 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1866 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04001867
Kévin Petite8edce32019-04-10 14:23:32 +01001868 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1869 ConstantMemorySemantics, ValueToXor};
1870 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05001871
1872 // Return a Nop so the old Call is removed
1873 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
1874 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001875 }
David Neto22f144c2017-06-12 14:26:21 -04001876
SJW2c317da2020-03-23 07:39:13 -05001877 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01001878 });
David Neto22f144c2017-06-12 14:26:21 -04001879}
1880
SJW2c317da2020-03-23 07:39:13 -05001881bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
1882 Module &M = *F.getParent();
1883 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001884 // The value to store.
1885 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001886
Kévin Petite8edce32019-04-10 14:23:32 +01001887 // The index argument from vstore_half.
1888 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001889
Kévin Petite8edce32019-04-10 14:23:32 +01001890 // The pointer argument from vstore_half.
1891 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001892
Kévin Petite8edce32019-04-10 14:23:32 +01001893 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001894 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001895 auto NewPointerTy =
1896 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001897 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001898
Kévin Petite8edce32019-04-10 14:23:32 +01001899 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001900 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001901
Kévin Petite8edce32019-04-10 14:23:32 +01001902 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001903
Kévin Petite8edce32019-04-10 14:23:32 +01001904 // Turn the packed x & y into the final packing.
1905 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001906
Kévin Petite8edce32019-04-10 14:23:32 +01001907 // Cast the half* pointer to int*.
1908 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001909
Kévin Petite8edce32019-04-10 14:23:32 +01001910 // Index into the correct address of the casted pointer.
1911 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001912
Kévin Petite8edce32019-04-10 14:23:32 +01001913 // Store to the int* we casted to.
1914 return new StoreInst(X, Index, CI);
1915 });
David Neto22f144c2017-06-12 14:26:21 -04001916}
1917
SJW2c317da2020-03-23 07:39:13 -05001918bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
1919 Module &M = *F.getParent();
1920 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001921 // The value to store.
1922 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001923
Kévin Petite8edce32019-04-10 14:23:32 +01001924 // The index argument from vstore_half.
1925 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001926
Kévin Petite8edce32019-04-10 14:23:32 +01001927 // The pointer argument from vstore_half.
1928 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001929
Kévin Petite8edce32019-04-10 14:23:32 +01001930 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001931 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1932 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001933 auto NewPointerTy =
1934 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001935 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001936
Kévin Petite8edce32019-04-10 14:23:32 +01001937 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1938 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04001939
Kévin Petite8edce32019-04-10 14:23:32 +01001940 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001941 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1942 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001943
Kévin Petite8edce32019-04-10 14:23:32 +01001944 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1945 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001946
Kévin Petite8edce32019-04-10 14:23:32 +01001947 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001948 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1949 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001950
Kévin Petite8edce32019-04-10 14:23:32 +01001951 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001952 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001953
Kévin Petite8edce32019-04-10 14:23:32 +01001954 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001955
Kévin Petite8edce32019-04-10 14:23:32 +01001956 // Turn the packed x & y into the final component of our int2.
1957 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001958
Kévin Petite8edce32019-04-10 14:23:32 +01001959 // Turn the packed z & w into the final component of our int2.
1960 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001961
Kévin Petite8edce32019-04-10 14:23:32 +01001962 auto Combine = InsertElementInst::Create(
1963 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001964 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
1965 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001966
Kévin Petite8edce32019-04-10 14:23:32 +01001967 // Cast the half* pointer to int2*.
1968 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001969
Kévin Petite8edce32019-04-10 14:23:32 +01001970 // Index into the correct address of the casted pointer.
1971 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001972
Kévin Petite8edce32019-04-10 14:23:32 +01001973 // Store to the int2* we casted to.
1974 return new StoreInst(Combine, Index, CI);
1975 });
David Neto22f144c2017-06-12 14:26:21 -04001976}
1977
SJW2c317da2020-03-23 07:39:13 -05001978bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
1979 // convert half to float
1980 Module &M = *F.getParent();
1981 return replaceCallsWithValue(F, [&](CallInst *CI) {
1982 SmallVector<Type *, 3> types;
1983 SmallVector<Value *, 3> args;
1984 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
1985 types.push_back(CI->getArgOperand(i)->getType());
1986 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05001987 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05001988
SJW2c317da2020-03-23 07:39:13 -05001989 auto NewFType = FunctionType::get(
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001990 FixedVectorType::get(Type::getFloatTy(M.getContext()),
1991 cast<VectorType>(CI->getType())->getNumElements()),
SJW2c317da2020-03-23 07:39:13 -05001992 types, false);
1993
SJW61531372020-06-09 07:31:08 -05001994 std::string NewFName =
1995 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05001996
1997 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1998
1999 auto NewCI = CallInst::Create(NewF, args, "", CI);
2000
2001 // Convert to the half type.
2002 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2003 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002004}
2005
SJW2c317da2020-03-23 07:39:13 -05002006bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2007 // convert half to float
2008 Module &M = *F.getParent();
2009 return replaceCallsWithValue(F, [&](CallInst *CI) {
2010 SmallVector<Type *, 3> types(3);
2011 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002012
SJW2c317da2020-03-23 07:39:13 -05002013 // Image
2014 types[0] = CI->getArgOperand(0)->getType();
2015 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002016
SJW2c317da2020-03-23 07:39:13 -05002017 // Coord
2018 types[1] = CI->getArgOperand(1)->getType();
2019 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002020
SJW2c317da2020-03-23 07:39:13 -05002021 // Data
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002022 types[2] = FixedVectorType::get(
SJW2c317da2020-03-23 07:39:13 -05002023 Type::getFloatTy(M.getContext()),
James Pricecf53df42020-04-20 14:41:24 -04002024 cast<VectorType>(CI->getArgOperand(2)->getType())->getNumElements());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002025
SJW2c317da2020-03-23 07:39:13 -05002026 auto NewFType =
2027 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002028
SJW61531372020-06-09 07:31:08 -05002029 std::string NewFName =
2030 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002031
SJW2c317da2020-03-23 07:39:13 -05002032 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002033
SJW2c317da2020-03-23 07:39:13 -05002034 // Convert data to the float type.
2035 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2036 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002037
SJW2c317da2020-03-23 07:39:13 -05002038 return CallInst::Create(NewF, args, "", CI);
2039 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002040}
2041
SJW2c317da2020-03-23 07:39:13 -05002042bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2043 Function &F) {
2044 // convert read_image with int coords to float coords
2045 Module &M = *F.getParent();
2046 return replaceCallsWithValue(F, [&](CallInst *CI) {
2047 // The image.
2048 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002049
SJW2c317da2020-03-23 07:39:13 -05002050 // The sampler.
2051 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002052
SJW2c317da2020-03-23 07:39:13 -05002053 // The coordinate (integer type that we can't handle).
2054 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002055
SJW2c317da2020-03-23 07:39:13 -05002056 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2057 uint32_t components =
2058 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2059 Type *float_ty = nullptr;
2060 if (components == 1) {
2061 float_ty = Type::getFloatTy(M.getContext());
2062 } else {
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002063 float_ty = FixedVectorType::get(
2064 Type::getFloatTy(M.getContext()),
2065 cast<VectorType>(Arg2->getType())->getNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002066 }
David Neto22f144c2017-06-12 14:26:21 -04002067
SJW2c317da2020-03-23 07:39:13 -05002068 auto NewFType = FunctionType::get(
2069 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2070
2071 std::string NewFName = F.getName().str();
2072 NewFName[NewFName.length() - 1] = 'f';
2073
2074 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2075
2076 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2077
2078 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2079 });
David Neto22f144c2017-06-12 14:26:21 -04002080}
2081
SJW2c317da2020-03-23 07:39:13 -05002082bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2083 return replaceCallsWithValue(F, [&](CallInst *CI) {
2084 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002085
SJW2c317da2020-03-23 07:39:13 -05002086 // We need to map the OpenCL constants to the SPIR-V equivalents.
2087 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2088 const auto ConstantMemorySemantics = ConstantInt::get(
2089 IntTy, spv::MemorySemanticsUniformMemoryMask |
2090 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002091
SJW2c317da2020-03-23 07:39:13 -05002092 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002093
SJW2c317da2020-03-23 07:39:13 -05002094 // The pointer.
2095 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002096
SJW2c317da2020-03-23 07:39:13 -05002097 // The memory scope.
2098 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002099
SJW2c317da2020-03-23 07:39:13 -05002100 // The memory semantics.
2101 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002102
SJW2c317da2020-03-23 07:39:13 -05002103 if (2 < CI->getNumArgOperands()) {
2104 // The unequal memory semantics.
2105 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002106
SJW2c317da2020-03-23 07:39:13 -05002107 // The value.
2108 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002109
SJW2c317da2020-03-23 07:39:13 -05002110 // The comparator.
2111 Params.push_back(CI->getArgOperand(1));
2112 } else if (1 < CI->getNumArgOperands()) {
2113 // The value.
2114 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002115 }
David Neto22f144c2017-06-12 14:26:21 -04002116
SJW2c317da2020-03-23 07:39:13 -05002117 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2118 });
David Neto22f144c2017-06-12 14:26:21 -04002119}
2120
SJW2c317da2020-03-23 07:39:13 -05002121bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2122 llvm::AtomicRMWInst::BinOp Op) {
2123 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002124 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2125 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002126 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002127 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002128 SyncScope::System, CI);
2129 });
2130}
David Neto22f144c2017-06-12 14:26:21 -04002131
SJW2c317da2020-03-23 07:39:13 -05002132bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2133 Module &M = *F.getParent();
2134 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002135 auto IntTy = Type::getInt32Ty(M.getContext());
2136 auto FloatTy = Type::getFloatTy(M.getContext());
2137
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002138 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2139 ConstantInt::get(IntTy, 1),
2140 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002141
2142 Constant *UpShuffleMask[4] = {
2143 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2144 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2145
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002146 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2147 UndefValue::get(FloatTy),
2148 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002149
Kévin Petite8edce32019-04-10 14:23:32 +01002150 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002151 auto Arg0 =
2152 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2153 ConstantVector::get(DownShuffleMask), "", CI);
2154 auto Arg1 =
2155 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2156 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002157 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002158
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002159 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002160 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002161
SJW61531372020-06-09 07:31:08 -05002162 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002163
Kévin Petite8edce32019-04-10 14:23:32 +01002164 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002165
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002166 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2167 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002168 });
David Neto22f144c2017-06-12 14:26:21 -04002169}
David Neto62653202017-10-16 19:05:18 -04002170
SJW2c317da2020-03-23 07:39:13 -05002171bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002172 // OpenCL's float result = fract(float x, float* ptr)
2173 //
2174 // In the LLVM domain:
2175 //
2176 // %floor_result = call spir_func float @floor(float %x)
2177 // store float %floor_result, float * %ptr
2178 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2179 // %result = call spir_func float
2180 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2181 //
2182 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2183 // and clspv.fract occur in the SPIR-V generator pass:
2184 //
2185 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2186 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2187 // ...
2188 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2189 // OpStore %ptr %floor_result
2190 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2191 // %fract_result = OpExtInst %float
2192 // %glsl_ext Fmin %fract_intermediate %just_under_1
2193
David Neto62653202017-10-16 19:05:18 -04002194 using std::string;
2195
2196 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2197 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002198
SJW2c317da2020-03-23 07:39:13 -05002199 Module &M = *F.getParent();
2200 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002201
SJW2c317da2020-03-23 07:39:13 -05002202 // This is either float or a float vector. All the float-like
2203 // types are this type.
2204 auto result_ty = F.getReturnType();
2205
SJW61531372020-06-09 07:31:08 -05002206 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002207 Function *fmin_fn = M.getFunction(fmin_name);
2208 if (!fmin_fn) {
2209 // Make the fmin function.
2210 FunctionType *fn_ty =
2211 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2212 fmin_fn =
2213 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2214 fmin_fn->addFnAttr(Attribute::ReadNone);
2215 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2216 }
2217
SJW61531372020-06-09 07:31:08 -05002218 std::string floor_name =
2219 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002220 Function *floor_fn = M.getFunction(floor_name);
2221 if (!floor_fn) {
2222 // Make the floor function.
2223 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2224 floor_fn =
2225 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2226 floor_fn->addFnAttr(Attribute::ReadNone);
2227 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2228 }
2229
SJW61531372020-06-09 07:31:08 -05002230 std::string clspv_fract_name =
2231 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002232 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2233 if (!clspv_fract_fn) {
2234 // Make the clspv_fract function.
2235 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2236 clspv_fract_fn = cast<Function>(
2237 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2238 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2239 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2240 }
2241
2242 // Number of significant significand bits, whether represented or not.
2243 unsigned num_significand_bits;
2244 switch (result_ty->getScalarType()->getTypeID()) {
2245 case Type::HalfTyID:
2246 num_significand_bits = 11;
2247 break;
2248 case Type::FloatTyID:
2249 num_significand_bits = 24;
2250 break;
2251 case Type::DoubleTyID:
2252 num_significand_bits = 53;
2253 break;
2254 default:
2255 llvm_unreachable("Unhandled float type when processing fract builtin");
2256 break;
2257 }
2258 // Beware that the disassembler displays this value as
2259 // OpConstant %float 1
2260 // which is not quite right.
2261 const double kJustUnderOneScalar =
2262 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2263
2264 Constant *just_under_one =
2265 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2266 if (result_ty->isVectorTy()) {
2267 just_under_one = ConstantVector::getSplat(
James Pricecf53df42020-04-20 14:41:24 -04002268 {cast<VectorType>(result_ty)->getNumElements(), false},
2269 just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002270 }
2271
2272 IRBuilder<> Builder(CI);
2273
2274 auto arg = CI->getArgOperand(0);
2275 auto ptr = CI->getArgOperand(1);
2276
2277 // Compute floor result and store it.
2278 auto floor = Builder.CreateCall(floor_fn, {arg});
2279 Builder.CreateStore(floor, ptr);
2280
2281 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2282 auto fract_result =
2283 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2284
2285 return fract_result;
2286 });
David Neto62653202017-10-16 19:05:18 -04002287}