blob: 420102fe9ef7ceede400adaf29ce30157ffa5073 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
alan-bakere0902602020-03-23 08:43:40 -040030#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
James Pricec05f6052020-01-14 13:37:20 -050033#include "clspv/DescriptorMap.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040034#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070035
SJW2c317da2020-03-23 07:39:13 -050036#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050037#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040038#include "Passes.h"
39#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050040#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040041
SJW2c317da2020-03-23 07:39:13 -050042using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040043using namespace llvm;
44
45#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
46
47namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000048
David Neto22f144c2017-06-12 14:26:21 -040049uint32_t clz(uint32_t v) {
50 uint32_t r;
51 uint32_t shift;
52
53 r = (v > 0xFFFF) << 4;
54 v >>= r;
55 shift = (v > 0xFF) << 3;
56 v >>= shift;
57 r |= shift;
58 shift = (v > 0xF) << 2;
59 v >>= shift;
60 r |= shift;
61 shift = (v > 0x3) << 1;
62 v >>= shift;
63 r |= shift;
64 r |= (v >> 1);
65
66 return r;
67}
68
Kévin Petitfdfa92e2019-09-25 14:20:58 +010069Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
70 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040071 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
72 IntTy = VectorType::get(IntTy, vec_ty->getNumElements());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010073 }
74 return IntTy;
75}
76
SJW2c317da2020-03-23 07:39:13 -050077bool replaceCallsWithValue(Function &F,
78 std::function<Value *(CallInst *)> Replacer) {
79
80 bool Changed = false;
81
82 SmallVector<Instruction *, 4> ToRemoves;
83
84 // Walk the users of the function.
85 for (auto &U : F.uses()) {
86 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
87
88 auto NewValue = Replacer(CI);
89
90 if (NewValue != nullptr) {
91 CI->replaceAllUsesWith(NewValue);
92
93 // Lastly, remember to remove the user.
94 ToRemoves.push_back(CI);
95 }
96 }
97 }
98
99 Changed = !ToRemoves.empty();
100
101 // And cleanup the calls we don't use anymore.
102 for (auto V : ToRemoves) {
103 V->eraseFromParent();
104 }
105
106 return Changed;
107}
108
David Neto22f144c2017-06-12 14:26:21 -0400109struct ReplaceOpenCLBuiltinPass final : public ModulePass {
110 static char ID;
111 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
112
113 bool runOnModule(Module &M) override;
SJW2c317da2020-03-23 07:39:13 -0500114 bool runOnFunction(Function &F);
115 bool replaceAbs(Function &F);
116 bool replaceAbsDiff(Function &F, bool is_signed);
117 bool replaceCopysign(Function &F);
118 bool replaceRecip(Function &F);
119 bool replaceDivide(Function &F);
120 bool replaceDot(Function &F);
121 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500122 bool replaceExp10(Function &F, const std::string &basename);
123 bool replaceLog10(Function &F, const std::string &basename);
SJW2c317da2020-03-23 07:39:13 -0500124 bool replaceBarrier(Function &F);
125 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100126 bool replacePrefetch(Function &F);
SJW2c317da2020-03-23 07:39:13 -0500127 bool replaceRelational(Function &F, CmpInst::Predicate P, int32_t C);
128 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
129 bool replaceIsFinite(Function &F);
130 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
131 bool replaceUpsample(Function &F);
132 bool replaceRotate(Function &F);
133 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
134 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
135 bool replaceSelect(Function &F);
136 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500137 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500138 bool replaceSignbit(Function &F, bool is_vec);
139 bool replaceMul(Function &F, bool is_float, bool is_mad);
140 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
141 bool replaceVloadHalf(Function &F);
142 bool replaceVloadHalf2(Function &F);
143 bool replaceVloadHalf4(Function &F);
144 bool replaceClspvVloadaHalf2(Function &F);
145 bool replaceClspvVloadaHalf4(Function &F);
146 bool replaceVstoreHalf(Function &F, int vec_size);
147 bool replaceVstoreHalf(Function &F);
148 bool replaceVstoreHalf2(Function &F);
149 bool replaceVstoreHalf4(Function &F);
150 bool replaceHalfReadImage(Function &F);
151 bool replaceHalfWriteImage(Function &F);
152 bool replaceSampledReadImageWithIntCoords(Function &F);
153 bool replaceAtomics(Function &F, spv::Op Op);
154 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
155 bool replaceCross(Function &F);
156 bool replaceFract(Function &F, int vec_size);
157 bool replaceVload(Function &F);
158 bool replaceVstore(Function &F);
David Neto22f144c2017-06-12 14:26:21 -0400159};
SJW2c317da2020-03-23 07:39:13 -0500160
Kévin Petit91bc72e2019-04-08 15:17:46 +0100161} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400162
163char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400164INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
165 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400166
167namespace clspv {
168ModulePass *createReplaceOpenCLBuiltinPass() {
169 return new ReplaceOpenCLBuiltinPass();
170}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400171} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400172
173bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500174 std::list<Function *> func_list;
175 for (auto &F : M.getFunctionList()) {
176 // process only function declarations
177 if (F.isDeclaration() && runOnFunction(F)) {
178 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000179 }
180 }
SJW2c317da2020-03-23 07:39:13 -0500181 if (func_list.size() != 0) {
182 // recursively convert functions, but first remove dead
183 for (auto *F : func_list) {
184 if (F->use_empty()) {
185 F->eraseFromParent();
186 }
187 }
188 runOnModule(M);
189 return true;
190 }
191 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000192}
193
SJW2c317da2020-03-23 07:39:13 -0500194bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
195 auto &FI = Builtins::Lookup(&F);
196 switch (FI.getType()) {
197 case Builtins::kAbs:
198 if (!FI.getParameter(0).is_signed) {
199 return replaceAbs(F);
200 }
201 break;
202 case Builtins::kAbsDiff:
203 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
204 case Builtins::kCopysign:
205 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100206
SJW2c317da2020-03-23 07:39:13 -0500207 case Builtins::kHalfRecip:
208 case Builtins::kNativeRecip:
209 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100210
SJW2c317da2020-03-23 07:39:13 -0500211 case Builtins::kHalfDivide:
212 case Builtins::kNativeDivide:
213 return replaceDivide(F);
214
215 case Builtins::kDot:
216 return replaceDot(F);
217
218 case Builtins::kExp10:
219 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500220 case Builtins::kNativeExp10:
221 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500222
223 case Builtins::kLog10:
224 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500225 case Builtins::kNativeLog10:
226 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500227
228 case Builtins::kFmod:
229 return replaceFmod(F);
230
231 case Builtins::kBarrier:
232 case Builtins::kWorkGroupBarrier:
233 return replaceBarrier(F);
234
235 case Builtins::kMemFence:
236 return replaceMemFence(F, spv::MemorySemanticsSequentiallyConsistentMask);
237 case Builtins::kReadMemFence:
238 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
239 case Builtins::kWriteMemFence:
240 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
241
242 // Relational
243 case Builtins::kIsequal:
244 return replaceRelational(F, CmpInst::FCMP_OEQ,
245 FI.getParameter(0).vector_size ? -1 : 1);
246 case Builtins::kIsgreater:
247 return replaceRelational(F, CmpInst::FCMP_OGT,
248 FI.getParameter(0).vector_size ? -1 : 1);
249 case Builtins::kIsgreaterequal:
250 return replaceRelational(F, CmpInst::FCMP_OGE,
251 FI.getParameter(0).vector_size ? -1 : 1);
252 case Builtins::kIsless:
253 return replaceRelational(F, CmpInst::FCMP_OLT,
254 FI.getParameter(0).vector_size ? -1 : 1);
255 case Builtins::kIslessequal:
256 return replaceRelational(F, CmpInst::FCMP_OLE,
257 FI.getParameter(0).vector_size ? -1 : 1);
258 case Builtins::kIsnotequal:
259 return replaceRelational(F, CmpInst::FCMP_ONE,
260 FI.getParameter(0).vector_size ? -1 : 1);
261
262 case Builtins::kIsinf: {
263 bool is_vec = FI.getParameter(0).vector_size != 0;
264 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
265 }
266 case Builtins::kIsnan: {
267 bool is_vec = FI.getParameter(0).vector_size != 0;
268 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
269 }
270
271 case Builtins::kIsfinite:
272 return replaceIsFinite(F);
273
274 case Builtins::kAll: {
275 bool is_vec = FI.getParameter(0).vector_size != 0;
276 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
277 }
278 case Builtins::kAny: {
279 bool is_vec = FI.getParameter(0).vector_size != 0;
280 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
281 }
282
283 case Builtins::kUpsample:
284 return replaceUpsample(F);
285
286 case Builtins::kRotate:
287 return replaceRotate(F);
288
289 case Builtins::kConvert:
290 return replaceConvert(F, FI.getParameter(0).is_signed,
291 FI.getReturnType().is_signed);
292
293 case Builtins::kAtomicInc:
294 return replaceAtomics(F, spv::OpAtomicIIncrement);
295 case Builtins::kAtomicDec:
296 return replaceAtomics(F, spv::OpAtomicIDecrement);
297 case Builtins::kAtomicCmpxchg:
298 return replaceAtomics(F, spv::OpAtomicCompareExchange);
299 case Builtins::kAtomicAdd:
300 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
301 case Builtins::kAtomicSub:
302 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
303 case Builtins::kAtomicXchg:
304 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
305 case Builtins::kAtomicMin:
306 return replaceAtomics(F, FI.getParameter(0).is_signed
307 ? llvm::AtomicRMWInst::Min
308 : llvm::AtomicRMWInst::UMin);
309 case Builtins::kAtomicMax:
310 return replaceAtomics(F, FI.getParameter(0).is_signed
311 ? llvm::AtomicRMWInst::Max
312 : llvm::AtomicRMWInst::UMax);
313 case Builtins::kAtomicAnd:
314 return replaceAtomics(F, llvm::AtomicRMWInst::And);
315 case Builtins::kAtomicOr:
316 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
317 case Builtins::kAtomicXor:
318 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
319
320 case Builtins::kCross:
321 if (FI.getParameter(0).vector_size == 4) {
322 return replaceCross(F);
323 }
324 break;
325
326 case Builtins::kFract:
327 if (FI.getParameterCount()) {
328 return replaceFract(F, FI.getParameter(0).vector_size);
329 }
330 break;
331
332 case Builtins::kMadHi:
333 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
334 case Builtins::kMulHi:
335 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
336
337 case Builtins::kMad:
338 case Builtins::kMad24:
339 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
340 true);
341 case Builtins::kMul24:
342 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
343 false);
344
345 case Builtins::kSelect:
346 return replaceSelect(F);
347
348 case Builtins::kBitselect:
349 return replaceBitSelect(F);
350
351 case Builtins::kVload:
352 return replaceVload(F);
353
354 case Builtins::kVloadaHalf:
355 case Builtins::kVloadHalf:
356 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
357
358 case Builtins::kVstore:
359 return replaceVstore(F);
360
361 case Builtins::kVstoreHalf:
362 case Builtins::kVstoreaHalf:
363 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
364
365 case Builtins::kSmoothstep: {
366 int vec_size = FI.getLastParameter().vector_size;
367 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500368 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500369 }
370 break;
371 }
372 case Builtins::kStep: {
373 int vec_size = FI.getLastParameter().vector_size;
374 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500375 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500376 }
377 break;
378 }
379
380 case Builtins::kSignbit:
381 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
382
383 case Builtins::kReadImageh:
384 return replaceHalfReadImage(F);
385 case Builtins::kReadImagef:
386 case Builtins::kReadImagei:
387 case Builtins::kReadImageui: {
388 if (FI.getParameter(1).isSampler() &&
389 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
390 return replaceSampledReadImageWithIntCoords(F);
391 }
392 break;
393 }
394
395 case Builtins::kWriteImageh:
396 return replaceHalfWriteImage(F);
397
Kévin Petit1cb45112020-04-27 18:55:48 +0100398 case Builtins::kPrefetch:
399 return replacePrefetch(F);
400
SJW2c317da2020-03-23 07:39:13 -0500401 default:
402 break;
403 }
404
405 return false;
406}
407
408bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
409 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400410 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100411}
412
SJW2c317da2020-03-23 07:39:13 -0500413bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
414 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100415 auto XValue = CI->getOperand(0);
416 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100417
Kévin Petite8edce32019-04-10 14:23:32 +0100418 IRBuilder<> Builder(CI);
419 auto XmY = Builder.CreateSub(XValue, YValue);
420 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100421
SJW2c317da2020-03-23 07:39:13 -0500422 Value *Cmp = nullptr;
423 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100424 Cmp = Builder.CreateICmpSGT(YValue, XValue);
425 } else {
426 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100427 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100428
Kévin Petite8edce32019-04-10 14:23:32 +0100429 return Builder.CreateSelect(Cmp, YmX, XmY);
430 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100431}
432
SJW2c317da2020-03-23 07:39:13 -0500433bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
434 return replaceCallsWithValue(F, [&F](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100435 auto XValue = CI->getOperand(0);
436 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100437
Kévin Petite8edce32019-04-10 14:23:32 +0100438 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100439
SJW2c317da2020-03-23 07:39:13 -0500440 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -0400441 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
442 IntTy = VectorType::get(IntTy, vec_ty->getNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100443 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100444
Kévin Petite8edce32019-04-10 14:23:32 +0100445 // Return X with the sign of Y
446
447 // Sign bit masks
448 auto SignBit = IntTy->getScalarSizeInBits() - 1;
449 auto SignBitMask = 1 << SignBit;
450 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
451 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
452
453 IRBuilder<> Builder(CI);
454
455 // Extract sign of Y
456 auto YInt = Builder.CreateBitCast(YValue, IntTy);
457 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
458
459 // Clear sign bit in X
460 auto XInt = Builder.CreateBitCast(XValue, IntTy);
461 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
462
463 // Insert sign bit of Y into X
464 auto NewXInt = Builder.CreateOr(XInt, YSign);
465
466 // And cast back to floating-point
467 return Builder.CreateBitCast(NewXInt, Ty);
468 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100469}
470
SJW2c317da2020-03-23 07:39:13 -0500471bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
472 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100473 // Recip has one arg.
474 auto Arg = CI->getOperand(0);
475 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
476 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
477 });
David Neto22f144c2017-06-12 14:26:21 -0400478}
479
SJW2c317da2020-03-23 07:39:13 -0500480bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
481 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100482 auto Op0 = CI->getOperand(0);
483 auto Op1 = CI->getOperand(1);
484 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
485 });
David Neto22f144c2017-06-12 14:26:21 -0400486}
487
SJW2c317da2020-03-23 07:39:13 -0500488bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
489 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100490 auto Op0 = CI->getOperand(0);
491 auto Op1 = CI->getOperand(1);
492
SJW2c317da2020-03-23 07:39:13 -0500493 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100494 if (Op0->getType()->isVectorTy()) {
495 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
496 CI->getType(), {Op0, Op1});
497 } else {
498 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
499 }
500
501 return V;
502 });
503}
504
SJW2c317da2020-03-23 07:39:13 -0500505bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500506 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500507 // convert to natural
508 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500509 std::string NewFName = basename.substr(0, slen);
510 NewFName =
511 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400512
SJW2c317da2020-03-23 07:39:13 -0500513 Module &M = *F.getParent();
514 return replaceCallsWithValue(F, [&](CallInst *CI) {
515 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
516
517 auto Arg = CI->getOperand(0);
518
519 // Constant of the natural log of 10 (ln(10)).
520 const double Ln10 =
521 2.302585092994045684017991454684364207601101488628772976033;
522
523 auto Mul = BinaryOperator::Create(
524 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
525
526 return CallInst::Create(NewF, Mul, "", CI);
527 });
David Neto22f144c2017-06-12 14:26:21 -0400528}
529
SJW2c317da2020-03-23 07:39:13 -0500530bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100531 // OpenCL fmod(x,y) is x - y * trunc(x/y)
532 // The sign for a non-zero result is taken from x.
533 // (Try an example.)
534 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500535 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100536 auto Op0 = CI->getOperand(0);
537 auto Op1 = CI->getOperand(1);
538 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
539 });
540}
541
SJW2c317da2020-03-23 07:39:13 -0500542bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500543 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500544 // convert to natural
545 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500546 std::string NewFName = basename.substr(0, slen);
547 NewFName =
548 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400549
SJW2c317da2020-03-23 07:39:13 -0500550 Module &M = *F.getParent();
551 return replaceCallsWithValue(F, [&](CallInst *CI) {
552 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
553
554 auto Arg = CI->getOperand(0);
555
556 // Constant of the reciprocal of the natural log of 10 (ln(10)).
557 const double Ln10 =
558 0.434294481903251827651128918916605082294397005803666566114;
559
560 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
561
562 return BinaryOperator::Create(Instruction::FMul,
563 ConstantFP::get(Arg->getType(), Ln10), NewCI,
564 "", CI);
565 });
David Neto22f144c2017-06-12 14:26:21 -0400566}
567
SJW2c317da2020-03-23 07:39:13 -0500568bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F) {
David Neto22f144c2017-06-12 14:26:21 -0400569
570 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
571
SJW2c317da2020-03-23 07:39:13 -0500572 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100573 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400574
Kévin Petitc4643922019-06-17 19:32:05 +0100575 // We need to map the OpenCL constants to the SPIR-V equivalents.
576 const auto LocalMemFence =
577 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
578 const auto GlobalMemFence =
579 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
580 const auto ConstantSequentiallyConsistent = ConstantInt::get(
581 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
582 const auto ConstantScopeDevice =
583 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
584 const auto ConstantScopeWorkgroup =
585 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400586
Kévin Petitc4643922019-06-17 19:32:05 +0100587 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
588 const auto LocalMemFenceMask =
589 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
590 const auto WorkgroupShiftAmount =
591 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
592 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
593 Instruction::Shl, LocalMemFenceMask,
594 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400595
Kévin Petitc4643922019-06-17 19:32:05 +0100596 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
597 const auto GlobalMemFenceMask =
598 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
599 const auto UniformShiftAmount =
600 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
601 const auto MemorySemanticsUniform = BinaryOperator::Create(
602 Instruction::Shl, GlobalMemFenceMask,
603 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400604
Kévin Petitc4643922019-06-17 19:32:05 +0100605 // And combine the above together, also adding in
606 // MemorySemanticsSequentiallyConsistentMask.
607 auto MemorySemantics =
608 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
609 ConstantSequentiallyConsistent, "", CI);
610 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
611 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400612
Kévin Petitc4643922019-06-17 19:32:05 +0100613 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
614 // Device Scope, otherwise Workgroup Scope.
615 const auto Cmp =
616 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, GlobalMemFenceMask,
617 GlobalMemFence, "", CI);
618 const auto MemoryScope = SelectInst::Create(Cmp, ConstantScopeDevice,
619 ConstantScopeWorkgroup, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400620
Kévin Petitc4643922019-06-17 19:32:05 +0100621 // Lastly, the Execution Scope is always Workgroup Scope.
622 const auto ExecutionScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400623
Kévin Petitc4643922019-06-17 19:32:05 +0100624 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
625 {Attribute::NoDuplicate}, CI->getType(),
626 {ExecutionScope, MemoryScope, MemorySemantics});
627 });
David Neto22f144c2017-06-12 14:26:21 -0400628}
629
SJW2c317da2020-03-23 07:39:13 -0500630bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
631 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400632
SJW2c317da2020-03-23 07:39:13 -0500633 return replaceCallsWithValue(F, [&](CallInst *CI) {
634 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
David Neto22f144c2017-06-12 14:26:21 -0400635
SJW2c317da2020-03-23 07:39:13 -0500636 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400637
SJW2c317da2020-03-23 07:39:13 -0500638 // We need to map the OpenCL constants to the SPIR-V equivalents.
639 const auto LocalMemFence =
640 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
641 const auto GlobalMemFence =
642 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
643 const auto ConstantMemorySemantics =
644 ConstantInt::get(Arg->getType(), semantics);
645 const auto ConstantScopeDevice =
646 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -0400647
SJW2c317da2020-03-23 07:39:13 -0500648 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
649 const auto LocalMemFenceMask =
650 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
651 const auto WorkgroupShiftAmount =
652 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
653 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
654 Instruction::Shl, LocalMemFenceMask,
655 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400656
SJW2c317da2020-03-23 07:39:13 -0500657 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
658 const auto GlobalMemFenceMask =
659 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
660 const auto UniformShiftAmount =
661 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
662 const auto MemorySemanticsUniform = BinaryOperator::Create(
663 Instruction::Shl, GlobalMemFenceMask,
664 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400665
SJW2c317da2020-03-23 07:39:13 -0500666 // And combine the above together, also adding in
667 // MemorySemanticsSequentiallyConsistentMask.
668 auto MemorySemantics =
669 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
670 ConstantMemorySemantics, "", CI);
671 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
672 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400673
SJW2c317da2020-03-23 07:39:13 -0500674 // Memory Scope is always device.
675 const auto MemoryScope = ConstantScopeDevice;
David Neto22f144c2017-06-12 14:26:21 -0400676
SJW2c317da2020-03-23 07:39:13 -0500677 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier, {}, CI->getType(),
678 {MemoryScope, MemorySemantics});
679 });
David Neto22f144c2017-06-12 14:26:21 -0400680}
681
Kévin Petit1cb45112020-04-27 18:55:48 +0100682bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
683 bool Changed = false;
684
685 SmallVector<Instruction *, 4> ToRemoves;
686
687 // Find all calls to the function
688 for (auto &U : F.uses()) {
689 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
690 ToRemoves.push_back(CI);
691 }
692 }
693
694 Changed = !ToRemoves.empty();
695
696 // Delete them
697 for (auto V : ToRemoves) {
698 V->eraseFromParent();
699 }
700
701 return Changed;
702}
703
SJW2c317da2020-03-23 07:39:13 -0500704bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
705 CmpInst::Predicate P,
706 int32_t C) {
707 return replaceCallsWithValue(F, [&](CallInst *CI) {
708 // The predicate to use in the CmpInst.
709 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -0400710
SJW2c317da2020-03-23 07:39:13 -0500711 // The value to return for true.
712 auto TrueValue = ConstantInt::getSigned(CI->getType(), C);
David Neto22f144c2017-06-12 14:26:21 -0400713
SJW2c317da2020-03-23 07:39:13 -0500714 // The value to return for false.
715 auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400716
SJW2c317da2020-03-23 07:39:13 -0500717 auto Arg1 = CI->getOperand(0);
718 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -0400719
SJW2c317da2020-03-23 07:39:13 -0500720 const auto Cmp =
721 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400722
SJW2c317da2020-03-23 07:39:13 -0500723 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
724 });
David Neto22f144c2017-06-12 14:26:21 -0400725}
726
SJW2c317da2020-03-23 07:39:13 -0500727bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
728 spv::Op SPIRVOp,
729 int32_t C) {
730 Module &M = *F.getParent();
731 return replaceCallsWithValue(F, [&](CallInst *CI) {
732 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -0400733
SJW2c317da2020-03-23 07:39:13 -0500734 // The value to return for true.
735 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -0400736
SJW2c317da2020-03-23 07:39:13 -0500737 // The value to return for false.
738 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -0400739
SJW2c317da2020-03-23 07:39:13 -0500740 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -0400741 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
SJW2c317da2020-03-23 07:39:13 -0500742 CorrespondingBoolTy = VectorType::get(Type::getInt1Ty(M.getContext()),
James Pricecf53df42020-04-20 14:41:24 -0400743 CIVecTy->getNumElements());
David Neto22f144c2017-06-12 14:26:21 -0400744 }
David Neto22f144c2017-06-12 14:26:21 -0400745
SJW2c317da2020-03-23 07:39:13 -0500746 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
747 CorrespondingBoolTy, {CI->getOperand(0)});
748
749 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
750 });
David Neto22f144c2017-06-12 14:26:21 -0400751}
752
SJW2c317da2020-03-23 07:39:13 -0500753bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
754 Module &M = *F.getParent();
755 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100756 auto &C = M.getContext();
757 auto Val = CI->getOperand(0);
758 auto ValTy = Val->getType();
759 auto RetTy = CI->getType();
760
761 // Get a suitable integer type to represent the number
762 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
763
764 // Create Mask
765 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -0500766 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100767 switch (ScalarSize) {
768 case 16:
769 InfMask = ConstantInt::get(IntTy, 0x7C00U);
770 break;
771 case 32:
772 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
773 break;
774 case 64:
775 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
776 break;
777 default:
778 llvm_unreachable("Unsupported floating-point type");
779 }
780
781 IRBuilder<> Builder(CI);
782
783 // Bitcast to int
784 auto ValInt = Builder.CreateBitCast(Val, IntTy);
785
786 // Mask and compare
787 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
788 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
789
790 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -0500791 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100792 if (ValTy->isVectorTy()) {
793 RetTrue = ConstantInt::getSigned(RetTy, -1);
794 } else {
795 RetTrue = ConstantInt::get(RetTy, 1);
796 }
797 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
798 });
799}
800
SJW2c317da2020-03-23 07:39:13 -0500801bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
802 Module &M = *F.getParent();
803 return replaceCallsWithValue(F, [&](CallInst *CI) {
804 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400805
SJW2c317da2020-03-23 07:39:13 -0500806 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000807
SJW2c317da2020-03-23 07:39:13 -0500808 // If the argument is a 32-bit int, just use a shift
809 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
810 V = BinaryOperator::Create(Instruction::LShr, Arg,
811 ConstantInt::get(Arg->getType(), 31), "", CI);
812 } else {
813 // The value for zero to compare against.
814 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -0400815
SJW2c317da2020-03-23 07:39:13 -0500816 // The value to return for true.
817 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -0400818
SJW2c317da2020-03-23 07:39:13 -0500819 // The value to return for false.
820 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400821
SJW2c317da2020-03-23 07:39:13 -0500822 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
823 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400824
SJW2c317da2020-03-23 07:39:13 -0500825 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -0400826
SJW2c317da2020-03-23 07:39:13 -0500827 // If we have a function to call, call it!
828 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -0400829
SJW2c317da2020-03-23 07:39:13 -0500830 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -0400831
SJW2c317da2020-03-23 07:39:13 -0500832 const auto NewCI = clspv::InsertSPIRVOp(
833 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
834 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -0400835
SJW2c317da2020-03-23 07:39:13 -0500836 } else {
837 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -0400838 }
839
SJW2c317da2020-03-23 07:39:13 -0500840 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400841 }
SJW2c317da2020-03-23 07:39:13 -0500842 return V;
843 });
David Neto22f144c2017-06-12 14:26:21 -0400844}
845
SJW2c317da2020-03-23 07:39:13 -0500846bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
847 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
848 // Get arguments
849 auto HiValue = CI->getOperand(0);
850 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000851
SJW2c317da2020-03-23 07:39:13 -0500852 // Don't touch overloads that aren't in OpenCL C
853 auto HiType = HiValue->getType();
854 auto LoType = LoValue->getType();
855
856 if (HiType != LoType) {
857 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000858 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000859
SJW2c317da2020-03-23 07:39:13 -0500860 if (!HiType->isIntOrIntVectorTy()) {
861 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000862 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000863
SJW2c317da2020-03-23 07:39:13 -0500864 if (HiType->getScalarSizeInBits() * 2 !=
865 CI->getType()->getScalarSizeInBits()) {
866 return nullptr;
867 }
868
869 if ((HiType->getScalarSizeInBits() != 8) &&
870 (HiType->getScalarSizeInBits() != 16) &&
871 (HiType->getScalarSizeInBits() != 32)) {
872 return nullptr;
873 }
874
James Pricecf53df42020-04-20 14:41:24 -0400875 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
876 unsigned NumElements = HiVecType->getNumElements();
877 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
878 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500879 return nullptr;
880 }
881 }
882
883 // Convert both operands to the result type
884 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
885 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
886
887 // Shift high operand
888 auto ShiftAmount =
889 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
890 auto HiShifted =
891 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
892
893 // OR both results
894 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
895 });
Kévin Petitbf0036c2019-03-06 13:57:10 +0000896}
897
SJW2c317da2020-03-23 07:39:13 -0500898bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
899 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
900 // Get arguments
901 auto SrcValue = CI->getOperand(0);
902 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +0000903
SJW2c317da2020-03-23 07:39:13 -0500904 // Don't touch overloads that aren't in OpenCL C
905 auto SrcType = SrcValue->getType();
906 auto RotType = RotAmount->getType();
907
908 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
909 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +0000910 }
Kévin Petitd44eef52019-03-08 13:22:14 +0000911
SJW2c317da2020-03-23 07:39:13 -0500912 if (!SrcType->isIntOrIntVectorTy()) {
913 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +0000914 }
Kévin Petitd44eef52019-03-08 13:22:14 +0000915
SJW2c317da2020-03-23 07:39:13 -0500916 if ((SrcType->getScalarSizeInBits() != 8) &&
917 (SrcType->getScalarSizeInBits() != 16) &&
918 (SrcType->getScalarSizeInBits() != 32) &&
919 (SrcType->getScalarSizeInBits() != 64)) {
920 return nullptr;
921 }
922
James Pricecf53df42020-04-20 14:41:24 -0400923 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
924 unsigned NumElements = SrcVecType->getNumElements();
925 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
926 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500927 return nullptr;
928 }
929 }
930
931 // The approach used is to shift the top bits down, the bottom bits up
932 // and OR the two shifted values.
933
934 // The rotation amount is to be treated modulo the element size.
935 // Since SPIR-V shift ops don't support this, let's apply the
936 // modulo ahead of shifting. The element size is always a power of
937 // two so we can just AND with a mask.
938 auto ModMask =
939 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
940 RotAmount =
941 BinaryOperator::Create(Instruction::And, RotAmount, ModMask, "", CI);
942
943 // Let's calc the amount by which to shift top bits down
944 auto ScalarSize = ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
945 auto DownAmount =
946 BinaryOperator::Create(Instruction::Sub, ScalarSize, RotAmount, "", CI);
947
948 // Now shift the bottom bits up and the top bits down
949 auto LoRotated =
950 BinaryOperator::Create(Instruction::Shl, SrcValue, RotAmount, "", CI);
951 auto HiRotated =
952 BinaryOperator::Create(Instruction::LShr, SrcValue, DownAmount, "", CI);
953
954 // Finally OR the two shifted values
955 return BinaryOperator::Create(Instruction::Or, LoRotated, HiRotated, "",
956 CI);
957 });
Kévin Petitd44eef52019-03-08 13:22:14 +0000958}
959
SJW2c317da2020-03-23 07:39:13 -0500960bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
961 bool DstIsSigned) {
962 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
963 Value *V = nullptr;
964 // Get arguments
965 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000966
SJW2c317da2020-03-23 07:39:13 -0500967 // Don't touch overloads that aren't in OpenCL C
968 auto SrcType = SrcValue->getType();
969 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000970
SJW2c317da2020-03-23 07:39:13 -0500971 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
972 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
973 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000974 }
975
James Pricecf53df42020-04-20 14:41:24 -0400976 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
977 unsigned SrcNumElements = SrcVecType->getNumElements();
978 unsigned DstNumElements = cast<VectorType>(DstType)->getNumElements();
979 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -0500980 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000981 }
982
James Pricecf53df42020-04-20 14:41:24 -0400983 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
984 (SrcNumElements != 4) && (SrcNumElements != 8) &&
985 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500986 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000987 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000988 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000989
SJW2c317da2020-03-23 07:39:13 -0500990 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
991 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
992
993 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
994 bool DstIsInt = DstType->isIntOrIntVectorTy();
995
996 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
997 // Unnecessary cast operation.
998 V = SrcValue;
999 } else if (SrcIsFloat && DstIsFloat) {
1000 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1001 } else if (SrcIsFloat && DstIsInt) {
1002 if (DstIsSigned) {
1003 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1004 } else {
1005 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1006 }
1007 } else if (SrcIsInt && DstIsFloat) {
1008 if (SrcIsSigned) {
1009 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1010 } else {
1011 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1012 }
1013 } else if (SrcIsInt && DstIsInt) {
1014 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1015 } else {
1016 // Not something we're supposed to handle, just move on
1017 }
1018
1019 return V;
1020 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001021}
1022
SJW2c317da2020-03-23 07:39:13 -05001023bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1024 bool is_mad) {
1025 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1026 Value *V = nullptr;
1027 // Get arguments
1028 auto AValue = CI->getOperand(0);
1029 auto BValue = CI->getOperand(1);
1030 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001031
SJW2c317da2020-03-23 07:39:13 -05001032 // Don't touch overloads that aren't in OpenCL C
1033 auto AType = AValue->getType();
1034 auto BType = BValue->getType();
1035 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001036
SJW2c317da2020-03-23 07:39:13 -05001037 if ((AType != BType) || (CI->getType() != AType) ||
1038 (is_mad && (AType != CType))) {
1039 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001040 }
1041
SJW2c317da2020-03-23 07:39:13 -05001042 if (!AType->isIntOrIntVectorTy()) {
1043 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001044 }
Kévin Petit8a560882019-03-21 15:24:34 +00001045
SJW2c317da2020-03-23 07:39:13 -05001046 if ((AType->getScalarSizeInBits() != 8) &&
1047 (AType->getScalarSizeInBits() != 16) &&
1048 (AType->getScalarSizeInBits() != 32) &&
1049 (AType->getScalarSizeInBits() != 64)) {
1050 return V;
1051 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001052
James Pricecf53df42020-04-20 14:41:24 -04001053 if (auto AVecType = dyn_cast<VectorType>(AType)) {
1054 unsigned NumElements = AVecType->getNumElements();
1055 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1056 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001057 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001058 }
1059 }
1060
SJW2c317da2020-03-23 07:39:13 -05001061 // Our SPIR-V op returns a struct, create a type for it
1062 SmallVector<Type *, 2> TwoValueType = {AType, AType};
1063 auto ExMulRetType = StructType::create(TwoValueType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001064
SJW2c317da2020-03-23 07:39:13 -05001065 // Select the appropriate signed/unsigned SPIR-V op
1066 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1067
1068 // Call the SPIR-V op
1069 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1070 ExMulRetType, {AValue, BValue});
1071
1072 // Get the high part of the result
1073 unsigned Idxs[] = {1};
1074 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1075
1076 // If we're handling a mad_hi, add the third argument to the result
1077 if (is_mad) {
1078 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001079 }
1080
SJW2c317da2020-03-23 07:39:13 -05001081 return V;
1082 });
Kévin Petit8a560882019-03-21 15:24:34 +00001083}
1084
SJW2c317da2020-03-23 07:39:13 -05001085bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1086 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1087 // Get arguments
1088 auto FalseValue = CI->getOperand(0);
1089 auto TrueValue = CI->getOperand(1);
1090 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001091
SJW2c317da2020-03-23 07:39:13 -05001092 // Don't touch overloads that aren't in OpenCL C
1093 auto FalseType = FalseValue->getType();
1094 auto TrueType = TrueValue->getType();
1095 auto PredicateType = PredicateValue->getType();
1096
1097 if (FalseType != TrueType) {
1098 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001099 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001100
SJW2c317da2020-03-23 07:39:13 -05001101 if (!PredicateType->isIntOrIntVectorTy()) {
1102 return nullptr;
1103 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001104
SJW2c317da2020-03-23 07:39:13 -05001105 if (!FalseType->isIntOrIntVectorTy() &&
1106 !FalseType->getScalarType()->isFloatingPointTy()) {
1107 return nullptr;
1108 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001109
SJW2c317da2020-03-23 07:39:13 -05001110 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1111 return nullptr;
1112 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001113
SJW2c317da2020-03-23 07:39:13 -05001114 if (FalseType->getScalarSizeInBits() !=
1115 PredicateType->getScalarSizeInBits()) {
1116 return nullptr;
1117 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001118
James Pricecf53df42020-04-20 14:41:24 -04001119 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
1120 unsigned NumElements = FalseVecType->getNumElements();
1121 if (NumElements != cast<VectorType>(PredicateType)->getNumElements()) {
SJW2c317da2020-03-23 07:39:13 -05001122 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001123 }
1124
James Pricecf53df42020-04-20 14:41:24 -04001125 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1126 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001127 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001128 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001129 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001130
SJW2c317da2020-03-23 07:39:13 -05001131 // Create constant
1132 const auto ZeroValue = Constant::getNullValue(PredicateType);
1133
1134 // Scalar and vector are to be treated differently
1135 CmpInst::Predicate Pred;
1136 if (PredicateType->isVectorTy()) {
1137 Pred = CmpInst::ICMP_SLT;
1138 } else {
1139 Pred = CmpInst::ICMP_NE;
1140 }
1141
1142 // Create comparison instruction
1143 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1144 ZeroValue, "", CI);
1145
1146 // Create select
1147 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1148 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001149}
1150
SJW2c317da2020-03-23 07:39:13 -05001151bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1152 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1153 Value *V = nullptr;
1154 if (CI->getNumOperands() != 4) {
1155 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001156 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001157
SJW2c317da2020-03-23 07:39:13 -05001158 // Get arguments
1159 auto FalseValue = CI->getOperand(0);
1160 auto TrueValue = CI->getOperand(1);
1161 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001162
SJW2c317da2020-03-23 07:39:13 -05001163 // Don't touch overloads that aren't in OpenCL C
1164 auto FalseType = FalseValue->getType();
1165 auto TrueType = TrueValue->getType();
1166 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001167
SJW2c317da2020-03-23 07:39:13 -05001168 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1169 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001170 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001171
James Pricecf53df42020-04-20 14:41:24 -04001172 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001173 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1174 !TrueType->getScalarType()->isIntegerTy()) {
1175 return V;
1176 }
James Pricecf53df42020-04-20 14:41:24 -04001177 unsigned NumElements = TrueVecType->getNumElements();
1178 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1179 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001180 return V;
1181 }
1182 }
1183
1184 // Remember the type of the operands
1185 auto OpType = TrueType;
1186
1187 // The actual bit selection will always be done on an integer type,
1188 // declare it here
1189 Type *BitType;
1190
1191 // If the operands are float, then bitcast them to int
1192 if (OpType->getScalarType()->isFloatingPointTy()) {
1193
1194 // First create the new type
1195 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1196
1197 // Then bitcast all operands
1198 PredicateValue =
1199 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1200 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1201 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1202
1203 } else {
1204 // The operands have an integer type, use it directly
1205 BitType = OpType;
1206 }
1207
1208 // All the operands are now always integers
1209 // implement as (c & b) | (~c & a)
1210
1211 // Create our negated predicate value
1212 auto AllOnes = Constant::getAllOnesValue(BitType);
1213 auto NotPredicateValue = BinaryOperator::Create(
1214 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1215
1216 // Then put everything together
1217 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1218 FalseValue, "", CI);
1219 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1220 TrueValue, "", CI);
1221
1222 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1223
1224 // If we were dealing with a floating point type, we must bitcast
1225 // the result back to that
1226 if (OpType->getScalarType()->isFloatingPointTy()) {
1227 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1228 }
1229
1230 return V;
1231 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001232}
1233
SJW61531372020-06-09 07:31:08 -05001234bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001235 // convert to vector versions
1236 Module &M = *F.getParent();
1237 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1238 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1239 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001240
SJW2c317da2020-03-23 07:39:13 -05001241 // First figure out which function we're dealing with
1242 if (is_smooth) {
1243 ArgsToSplat.push_back(CI->getOperand(1));
1244 VectorArg = CI->getOperand(2);
1245 } else {
1246 VectorArg = CI->getOperand(1);
1247 }
1248
1249 // Splat arguments that need to be
1250 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001251 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001252
1253 for (auto arg : ArgsToSplat) {
1254 Value *NewVectorArg = UndefValue::get(VecType);
James Pricecf53df42020-04-20 14:41:24 -04001255 for (auto i = 0; i < VecType->getNumElements(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001256 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1257 NewVectorArg =
1258 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1259 }
1260 SplatArgs.push_back(NewVectorArg);
1261 }
1262
1263 // Replace the call with the vector/vector flavour
1264 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1265 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1266
SJW61531372020-06-09 07:31:08 -05001267 std::string NewFName = Builtins::GetMangledFunctionName(
1268 is_smooth ? "smoothstep" : "step", NewFType);
1269
SJW2c317da2020-03-23 07:39:13 -05001270 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1271
1272 SmallVector<Value *, 3> NewArgs;
1273 for (auto arg : SplatArgs) {
1274 NewArgs.push_back(arg);
1275 }
1276 NewArgs.push_back(VectorArg);
1277
1278 return CallInst::Create(NewF, NewArgs, "", CI);
1279 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001280}
1281
SJW2c317da2020-03-23 07:39:13 -05001282bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001283 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1284 auto Arg = CI->getOperand(0);
1285 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001286
SJW2c317da2020-03-23 07:39:13 -05001287 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001288
SJW2c317da2020-03-23 07:39:13 -05001289 return BinaryOperator::Create(Op, Bitcast,
1290 ConstantInt::get(CI->getType(), 31), "", CI);
1291 });
David Neto22f144c2017-06-12 14:26:21 -04001292}
1293
SJW2c317da2020-03-23 07:39:13 -05001294bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1295 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001296 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1297 // The multiply instruction to use.
1298 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001299
SJW2c317da2020-03-23 07:39:13 -05001300 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001301
SJW2c317da2020-03-23 07:39:13 -05001302 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1303 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001304
SJW2c317da2020-03-23 07:39:13 -05001305 if (is_mad) {
1306 // The add instruction to use.
1307 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001308
SJW2c317da2020-03-23 07:39:13 -05001309 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001310 }
David Neto22f144c2017-06-12 14:26:21 -04001311
SJW2c317da2020-03-23 07:39:13 -05001312 return V;
1313 });
David Neto22f144c2017-06-12 14:26:21 -04001314}
1315
SJW2c317da2020-03-23 07:39:13 -05001316bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001317 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1318 Value *V = nullptr;
1319 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001320
SJW2c317da2020-03-23 07:39:13 -05001321 auto data_type = data->getType();
1322 if (!data_type->isVectorTy())
1323 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001324
James Pricecf53df42020-04-20 14:41:24 -04001325 auto vec_data_type = cast<VectorType>(data_type);
1326
1327 auto elems = vec_data_type->getNumElements();
SJW2c317da2020-03-23 07:39:13 -05001328 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1329 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001330
SJW2c317da2020-03-23 07:39:13 -05001331 auto offset = CI->getOperand(1);
1332 auto ptr = CI->getOperand(2);
1333 auto ptr_type = ptr->getType();
1334 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001335 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001336 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001337
SJW2c317da2020-03-23 07:39:13 -05001338 // Avoid pointer casts. Instead generate the correct number of stores
1339 // and rely on drivers to coalesce appropriately.
1340 IRBuilder<> builder(CI);
1341 auto elems_const = builder.getInt32(elems);
1342 auto adjust = builder.CreateMul(offset, elems_const);
1343 for (auto i = 0; i < elems; ++i) {
1344 auto idx = builder.getInt32(i);
1345 auto add = builder.CreateAdd(adjust, idx);
1346 auto gep = builder.CreateGEP(ptr, add);
1347 auto extract = builder.CreateExtractElement(data, i);
1348 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001349 }
SJW2c317da2020-03-23 07:39:13 -05001350 return V;
1351 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001352}
1353
SJW2c317da2020-03-23 07:39:13 -05001354bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001355 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1356 Value *V = nullptr;
1357 auto ret_type = F.getReturnType();
1358 if (!ret_type->isVectorTy())
1359 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001360
James Pricecf53df42020-04-20 14:41:24 -04001361 auto vec_ret_type = cast<VectorType>(ret_type);
1362
1363 auto elems = vec_ret_type->getNumElements();
SJW2c317da2020-03-23 07:39:13 -05001364 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1365 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001366
SJW2c317da2020-03-23 07:39:13 -05001367 auto offset = CI->getOperand(0);
1368 auto ptr = CI->getOperand(1);
1369 auto ptr_type = ptr->getType();
1370 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001371 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001372 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001373
SJW2c317da2020-03-23 07:39:13 -05001374 // Avoid pointer casts. Instead generate the correct number of loads
1375 // and rely on drivers to coalesce appropriately.
1376 IRBuilder<> builder(CI);
1377 auto elems_const = builder.getInt32(elems);
1378 V = UndefValue::get(ret_type);
1379 auto adjust = builder.CreateMul(offset, elems_const);
1380 for (auto i = 0; i < elems; ++i) {
1381 auto idx = builder.getInt32(i);
1382 auto add = builder.CreateAdd(adjust, idx);
1383 auto gep = builder.CreateGEP(ptr, add);
1384 auto load = builder.CreateLoad(gep);
1385 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001386 }
SJW2c317da2020-03-23 07:39:13 -05001387 return V;
1388 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001389}
1390
SJW2c317da2020-03-23 07:39:13 -05001391bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1392 const std::string &name,
1393 int vec_size) {
1394 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1395 if (!vec_size) {
1396 // deduce vec_size from last character of name (e.g. vload_half4)
1397 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001398 }
SJW2c317da2020-03-23 07:39:13 -05001399 switch (vec_size) {
1400 case 2:
1401 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1402 case 4:
1403 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1404 case 0:
1405 if (!is_clspv_version) {
1406 return replaceVloadHalf(F);
1407 }
1408 default:
1409 llvm_unreachable("Unsupported vload_half vector size");
1410 break;
1411 }
1412 return false;
David Neto22f144c2017-06-12 14:26:21 -04001413}
1414
SJW2c317da2020-03-23 07:39:13 -05001415bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1416 Module &M = *F.getParent();
1417 return replaceCallsWithValue(F, [&](CallInst *CI) {
1418 // The index argument from vload_half.
1419 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001420
SJW2c317da2020-03-23 07:39:13 -05001421 // The pointer argument from vload_half.
1422 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001423
SJW2c317da2020-03-23 07:39:13 -05001424 auto IntTy = Type::getInt32Ty(M.getContext());
1425 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1426 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1427
1428 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001429 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001430
1431 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1432
1433 Value *V = nullptr;
1434
alan-baker7efcaaa2020-05-06 19:33:27 -04001435 bool supports_16bit_storage = true;
1436 switch (Arg1->getType()->getPointerAddressSpace()) {
1437 case clspv::AddressSpace::Global:
1438 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1439 clspv::Option::StorageClass::kSSBO);
1440 break;
1441 case clspv::AddressSpace::Constant:
1442 if (clspv::Option::ConstantArgsInUniformBuffer())
1443 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1444 clspv::Option::StorageClass::kUBO);
1445 else
1446 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1447 clspv::Option::StorageClass::kSSBO);
1448 break;
1449 default:
1450 // Clspv will emit the Float16 capability if the half type is
1451 // encountered. That capability covers private and local addressspaces.
1452 break;
1453 }
1454
1455 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001456 auto ShortTy = Type::getInt16Ty(M.getContext());
1457 auto ShortPointerTy =
1458 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1459
1460 // Cast the half* pointer to short*.
1461 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1462
1463 // Index into the correct address of the casted pointer.
1464 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1465
1466 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001467 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001468
1469 // ZExt the short -> int.
1470 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1471
1472 // Get our float2.
1473 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1474
1475 // Extract out the bottom element which is our float result.
1476 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1477 } else {
1478 // Assume the pointer argument points to storage aligned to 32bits
1479 // or more.
1480 // TODO(dneto): Do more analysis to make sure this is true?
1481 //
1482 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1483 // with:
1484 //
1485 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1486 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1487 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1488 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1489 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1490 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1491 // x float> %converted, %index_is_odd32
1492
1493 auto IntPointerTy =
1494 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1495
1496 // Cast the base pointer to int*.
1497 // In a valid call (according to assumptions), this should get
1498 // optimized away in the simplify GEP pass.
1499 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1500
1501 auto One = ConstantInt::get(IntTy, 1);
1502 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1503 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1504
1505 // Index into the correct address of the casted pointer.
1506 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1507
1508 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001509 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001510
1511 // Get our float2.
1512 auto Call = CallInst::Create(NewF, Load, "", CI);
1513
1514 // Extract out the float result, where the element number is
1515 // determined by whether the original index was even or odd.
1516 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1517 }
1518 return V;
1519 });
1520}
1521
1522bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1523 Module &M = *F.getParent();
1524 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001525 // The index argument from vload_half.
1526 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001527
Kévin Petite8edce32019-04-10 14:23:32 +01001528 // The pointer argument from vload_half.
1529 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001530
Kévin Petite8edce32019-04-10 14:23:32 +01001531 auto IntTy = Type::getInt32Ty(M.getContext());
1532 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001533 auto NewPointerTy =
1534 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001535 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001536
Kévin Petite8edce32019-04-10 14:23:32 +01001537 // Cast the half* pointer to int*.
1538 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001539
Kévin Petite8edce32019-04-10 14:23:32 +01001540 // Index into the correct address of the casted pointer.
1541 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001542
Kévin Petite8edce32019-04-10 14:23:32 +01001543 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001544 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001545
Kévin Petite8edce32019-04-10 14:23:32 +01001546 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001547 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001548
Kévin Petite8edce32019-04-10 14:23:32 +01001549 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001550
Kévin Petite8edce32019-04-10 14:23:32 +01001551 // Get our float2.
1552 return CallInst::Create(NewF, Load, "", CI);
1553 });
David Neto22f144c2017-06-12 14:26:21 -04001554}
1555
SJW2c317da2020-03-23 07:39:13 -05001556bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1557 Module &M = *F.getParent();
1558 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001559 // The index argument from vload_half.
1560 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001561
Kévin Petite8edce32019-04-10 14:23:32 +01001562 // The pointer argument from vload_half.
1563 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001564
Kévin Petite8edce32019-04-10 14:23:32 +01001565 auto IntTy = Type::getInt32Ty(M.getContext());
1566 auto Int2Ty = VectorType::get(IntTy, 2);
1567 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001568 auto NewPointerTy =
1569 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001570 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001571
Kévin Petite8edce32019-04-10 14:23:32 +01001572 // Cast the half* pointer to int2*.
1573 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001574
Kévin Petite8edce32019-04-10 14:23:32 +01001575 // Index into the correct address of the casted pointer.
1576 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001577
Kévin Petite8edce32019-04-10 14:23:32 +01001578 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001579 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001580
Kévin Petite8edce32019-04-10 14:23:32 +01001581 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001582 auto X =
1583 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1584 auto Y =
1585 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001586
Kévin Petite8edce32019-04-10 14:23:32 +01001587 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001588 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001589
Kévin Petite8edce32019-04-10 14:23:32 +01001590 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001591
Kévin Petite8edce32019-04-10 14:23:32 +01001592 // Get the lower (x & y) components of our final float4.
1593 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001594
Kévin Petite8edce32019-04-10 14:23:32 +01001595 // Get the higher (z & w) components of our final float4.
1596 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001597
Kévin Petite8edce32019-04-10 14:23:32 +01001598 Constant *ShuffleMask[4] = {
1599 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1600 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001601
Kévin Petite8edce32019-04-10 14:23:32 +01001602 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001603 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1604 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001605 });
David Neto22f144c2017-06-12 14:26:21 -04001606}
1607
SJW2c317da2020-03-23 07:39:13 -05001608bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001609
1610 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1611 //
1612 // %u = load i32 %ptr
1613 // %fxy = call <2 x float> Unpack2xHalf(u)
1614 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001615 Module &M = *F.getParent();
1616 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001617 auto Index = CI->getOperand(0);
1618 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001619
Kévin Petite8edce32019-04-10 14:23:32 +01001620 auto IntTy = Type::getInt32Ty(M.getContext());
1621 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1622 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001623
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001624 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001625 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001626
Kévin Petite8edce32019-04-10 14:23:32 +01001627 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001628 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001629
Kévin Petite8edce32019-04-10 14:23:32 +01001630 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001631
Kévin Petite8edce32019-04-10 14:23:32 +01001632 // Get our final float2.
1633 return CallInst::Create(NewF, Load, "", CI);
1634 });
David Neto6ad93232018-06-07 15:42:58 -07001635}
1636
SJW2c317da2020-03-23 07:39:13 -05001637bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001638
1639 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1640 //
1641 // %u2 = load <2 x i32> %ptr
1642 // %u2xy = extractelement %u2, 0
1643 // %u2zw = extractelement %u2, 1
1644 // %fxy = call <2 x float> Unpack2xHalf(uint)
1645 // %fzw = call <2 x float> Unpack2xHalf(uint)
1646 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001647 Module &M = *F.getParent();
1648 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001649 auto Index = CI->getOperand(0);
1650 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001651
Kévin Petite8edce32019-04-10 14:23:32 +01001652 auto IntTy = Type::getInt32Ty(M.getContext());
1653 auto Int2Ty = VectorType::get(IntTy, 2);
1654 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1655 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001656
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001657 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001658 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001659
Kévin Petite8edce32019-04-10 14:23:32 +01001660 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001661 auto X =
1662 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1663 auto Y =
1664 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001665
Kévin Petite8edce32019-04-10 14:23:32 +01001666 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001667 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001668
Kévin Petite8edce32019-04-10 14:23:32 +01001669 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001670
Kévin Petite8edce32019-04-10 14:23:32 +01001671 // Get the lower (x & y) components of our final float4.
1672 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001673
Kévin Petite8edce32019-04-10 14:23:32 +01001674 // Get the higher (z & w) components of our final float4.
1675 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001676
Kévin Petite8edce32019-04-10 14:23:32 +01001677 Constant *ShuffleMask[4] = {
1678 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1679 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001680
Kévin Petite8edce32019-04-10 14:23:32 +01001681 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001682 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1683 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001684 });
David Neto6ad93232018-06-07 15:42:58 -07001685}
1686
SJW2c317da2020-03-23 07:39:13 -05001687bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1688 switch (vec_size) {
1689 case 0:
1690 return replaceVstoreHalf(F);
1691 case 2:
1692 return replaceVstoreHalf2(F);
1693 case 4:
1694 return replaceVstoreHalf4(F);
1695 default:
1696 llvm_unreachable("Unsupported vstore_half vector size");
1697 break;
1698 }
1699 return false;
1700}
David Neto22f144c2017-06-12 14:26:21 -04001701
SJW2c317da2020-03-23 07:39:13 -05001702bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1703 Module &M = *F.getParent();
1704 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001705 // The value to store.
1706 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001707
Kévin Petite8edce32019-04-10 14:23:32 +01001708 // The index argument from vstore_half.
1709 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001710
Kévin Petite8edce32019-04-10 14:23:32 +01001711 // The pointer argument from vstore_half.
1712 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001713
Kévin Petite8edce32019-04-10 14:23:32 +01001714 auto IntTy = Type::getInt32Ty(M.getContext());
1715 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1716 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1717 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001718
Kévin Petite8edce32019-04-10 14:23:32 +01001719 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001720 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001721
Kévin Petite8edce32019-04-10 14:23:32 +01001722 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001723
Kévin Petite8edce32019-04-10 14:23:32 +01001724 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001725 auto TempVec = InsertElementInst::Create(
1726 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001727
Kévin Petite8edce32019-04-10 14:23:32 +01001728 // Pack the float2 -> half2 (in an int).
1729 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001730
alan-baker7efcaaa2020-05-06 19:33:27 -04001731 bool supports_16bit_storage = true;
1732 switch (Arg2->getType()->getPointerAddressSpace()) {
1733 case clspv::AddressSpace::Global:
1734 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1735 clspv::Option::StorageClass::kSSBO);
1736 break;
1737 case clspv::AddressSpace::Constant:
1738 if (clspv::Option::ConstantArgsInUniformBuffer())
1739 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1740 clspv::Option::StorageClass::kUBO);
1741 else
1742 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1743 clspv::Option::StorageClass::kSSBO);
1744 break;
1745 default:
1746 // Clspv will emit the Float16 capability if the half type is
1747 // encountered. That capability covers private and local addressspaces.
1748 break;
1749 }
1750
SJW2c317da2020-03-23 07:39:13 -05001751 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04001752 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01001753 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001754 auto ShortPointerTy =
1755 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001756
Kévin Petite8edce32019-04-10 14:23:32 +01001757 // Truncate our i32 to an i16.
1758 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001759
Kévin Petite8edce32019-04-10 14:23:32 +01001760 // Cast the half* pointer to short*.
1761 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001762
Kévin Petite8edce32019-04-10 14:23:32 +01001763 // Index into the correct address of the casted pointer.
1764 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001765
Kévin Petite8edce32019-04-10 14:23:32 +01001766 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05001767 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001768 } else {
1769 // We can only write to 32-bit aligned words.
1770 //
1771 // Assuming base is aligned to 32-bits, replace the equivalent of
1772 // vstore_half(value, index, base)
1773 // with:
1774 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1775 // uint32_t write_to_upper_half = index & 1u;
1776 // uint32_t shift = write_to_upper_half << 4;
1777 //
1778 // // Pack the float value as a half number in bottom 16 bits
1779 // // of an i32.
1780 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1781 //
1782 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1783 // ^ ((packed & 0xffff) << shift)
1784 // // We only need relaxed consistency, but OpenCL 1.2 only has
1785 // // sequentially consistent atomics.
1786 // // TODO(dneto): Use relaxed consistency.
1787 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001788 auto IntPointerTy =
1789 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001790
Kévin Petite8edce32019-04-10 14:23:32 +01001791 auto Four = ConstantInt::get(IntTy, 4);
1792 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04001793
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001794 auto IndexIsOdd =
1795 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001796 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001797 auto IndexIntoI32 =
1798 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1799 auto BaseI32Ptr =
1800 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1801 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
1802 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001803 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001804 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001805 auto MaskBitsToWrite =
1806 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1807 auto MaskedCurrent = BinaryOperator::CreateAnd(
1808 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04001809
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001810 auto XLowerBits =
1811 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1812 auto NewBitsToWrite =
1813 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1814 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
1815 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04001816
Kévin Petite8edce32019-04-10 14:23:32 +01001817 // Generate the call to atomi_xor.
1818 SmallVector<Type *, 5> ParamTypes;
1819 // The pointer type.
1820 ParamTypes.push_back(IntPointerTy);
1821 // The Types for memory scope, semantics, and value.
1822 ParamTypes.push_back(IntTy);
1823 ParamTypes.push_back(IntTy);
1824 ParamTypes.push_back(IntTy);
1825 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1826 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04001827
Kévin Petite8edce32019-04-10 14:23:32 +01001828 const auto ConstantScopeDevice =
1829 ConstantInt::get(IntTy, spv::ScopeDevice);
1830 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1831 // (SPIR-V Workgroup).
1832 const auto AddrSpaceSemanticsBits =
1833 IntPointerTy->getPointerAddressSpace() == 1
1834 ? spv::MemorySemanticsUniformMemoryMask
1835 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04001836
Kévin Petite8edce32019-04-10 14:23:32 +01001837 // We're using relaxed consistency here.
1838 const auto ConstantMemorySemantics =
1839 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1840 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04001841
Kévin Petite8edce32019-04-10 14:23:32 +01001842 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1843 ConstantMemorySemantics, ValueToXor};
1844 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05001845
1846 // Return a Nop so the old Call is removed
1847 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
1848 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001849 }
David Neto22f144c2017-06-12 14:26:21 -04001850
SJW2c317da2020-03-23 07:39:13 -05001851 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01001852 });
David Neto22f144c2017-06-12 14:26:21 -04001853}
1854
SJW2c317da2020-03-23 07:39:13 -05001855bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
1856 Module &M = *F.getParent();
1857 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001858 // The value to store.
1859 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001860
Kévin Petite8edce32019-04-10 14:23:32 +01001861 // The index argument from vstore_half.
1862 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001863
Kévin Petite8edce32019-04-10 14:23:32 +01001864 // The pointer argument from vstore_half.
1865 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001866
Kévin Petite8edce32019-04-10 14:23:32 +01001867 auto IntTy = Type::getInt32Ty(M.getContext());
1868 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001869 auto NewPointerTy =
1870 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001871 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001872
Kévin Petite8edce32019-04-10 14:23:32 +01001873 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001874 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001875
Kévin Petite8edce32019-04-10 14:23:32 +01001876 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001877
Kévin Petite8edce32019-04-10 14:23:32 +01001878 // Turn the packed x & y into the final packing.
1879 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001880
Kévin Petite8edce32019-04-10 14:23:32 +01001881 // Cast the half* pointer to int*.
1882 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001883
Kévin Petite8edce32019-04-10 14:23:32 +01001884 // Index into the correct address of the casted pointer.
1885 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001886
Kévin Petite8edce32019-04-10 14:23:32 +01001887 // Store to the int* we casted to.
1888 return new StoreInst(X, Index, CI);
1889 });
David Neto22f144c2017-06-12 14:26:21 -04001890}
1891
SJW2c317da2020-03-23 07:39:13 -05001892bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
1893 Module &M = *F.getParent();
1894 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001895 // The value to store.
1896 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001897
Kévin Petite8edce32019-04-10 14:23:32 +01001898 // The index argument from vstore_half.
1899 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001900
Kévin Petite8edce32019-04-10 14:23:32 +01001901 // The pointer argument from vstore_half.
1902 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001903
Kévin Petite8edce32019-04-10 14:23:32 +01001904 auto IntTy = Type::getInt32Ty(M.getContext());
1905 auto Int2Ty = VectorType::get(IntTy, 2);
1906 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001907 auto NewPointerTy =
1908 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001909 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001910
Kévin Petite8edce32019-04-10 14:23:32 +01001911 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1912 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04001913
Kévin Petite8edce32019-04-10 14:23:32 +01001914 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001915 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1916 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001917
Kévin Petite8edce32019-04-10 14:23:32 +01001918 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1919 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001920
Kévin Petite8edce32019-04-10 14:23:32 +01001921 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001922 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1923 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001924
Kévin Petite8edce32019-04-10 14:23:32 +01001925 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001926 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001927
Kévin Petite8edce32019-04-10 14:23:32 +01001928 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001929
Kévin Petite8edce32019-04-10 14:23:32 +01001930 // Turn the packed x & y into the final component of our int2.
1931 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001932
Kévin Petite8edce32019-04-10 14:23:32 +01001933 // Turn the packed z & w into the final component of our int2.
1934 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001935
Kévin Petite8edce32019-04-10 14:23:32 +01001936 auto Combine = InsertElementInst::Create(
1937 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001938 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
1939 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001940
Kévin Petite8edce32019-04-10 14:23:32 +01001941 // Cast the half* pointer to int2*.
1942 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001943
Kévin Petite8edce32019-04-10 14:23:32 +01001944 // Index into the correct address of the casted pointer.
1945 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001946
Kévin Petite8edce32019-04-10 14:23:32 +01001947 // Store to the int2* we casted to.
1948 return new StoreInst(Combine, Index, CI);
1949 });
David Neto22f144c2017-06-12 14:26:21 -04001950}
1951
SJW2c317da2020-03-23 07:39:13 -05001952bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
1953 // convert half to float
1954 Module &M = *F.getParent();
1955 return replaceCallsWithValue(F, [&](CallInst *CI) {
1956 SmallVector<Type *, 3> types;
1957 SmallVector<Value *, 3> args;
1958 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
1959 types.push_back(CI->getArgOperand(i)->getType());
1960 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05001961 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05001962
SJW2c317da2020-03-23 07:39:13 -05001963 auto NewFType = FunctionType::get(
1964 VectorType::get(Type::getFloatTy(M.getContext()),
James Pricecf53df42020-04-20 14:41:24 -04001965 cast<VectorType>(CI->getType())->getNumElements()),
SJW2c317da2020-03-23 07:39:13 -05001966 types, false);
1967
SJW61531372020-06-09 07:31:08 -05001968 std::string NewFName =
1969 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05001970
1971 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1972
1973 auto NewCI = CallInst::Create(NewF, args, "", CI);
1974
1975 // Convert to the half type.
1976 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
1977 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05001978}
1979
SJW2c317da2020-03-23 07:39:13 -05001980bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
1981 // convert half to float
1982 Module &M = *F.getParent();
1983 return replaceCallsWithValue(F, [&](CallInst *CI) {
1984 SmallVector<Type *, 3> types(3);
1985 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05001986
SJW2c317da2020-03-23 07:39:13 -05001987 // Image
1988 types[0] = CI->getArgOperand(0)->getType();
1989 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05001990
SJW2c317da2020-03-23 07:39:13 -05001991 // Coord
1992 types[1] = CI->getArgOperand(1)->getType();
1993 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05001994
SJW2c317da2020-03-23 07:39:13 -05001995 // Data
1996 types[2] = VectorType::get(
1997 Type::getFloatTy(M.getContext()),
James Pricecf53df42020-04-20 14:41:24 -04001998 cast<VectorType>(CI->getArgOperand(2)->getType())->getNumElements());
alan-bakerf7e17cb2020-01-02 07:29:59 -05001999
SJW2c317da2020-03-23 07:39:13 -05002000 auto NewFType =
2001 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002002
SJW61531372020-06-09 07:31:08 -05002003 std::string NewFName =
2004 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002005
SJW2c317da2020-03-23 07:39:13 -05002006 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002007
SJW2c317da2020-03-23 07:39:13 -05002008 // Convert data to the float type.
2009 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2010 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002011
SJW2c317da2020-03-23 07:39:13 -05002012 return CallInst::Create(NewF, args, "", CI);
2013 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002014}
2015
SJW2c317da2020-03-23 07:39:13 -05002016bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2017 Function &F) {
2018 // convert read_image with int coords to float coords
2019 Module &M = *F.getParent();
2020 return replaceCallsWithValue(F, [&](CallInst *CI) {
2021 // The image.
2022 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002023
SJW2c317da2020-03-23 07:39:13 -05002024 // The sampler.
2025 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002026
SJW2c317da2020-03-23 07:39:13 -05002027 // The coordinate (integer type that we can't handle).
2028 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002029
SJW2c317da2020-03-23 07:39:13 -05002030 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2031 uint32_t components =
2032 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2033 Type *float_ty = nullptr;
2034 if (components == 1) {
2035 float_ty = Type::getFloatTy(M.getContext());
2036 } else {
James Pricecf53df42020-04-20 14:41:24 -04002037 float_ty =
2038 VectorType::get(Type::getFloatTy(M.getContext()),
2039 cast<VectorType>(Arg2->getType())->getNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002040 }
David Neto22f144c2017-06-12 14:26:21 -04002041
SJW2c317da2020-03-23 07:39:13 -05002042 auto NewFType = FunctionType::get(
2043 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2044
2045 std::string NewFName = F.getName().str();
2046 NewFName[NewFName.length() - 1] = 'f';
2047
2048 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2049
2050 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2051
2052 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2053 });
David Neto22f144c2017-06-12 14:26:21 -04002054}
2055
SJW2c317da2020-03-23 07:39:13 -05002056bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2057 return replaceCallsWithValue(F, [&](CallInst *CI) {
2058 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002059
SJW2c317da2020-03-23 07:39:13 -05002060 // We need to map the OpenCL constants to the SPIR-V equivalents.
2061 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2062 const auto ConstantMemorySemantics = ConstantInt::get(
2063 IntTy, spv::MemorySemanticsUniformMemoryMask |
2064 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002065
SJW2c317da2020-03-23 07:39:13 -05002066 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002067
SJW2c317da2020-03-23 07:39:13 -05002068 // The pointer.
2069 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002070
SJW2c317da2020-03-23 07:39:13 -05002071 // The memory scope.
2072 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002073
SJW2c317da2020-03-23 07:39:13 -05002074 // The memory semantics.
2075 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002076
SJW2c317da2020-03-23 07:39:13 -05002077 if (2 < CI->getNumArgOperands()) {
2078 // The unequal memory semantics.
2079 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002080
SJW2c317da2020-03-23 07:39:13 -05002081 // The value.
2082 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002083
SJW2c317da2020-03-23 07:39:13 -05002084 // The comparator.
2085 Params.push_back(CI->getArgOperand(1));
2086 } else if (1 < CI->getNumArgOperands()) {
2087 // The value.
2088 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002089 }
David Neto22f144c2017-06-12 14:26:21 -04002090
SJW2c317da2020-03-23 07:39:13 -05002091 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2092 });
David Neto22f144c2017-06-12 14:26:21 -04002093}
2094
SJW2c317da2020-03-23 07:39:13 -05002095bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2096 llvm::AtomicRMWInst::BinOp Op) {
2097 return replaceCallsWithValue(F, [&](CallInst *CI) {
2098 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
2099 AtomicOrdering::SequentiallyConsistent,
2100 SyncScope::System, CI);
2101 });
2102}
David Neto22f144c2017-06-12 14:26:21 -04002103
SJW2c317da2020-03-23 07:39:13 -05002104bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2105 Module &M = *F.getParent();
2106 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002107 auto IntTy = Type::getInt32Ty(M.getContext());
2108 auto FloatTy = Type::getFloatTy(M.getContext());
2109
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002110 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2111 ConstantInt::get(IntTy, 1),
2112 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002113
2114 Constant *UpShuffleMask[4] = {
2115 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2116 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2117
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002118 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2119 UndefValue::get(FloatTy),
2120 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002121
Kévin Petite8edce32019-04-10 14:23:32 +01002122 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002123 auto Arg0 =
2124 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2125 ConstantVector::get(DownShuffleMask), "", CI);
2126 auto Arg1 =
2127 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2128 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002129 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002130
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002131 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002132 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002133
SJW61531372020-06-09 07:31:08 -05002134 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002135
Kévin Petite8edce32019-04-10 14:23:32 +01002136 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002137
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002138 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2139 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002140 });
David Neto22f144c2017-06-12 14:26:21 -04002141}
David Neto62653202017-10-16 19:05:18 -04002142
SJW2c317da2020-03-23 07:39:13 -05002143bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002144 // OpenCL's float result = fract(float x, float* ptr)
2145 //
2146 // In the LLVM domain:
2147 //
2148 // %floor_result = call spir_func float @floor(float %x)
2149 // store float %floor_result, float * %ptr
2150 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2151 // %result = call spir_func float
2152 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2153 //
2154 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2155 // and clspv.fract occur in the SPIR-V generator pass:
2156 //
2157 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2158 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2159 // ...
2160 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2161 // OpStore %ptr %floor_result
2162 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2163 // %fract_result = OpExtInst %float
2164 // %glsl_ext Fmin %fract_intermediate %just_under_1
2165
David Neto62653202017-10-16 19:05:18 -04002166 using std::string;
2167
2168 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2169 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002170
SJW2c317da2020-03-23 07:39:13 -05002171 Module &M = *F.getParent();
2172 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002173
SJW2c317da2020-03-23 07:39:13 -05002174 // This is either float or a float vector. All the float-like
2175 // types are this type.
2176 auto result_ty = F.getReturnType();
2177
SJW61531372020-06-09 07:31:08 -05002178 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002179 Function *fmin_fn = M.getFunction(fmin_name);
2180 if (!fmin_fn) {
2181 // Make the fmin function.
2182 FunctionType *fn_ty =
2183 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2184 fmin_fn =
2185 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2186 fmin_fn->addFnAttr(Attribute::ReadNone);
2187 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2188 }
2189
SJW61531372020-06-09 07:31:08 -05002190 std::string floor_name =
2191 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002192 Function *floor_fn = M.getFunction(floor_name);
2193 if (!floor_fn) {
2194 // Make the floor function.
2195 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2196 floor_fn =
2197 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2198 floor_fn->addFnAttr(Attribute::ReadNone);
2199 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2200 }
2201
SJW61531372020-06-09 07:31:08 -05002202 std::string clspv_fract_name =
2203 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002204 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2205 if (!clspv_fract_fn) {
2206 // Make the clspv_fract function.
2207 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2208 clspv_fract_fn = cast<Function>(
2209 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2210 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2211 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2212 }
2213
2214 // Number of significant significand bits, whether represented or not.
2215 unsigned num_significand_bits;
2216 switch (result_ty->getScalarType()->getTypeID()) {
2217 case Type::HalfTyID:
2218 num_significand_bits = 11;
2219 break;
2220 case Type::FloatTyID:
2221 num_significand_bits = 24;
2222 break;
2223 case Type::DoubleTyID:
2224 num_significand_bits = 53;
2225 break;
2226 default:
2227 llvm_unreachable("Unhandled float type when processing fract builtin");
2228 break;
2229 }
2230 // Beware that the disassembler displays this value as
2231 // OpConstant %float 1
2232 // which is not quite right.
2233 const double kJustUnderOneScalar =
2234 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2235
2236 Constant *just_under_one =
2237 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2238 if (result_ty->isVectorTy()) {
2239 just_under_one = ConstantVector::getSplat(
James Pricecf53df42020-04-20 14:41:24 -04002240 {cast<VectorType>(result_ty)->getNumElements(), false},
2241 just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002242 }
2243
2244 IRBuilder<> Builder(CI);
2245
2246 auto arg = CI->getArgOperand(0);
2247 auto ptr = CI->getArgOperand(1);
2248
2249 // Compute floor result and store it.
2250 auto floor = Builder.CreateCall(floor_fn, {arg});
2251 Builder.CreateStore(floor, ptr);
2252
2253 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2254 auto fract_result =
2255 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2256
2257 return fract_result;
2258 });
David Neto62653202017-10-16 19:05:18 -04002259}