blob: 62da1b68f72505c361840fec1dfe6a0654ed35ab [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
David Neto482550a2018-03-24 05:21:07 -070032#include "clspv/Option.h"
Kévin Petit617a76d2019-04-04 13:54:16 +010033#include "SPIRVOp.h"
David Neto482550a2018-03-24 05:21:07 -070034
David Neto22f144c2017-06-12 14:26:21 -040035using namespace llvm;
36
37#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
38
39namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000040
41struct ArgTypeInfo {
42 enum class SignedNess {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000043 None,
Kévin Petit8a560882019-03-21 15:24:34 +000044 Unsigned,
45 Signed
46 };
47 SignedNess signedness;
48};
49
50struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000051 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000052 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000053
Kévin Petit91bc72e2019-04-08 15:17:46 +010054 bool isArgSigned(size_t arg) const {
55 assert(argTypeInfos.size() > arg);
56 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000057 }
58
Kévin Petit91bc72e2019-04-08 15:17:46 +010059 static FunctionInfo getFromMangledName(StringRef name) {
60 FunctionInfo fi;
61 if (!getFromMangledNameCheck(name, &fi)) {
62 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000063 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010064 return fi;
65 }
Kévin Petit8a560882019-03-21 15:24:34 +000066
Kévin Petit91bc72e2019-04-08 15:17:46 +010067 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
68 if (!name.consume_front("_Z")) {
69 return false;
70 }
71 size_t nameLen;
72 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000073 return false;
74 }
75
Kévin Petit91bc72e2019-04-08 15:17:46 +010076 finfo->name = name.take_front(nameLen);
77 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000078
Kévin Petit91bc72e2019-04-08 15:17:46 +010079 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000080
Kévin Petit91bc72e2019-04-08 15:17:46 +010081 while (name.size() != 0) {
82
83 ArgTypeInfo ti;
84
85 // Try parsing a vector prefix
86 if (name.consume_front("Dv")) {
87 int numElems;
88 if (name.consumeInteger(10, numElems)) {
89 return false;
90 }
91
92 if (!name.consume_front("_")) {
93 return false;
94 }
95 }
96
97 // Parse the base type
98 char typeCode = name.front();
99 name = name.drop_front(1);
100 switch(typeCode) {
101 case 'c': // char
102 case 'a': // signed char
103 case 's': // short
104 case 'i': // int
105 case 'l': // long
106 ti.signedness = ArgTypeInfo::SignedNess::Signed;
107 break;
108 case 'h': // unsigned char
109 case 't': // unsigned short
110 case 'j': // unsigned int
111 case 'm': // unsigned long
112 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
113 break;
114 case 'f':
115 ti.signedness = ArgTypeInfo::SignedNess::None;
116 break;
117 case 'S':
118 ti = prev_ti;
119 if (!name.consume_front("_")) {
120 return false;
121 }
122 break;
123 default:
124 return false;
125 }
126
127 finfo->argTypeInfos.push_back(ti);
128
129 prev_ti = ti;
130 }
131
132 return true;
133 };
Kévin Petit8a560882019-03-21 15:24:34 +0000134};
135
David Neto22f144c2017-06-12 14:26:21 -0400136uint32_t clz(uint32_t v) {
137 uint32_t r;
138 uint32_t shift;
139
140 r = (v > 0xFFFF) << 4;
141 v >>= r;
142 shift = (v > 0xFF) << 3;
143 v >>= shift;
144 r |= shift;
145 shift = (v > 0xF) << 2;
146 v >>= shift;
147 r |= shift;
148 shift = (v > 0x3) << 1;
149 v >>= shift;
150 r |= shift;
151 r |= (v >> 1);
152
153 return r;
154}
155
156Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
157 if (1 == elements) {
158 return Type::getInt1Ty(C);
159 } else {
160 return VectorType::get(Type::getInt1Ty(C), elements);
161 }
162}
163
164struct ReplaceOpenCLBuiltinPass final : public ModulePass {
165 static char ID;
166 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
167
168 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000169 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100170 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100171 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400172 bool replaceRecip(Module &M);
173 bool replaceDivide(Module &M);
174 bool replaceExp10(Module &M);
175 bool replaceLog10(Module &M);
176 bool replaceBarrier(Module &M);
177 bool replaceMemFence(Module &M);
178 bool replaceRelational(Module &M);
179 bool replaceIsInfAndIsNan(Module &M);
180 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000181 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000182 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000183 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000184 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000185 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000186 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000187 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400188 bool replaceSignbit(Module &M);
189 bool replaceMadandMad24andMul24(Module &M);
190 bool replaceVloadHalf(Module &M);
191 bool replaceVloadHalf2(Module &M);
192 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700193 bool replaceClspvVloadaHalf2(Module &M);
194 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400195 bool replaceVstoreHalf(Module &M);
196 bool replaceVstoreHalf2(Module &M);
197 bool replaceVstoreHalf4(Module &M);
198 bool replaceReadImageF(Module &M);
199 bool replaceAtomics(Module &M);
200 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400201 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700202 bool replaceVload(Module &M);
203 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400204};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100205} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400206
207char ReplaceOpenCLBuiltinPass::ID = 0;
208static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
209 "Replace OpenCL Builtins Pass");
210
211namespace clspv {
212ModulePass *createReplaceOpenCLBuiltinPass() {
213 return new ReplaceOpenCLBuiltinPass();
214}
215}
216
217bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
218 bool Changed = false;
219
Kévin Petit2444e9b2018-11-09 14:14:37 +0000220 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100221 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100222 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400223 Changed |= replaceRecip(M);
224 Changed |= replaceDivide(M);
225 Changed |= replaceExp10(M);
226 Changed |= replaceLog10(M);
227 Changed |= replaceBarrier(M);
228 Changed |= replaceMemFence(M);
229 Changed |= replaceRelational(M);
230 Changed |= replaceIsInfAndIsNan(M);
231 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000232 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000233 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000234 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000235 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000236 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000237 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000238 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400239 Changed |= replaceSignbit(M);
240 Changed |= replaceMadandMad24andMul24(M);
241 Changed |= replaceVloadHalf(M);
242 Changed |= replaceVloadHalf2(M);
243 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700244 Changed |= replaceClspvVloadaHalf2(M);
245 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400246 Changed |= replaceVstoreHalf(M);
247 Changed |= replaceVstoreHalf2(M);
248 Changed |= replaceVstoreHalf4(M);
249 Changed |= replaceReadImageF(M);
250 Changed |= replaceAtomics(M);
251 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400252 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700253 Changed |= replaceVload(M);
254 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400255
256 return Changed;
257}
258
Kévin Petit2444e9b2018-11-09 14:14:37 +0000259bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
260 bool Changed = false;
261
262 const char *Names[] = {
Kévin Petit5ace14c2019-04-01 16:29:53 +0100263 "_Z3absh",
264 "_Z3absDv2_h",
265 "_Z3absDv3_h",
266 "_Z3absDv4_h",
Kévin Petit2444e9b2018-11-09 14:14:37 +0000267 "_Z3abst",
268 "_Z3absDv2_t",
269 "_Z3absDv3_t",
270 "_Z3absDv4_t",
271 "_Z3absj",
272 "_Z3absDv2_j",
273 "_Z3absDv3_j",
274 "_Z3absDv4_j",
275 "_Z3absm",
276 "_Z3absDv2_m",
277 "_Z3absDv3_m",
278 "_Z3absDv4_m",
279 };
280
281 for (auto Name : Names) {
282 // If we find a function with the matching name.
283 if (auto F = M.getFunction(Name)) {
284 SmallVector<Instruction *, 4> ToRemoves;
285
286 // Walk the users of the function.
287 for (auto &U : F->uses()) {
288 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
289 // Abs has one arg.
290 auto Arg = CI->getOperand(0);
291
292 // Use the argument unchanged, we know it's unsigned
293 CI->replaceAllUsesWith(Arg);
294
295 // Lastly, remember to remove the user.
296 ToRemoves.push_back(CI);
297 }
298 }
299
300 Changed = !ToRemoves.empty();
301
302 // And cleanup the calls we don't use anymore.
303 for (auto V : ToRemoves) {
304 V->eraseFromParent();
305 }
306
307 // And remove the function we don't need either too.
308 F->eraseFromParent();
309 }
310 }
311
312 return Changed;
313}
314
Kévin Petit91bc72e2019-04-08 15:17:46 +0100315bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
316 bool Changed = false;
317
318 const char *Names[] = {
319 "_Z8abs_diffcc",
320 "_Z8abs_diffDv2_cS_",
321 "_Z8abs_diffDv3_cS_",
322 "_Z8abs_diffDv4_cS_",
323 "_Z8abs_diffhh",
324 "_Z8abs_diffDv2_hS_",
325 "_Z8abs_diffDv3_hS_",
326 "_Z8abs_diffDv4_hS_",
327 "_Z8abs_diffss",
328 "_Z8abs_diffDv2_sS_",
329 "_Z8abs_diffDv3_sS_",
330 "_Z8abs_diffDv4_sS_",
331 "_Z8abs_difftt",
332 "_Z8abs_diffDv2_tS_",
333 "_Z8abs_diffDv3_tS_",
334 "_Z8abs_diffDv4_tS_",
335 "_Z8abs_diffii",
336 "_Z8abs_diffDv2_iS_",
337 "_Z8abs_diffDv3_iS_",
338 "_Z8abs_diffDv4_iS_",
339 "_Z8abs_diffjj",
340 "_Z8abs_diffDv2_jS_",
341 "_Z8abs_diffDv3_jS_",
342 "_Z8abs_diffDv4_jS_",
343 "_Z8abs_diffll",
344 "_Z8abs_diffDv2_lS_",
345 "_Z8abs_diffDv3_lS_",
346 "_Z8abs_diffDv4_lS_",
347 "_Z8abs_diffmm",
348 "_Z8abs_diffDv2_mS_",
349 "_Z8abs_diffDv3_mS_",
350 "_Z8abs_diffDv4_mS_",
351 };
352
353 for (auto Name : Names) {
354 // If we find a function with the matching name.
355 if (auto F = M.getFunction(Name)) {
356 SmallVector<Instruction *, 4> ToRemoves;
357
358 // Walk the users of the function.
359 for (auto &U : F->uses()) {
360 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
361
362 auto XValue = CI->getOperand(0);
363 auto YValue = CI->getOperand(1);
364
365 IRBuilder<> Builder(CI);
366 auto XmY = Builder.CreateSub(XValue, YValue);
367 auto YmX = Builder.CreateSub(YValue, XValue);
368
369 Value* Cmp;
370 auto finfo = FunctionInfo::getFromMangledName(F->getName());
371 if (finfo.isArgSigned(0)) {
372 Cmp = Builder.CreateICmpSGT(YValue, XValue);
373 } else {
374 Cmp = Builder.CreateICmpUGT(YValue, XValue);
375 }
376
377 auto V = Builder.CreateSelect(Cmp, YmX, XmY);
378
379 // Use the argument unchanged, we know it's unsigned
380 CI->replaceAllUsesWith(V);
381
382 // Lastly, remember to remove the user.
383 ToRemoves.push_back(CI);
384 }
385 }
386
387 Changed = !ToRemoves.empty();
388
389 // And cleanup the calls we don't use anymore.
390 for (auto V : ToRemoves) {
391 V->eraseFromParent();
392 }
393
394 // And remove the function we don't need either too.
395 F->eraseFromParent();
396 }
397 }
398
399 return Changed;
400}
401
Kévin Petit8c1be282019-04-02 19:34:25 +0100402bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
403 bool Changed = false;
404
405 const char *Names[] = {
406 "_Z8copysignff",
407 "_Z8copysignDv2_fS_",
408 "_Z8copysignDv3_fS_",
409 "_Z8copysignDv4_fS_",
410 };
411
412 for (auto Name : Names) {
413 // If we find a function with the matching name.
414 if (auto F = M.getFunction(Name)) {
415 SmallVector<Instruction *, 4> ToRemoves;
416
417 // Walk the users of the function.
418 for (auto &U : F->uses()) {
419 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
420
421 auto XValue = CI->getOperand(0);
422 auto YValue = CI->getOperand(1);
423
424 auto Ty = XValue->getType();
425
426 Type* IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
427 if (Ty->isVectorTy()) {
428 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
429 }
430
431 // Return X with the sign of Y
432
433 // Sign bit masks
434 auto SignBit = IntTy->getScalarSizeInBits() - 1;
435 auto SignBitMask = 1 << SignBit;
436 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
437 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
438
439 IRBuilder<> Builder(CI);
440
441 // Extract sign of Y
442 auto YInt = Builder.CreateBitCast(YValue, IntTy);
443 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
444
445 // Clear sign bit in X
446 auto XInt = Builder.CreateBitCast(XValue, IntTy);
447 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
448
449 // Insert sign bit of Y into X
450 auto NewXInt = Builder.CreateOr(XInt, YSign);
451
452 // And cast back to floating-point
453 auto NewX = Builder.CreateBitCast(NewXInt, Ty);
454
455 CI->replaceAllUsesWith(NewX);
456
457 // Lastly, remember to remove the user.
458 ToRemoves.push_back(CI);
459 }
460 }
461
462 Changed = !ToRemoves.empty();
463
464 // And cleanup the calls we don't use anymore.
465 for (auto V : ToRemoves) {
466 V->eraseFromParent();
467 }
468
469 // And remove the function we don't need either too.
470 F->eraseFromParent();
471 }
472 }
473
474 return Changed;
475}
476
David Neto22f144c2017-06-12 14:26:21 -0400477bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
478 bool Changed = false;
479
480 const char *Names[] = {
481 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
482 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
483 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
484 };
485
486 for (auto Name : Names) {
487 // If we find a function with the matching name.
488 if (auto F = M.getFunction(Name)) {
489 SmallVector<Instruction *, 4> ToRemoves;
490
491 // Walk the users of the function.
492 for (auto &U : F->uses()) {
493 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
494 // Recip has one arg.
495 auto Arg = CI->getOperand(0);
496
497 auto Div = BinaryOperator::Create(
498 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
499 CI);
500
501 CI->replaceAllUsesWith(Div);
502
503 // Lastly, remember to remove the user.
504 ToRemoves.push_back(CI);
505 }
506 }
507
508 Changed = !ToRemoves.empty();
509
510 // And cleanup the calls we don't use anymore.
511 for (auto V : ToRemoves) {
512 V->eraseFromParent();
513 }
514
515 // And remove the function we don't need either too.
516 F->eraseFromParent();
517 }
518 }
519
520 return Changed;
521}
522
523bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
524 bool Changed = false;
525
526 const char *Names[] = {
527 "_Z11half_divideff", "_Z13native_divideff",
528 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
529 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
530 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
531 };
532
533 for (auto Name : Names) {
534 // If we find a function with the matching name.
535 if (auto F = M.getFunction(Name)) {
536 SmallVector<Instruction *, 4> ToRemoves;
537
538 // Walk the users of the function.
539 for (auto &U : F->uses()) {
540 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
541 auto Div = BinaryOperator::Create(
542 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
543
544 CI->replaceAllUsesWith(Div);
545
546 // Lastly, remember to remove the user.
547 ToRemoves.push_back(CI);
548 }
549 }
550
551 Changed = !ToRemoves.empty();
552
553 // And cleanup the calls we don't use anymore.
554 for (auto V : ToRemoves) {
555 V->eraseFromParent();
556 }
557
558 // And remove the function we don't need either too.
559 F->eraseFromParent();
560 }
561 }
562
563 return Changed;
564}
565
566bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
567 bool Changed = false;
568
569 const std::map<const char *, const char *> Map = {
570 {"_Z5exp10f", "_Z3expf"},
571 {"_Z10half_exp10f", "_Z8half_expf"},
572 {"_Z12native_exp10f", "_Z10native_expf"},
573 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
574 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
575 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
576 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
577 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
578 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
579 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
580 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
581 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
582
583 for (auto Pair : Map) {
584 // If we find a function with the matching name.
585 if (auto F = M.getFunction(Pair.first)) {
586 SmallVector<Instruction *, 4> ToRemoves;
587
588 // Walk the users of the function.
589 for (auto &U : F->uses()) {
590 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
591 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
592
593 auto Arg = CI->getOperand(0);
594
595 // Constant of the natural log of 10 (ln(10)).
596 const double Ln10 =
597 2.302585092994045684017991454684364207601101488628772976033;
598
599 auto Mul = BinaryOperator::Create(
600 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
601 CI);
602
603 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
604
605 CI->replaceAllUsesWith(NewCI);
606
607 // Lastly, remember to remove the user.
608 ToRemoves.push_back(CI);
609 }
610 }
611
612 Changed = !ToRemoves.empty();
613
614 // And cleanup the calls we don't use anymore.
615 for (auto V : ToRemoves) {
616 V->eraseFromParent();
617 }
618
619 // And remove the function we don't need either too.
620 F->eraseFromParent();
621 }
622 }
623
624 return Changed;
625}
626
627bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
628 bool Changed = false;
629
630 const std::map<const char *, const char *> Map = {
631 {"_Z5log10f", "_Z3logf"},
632 {"_Z10half_log10f", "_Z8half_logf"},
633 {"_Z12native_log10f", "_Z10native_logf"},
634 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
635 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
636 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
637 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
638 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
639 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
640 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
641 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
642 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
643
644 for (auto Pair : Map) {
645 // If we find a function with the matching name.
646 if (auto F = M.getFunction(Pair.first)) {
647 SmallVector<Instruction *, 4> ToRemoves;
648
649 // Walk the users of the function.
650 for (auto &U : F->uses()) {
651 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
652 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
653
654 auto Arg = CI->getOperand(0);
655
656 // Constant of the reciprocal of the natural log of 10 (ln(10)).
657 const double Ln10 =
658 0.434294481903251827651128918916605082294397005803666566114;
659
660 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
661
662 auto Mul = BinaryOperator::Create(
663 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
664 "", CI);
665
666 CI->replaceAllUsesWith(Mul);
667
668 // Lastly, remember to remove the user.
669 ToRemoves.push_back(CI);
670 }
671 }
672
673 Changed = !ToRemoves.empty();
674
675 // And cleanup the calls we don't use anymore.
676 for (auto V : ToRemoves) {
677 V->eraseFromParent();
678 }
679
680 // And remove the function we don't need either too.
681 F->eraseFromParent();
682 }
683 }
684
685 return Changed;
686}
687
688bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
689 bool Changed = false;
690
691 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
692
693 const std::map<const char *, const char *> Map = {
694 {"_Z7barrierj", "__spirv_control_barrier"}};
695
696 for (auto Pair : Map) {
697 // If we find a function with the matching name.
698 if (auto F = M.getFunction(Pair.first)) {
699 SmallVector<Instruction *, 4> ToRemoves;
700
701 // Walk the users of the function.
702 for (auto &U : F->uses()) {
703 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
704 auto FType = F->getFunctionType();
705 SmallVector<Type *, 3> Params;
706 for (unsigned i = 0; i < 3; i++) {
707 Params.push_back(FType->getParamType(0));
708 }
709 auto NewFType =
710 FunctionType::get(FType->getReturnType(), Params, false);
711 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
712
713 auto Arg = CI->getOperand(0);
714
715 // We need to map the OpenCL constants to the SPIR-V equivalents.
716 const auto LocalMemFence =
717 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
718 const auto GlobalMemFence =
719 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
720 const auto ConstantSequentiallyConsistent = ConstantInt::get(
721 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
722 const auto ConstantScopeDevice =
723 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
724 const auto ConstantScopeWorkgroup =
725 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
726
727 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
728 const auto LocalMemFenceMask = BinaryOperator::Create(
729 Instruction::And, LocalMemFence, Arg, "", CI);
730 const auto WorkgroupShiftAmount =
731 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
732 clz(CLK_LOCAL_MEM_FENCE);
733 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
734 Instruction::Shl, LocalMemFenceMask,
735 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
736
737 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
738 const auto GlobalMemFenceMask = BinaryOperator::Create(
739 Instruction::And, GlobalMemFence, Arg, "", CI);
740 const auto UniformShiftAmount =
741 clz(spv::MemorySemanticsUniformMemoryMask) -
742 clz(CLK_GLOBAL_MEM_FENCE);
743 const auto MemorySemanticsUniform = BinaryOperator::Create(
744 Instruction::Shl, GlobalMemFenceMask,
745 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
746
747 // And combine the above together, also adding in
748 // MemorySemanticsSequentiallyConsistentMask.
749 auto MemorySemantics =
750 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
751 ConstantSequentiallyConsistent, "", CI);
752 MemorySemantics = BinaryOperator::Create(
753 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
754
755 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
756 // Device Scope, otherwise Workgroup Scope.
757 const auto Cmp =
758 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
759 GlobalMemFenceMask, GlobalMemFence, "", CI);
760 const auto MemoryScope = SelectInst::Create(
761 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
762
763 // Lastly, the Execution Scope is always Workgroup Scope.
764 const auto ExecutionScope = ConstantScopeWorkgroup;
765
766 auto NewCI = CallInst::Create(
767 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
768
769 CI->replaceAllUsesWith(NewCI);
770
771 // Lastly, remember to remove the user.
772 ToRemoves.push_back(CI);
773 }
774 }
775
776 Changed = !ToRemoves.empty();
777
778 // And cleanup the calls we don't use anymore.
779 for (auto V : ToRemoves) {
780 V->eraseFromParent();
781 }
782
783 // And remove the function we don't need either too.
784 F->eraseFromParent();
785 }
786 }
787
788 return Changed;
789}
790
791bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
792 bool Changed = false;
793
794 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
795
Neil Henning39672102017-09-29 14:33:13 +0100796 using Tuple = std::tuple<const char *, unsigned>;
797 const std::map<const char *, Tuple> Map = {
798 {"_Z9mem_fencej",
799 Tuple("__spirv_memory_barrier",
800 spv::MemorySemanticsSequentiallyConsistentMask)},
801 {"_Z14read_mem_fencej",
802 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
803 {"_Z15write_mem_fencej",
804 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400805
806 for (auto Pair : Map) {
807 // If we find a function with the matching name.
808 if (auto F = M.getFunction(Pair.first)) {
809 SmallVector<Instruction *, 4> ToRemoves;
810
811 // Walk the users of the function.
812 for (auto &U : F->uses()) {
813 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
814 auto FType = F->getFunctionType();
815 SmallVector<Type *, 2> Params;
816 for (unsigned i = 0; i < 2; i++) {
817 Params.push_back(FType->getParamType(0));
818 }
819 auto NewFType =
820 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100821 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400822
823 auto Arg = CI->getOperand(0);
824
825 // We need to map the OpenCL constants to the SPIR-V equivalents.
826 const auto LocalMemFence =
827 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
828 const auto GlobalMemFence =
829 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
830 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100831 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400832 const auto ConstantScopeDevice =
833 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
834
835 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
836 const auto LocalMemFenceMask = BinaryOperator::Create(
837 Instruction::And, LocalMemFence, Arg, "", CI);
838 const auto WorkgroupShiftAmount =
839 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
840 clz(CLK_LOCAL_MEM_FENCE);
841 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
842 Instruction::Shl, LocalMemFenceMask,
843 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
844
845 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
846 const auto GlobalMemFenceMask = BinaryOperator::Create(
847 Instruction::And, GlobalMemFence, Arg, "", CI);
848 const auto UniformShiftAmount =
849 clz(spv::MemorySemanticsUniformMemoryMask) -
850 clz(CLK_GLOBAL_MEM_FENCE);
851 const auto MemorySemanticsUniform = BinaryOperator::Create(
852 Instruction::Shl, GlobalMemFenceMask,
853 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
854
855 // And combine the above together, also adding in
856 // MemorySemanticsSequentiallyConsistentMask.
857 auto MemorySemantics =
858 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
859 ConstantMemorySemantics, "", CI);
860 MemorySemantics = BinaryOperator::Create(
861 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
862
863 // Memory Scope is always device.
864 const auto MemoryScope = ConstantScopeDevice;
865
866 auto NewCI =
867 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
868
869 CI->replaceAllUsesWith(NewCI);
870
871 // Lastly, remember to remove the user.
872 ToRemoves.push_back(CI);
873 }
874 }
875
876 Changed = !ToRemoves.empty();
877
878 // And cleanup the calls we don't use anymore.
879 for (auto V : ToRemoves) {
880 V->eraseFromParent();
881 }
882
883 // And remove the function we don't need either too.
884 F->eraseFromParent();
885 }
886 }
887
888 return Changed;
889}
890
891bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
892 bool Changed = false;
893
894 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
895 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
896 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
897 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
898 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
899 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
900 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
901 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
902 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
903 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
904 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
905 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
906 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
907 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
908 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
909 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
910 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
911 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
912 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
913 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
914 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
915 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
916 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
917 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
918 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
919 };
920
921 for (auto Pair : Map) {
922 // If we find a function with the matching name.
923 if (auto F = M.getFunction(Pair.first)) {
924 SmallVector<Instruction *, 4> ToRemoves;
925
926 // Walk the users of the function.
927 for (auto &U : F->uses()) {
928 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
929 // The predicate to use in the CmpInst.
930 auto Predicate = Pair.second.first;
931
932 // The value to return for true.
933 auto TrueValue =
934 ConstantInt::getSigned(CI->getType(), Pair.second.second);
935
936 // The value to return for false.
937 auto FalseValue = Constant::getNullValue(CI->getType());
938
939 auto Arg1 = CI->getOperand(0);
940 auto Arg2 = CI->getOperand(1);
941
942 const auto Cmp =
943 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
944
945 const auto Select =
946 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
947
948 CI->replaceAllUsesWith(Select);
949
950 // Lastly, remember to remove the user.
951 ToRemoves.push_back(CI);
952 }
953 }
954
955 Changed = !ToRemoves.empty();
956
957 // And cleanup the calls we don't use anymore.
958 for (auto V : ToRemoves) {
959 V->eraseFromParent();
960 }
961
962 // And remove the function we don't need either too.
963 F->eraseFromParent();
964 }
965 }
966
967 return Changed;
968}
969
970bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
971 bool Changed = false;
972
973 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
974 {"_Z5isinff", {"__spirv_isinff", 1}},
975 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
976 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
977 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
978 {"_Z5isnanf", {"__spirv_isnanf", 1}},
979 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
980 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
981 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
982 };
983
984 for (auto Pair : Map) {
985 // If we find a function with the matching name.
986 if (auto F = M.getFunction(Pair.first)) {
987 SmallVector<Instruction *, 4> ToRemoves;
988
989 // Walk the users of the function.
990 for (auto &U : F->uses()) {
991 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
992 const auto CITy = CI->getType();
993
994 // The fake SPIR-V intrinsic to generate.
995 auto SPIRVIntrinsic = Pair.second.first;
996
997 // The value to return for true.
998 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
999
1000 // The value to return for false.
1001 auto FalseValue = Constant::getNullValue(CITy);
1002
1003 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
1004 M.getContext(),
1005 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
1006
1007 auto NewFType =
1008 FunctionType::get(CorrespondingBoolTy,
1009 F->getFunctionType()->getParamType(0), false);
1010
1011 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1012
1013 auto Arg = CI->getOperand(0);
1014
1015 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
1016
1017 const auto Select =
1018 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
1019
1020 CI->replaceAllUsesWith(Select);
1021
1022 // Lastly, remember to remove the user.
1023 ToRemoves.push_back(CI);
1024 }
1025 }
1026
1027 Changed = !ToRemoves.empty();
1028
1029 // And cleanup the calls we don't use anymore.
1030 for (auto V : ToRemoves) {
1031 V->eraseFromParent();
1032 }
1033
1034 // And remove the function we don't need either too.
1035 F->eraseFromParent();
1036 }
1037 }
1038
1039 return Changed;
1040}
1041
1042bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
1043 bool Changed = false;
1044
1045 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +00001046 // all
alan-bakerb39c8262019-03-08 14:03:37 -05001047 {"_Z3allc", ""},
1048 {"_Z3allDv2_c", "__spirv_allDv2_c"},
1049 {"_Z3allDv3_c", "__spirv_allDv3_c"},
1050 {"_Z3allDv4_c", "__spirv_allDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +00001051 {"_Z3alls", ""},
1052 {"_Z3allDv2_s", "__spirv_allDv2_s"},
1053 {"_Z3allDv3_s", "__spirv_allDv3_s"},
1054 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -04001055 {"_Z3alli", ""},
1056 {"_Z3allDv2_i", "__spirv_allDv2_i"},
1057 {"_Z3allDv3_i", "__spirv_allDv3_i"},
1058 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +00001059 {"_Z3alll", ""},
1060 {"_Z3allDv2_l", "__spirv_allDv2_l"},
1061 {"_Z3allDv3_l", "__spirv_allDv3_l"},
1062 {"_Z3allDv4_l", "__spirv_allDv4_l"},
1063
1064 // any
alan-bakerb39c8262019-03-08 14:03:37 -05001065 {"_Z3anyc", ""},
1066 {"_Z3anyDv2_c", "__spirv_anyDv2_c"},
1067 {"_Z3anyDv3_c", "__spirv_anyDv3_c"},
1068 {"_Z3anyDv4_c", "__spirv_anyDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +00001069 {"_Z3anys", ""},
1070 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
1071 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
1072 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -04001073 {"_Z3anyi", ""},
1074 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
1075 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
1076 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +00001077 {"_Z3anyl", ""},
1078 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
1079 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
1080 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -04001081 };
1082
1083 for (auto Pair : Map) {
1084 // If we find a function with the matching name.
1085 if (auto F = M.getFunction(Pair.first)) {
1086 SmallVector<Instruction *, 4> ToRemoves;
1087
1088 // Walk the users of the function.
1089 for (auto &U : F->uses()) {
1090 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1091 // The fake SPIR-V intrinsic to generate.
1092 auto SPIRVIntrinsic = Pair.second;
1093
1094 auto Arg = CI->getOperand(0);
1095
1096 Value *V;
1097
Kévin Petitfd27cca2018-10-31 13:00:17 +00001098 // If the argument is a 32-bit int, just use a shift
1099 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1100 V = BinaryOperator::Create(Instruction::LShr, Arg,
1101 ConstantInt::get(Arg->getType(), 31), "",
1102 CI);
1103 } else {
David Neto22f144c2017-06-12 14:26:21 -04001104 // The value for zero to compare against.
1105 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1106
David Neto22f144c2017-06-12 14:26:21 -04001107 // The value to return for true.
1108 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1109
1110 // The value to return for false.
1111 const auto FalseValue = Constant::getNullValue(CI->getType());
1112
Kévin Petitfd27cca2018-10-31 13:00:17 +00001113 const auto Cmp = CmpInst::Create(
1114 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1115
1116 Value* SelectSource;
1117
1118 // If we have a function to call, call it!
1119 if (0 < strlen(SPIRVIntrinsic)) {
1120
1121 const auto NewFType = FunctionType::get(
1122 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
1123
1124 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1125
1126 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
1127
1128 SelectSource = NewCI;
1129
1130 } else {
1131 SelectSource = Cmp;
1132 }
1133
1134 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001135 }
1136
1137 CI->replaceAllUsesWith(V);
1138
1139 // Lastly, remember to remove the user.
1140 ToRemoves.push_back(CI);
1141 }
1142 }
1143
1144 Changed = !ToRemoves.empty();
1145
1146 // And cleanup the calls we don't use anymore.
1147 for (auto V : ToRemoves) {
1148 V->eraseFromParent();
1149 }
1150
1151 // And remove the function we don't need either too.
1152 F->eraseFromParent();
1153 }
1154 }
1155
1156 return Changed;
1157}
1158
Kévin Petitbf0036c2019-03-06 13:57:10 +00001159bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1160 bool Changed = false;
1161
1162 for (auto const &SymVal : M.getValueSymbolTable()) {
1163 // Skip symbols whose name doesn't match
1164 if (!SymVal.getKey().startswith("_Z8upsample")) {
1165 continue;
1166 }
1167 // Is there a function going by that name?
1168 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1169
1170 SmallVector<Instruction *, 4> ToRemoves;
1171
1172 // Walk the users of the function.
1173 for (auto &U : F->uses()) {
1174 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1175
1176 // Get arguments
1177 auto HiValue = CI->getOperand(0);
1178 auto LoValue = CI->getOperand(1);
1179
1180 // Don't touch overloads that aren't in OpenCL C
1181 auto HiType = HiValue->getType();
1182 auto LoType = LoValue->getType();
1183
1184 if (HiType != LoType) {
1185 continue;
1186 }
1187
1188 if (!HiType->isIntOrIntVectorTy()) {
1189 continue;
1190 }
1191
1192 if (HiType->getScalarSizeInBits() * 2 !=
1193 CI->getType()->getScalarSizeInBits()) {
1194 continue;
1195 }
1196
1197 if ((HiType->getScalarSizeInBits() != 8) &&
1198 (HiType->getScalarSizeInBits() != 16) &&
1199 (HiType->getScalarSizeInBits() != 32)) {
1200 continue;
1201 }
1202
1203 if (HiType->isVectorTy()) {
1204 if ((HiType->getVectorNumElements() != 2) &&
1205 (HiType->getVectorNumElements() != 3) &&
1206 (HiType->getVectorNumElements() != 4) &&
1207 (HiType->getVectorNumElements() != 8) &&
1208 (HiType->getVectorNumElements() != 16)) {
1209 continue;
1210 }
1211 }
1212
1213 // Convert both operands to the result type
1214 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(),
1215 "", CI);
1216 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(),
1217 "", CI);
1218
1219 // Shift high operand
1220 auto ShiftAmount = ConstantInt::get(CI->getType(),
1221 HiType->getScalarSizeInBits());
1222 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1223 ShiftAmount, "", CI);
1224
1225 // OR both results
1226 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1227 "", CI);
1228
1229 // Replace call with the expression
1230 CI->replaceAllUsesWith(V);
1231
1232 // Lastly, remember to remove the user.
1233 ToRemoves.push_back(CI);
1234 }
1235 }
1236
1237 Changed = !ToRemoves.empty();
1238
1239 // And cleanup the calls we don't use anymore.
1240 for (auto V : ToRemoves) {
1241 V->eraseFromParent();
1242 }
1243
1244 // And remove the function we don't need either too.
1245 F->eraseFromParent();
1246 }
1247 }
1248
1249 return Changed;
1250}
1251
Kévin Petitd44eef52019-03-08 13:22:14 +00001252bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1253 bool Changed = false;
1254
1255 for (auto const &SymVal : M.getValueSymbolTable()) {
1256 // Skip symbols whose name doesn't match
1257 if (!SymVal.getKey().startswith("_Z6rotate")) {
1258 continue;
1259 }
1260 // Is there a function going by that name?
1261 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1262
1263 SmallVector<Instruction *, 4> ToRemoves;
1264
1265 // Walk the users of the function.
1266 for (auto &U : F->uses()) {
1267 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1268
1269 // Get arguments
1270 auto SrcValue = CI->getOperand(0);
1271 auto RotAmount = CI->getOperand(1);
1272
1273 // Don't touch overloads that aren't in OpenCL C
1274 auto SrcType = SrcValue->getType();
1275 auto RotType = RotAmount->getType();
1276
1277 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1278 continue;
1279 }
1280
1281 if (!SrcType->isIntOrIntVectorTy()) {
1282 continue;
1283 }
1284
1285 if ((SrcType->getScalarSizeInBits() != 8) &&
1286 (SrcType->getScalarSizeInBits() != 16) &&
1287 (SrcType->getScalarSizeInBits() != 32) &&
1288 (SrcType->getScalarSizeInBits() != 64)) {
1289 continue;
1290 }
1291
1292 if (SrcType->isVectorTy()) {
1293 if ((SrcType->getVectorNumElements() != 2) &&
1294 (SrcType->getVectorNumElements() != 3) &&
1295 (SrcType->getVectorNumElements() != 4) &&
1296 (SrcType->getVectorNumElements() != 8) &&
1297 (SrcType->getVectorNumElements() != 16)) {
1298 continue;
1299 }
1300 }
1301
1302 // The approach used is to shift the top bits down, the bottom bits up
1303 // and OR the two shifted values.
1304
1305 // The rotation amount is to be treated modulo the element size.
1306 // Since SPIR-V shift ops don't support this, let's apply the
1307 // modulo ahead of shifting. The element size is always a power of
1308 // two so we can just AND with a mask.
1309 auto ModMask = ConstantInt::get(SrcType,
1310 SrcType->getScalarSizeInBits() - 1);
1311 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1312 ModMask, "", CI);
1313
1314 // Let's calc the amount by which to shift top bits down
1315 auto ScalarSize = ConstantInt::get(SrcType,
1316 SrcType->getScalarSizeInBits());
1317 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1318 RotAmount, "", CI);
1319
1320 // Now shift the bottom bits up and the top bits down
1321 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1322 RotAmount, "", CI);
1323 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1324 DownAmount, "", CI);
1325
1326 // Finally OR the two shifted values
1327 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1328 HiRotated, "", CI);
1329
1330 // Replace call with the expression
1331 CI->replaceAllUsesWith(V);
1332
1333 // Lastly, remember to remove the user.
1334 ToRemoves.push_back(CI);
1335 }
1336 }
1337
1338 Changed = !ToRemoves.empty();
1339
1340 // And cleanup the calls we don't use anymore.
1341 for (auto V : ToRemoves) {
1342 V->eraseFromParent();
1343 }
1344
1345 // And remove the function we don't need either too.
1346 F->eraseFromParent();
1347 }
1348 }
1349
1350 return Changed;
1351}
1352
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001353bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1354 bool Changed = false;
1355
1356 for (auto const &SymVal : M.getValueSymbolTable()) {
1357
1358 // Skip symbols whose name obviously doesn't match
1359 if (!SymVal.getKey().contains("convert_")) {
1360 continue;
1361 }
1362
1363 // Is there a function going by that name?
1364 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1365
1366 // Get info from the mangled name
1367 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001368 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001369
1370 // All functions of interest are handled by our mangled name parser
1371 if (!parsed) {
1372 continue;
1373 }
1374
1375 // Move on if this isn't a call to convert_
1376 if (!finfo.name.startswith("convert_")) {
1377 continue;
1378 }
1379
1380 // Extract the destination type from the function name
1381 StringRef DstTypeName = finfo.name;
1382 DstTypeName.consume_front("convert_");
1383
1384 auto DstSignedNess = StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1385 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1386 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1387 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1388 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1389 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1390 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1391 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1392 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1393 .Default(ArgTypeInfo::SignedNess::None);
1394
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001395 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001396 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001397
1398 SmallVector<Instruction *, 4> ToRemoves;
1399
1400 // Walk the users of the function.
1401 for (auto &U : F->uses()) {
1402 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1403
1404 // Get arguments
1405 auto SrcValue = CI->getOperand(0);
1406
1407 // Don't touch overloads that aren't in OpenCL C
1408 auto SrcType = SrcValue->getType();
1409 auto DstType = CI->getType();
1410
1411 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1412 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1413 continue;
1414 }
1415
1416 if (SrcType->isVectorTy()) {
1417
1418 if (SrcType->getVectorNumElements() !=
1419 DstType->getVectorNumElements()) {
1420 continue;
1421 }
1422
1423 if ((SrcType->getVectorNumElements() != 2) &&
1424 (SrcType->getVectorNumElements() != 3) &&
1425 (SrcType->getVectorNumElements() != 4) &&
1426 (SrcType->getVectorNumElements() != 8) &&
1427 (SrcType->getVectorNumElements() != 16)) {
1428 continue;
1429 }
1430 }
1431
1432 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1433 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1434
1435 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1436 bool DstIsInt = DstType->isIntOrIntVectorTy();
1437
1438 Value *V;
1439 if (SrcIsFloat && DstIsFloat) {
1440 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1441 } else if (SrcIsFloat && DstIsInt) {
1442 if (DstIsSigned) {
1443 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1444 } else {
1445 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1446 }
1447 } else if (SrcIsInt && DstIsFloat) {
1448 if (SrcIsSigned) {
1449 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1450 } else {
1451 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1452 }
1453 } else if (SrcIsInt && DstIsInt) {
1454 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1455 } else {
1456 // Not something we're supposed to handle, just move on
1457 continue;
1458 }
1459
1460 // Replace call with the expression
1461 CI->replaceAllUsesWith(V);
1462
1463 // Lastly, remember to remove the user.
1464 ToRemoves.push_back(CI);
1465 }
1466 }
1467
1468 Changed = !ToRemoves.empty();
1469
1470 // And cleanup the calls we don't use anymore.
1471 for (auto V : ToRemoves) {
1472 V->eraseFromParent();
1473 }
1474
1475 // And remove the function we don't need either too.
1476 F->eraseFromParent();
1477 }
1478 }
1479
1480 return Changed;
1481}
1482
Kévin Petit8a560882019-03-21 15:24:34 +00001483bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1484 bool Changed = false;
1485
Kévin Petit617a76d2019-04-04 13:54:16 +01001486 SmallVector<Function*, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001487
Kévin Petit617a76d2019-04-04 13:54:16 +01001488 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001489 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1490 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1491
1492 // Skip symbols whose name doesn't match
1493 if (!isMad && !isMul) {
1494 continue;
1495 }
1496
1497 // Is there a function going by that name?
1498 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001499 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001500 }
1501 }
1502
Kévin Petit617a76d2019-04-04 13:54:16 +01001503 for (auto F : FnWorklist) {
1504 SmallVector<Instruction *, 4> ToRemoves;
1505
1506 bool isMad = F->getName().startswith("_Z6mad_hi");
1507 // Walk the users of the function.
1508 for (auto &U : F->uses()) {
1509 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1510
1511 // Get arguments
1512 auto AValue = CI->getOperand(0);
1513 auto BValue = CI->getOperand(1);
1514 auto CValue = CI->getOperand(2);
1515
1516 // Don't touch overloads that aren't in OpenCL C
1517 auto AType = AValue->getType();
1518 auto BType = BValue->getType();
1519 auto CType = CValue->getType();
1520
1521 if ((AType != BType) || (CI->getType() != AType) ||
1522 (isMad && (AType != CType))) {
1523 continue;
1524 }
1525
1526 if (!AType->isIntOrIntVectorTy()) {
1527 continue;
1528 }
1529
1530 if ((AType->getScalarSizeInBits() != 8) &&
1531 (AType->getScalarSizeInBits() != 16) &&
1532 (AType->getScalarSizeInBits() != 32) &&
1533 (AType->getScalarSizeInBits() != 64)) {
1534 continue;
1535 }
1536
1537 if (AType->isVectorTy()) {
1538 if ((AType->getVectorNumElements() != 2) &&
1539 (AType->getVectorNumElements() != 3) &&
1540 (AType->getVectorNumElements() != 4) &&
1541 (AType->getVectorNumElements() != 8) &&
1542 (AType->getVectorNumElements() != 16)) {
1543 continue;
1544 }
1545 }
1546
1547 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001548 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001549
1550 // Select the appropriate signed/unsigned SPIR-V op
1551 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001552 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001553 opcode = spv::OpSMulExtended;
1554 } else {
1555 opcode = spv::OpUMulExtended;
1556 }
1557
1558 // Our SPIR-V op returns a struct, create a type for it
1559 SmallVector<Type*, 2> TwoValueType = {
1560 AType,
1561 AType
1562 };
1563 auto ExMulRetType = StructType::create(TwoValueType);
1564
1565 // Call the SPIR-V op
1566 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1567 ExMulRetType, {AValue, BValue});
1568
1569 // Get the high part of the result
1570 unsigned Idxs[] = {1};
1571 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1572
1573 // If we're handling a mad_hi, add the third argument to the result
1574 if (isMad) {
1575 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1576 }
1577
1578 // Replace call with the expression
1579 CI->replaceAllUsesWith(V);
1580
1581 // Lastly, remember to remove the user.
1582 ToRemoves.push_back(CI);
1583 }
1584 }
1585
1586 Changed = !ToRemoves.empty();
1587
1588 // And cleanup the calls we don't use anymore.
1589 for (auto V : ToRemoves) {
1590 V->eraseFromParent();
1591 }
1592
1593 // And remove the function we don't need either too.
1594 F->eraseFromParent();
1595 }
1596
Kévin Petit8a560882019-03-21 15:24:34 +00001597 return Changed;
1598}
1599
Kévin Petitf5b78a22018-10-25 14:32:17 +00001600bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1601 bool Changed = false;
1602
1603 for (auto const &SymVal : M.getValueSymbolTable()) {
1604 // Skip symbols whose name doesn't match
1605 if (!SymVal.getKey().startswith("_Z6select")) {
1606 continue;
1607 }
1608 // Is there a function going by that name?
1609 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1610
1611 SmallVector<Instruction *, 4> ToRemoves;
1612
1613 // Walk the users of the function.
1614 for (auto &U : F->uses()) {
1615 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1616
1617 // Get arguments
1618 auto FalseValue = CI->getOperand(0);
1619 auto TrueValue = CI->getOperand(1);
1620 auto PredicateValue = CI->getOperand(2);
1621
1622 // Don't touch overloads that aren't in OpenCL C
1623 auto FalseType = FalseValue->getType();
1624 auto TrueType = TrueValue->getType();
1625 auto PredicateType = PredicateValue->getType();
1626
1627 if (FalseType != TrueType) {
1628 continue;
1629 }
1630
1631 if (!PredicateType->isIntOrIntVectorTy()) {
1632 continue;
1633 }
1634
1635 if (!FalseType->isIntOrIntVectorTy() &&
1636 !FalseType->getScalarType()->isFloatingPointTy()) {
1637 continue;
1638 }
1639
1640 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1641 continue;
1642 }
1643
1644 if (FalseType->getScalarSizeInBits() !=
1645 PredicateType->getScalarSizeInBits()) {
1646 continue;
1647 }
1648
1649 if (FalseType->isVectorTy()) {
1650 if (FalseType->getVectorNumElements() !=
1651 PredicateType->getVectorNumElements()) {
1652 continue;
1653 }
1654
1655 if ((FalseType->getVectorNumElements() != 2) &&
1656 (FalseType->getVectorNumElements() != 3) &&
1657 (FalseType->getVectorNumElements() != 4) &&
1658 (FalseType->getVectorNumElements() != 8) &&
1659 (FalseType->getVectorNumElements() != 16)) {
1660 continue;
1661 }
1662 }
1663
1664 // Create constant
1665 const auto ZeroValue = Constant::getNullValue(PredicateType);
1666
1667 // Scalar and vector are to be treated differently
1668 CmpInst::Predicate Pred;
1669 if (PredicateType->isVectorTy()) {
1670 Pred = CmpInst::ICMP_SLT;
1671 } else {
1672 Pred = CmpInst::ICMP_NE;
1673 }
1674
1675 // Create comparison instruction
1676 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1677 ZeroValue, "", CI);
1678
1679 // Create select
1680 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1681
1682 // Replace call with the selection
1683 CI->replaceAllUsesWith(V);
1684
1685 // Lastly, remember to remove the user.
1686 ToRemoves.push_back(CI);
1687 }
1688 }
1689
1690 Changed = !ToRemoves.empty();
1691
1692 // And cleanup the calls we don't use anymore.
1693 for (auto V : ToRemoves) {
1694 V->eraseFromParent();
1695 }
1696
1697 // And remove the function we don't need either too.
1698 F->eraseFromParent();
1699 }
1700 }
1701
1702 return Changed;
1703}
1704
Kévin Petite7d0cce2018-10-31 12:38:56 +00001705bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1706 bool Changed = false;
1707
1708 for (auto const &SymVal : M.getValueSymbolTable()) {
1709 // Skip symbols whose name doesn't match
1710 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1711 continue;
1712 }
1713 // Is there a function going by that name?
1714 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1715
1716 SmallVector<Instruction *, 4> ToRemoves;
1717
1718 // Walk the users of the function.
1719 for (auto &U : F->uses()) {
1720 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1721
1722 if (CI->getNumOperands() != 4) {
1723 continue;
1724 }
1725
1726 // Get arguments
1727 auto FalseValue = CI->getOperand(0);
1728 auto TrueValue = CI->getOperand(1);
1729 auto PredicateValue = CI->getOperand(2);
1730
1731 // Don't touch overloads that aren't in OpenCL C
1732 auto FalseType = FalseValue->getType();
1733 auto TrueType = TrueValue->getType();
1734 auto PredicateType = PredicateValue->getType();
1735
1736 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1737 continue;
1738 }
1739
1740 if (TrueType->isVectorTy()) {
1741 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1742 !TrueType->getScalarType()->isIntegerTy()) {
1743 continue;
1744 }
1745 if ((TrueType->getVectorNumElements() != 2) &&
1746 (TrueType->getVectorNumElements() != 3) &&
1747 (TrueType->getVectorNumElements() != 4) &&
1748 (TrueType->getVectorNumElements() != 8) &&
1749 (TrueType->getVectorNumElements() != 16)) {
1750 continue;
1751 }
1752 }
1753
1754 // Remember the type of the operands
1755 auto OpType = TrueType;
1756
1757 // The actual bit selection will always be done on an integer type,
1758 // declare it here
1759 Type *BitType;
1760
1761 // If the operands are float, then bitcast them to int
1762 if (OpType->getScalarType()->isFloatingPointTy()) {
1763
1764 // First create the new type
1765 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1766 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1767 if (OpType->isVectorTy()) {
1768 BitType = VectorType::get(BitType, OpType->getVectorNumElements());
1769 }
1770
1771 // Then bitcast all operands
1772 PredicateValue = CastInst::CreateZExtOrBitCast(PredicateValue,
1773 BitType, "", CI);
1774 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue,
1775 BitType, "", CI);
1776 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1777
1778 } else {
1779 // The operands have an integer type, use it directly
1780 BitType = OpType;
1781 }
1782
1783 // All the operands are now always integers
1784 // implement as (c & b) | (~c & a)
1785
1786 // Create our negated predicate value
1787 auto AllOnes = Constant::getAllOnesValue(BitType);
1788 auto NotPredicateValue = BinaryOperator::Create(Instruction::Xor,
1789 PredicateValue,
1790 AllOnes, "", CI);
1791
1792 // Then put everything together
1793 auto BitsFalse = BinaryOperator::Create(Instruction::And,
1794 NotPredicateValue,
1795 FalseValue, "", CI);
1796 auto BitsTrue = BinaryOperator::Create(Instruction::And,
1797 PredicateValue,
1798 TrueValue, "", CI);
1799
1800 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1801 BitsTrue, "", CI);
1802
1803 // If we were dealing with a floating point type, we must bitcast
1804 // the result back to that
1805 if (OpType->getScalarType()->isFloatingPointTy()) {
1806 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1807 }
1808
1809 // Replace call with our new code
1810 CI->replaceAllUsesWith(V);
1811
1812 // Lastly, remember to remove the user.
1813 ToRemoves.push_back(CI);
1814 }
1815 }
1816
1817 Changed = !ToRemoves.empty();
1818
1819 // And cleanup the calls we don't use anymore.
1820 for (auto V : ToRemoves) {
1821 V->eraseFromParent();
1822 }
1823
1824 // And remove the function we don't need either too.
1825 F->eraseFromParent();
1826 }
1827 }
1828
1829 return Changed;
1830}
1831
Kévin Petit6b0a9532018-10-30 20:00:39 +00001832bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1833 bool Changed = false;
1834
1835 const std::map<const char *, const char *> Map = {
1836 { "_Z4stepfDv2_f", "_Z4stepDv2_fS_" },
1837 { "_Z4stepfDv3_f", "_Z4stepDv3_fS_" },
1838 { "_Z4stepfDv4_f", "_Z4stepDv4_fS_" },
1839 { "_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_" },
1840 { "_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_" },
1841 { "_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_" },
1842 };
1843
1844 for (auto Pair : Map) {
1845 // If we find a function with the matching name.
1846 if (auto F = M.getFunction(Pair.first)) {
1847 SmallVector<Instruction *, 4> ToRemoves;
1848
1849 // Walk the users of the function.
1850 for (auto &U : F->uses()) {
1851 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1852
1853 auto ReplacementFn = Pair.second;
1854
1855 SmallVector<Value*, 2> ArgsToSplat = {CI->getOperand(0)};
1856 Value *VectorArg;
1857
1858 // First figure out which function we're dealing with
1859 if (F->getName().startswith("_Z10smoothstep")) {
1860 ArgsToSplat.push_back(CI->getOperand(1));
1861 VectorArg = CI->getOperand(2);
1862 } else {
1863 VectorArg = CI->getOperand(1);
1864 }
1865
1866 // Splat arguments that need to be
1867 SmallVector<Value*, 2> SplatArgs;
1868 auto VecType = VectorArg->getType();
1869
1870 for (auto arg : ArgsToSplat) {
1871 Value* NewVectorArg = UndefValue::get(VecType);
1872 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
1873 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1874 NewVectorArg = InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1875 }
1876 SplatArgs.push_back(NewVectorArg);
1877 }
1878
1879 // Replace the call with the vector/vector flavour
1880 SmallVector<Type*, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1881 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1882
1883 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1884
1885 SmallVector<Value*, 3> NewArgs;
1886 for (auto arg : SplatArgs) {
1887 NewArgs.push_back(arg);
1888 }
1889 NewArgs.push_back(VectorArg);
1890
1891 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1892
1893 CI->replaceAllUsesWith(NewCI);
1894
1895 // Lastly, remember to remove the user.
1896 ToRemoves.push_back(CI);
1897 }
1898 }
1899
1900 Changed = !ToRemoves.empty();
1901
1902 // And cleanup the calls we don't use anymore.
1903 for (auto V : ToRemoves) {
1904 V->eraseFromParent();
1905 }
1906
1907 // And remove the function we don't need either too.
1908 F->eraseFromParent();
1909 }
1910 }
1911
1912 return Changed;
1913}
1914
David Neto22f144c2017-06-12 14:26:21 -04001915bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1916 bool Changed = false;
1917
1918 const std::map<const char *, Instruction::BinaryOps> Map = {
1919 {"_Z7signbitf", Instruction::LShr},
1920 {"_Z7signbitDv2_f", Instruction::AShr},
1921 {"_Z7signbitDv3_f", Instruction::AShr},
1922 {"_Z7signbitDv4_f", Instruction::AShr},
1923 };
1924
1925 for (auto Pair : Map) {
1926 // If we find a function with the matching name.
1927 if (auto F = M.getFunction(Pair.first)) {
1928 SmallVector<Instruction *, 4> ToRemoves;
1929
1930 // Walk the users of the function.
1931 for (auto &U : F->uses()) {
1932 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1933 auto Arg = CI->getOperand(0);
1934
1935 auto Bitcast =
1936 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1937
1938 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1939 ConstantInt::get(CI->getType(), 31),
1940 "", CI);
1941
1942 CI->replaceAllUsesWith(Shr);
1943
1944 // Lastly, remember to remove the user.
1945 ToRemoves.push_back(CI);
1946 }
1947 }
1948
1949 Changed = !ToRemoves.empty();
1950
1951 // And cleanup the calls we don't use anymore.
1952 for (auto V : ToRemoves) {
1953 V->eraseFromParent();
1954 }
1955
1956 // And remove the function we don't need either too.
1957 F->eraseFromParent();
1958 }
1959 }
1960
1961 return Changed;
1962}
1963
1964bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1965 bool Changed = false;
1966
1967 const std::map<const char *,
1968 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1969 Map = {
1970 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1971 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1972 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1973 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1974 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1975 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1976 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1977 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1978 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1979 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1980 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1981 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1982 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1983 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1984 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1985 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1986 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1987 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1988 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1989 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1990 };
1991
1992 for (auto Pair : Map) {
1993 // If we find a function with the matching name.
1994 if (auto F = M.getFunction(Pair.first)) {
1995 SmallVector<Instruction *, 4> ToRemoves;
1996
1997 // Walk the users of the function.
1998 for (auto &U : F->uses()) {
1999 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2000 // The multiply instruction to use.
2001 auto MulInst = Pair.second.first;
2002
2003 // The add instruction to use.
2004 auto AddInst = Pair.second.second;
2005
2006 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
2007
2008 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
2009 CI->getArgOperand(1), "", CI);
2010
2011 if (Instruction::BinaryOpsEnd != AddInst) {
2012 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
2013 CI);
2014 }
2015
2016 CI->replaceAllUsesWith(I);
2017
2018 // Lastly, remember to remove the user.
2019 ToRemoves.push_back(CI);
2020 }
2021 }
2022
2023 Changed = !ToRemoves.empty();
2024
2025 // And cleanup the calls we don't use anymore.
2026 for (auto V : ToRemoves) {
2027 V->eraseFromParent();
2028 }
2029
2030 // And remove the function we don't need either too.
2031 F->eraseFromParent();
2032 }
2033 }
2034
2035 return Changed;
2036}
2037
Derek Chowcfd368b2017-10-19 20:58:45 -07002038bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
2039 bool Changed = false;
2040
2041 struct VectorStoreOps {
2042 const char* name;
2043 int n;
2044 Type* (*get_scalar_type_function)(LLVMContext&);
2045 } vector_store_ops[] = {
2046 // TODO(derekjchow): Expand this list.
2047 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
2048 };
2049
David Neto544fffc2017-11-16 18:35:14 -05002050 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002051 auto Name = Op.name;
2052 auto N = Op.n;
2053 auto TypeFn = Op.get_scalar_type_function;
2054 if (auto F = M.getFunction(Name)) {
2055 SmallVector<Instruction *, 4> ToRemoves;
2056
2057 // Walk the users of the function.
2058 for (auto &U : F->uses()) {
2059 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2060 // The value argument from vstoren.
2061 auto Arg0 = CI->getOperand(0);
2062
2063 // The index argument from vstoren.
2064 auto Arg1 = CI->getOperand(1);
2065
2066 // The pointer argument from vstoren.
2067 auto Arg2 = CI->getOperand(2);
2068
2069 // Get types.
2070 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
2071 auto ScalarNPointerTy = PointerType::get(
2072 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
2073
2074 // Cast to scalarn
2075 auto Cast = CastInst::CreatePointerCast(
2076 Arg2, ScalarNPointerTy, "", CI);
2077 // Index to correct address
2078 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
2079 // Store
2080 auto Store = new StoreInst(Arg0, Index, CI);
2081
2082 CI->replaceAllUsesWith(Store);
2083 ToRemoves.push_back(CI);
2084 }
2085 }
2086
2087 Changed = !ToRemoves.empty();
2088
2089 // And cleanup the calls we don't use anymore.
2090 for (auto V : ToRemoves) {
2091 V->eraseFromParent();
2092 }
2093
2094 // And remove the function we don't need either too.
2095 F->eraseFromParent();
2096 }
2097 }
2098
2099 return Changed;
2100}
2101
2102bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2103 bool Changed = false;
2104
2105 struct VectorLoadOps {
2106 const char* name;
2107 int n;
2108 Type* (*get_scalar_type_function)(LLVMContext&);
2109 } vector_load_ops[] = {
2110 // TODO(derekjchow): Expand this list.
2111 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
2112 };
2113
David Neto544fffc2017-11-16 18:35:14 -05002114 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002115 auto Name = Op.name;
2116 auto N = Op.n;
2117 auto TypeFn = Op.get_scalar_type_function;
2118 // If we find a function with the matching name.
2119 if (auto F = M.getFunction(Name)) {
2120 SmallVector<Instruction *, 4> ToRemoves;
2121
2122 // Walk the users of the function.
2123 for (auto &U : F->uses()) {
2124 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2125 // The index argument from vloadn.
2126 auto Arg0 = CI->getOperand(0);
2127
2128 // The pointer argument from vloadn.
2129 auto Arg1 = CI->getOperand(1);
2130
2131 // Get types.
2132 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
2133 auto ScalarNPointerTy = PointerType::get(
2134 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
2135
2136 // Cast to scalarn
2137 auto Cast = CastInst::CreatePointerCast(
2138 Arg1, ScalarNPointerTy, "", CI);
2139 // Index to correct address
2140 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
2141 // Load
2142 auto Load = new LoadInst(Index, "", CI);
2143
2144 CI->replaceAllUsesWith(Load);
2145 ToRemoves.push_back(CI);
2146 }
2147 }
2148
2149 Changed = !ToRemoves.empty();
2150
2151 // And cleanup the calls we don't use anymore.
2152 for (auto V : ToRemoves) {
2153 V->eraseFromParent();
2154 }
2155
2156 // And remove the function we don't need either too.
2157 F->eraseFromParent();
2158
2159 }
2160 }
2161
2162 return Changed;
2163}
2164
David Neto22f144c2017-06-12 14:26:21 -04002165bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2166 bool Changed = false;
2167
2168 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2169 "_Z10vload_halfjPU3AS2KDh"};
2170
2171 for (auto Name : Map) {
2172 // If we find a function with the matching name.
2173 if (auto F = M.getFunction(Name)) {
2174 SmallVector<Instruction *, 4> ToRemoves;
2175
2176 // Walk the users of the function.
2177 for (auto &U : F->uses()) {
2178 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2179 // The index argument from vload_half.
2180 auto Arg0 = CI->getOperand(0);
2181
2182 // The pointer argument from vload_half.
2183 auto Arg1 = CI->getOperand(1);
2184
David Neto22f144c2017-06-12 14:26:21 -04002185 auto IntTy = Type::getInt32Ty(M.getContext());
2186 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002187 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2188
David Neto22f144c2017-06-12 14:26:21 -04002189 // Our intrinsic to unpack a float2 from an int.
2190 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2191
2192 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2193
David Neto482550a2018-03-24 05:21:07 -07002194 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002195 auto ShortTy = Type::getInt16Ty(M.getContext());
2196 auto ShortPointerTy = PointerType::get(
2197 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002198
David Netoac825b82017-05-30 12:49:01 -04002199 // Cast the half* pointer to short*.
2200 auto Cast =
2201 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002202
David Netoac825b82017-05-30 12:49:01 -04002203 // Index into the correct address of the casted pointer.
2204 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2205
2206 // Load from the short* we casted to.
2207 auto Load = new LoadInst(Index, "", CI);
2208
2209 // ZExt the short -> int.
2210 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2211
2212 // Get our float2.
2213 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2214
2215 // Extract out the bottom element which is our float result.
2216 auto Extract = ExtractElementInst::Create(
2217 Call, ConstantInt::get(IntTy, 0), "", CI);
2218
2219 CI->replaceAllUsesWith(Extract);
2220 } else {
2221 // Assume the pointer argument points to storage aligned to 32bits
2222 // or more.
2223 // TODO(dneto): Do more analysis to make sure this is true?
2224 //
2225 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2226 // with:
2227 //
2228 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2229 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2230 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2231 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2232 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2233 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2234 // x float> %converted, %index_is_odd32
2235
2236 auto IntPointerTy = PointerType::get(
2237 IntTy, Arg1->getType()->getPointerAddressSpace());
2238
David Neto973e6a82017-05-30 13:48:18 -04002239 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002240 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002241 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002242 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2243
2244 auto One = ConstantInt::get(IntTy, 1);
2245 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2246 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2247
2248 // Index into the correct address of the casted pointer.
2249 auto Ptr =
2250 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2251
2252 // Load from the int* we casted to.
2253 auto Load = new LoadInst(Ptr, "", CI);
2254
2255 // Get our float2.
2256 auto Call = CallInst::Create(NewF, Load, "", CI);
2257
2258 // Extract out the float result, where the element number is
2259 // determined by whether the original index was even or odd.
2260 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2261
2262 CI->replaceAllUsesWith(Extract);
2263 }
David Neto22f144c2017-06-12 14:26:21 -04002264
2265 // Lastly, remember to remove the user.
2266 ToRemoves.push_back(CI);
2267 }
2268 }
2269
2270 Changed = !ToRemoves.empty();
2271
2272 // And cleanup the calls we don't use anymore.
2273 for (auto V : ToRemoves) {
2274 V->eraseFromParent();
2275 }
2276
2277 // And remove the function we don't need either too.
2278 F->eraseFromParent();
2279 }
2280 }
2281
2282 return Changed;
2283}
2284
2285bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
2286 bool Changed = false;
2287
David Neto556c7e62018-06-08 13:45:55 -07002288 const std::vector<const char *> Map = {
2289 "_Z11vload_half2jPU3AS1KDh",
2290 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2291 "_Z11vload_half2jPU3AS2KDh",
2292 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2293 };
David Neto22f144c2017-06-12 14:26:21 -04002294
2295 for (auto Name : Map) {
2296 // If we find a function with the matching name.
2297 if (auto F = M.getFunction(Name)) {
2298 SmallVector<Instruction *, 4> ToRemoves;
2299
2300 // Walk the users of the function.
2301 for (auto &U : F->uses()) {
2302 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2303 // The index argument from vload_half.
2304 auto Arg0 = CI->getOperand(0);
2305
2306 // The pointer argument from vload_half.
2307 auto Arg1 = CI->getOperand(1);
2308
2309 auto IntTy = Type::getInt32Ty(M.getContext());
2310 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2311 auto NewPointerTy = PointerType::get(
2312 IntTy, Arg1->getType()->getPointerAddressSpace());
2313 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2314
2315 // Cast the half* pointer to int*.
2316 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2317
2318 // Index into the correct address of the casted pointer.
2319 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
2320
2321 // Load from the int* we casted to.
2322 auto Load = new LoadInst(Index, "", CI);
2323
2324 // Our intrinsic to unpack a float2 from an int.
2325 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2326
2327 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2328
2329 // Get our float2.
2330 auto Call = CallInst::Create(NewF, Load, "", CI);
2331
2332 CI->replaceAllUsesWith(Call);
2333
2334 // Lastly, remember to remove the user.
2335 ToRemoves.push_back(CI);
2336 }
2337 }
2338
2339 Changed = !ToRemoves.empty();
2340
2341 // And cleanup the calls we don't use anymore.
2342 for (auto V : ToRemoves) {
2343 V->eraseFromParent();
2344 }
2345
2346 // And remove the function we don't need either too.
2347 F->eraseFromParent();
2348 }
2349 }
2350
2351 return Changed;
2352}
2353
2354bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
2355 bool Changed = false;
2356
David Neto556c7e62018-06-08 13:45:55 -07002357 const std::vector<const char *> Map = {
2358 "_Z11vload_half4jPU3AS1KDh",
2359 "_Z12vloada_half4jPU3AS1KDh",
2360 "_Z11vload_half4jPU3AS2KDh",
2361 "_Z12vloada_half4jPU3AS2KDh",
2362 };
David Neto22f144c2017-06-12 14:26:21 -04002363
2364 for (auto Name : Map) {
2365 // If we find a function with the matching name.
2366 if (auto F = M.getFunction(Name)) {
2367 SmallVector<Instruction *, 4> ToRemoves;
2368
2369 // Walk the users of the function.
2370 for (auto &U : F->uses()) {
2371 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2372 // The index argument from vload_half.
2373 auto Arg0 = CI->getOperand(0);
2374
2375 // The pointer argument from vload_half.
2376 auto Arg1 = CI->getOperand(1);
2377
2378 auto IntTy = Type::getInt32Ty(M.getContext());
2379 auto Int2Ty = VectorType::get(IntTy, 2);
2380 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2381 auto NewPointerTy = PointerType::get(
2382 Int2Ty, Arg1->getType()->getPointerAddressSpace());
2383 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2384
2385 // Cast the half* pointer to int2*.
2386 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2387
2388 // Index into the correct address of the casted pointer.
2389 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
2390
2391 // Load from the int2* we casted to.
2392 auto Load = new LoadInst(Index, "", CI);
2393
2394 // Extract each element from the loaded int2.
2395 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2396 "", CI);
2397 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2398 "", CI);
2399
2400 // Our intrinsic to unpack a float2 from an int.
2401 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2402
2403 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2404
2405 // Get the lower (x & y) components of our final float4.
2406 auto Lo = CallInst::Create(NewF, X, "", CI);
2407
2408 // Get the higher (z & w) components of our final float4.
2409 auto Hi = CallInst::Create(NewF, Y, "", CI);
2410
2411 Constant *ShuffleMask[4] = {
2412 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2413 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2414
2415 // Combine our two float2's into one float4.
2416 auto Combine = new ShuffleVectorInst(
2417 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
2418
2419 CI->replaceAllUsesWith(Combine);
2420
2421 // Lastly, remember to remove the user.
2422 ToRemoves.push_back(CI);
2423 }
2424 }
2425
2426 Changed = !ToRemoves.empty();
2427
2428 // And cleanup the calls we don't use anymore.
2429 for (auto V : ToRemoves) {
2430 V->eraseFromParent();
2431 }
2432
2433 // And remove the function we don't need either too.
2434 F->eraseFromParent();
2435 }
2436 }
2437
2438 return Changed;
2439}
2440
David Neto6ad93232018-06-07 15:42:58 -07002441bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
2442 bool Changed = false;
2443
2444 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2445 //
2446 // %u = load i32 %ptr
2447 // %fxy = call <2 x float> Unpack2xHalf(u)
2448 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
2449 const std::vector<const char *> Map = {
2450 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2451 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2452 "_Z20__clspv_vloada_half2jPKj", // private
2453 };
2454
2455 for (auto Name : Map) {
2456 // If we find a function with the matching name.
2457 if (auto F = M.getFunction(Name)) {
2458 SmallVector<Instruction *, 4> ToRemoves;
2459
2460 // Walk the users of the function.
2461 for (auto &U : F->uses()) {
2462 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
2463 auto Index = CI->getOperand(0);
2464 auto Ptr = CI->getOperand(1);
2465
2466 auto IntTy = Type::getInt32Ty(M.getContext());
2467 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2468 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2469
2470 auto IndexedPtr =
2471 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
2472 auto Load = new LoadInst(IndexedPtr, "", CI);
2473
2474 // Our intrinsic to unpack a float2 from an int.
2475 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2476
2477 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2478
2479 // Get our final float2.
2480 auto Result = CallInst::Create(NewF, Load, "", CI);
2481
2482 CI->replaceAllUsesWith(Result);
2483
2484 // Lastly, remember to remove the user.
2485 ToRemoves.push_back(CI);
2486 }
2487 }
2488
2489 Changed = true;
2490
2491 // And cleanup the calls we don't use anymore.
2492 for (auto V : ToRemoves) {
2493 V->eraseFromParent();
2494 }
2495
2496 // And remove the function we don't need either too.
2497 F->eraseFromParent();
2498 }
2499 }
2500
2501 return Changed;
2502}
2503
2504bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
2505 bool Changed = false;
2506
2507 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2508 //
2509 // %u2 = load <2 x i32> %ptr
2510 // %u2xy = extractelement %u2, 0
2511 // %u2zw = extractelement %u2, 1
2512 // %fxy = call <2 x float> Unpack2xHalf(uint)
2513 // %fzw = call <2 x float> Unpack2xHalf(uint)
2514 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
2515 const std::vector<const char *> Map = {
2516 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2517 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2518 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2519 };
2520
2521 for (auto Name : Map) {
2522 // If we find a function with the matching name.
2523 if (auto F = M.getFunction(Name)) {
2524 SmallVector<Instruction *, 4> ToRemoves;
2525
2526 // Walk the users of the function.
2527 for (auto &U : F->uses()) {
2528 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2529 auto Index = CI->getOperand(0);
2530 auto Ptr = CI->getOperand(1);
2531
2532 auto IntTy = Type::getInt32Ty(M.getContext());
2533 auto Int2Ty = VectorType::get(IntTy, 2);
2534 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2535 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2536
2537 auto IndexedPtr =
2538 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
2539 auto Load = new LoadInst(IndexedPtr, "", CI);
2540
2541 // Extract each element from the loaded int2.
2542 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2543 "", CI);
2544 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2545 "", CI);
2546
2547 // Our intrinsic to unpack a float2 from an int.
2548 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2549
2550 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2551
2552 // Get the lower (x & y) components of our final float4.
2553 auto Lo = CallInst::Create(NewF, X, "", CI);
2554
2555 // Get the higher (z & w) components of our final float4.
2556 auto Hi = CallInst::Create(NewF, Y, "", CI);
2557
2558 Constant *ShuffleMask[4] = {
2559 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2560 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2561
2562 // Combine our two float2's into one float4.
2563 auto Combine = new ShuffleVectorInst(
2564 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
2565
2566 CI->replaceAllUsesWith(Combine);
2567
2568 // Lastly, remember to remove the user.
2569 ToRemoves.push_back(CI);
2570 }
2571 }
2572
2573 Changed = true;
2574
2575 // And cleanup the calls we don't use anymore.
2576 for (auto V : ToRemoves) {
2577 V->eraseFromParent();
2578 }
2579
2580 // And remove the function we don't need either too.
2581 F->eraseFromParent();
2582 }
2583 }
2584
2585 return Changed;
2586}
2587
David Neto22f144c2017-06-12 14:26:21 -04002588bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
2589 bool Changed = false;
2590
2591 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
2592 "_Z15vstore_half_rtefjPU3AS1Dh",
2593 "_Z15vstore_half_rtzfjPU3AS1Dh"};
2594
2595 for (auto Name : Map) {
2596 // If we find a function with the matching name.
2597 if (auto F = M.getFunction(Name)) {
2598 SmallVector<Instruction *, 4> ToRemoves;
2599
2600 // Walk the users of the function.
2601 for (auto &U : F->uses()) {
2602 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2603 // The value to store.
2604 auto Arg0 = CI->getOperand(0);
2605
2606 // The index argument from vstore_half.
2607 auto Arg1 = CI->getOperand(1);
2608
2609 // The pointer argument from vstore_half.
2610 auto Arg2 = CI->getOperand(2);
2611
David Neto22f144c2017-06-12 14:26:21 -04002612 auto IntTy = Type::getInt32Ty(M.getContext());
2613 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002614 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04002615 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002616
2617 // Our intrinsic to pack a float2 to an int.
2618 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2619
2620 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2621
2622 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04002623 auto TempVec =
2624 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
2625 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002626
2627 // Pack the float2 -> half2 (in an int).
2628 auto X = CallInst::Create(NewF, TempVec, "", CI);
2629
David Neto482550a2018-03-24 05:21:07 -07002630 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04002631 auto ShortTy = Type::getInt16Ty(M.getContext());
2632 auto ShortPointerTy = PointerType::get(
2633 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002634
David Neto17852de2017-05-29 17:29:31 -04002635 // Truncate our i32 to an i16.
2636 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002637
David Neto17852de2017-05-29 17:29:31 -04002638 // Cast the half* pointer to short*.
2639 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002640
David Neto17852de2017-05-29 17:29:31 -04002641 // Index into the correct address of the casted pointer.
2642 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002643
David Neto17852de2017-05-29 17:29:31 -04002644 // Store to the int* we casted to.
2645 auto Store = new StoreInst(Trunc, Index, CI);
2646
2647 CI->replaceAllUsesWith(Store);
2648 } else {
2649 // We can only write to 32-bit aligned words.
2650 //
2651 // Assuming base is aligned to 32-bits, replace the equivalent of
2652 // vstore_half(value, index, base)
2653 // with:
2654 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2655 // uint32_t write_to_upper_half = index & 1u;
2656 // uint32_t shift = write_to_upper_half << 4;
2657 //
2658 // // Pack the float value as a half number in bottom 16 bits
2659 // // of an i32.
2660 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2661 //
2662 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2663 // ^ ((packed & 0xffff) << shift)
2664 // // We only need relaxed consistency, but OpenCL 1.2 only has
2665 // // sequentially consistent atomics.
2666 // // TODO(dneto): Use relaxed consistency.
2667 // atomic_xor(target_ptr, xor_value)
2668 auto IntPointerTy = PointerType::get(
2669 IntTy, Arg2->getType()->getPointerAddressSpace());
2670
2671 auto Four = ConstantInt::get(IntTy, 4);
2672 auto FFFF = ConstantInt::get(IntTy, 0xffff);
2673
2674 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
2675 // Compute index / 2
2676 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2677 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2678 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
2679 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2680 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
2681 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2682 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
2683
2684 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2685 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2686 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
2687
2688 // Generate the call to atomi_xor.
2689 SmallVector<Type *, 5> ParamTypes;
2690 // The pointer type.
2691 ParamTypes.push_back(IntPointerTy);
2692 // The Types for memory scope, semantics, and value.
2693 ParamTypes.push_back(IntTy);
2694 ParamTypes.push_back(IntTy);
2695 ParamTypes.push_back(IntTy);
2696 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2697 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
2698
2699 const auto ConstantScopeDevice =
2700 ConstantInt::get(IntTy, spv::ScopeDevice);
2701 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2702 // (SPIR-V Workgroup).
2703 const auto AddrSpaceSemanticsBits =
2704 IntPointerTy->getPointerAddressSpace() == 1
2705 ? spv::MemorySemanticsUniformMemoryMask
2706 : spv::MemorySemanticsWorkgroupMemoryMask;
2707
2708 // We're using relaxed consistency here.
2709 const auto ConstantMemorySemantics =
2710 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2711 AddrSpaceSemanticsBits);
2712
2713 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2714 ConstantMemorySemantics, ValueToXor};
2715 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2716 }
David Neto22f144c2017-06-12 14:26:21 -04002717
2718 // Lastly, remember to remove the user.
2719 ToRemoves.push_back(CI);
2720 }
2721 }
2722
2723 Changed = !ToRemoves.empty();
2724
2725 // And cleanup the calls we don't use anymore.
2726 for (auto V : ToRemoves) {
2727 V->eraseFromParent();
2728 }
2729
2730 // And remove the function we don't need either too.
2731 F->eraseFromParent();
2732 }
2733 }
2734
2735 return Changed;
2736}
2737
2738bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
2739 bool Changed = false;
2740
David Netoe2871522018-06-08 11:09:54 -07002741 const std::vector<const char *> Map = {
2742 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2743 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2744 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2745 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2746 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2747 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2748 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2749 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2750 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2751 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2752 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2753 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2754 };
David Neto22f144c2017-06-12 14:26:21 -04002755
2756 for (auto Name : Map) {
2757 // If we find a function with the matching name.
2758 if (auto F = M.getFunction(Name)) {
2759 SmallVector<Instruction *, 4> ToRemoves;
2760
2761 // Walk the users of the function.
2762 for (auto &U : F->uses()) {
2763 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2764 // The value to store.
2765 auto Arg0 = CI->getOperand(0);
2766
2767 // The index argument from vstore_half.
2768 auto Arg1 = CI->getOperand(1);
2769
2770 // The pointer argument from vstore_half.
2771 auto Arg2 = CI->getOperand(2);
2772
2773 auto IntTy = Type::getInt32Ty(M.getContext());
2774 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2775 auto NewPointerTy = PointerType::get(
2776 IntTy, Arg2->getType()->getPointerAddressSpace());
2777 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2778
2779 // Our intrinsic to pack a float2 to an int.
2780 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2781
2782 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2783
2784 // Turn the packed x & y into the final packing.
2785 auto X = CallInst::Create(NewF, Arg0, "", CI);
2786
2787 // Cast the half* pointer to int*.
2788 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2789
2790 // Index into the correct address of the casted pointer.
2791 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
2792
2793 // Store to the int* we casted to.
2794 auto Store = new StoreInst(X, Index, CI);
2795
2796 CI->replaceAllUsesWith(Store);
2797
2798 // Lastly, remember to remove the user.
2799 ToRemoves.push_back(CI);
2800 }
2801 }
2802
2803 Changed = !ToRemoves.empty();
2804
2805 // And cleanup the calls we don't use anymore.
2806 for (auto V : ToRemoves) {
2807 V->eraseFromParent();
2808 }
2809
2810 // And remove the function we don't need either too.
2811 F->eraseFromParent();
2812 }
2813 }
2814
2815 return Changed;
2816}
2817
2818bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
2819 bool Changed = false;
2820
David Netoe2871522018-06-08 11:09:54 -07002821 const std::vector<const char *> Map = {
2822 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2823 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2824 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2825 "_Z13vstorea_half4Dv4_fjPDh", // private
2826 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2827 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2828 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2829 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2830 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2831 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2832 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2833 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2834 };
David Neto22f144c2017-06-12 14:26:21 -04002835
2836 for (auto Name : Map) {
2837 // If we find a function with the matching name.
2838 if (auto F = M.getFunction(Name)) {
2839 SmallVector<Instruction *, 4> ToRemoves;
2840
2841 // Walk the users of the function.
2842 for (auto &U : F->uses()) {
2843 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2844 // The value to store.
2845 auto Arg0 = CI->getOperand(0);
2846
2847 // The index argument from vstore_half.
2848 auto Arg1 = CI->getOperand(1);
2849
2850 // The pointer argument from vstore_half.
2851 auto Arg2 = CI->getOperand(2);
2852
2853 auto IntTy = Type::getInt32Ty(M.getContext());
2854 auto Int2Ty = VectorType::get(IntTy, 2);
2855 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2856 auto NewPointerTy = PointerType::get(
2857 Int2Ty, Arg2->getType()->getPointerAddressSpace());
2858 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2859
2860 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2861 ConstantInt::get(IntTy, 1)};
2862
2863 // Extract out the x & y components of our to store value.
2864 auto Lo =
2865 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2866 ConstantVector::get(LoShuffleMask), "", CI);
2867
2868 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2869 ConstantInt::get(IntTy, 3)};
2870
2871 // Extract out the z & w components of our to store value.
2872 auto Hi =
2873 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2874 ConstantVector::get(HiShuffleMask), "", CI);
2875
2876 // Our intrinsic to pack a float2 to an int.
2877 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2878
2879 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2880
2881 // Turn the packed x & y into the final component of our int2.
2882 auto X = CallInst::Create(NewF, Lo, "", CI);
2883
2884 // Turn the packed z & w into the final component of our int2.
2885 auto Y = CallInst::Create(NewF, Hi, "", CI);
2886
2887 auto Combine = InsertElementInst::Create(
2888 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
2889 Combine = InsertElementInst::Create(
2890 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
2891
2892 // Cast the half* pointer to int2*.
2893 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2894
2895 // Index into the correct address of the casted pointer.
2896 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
2897
2898 // Store to the int2* we casted to.
2899 auto Store = new StoreInst(Combine, Index, CI);
2900
2901 CI->replaceAllUsesWith(Store);
2902
2903 // Lastly, remember to remove the user.
2904 ToRemoves.push_back(CI);
2905 }
2906 }
2907
2908 Changed = !ToRemoves.empty();
2909
2910 // And cleanup the calls we don't use anymore.
2911 for (auto V : ToRemoves) {
2912 V->eraseFromParent();
2913 }
2914
2915 // And remove the function we don't need either too.
2916 F->eraseFromParent();
2917 }
2918 }
2919
2920 return Changed;
2921}
2922
2923bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2924 bool Changed = false;
2925
2926 const std::map<const char *, const char*> Map = {
2927 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
2928 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
2929 };
2930
2931 for (auto Pair : Map) {
2932 // If we find a function with the matching name.
2933 if (auto F = M.getFunction(Pair.first)) {
2934 SmallVector<Instruction *, 4> ToRemoves;
2935
2936 // Walk the users of the function.
2937 for (auto &U : F->uses()) {
2938 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2939 // The image.
2940 auto Arg0 = CI->getOperand(0);
2941
2942 // The sampler.
2943 auto Arg1 = CI->getOperand(1);
2944
2945 // The coordinate (integer type that we can't handle).
2946 auto Arg2 = CI->getOperand(2);
2947
2948 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
2949
2950 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
2951
2952 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2953
2954 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
2955
2956 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2957
2958 CI->replaceAllUsesWith(NewCI);
2959
2960 // Lastly, remember to remove the user.
2961 ToRemoves.push_back(CI);
2962 }
2963 }
2964
2965 Changed = !ToRemoves.empty();
2966
2967 // And cleanup the calls we don't use anymore.
2968 for (auto V : ToRemoves) {
2969 V->eraseFromParent();
2970 }
2971
2972 // And remove the function we don't need either too.
2973 F->eraseFromParent();
2974 }
2975 }
2976
2977 return Changed;
2978}
2979
2980bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2981 bool Changed = false;
2982
2983 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002984 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002985 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002986 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002987 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002988 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002989 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002990 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002991 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002992 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002993 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002994 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002995 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002996 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002997 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002998 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002999 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04003000 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00003001 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04003002 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00003003 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04003004 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00003005 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
3006 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
3007 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04003008
3009 for (auto Pair : Map) {
3010 // If we find a function with the matching name.
3011 if (auto F = M.getFunction(Pair.first)) {
3012 SmallVector<Instruction *, 4> ToRemoves;
3013
3014 // Walk the users of the function.
3015 for (auto &U : F->uses()) {
3016 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3017 auto FType = F->getFunctionType();
3018 SmallVector<Type *, 5> ParamTypes;
3019
3020 // The pointer type.
3021 ParamTypes.push_back(FType->getParamType(0));
3022
3023 auto IntTy = Type::getInt32Ty(M.getContext());
3024
3025 // The memory scope type.
3026 ParamTypes.push_back(IntTy);
3027
3028 // The memory semantics type.
3029 ParamTypes.push_back(IntTy);
3030
3031 if (2 < CI->getNumArgOperands()) {
3032 // The unequal memory semantics type.
3033 ParamTypes.push_back(IntTy);
3034
3035 // The value type.
3036 ParamTypes.push_back(FType->getParamType(2));
3037
3038 // The comparator type.
3039 ParamTypes.push_back(FType->getParamType(1));
3040 } else if (1 < CI->getNumArgOperands()) {
3041 // The value type.
3042 ParamTypes.push_back(FType->getParamType(1));
3043 }
3044
3045 auto NewFType =
3046 FunctionType::get(FType->getReturnType(), ParamTypes, false);
3047 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
3048
3049 // We need to map the OpenCL constants to the SPIR-V equivalents.
3050 const auto ConstantScopeDevice =
3051 ConstantInt::get(IntTy, spv::ScopeDevice);
3052 const auto ConstantMemorySemantics = ConstantInt::get(
3053 IntTy, spv::MemorySemanticsUniformMemoryMask |
3054 spv::MemorySemanticsSequentiallyConsistentMask);
3055
3056 SmallVector<Value *, 5> Params;
3057
3058 // The pointer.
3059 Params.push_back(CI->getArgOperand(0));
3060
3061 // The memory scope.
3062 Params.push_back(ConstantScopeDevice);
3063
3064 // The memory semantics.
3065 Params.push_back(ConstantMemorySemantics);
3066
3067 if (2 < CI->getNumArgOperands()) {
3068 // The unequal memory semantics.
3069 Params.push_back(ConstantMemorySemantics);
3070
3071 // The value.
3072 Params.push_back(CI->getArgOperand(2));
3073
3074 // The comparator.
3075 Params.push_back(CI->getArgOperand(1));
3076 } else if (1 < CI->getNumArgOperands()) {
3077 // The value.
3078 Params.push_back(CI->getArgOperand(1));
3079 }
3080
3081 auto NewCI = CallInst::Create(NewF, Params, "", CI);
3082
3083 CI->replaceAllUsesWith(NewCI);
3084
3085 // Lastly, remember to remove the user.
3086 ToRemoves.push_back(CI);
3087 }
3088 }
3089
3090 Changed = !ToRemoves.empty();
3091
3092 // And cleanup the calls we don't use anymore.
3093 for (auto V : ToRemoves) {
3094 V->eraseFromParent();
3095 }
3096
3097 // And remove the function we don't need either too.
3098 F->eraseFromParent();
3099 }
3100 }
3101
Neil Henning39672102017-09-29 14:33:13 +01003102 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003103 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003104 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003105 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003106 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003107 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003108 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003109 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003110 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003111 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003112 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003113 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003114 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003115 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003116 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003117 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003118 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003119 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003120 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003121 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003122 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003123 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003124 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003125 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003126 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003127 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003128 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003129 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003130 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003131 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003132 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003133 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003134 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01003135 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003136 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003137 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003138 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003139 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003140 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003141 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003142 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003143 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003144 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003145 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003146 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003147 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003148 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01003149 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003150 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01003151 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003152 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01003153 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003154 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01003155 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003156 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003157 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003158 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003159 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003160 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003161 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003162 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003163 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003164 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
3165 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
3166 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01003167
3168 for (auto Pair : Map2) {
3169 // If we find a function with the matching name.
3170 if (auto F = M.getFunction(Pair.first)) {
3171 SmallVector<Instruction *, 4> ToRemoves;
3172
3173 // Walk the users of the function.
3174 for (auto &U : F->uses()) {
3175 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3176 auto AtomicOp = new AtomicRMWInst(
3177 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
3178 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
3179
3180 CI->replaceAllUsesWith(AtomicOp);
3181
3182 // Lastly, remember to remove the user.
3183 ToRemoves.push_back(CI);
3184 }
3185 }
3186
3187 Changed = !ToRemoves.empty();
3188
3189 // And cleanup the calls we don't use anymore.
3190 for (auto V : ToRemoves) {
3191 V->eraseFromParent();
3192 }
3193
3194 // And remove the function we don't need either too.
3195 F->eraseFromParent();
3196 }
3197 }
3198
David Neto22f144c2017-06-12 14:26:21 -04003199 return Changed;
3200}
3201
3202bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
3203 bool Changed = false;
3204
3205 // If we find a function with the matching name.
3206 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
3207 SmallVector<Instruction *, 4> ToRemoves;
3208
3209 auto IntTy = Type::getInt32Ty(M.getContext());
3210 auto FloatTy = Type::getFloatTy(M.getContext());
3211
3212 Constant *DownShuffleMask[3] = {
3213 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3214 ConstantInt::get(IntTy, 2)};
3215
3216 Constant *UpShuffleMask[4] = {
3217 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3218 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3219
3220 Constant *FloatVec[3] = {
3221 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
3222 };
3223
3224 // Walk the users of the function.
3225 for (auto &U : F->uses()) {
3226 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3227 auto Vec4Ty = CI->getArgOperand(0)->getType();
3228 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
3229 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
3230 auto Vec3Ty = Arg0->getType();
3231
3232 auto NewFType =
3233 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
3234
3235 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
3236
3237 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
3238
3239 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
3240
3241 CI->replaceAllUsesWith(Result);
3242
3243 // Lastly, remember to remove the user.
3244 ToRemoves.push_back(CI);
3245 }
3246 }
3247
3248 Changed = !ToRemoves.empty();
3249
3250 // And cleanup the calls we don't use anymore.
3251 for (auto V : ToRemoves) {
3252 V->eraseFromParent();
3253 }
3254
3255 // And remove the function we don't need either too.
3256 F->eraseFromParent();
3257 }
3258
3259 return Changed;
3260}
David Neto62653202017-10-16 19:05:18 -04003261
3262bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3263 bool Changed = false;
3264
3265 // OpenCL's float result = fract(float x, float* ptr)
3266 //
3267 // In the LLVM domain:
3268 //
3269 // %floor_result = call spir_func float @floor(float %x)
3270 // store float %floor_result, float * %ptr
3271 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3272 // %result = call spir_func float
3273 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3274 //
3275 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3276 // and clspv.fract occur in the SPIR-V generator pass:
3277 //
3278 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3279 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3280 // ...
3281 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3282 // OpStore %ptr %floor_result
3283 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3284 // %fract_result = OpExtInst %float
3285 // %glsl_ext Fmin %fract_intermediate %just_under_1
3286
3287
3288 using std::string;
3289
3290 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3291 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
3292 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
3293 auto make_quad = [](const char *a, const char *b, const char *c,
3294 const char *d) {
3295 return std::tuple<const char *, const char *, const char *, const char *>(
3296 a, b, c, d);
3297 };
3298 const std::vector<QuadType> Functions = {
3299 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
3300 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
3301 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
3302 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
3303 };
3304
3305 for (auto& quad : Functions) {
3306 const StringRef fract_name(std::get<0>(quad));
3307
3308 // If we find a function with the matching name.
3309 if (auto F = M.getFunction(fract_name)) {
3310 if (F->use_begin() == F->use_end())
3311 continue;
3312
3313 // We have some uses.
3314 Changed = true;
3315
3316 auto& Context = M.getContext();
3317
3318 const StringRef floor_name(std::get<1>(quad));
3319 const StringRef fmin_name(std::get<2>(quad));
3320 const StringRef clspv_fract_name(std::get<3>(quad));
3321
3322 // This is either float or a float vector. All the float-like
3323 // types are this type.
3324 auto result_ty = F->getReturnType();
3325
3326 Function* fmin_fn = M.getFunction(fmin_name);
3327 if (!fmin_fn) {
3328 // Make the fmin function.
3329 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003330 fmin_fn =
3331 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003332 fmin_fn->addFnAttr(Attribute::ReadNone);
3333 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3334 }
3335
3336 Function* floor_fn = M.getFunction(floor_name);
3337 if (!floor_fn) {
3338 // Make the floor function.
3339 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003340 floor_fn = cast<Function>(
3341 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003342 floor_fn->addFnAttr(Attribute::ReadNone);
3343 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3344 }
3345
3346 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
3347 if (!clspv_fract_fn) {
3348 // Make the clspv_fract function.
3349 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003350 clspv_fract_fn = cast<Function>(
3351 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003352 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3353 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3354 }
3355
3356 // Number of significant significand bits, whether represented or not.
3357 unsigned num_significand_bits;
3358 switch (result_ty->getScalarType()->getTypeID()) {
3359 case Type::HalfTyID:
3360 num_significand_bits = 11;
3361 break;
3362 case Type::FloatTyID:
3363 num_significand_bits = 24;
3364 break;
3365 case Type::DoubleTyID:
3366 num_significand_bits = 53;
3367 break;
3368 default:
3369 assert(false && "Unhandled float type when processing fract builtin");
3370 break;
3371 }
3372 // Beware that the disassembler displays this value as
3373 // OpConstant %float 1
3374 // which is not quite right.
3375 const double kJustUnderOneScalar =
3376 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3377
3378 Constant *just_under_one =
3379 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3380 if (result_ty->isVectorTy()) {
3381 just_under_one = ConstantVector::getSplat(
3382 result_ty->getVectorNumElements(), just_under_one);
3383 }
3384
3385 IRBuilder<> Builder(Context);
3386
3387 SmallVector<Instruction *, 4> ToRemoves;
3388
3389 // Walk the users of the function.
3390 for (auto &U : F->uses()) {
3391 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3392
3393 Builder.SetInsertPoint(CI);
3394 auto arg = CI->getArgOperand(0);
3395 auto ptr = CI->getArgOperand(1);
3396
3397 // Compute floor result and store it.
3398 auto floor = Builder.CreateCall(floor_fn, {arg});
3399 Builder.CreateStore(floor, ptr);
3400
3401 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
3402 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
3403
3404 CI->replaceAllUsesWith(fract_result);
3405
3406 // Lastly, remember to remove the user.
3407 ToRemoves.push_back(CI);
3408 }
3409 }
3410
3411 // And cleanup the calls we don't use anymore.
3412 for (auto V : ToRemoves) {
3413 V->eraseFromParent();
3414 }
3415
3416 // And remove the function we don't need either too.
3417 F->eraseFromParent();
3418 }
3419 }
3420
3421 return Changed;
3422}