blob: a56c58766ae5b007a1e5391153a368ba8a3463fd [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
David Neto482550a2018-03-24 05:21:07 -070032#include "clspv/Option.h"
Kévin Petit617a76d2019-04-04 13:54:16 +010033#include "SPIRVOp.h"
David Neto482550a2018-03-24 05:21:07 -070034
David Neto22f144c2017-06-12 14:26:21 -040035using namespace llvm;
36
37#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
38
39namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000040
41struct ArgTypeInfo {
42 enum class SignedNess {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000043 None,
Kévin Petit8a560882019-03-21 15:24:34 +000044 Unsigned,
45 Signed
46 };
47 SignedNess signedness;
48};
49
50struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000051 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000052 std::vector<ArgTypeInfo> argTypeInfos;
53};
54
55bool getFunctionInfoFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
56 if (!name.consume_front("_Z")) {
57 return false;
58 }
59 size_t nameLen;
60 if (name.consumeInteger(10, nameLen)) {
61 return false;
62 }
63
Kévin Petit9d1a9d12019-03-25 15:23:46 +000064 finfo->name = name.take_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000065 name = name.drop_front(nameLen);
66
67 ArgTypeInfo prev_ti;
68
69 while (name.size() != 0) {
70
71 ArgTypeInfo ti;
72
73 // Try parsing a vector prefix
74 if (name.consume_front("Dv")) {
75 int numElems;
76 if (name.consumeInteger(10, numElems)) {
77 return false;
78 }
79
80 if (!name.consume_front("_")) {
81 return false;
82 }
83 }
84
85 // Parse the base type
86 char typeCode = name.front();
87 name = name.drop_front(1);
88 switch(typeCode) {
89 case 'c': // char
90 case 'a': // signed char
91 case 's': // short
92 case 'i': // int
93 case 'l': // long
94 ti.signedness = ArgTypeInfo::SignedNess::Signed;
95 break;
96 case 'h': // unsigned char
97 case 't': // unsigned short
98 case 'j': // unsigned int
99 case 'm': // unsigned long
100 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
101 break;
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000102 case 'f':
103 ti.signedness = ArgTypeInfo::SignedNess::None;
104 break;
Kévin Petit8a560882019-03-21 15:24:34 +0000105 case 'S':
106 ti = prev_ti;
107 if (!name.consume_front("_")) {
108 return false;
109 }
110 break;
111 default:
112 return false;
113 }
114
115 finfo->argTypeInfos.push_back(ti);
116
117 prev_ti = ti;
118 }
119
120 return true;
121};
122
123void getFunctionInfoFromMangledName(StringRef name, FunctionInfo *finfo) {
124 if (!getFunctionInfoFromMangledNameCheck(name, finfo)) {
125 llvm_unreachable("Can't parse mangled function name!");
126 }
127}
128
David Neto22f144c2017-06-12 14:26:21 -0400129uint32_t clz(uint32_t v) {
130 uint32_t r;
131 uint32_t shift;
132
133 r = (v > 0xFFFF) << 4;
134 v >>= r;
135 shift = (v > 0xFF) << 3;
136 v >>= shift;
137 r |= shift;
138 shift = (v > 0xF) << 2;
139 v >>= shift;
140 r |= shift;
141 shift = (v > 0x3) << 1;
142 v >>= shift;
143 r |= shift;
144 r |= (v >> 1);
145
146 return r;
147}
148
149Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
150 if (1 == elements) {
151 return Type::getInt1Ty(C);
152 } else {
153 return VectorType::get(Type::getInt1Ty(C), elements);
154 }
155}
156
157struct ReplaceOpenCLBuiltinPass final : public ModulePass {
158 static char ID;
159 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
160
161 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000162 bool replaceAbs(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100163 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400164 bool replaceRecip(Module &M);
165 bool replaceDivide(Module &M);
166 bool replaceExp10(Module &M);
167 bool replaceLog10(Module &M);
168 bool replaceBarrier(Module &M);
169 bool replaceMemFence(Module &M);
170 bool replaceRelational(Module &M);
171 bool replaceIsInfAndIsNan(Module &M);
172 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000173 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000174 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000175 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000176 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000177 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000178 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000179 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400180 bool replaceSignbit(Module &M);
181 bool replaceMadandMad24andMul24(Module &M);
182 bool replaceVloadHalf(Module &M);
183 bool replaceVloadHalf2(Module &M);
184 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700185 bool replaceClspvVloadaHalf2(Module &M);
186 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400187 bool replaceVstoreHalf(Module &M);
188 bool replaceVstoreHalf2(Module &M);
189 bool replaceVstoreHalf4(Module &M);
190 bool replaceReadImageF(Module &M);
191 bool replaceAtomics(Module &M);
192 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400193 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700194 bool replaceVload(Module &M);
195 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400196};
197}
198
199char ReplaceOpenCLBuiltinPass::ID = 0;
200static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
201 "Replace OpenCL Builtins Pass");
202
203namespace clspv {
204ModulePass *createReplaceOpenCLBuiltinPass() {
205 return new ReplaceOpenCLBuiltinPass();
206}
207}
208
209bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
210 bool Changed = false;
211
Kévin Petit2444e9b2018-11-09 14:14:37 +0000212 Changed |= replaceAbs(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100213 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400214 Changed |= replaceRecip(M);
215 Changed |= replaceDivide(M);
216 Changed |= replaceExp10(M);
217 Changed |= replaceLog10(M);
218 Changed |= replaceBarrier(M);
219 Changed |= replaceMemFence(M);
220 Changed |= replaceRelational(M);
221 Changed |= replaceIsInfAndIsNan(M);
222 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000223 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000224 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000225 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000226 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000227 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000228 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000229 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400230 Changed |= replaceSignbit(M);
231 Changed |= replaceMadandMad24andMul24(M);
232 Changed |= replaceVloadHalf(M);
233 Changed |= replaceVloadHalf2(M);
234 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700235 Changed |= replaceClspvVloadaHalf2(M);
236 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400237 Changed |= replaceVstoreHalf(M);
238 Changed |= replaceVstoreHalf2(M);
239 Changed |= replaceVstoreHalf4(M);
240 Changed |= replaceReadImageF(M);
241 Changed |= replaceAtomics(M);
242 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400243 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700244 Changed |= replaceVload(M);
245 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400246
247 return Changed;
248}
249
Kévin Petit2444e9b2018-11-09 14:14:37 +0000250bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
251 bool Changed = false;
252
253 const char *Names[] = {
Kévin Petit5ace14c2019-04-01 16:29:53 +0100254 "_Z3absh",
255 "_Z3absDv2_h",
256 "_Z3absDv3_h",
257 "_Z3absDv4_h",
Kévin Petit2444e9b2018-11-09 14:14:37 +0000258 "_Z3abst",
259 "_Z3absDv2_t",
260 "_Z3absDv3_t",
261 "_Z3absDv4_t",
262 "_Z3absj",
263 "_Z3absDv2_j",
264 "_Z3absDv3_j",
265 "_Z3absDv4_j",
266 "_Z3absm",
267 "_Z3absDv2_m",
268 "_Z3absDv3_m",
269 "_Z3absDv4_m",
270 };
271
272 for (auto Name : Names) {
273 // If we find a function with the matching name.
274 if (auto F = M.getFunction(Name)) {
275 SmallVector<Instruction *, 4> ToRemoves;
276
277 // Walk the users of the function.
278 for (auto &U : F->uses()) {
279 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
280 // Abs has one arg.
281 auto Arg = CI->getOperand(0);
282
283 // Use the argument unchanged, we know it's unsigned
284 CI->replaceAllUsesWith(Arg);
285
286 // Lastly, remember to remove the user.
287 ToRemoves.push_back(CI);
288 }
289 }
290
291 Changed = !ToRemoves.empty();
292
293 // And cleanup the calls we don't use anymore.
294 for (auto V : ToRemoves) {
295 V->eraseFromParent();
296 }
297
298 // And remove the function we don't need either too.
299 F->eraseFromParent();
300 }
301 }
302
303 return Changed;
304}
305
Kévin Petit8c1be282019-04-02 19:34:25 +0100306bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
307 bool Changed = false;
308
309 const char *Names[] = {
310 "_Z8copysignff",
311 "_Z8copysignDv2_fS_",
312 "_Z8copysignDv3_fS_",
313 "_Z8copysignDv4_fS_",
314 };
315
316 for (auto Name : Names) {
317 // If we find a function with the matching name.
318 if (auto F = M.getFunction(Name)) {
319 SmallVector<Instruction *, 4> ToRemoves;
320
321 // Walk the users of the function.
322 for (auto &U : F->uses()) {
323 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
324
325 auto XValue = CI->getOperand(0);
326 auto YValue = CI->getOperand(1);
327
328 auto Ty = XValue->getType();
329
330 Type* IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
331 if (Ty->isVectorTy()) {
332 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
333 }
334
335 // Return X with the sign of Y
336
337 // Sign bit masks
338 auto SignBit = IntTy->getScalarSizeInBits() - 1;
339 auto SignBitMask = 1 << SignBit;
340 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
341 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
342
343 IRBuilder<> Builder(CI);
344
345 // Extract sign of Y
346 auto YInt = Builder.CreateBitCast(YValue, IntTy);
347 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
348
349 // Clear sign bit in X
350 auto XInt = Builder.CreateBitCast(XValue, IntTy);
351 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
352
353 // Insert sign bit of Y into X
354 auto NewXInt = Builder.CreateOr(XInt, YSign);
355
356 // And cast back to floating-point
357 auto NewX = Builder.CreateBitCast(NewXInt, Ty);
358
359 CI->replaceAllUsesWith(NewX);
360
361 // Lastly, remember to remove the user.
362 ToRemoves.push_back(CI);
363 }
364 }
365
366 Changed = !ToRemoves.empty();
367
368 // And cleanup the calls we don't use anymore.
369 for (auto V : ToRemoves) {
370 V->eraseFromParent();
371 }
372
373 // And remove the function we don't need either too.
374 F->eraseFromParent();
375 }
376 }
377
378 return Changed;
379}
380
David Neto22f144c2017-06-12 14:26:21 -0400381bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
382 bool Changed = false;
383
384 const char *Names[] = {
385 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
386 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
387 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
388 };
389
390 for (auto Name : Names) {
391 // If we find a function with the matching name.
392 if (auto F = M.getFunction(Name)) {
393 SmallVector<Instruction *, 4> ToRemoves;
394
395 // Walk the users of the function.
396 for (auto &U : F->uses()) {
397 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
398 // Recip has one arg.
399 auto Arg = CI->getOperand(0);
400
401 auto Div = BinaryOperator::Create(
402 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
403 CI);
404
405 CI->replaceAllUsesWith(Div);
406
407 // Lastly, remember to remove the user.
408 ToRemoves.push_back(CI);
409 }
410 }
411
412 Changed = !ToRemoves.empty();
413
414 // And cleanup the calls we don't use anymore.
415 for (auto V : ToRemoves) {
416 V->eraseFromParent();
417 }
418
419 // And remove the function we don't need either too.
420 F->eraseFromParent();
421 }
422 }
423
424 return Changed;
425}
426
427bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
428 bool Changed = false;
429
430 const char *Names[] = {
431 "_Z11half_divideff", "_Z13native_divideff",
432 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
433 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
434 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
435 };
436
437 for (auto Name : Names) {
438 // If we find a function with the matching name.
439 if (auto F = M.getFunction(Name)) {
440 SmallVector<Instruction *, 4> ToRemoves;
441
442 // Walk the users of the function.
443 for (auto &U : F->uses()) {
444 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
445 auto Div = BinaryOperator::Create(
446 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
447
448 CI->replaceAllUsesWith(Div);
449
450 // Lastly, remember to remove the user.
451 ToRemoves.push_back(CI);
452 }
453 }
454
455 Changed = !ToRemoves.empty();
456
457 // And cleanup the calls we don't use anymore.
458 for (auto V : ToRemoves) {
459 V->eraseFromParent();
460 }
461
462 // And remove the function we don't need either too.
463 F->eraseFromParent();
464 }
465 }
466
467 return Changed;
468}
469
470bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
471 bool Changed = false;
472
473 const std::map<const char *, const char *> Map = {
474 {"_Z5exp10f", "_Z3expf"},
475 {"_Z10half_exp10f", "_Z8half_expf"},
476 {"_Z12native_exp10f", "_Z10native_expf"},
477 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
478 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
479 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
480 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
481 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
482 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
483 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
484 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
485 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
486
487 for (auto Pair : Map) {
488 // If we find a function with the matching name.
489 if (auto F = M.getFunction(Pair.first)) {
490 SmallVector<Instruction *, 4> ToRemoves;
491
492 // Walk the users of the function.
493 for (auto &U : F->uses()) {
494 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
495 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
496
497 auto Arg = CI->getOperand(0);
498
499 // Constant of the natural log of 10 (ln(10)).
500 const double Ln10 =
501 2.302585092994045684017991454684364207601101488628772976033;
502
503 auto Mul = BinaryOperator::Create(
504 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
505 CI);
506
507 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
508
509 CI->replaceAllUsesWith(NewCI);
510
511 // Lastly, remember to remove the user.
512 ToRemoves.push_back(CI);
513 }
514 }
515
516 Changed = !ToRemoves.empty();
517
518 // And cleanup the calls we don't use anymore.
519 for (auto V : ToRemoves) {
520 V->eraseFromParent();
521 }
522
523 // And remove the function we don't need either too.
524 F->eraseFromParent();
525 }
526 }
527
528 return Changed;
529}
530
531bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
532 bool Changed = false;
533
534 const std::map<const char *, const char *> Map = {
535 {"_Z5log10f", "_Z3logf"},
536 {"_Z10half_log10f", "_Z8half_logf"},
537 {"_Z12native_log10f", "_Z10native_logf"},
538 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
539 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
540 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
541 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
542 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
543 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
544 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
545 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
546 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
547
548 for (auto Pair : Map) {
549 // If we find a function with the matching name.
550 if (auto F = M.getFunction(Pair.first)) {
551 SmallVector<Instruction *, 4> ToRemoves;
552
553 // Walk the users of the function.
554 for (auto &U : F->uses()) {
555 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
556 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
557
558 auto Arg = CI->getOperand(0);
559
560 // Constant of the reciprocal of the natural log of 10 (ln(10)).
561 const double Ln10 =
562 0.434294481903251827651128918916605082294397005803666566114;
563
564 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
565
566 auto Mul = BinaryOperator::Create(
567 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
568 "", CI);
569
570 CI->replaceAllUsesWith(Mul);
571
572 // Lastly, remember to remove the user.
573 ToRemoves.push_back(CI);
574 }
575 }
576
577 Changed = !ToRemoves.empty();
578
579 // And cleanup the calls we don't use anymore.
580 for (auto V : ToRemoves) {
581 V->eraseFromParent();
582 }
583
584 // And remove the function we don't need either too.
585 F->eraseFromParent();
586 }
587 }
588
589 return Changed;
590}
591
592bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
593 bool Changed = false;
594
595 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
596
597 const std::map<const char *, const char *> Map = {
598 {"_Z7barrierj", "__spirv_control_barrier"}};
599
600 for (auto Pair : Map) {
601 // If we find a function with the matching name.
602 if (auto F = M.getFunction(Pair.first)) {
603 SmallVector<Instruction *, 4> ToRemoves;
604
605 // Walk the users of the function.
606 for (auto &U : F->uses()) {
607 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
608 auto FType = F->getFunctionType();
609 SmallVector<Type *, 3> Params;
610 for (unsigned i = 0; i < 3; i++) {
611 Params.push_back(FType->getParamType(0));
612 }
613 auto NewFType =
614 FunctionType::get(FType->getReturnType(), Params, false);
615 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
616
617 auto Arg = CI->getOperand(0);
618
619 // We need to map the OpenCL constants to the SPIR-V equivalents.
620 const auto LocalMemFence =
621 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
622 const auto GlobalMemFence =
623 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
624 const auto ConstantSequentiallyConsistent = ConstantInt::get(
625 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
626 const auto ConstantScopeDevice =
627 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
628 const auto ConstantScopeWorkgroup =
629 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
630
631 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
632 const auto LocalMemFenceMask = BinaryOperator::Create(
633 Instruction::And, LocalMemFence, Arg, "", CI);
634 const auto WorkgroupShiftAmount =
635 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
636 clz(CLK_LOCAL_MEM_FENCE);
637 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
638 Instruction::Shl, LocalMemFenceMask,
639 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
640
641 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
642 const auto GlobalMemFenceMask = BinaryOperator::Create(
643 Instruction::And, GlobalMemFence, Arg, "", CI);
644 const auto UniformShiftAmount =
645 clz(spv::MemorySemanticsUniformMemoryMask) -
646 clz(CLK_GLOBAL_MEM_FENCE);
647 const auto MemorySemanticsUniform = BinaryOperator::Create(
648 Instruction::Shl, GlobalMemFenceMask,
649 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
650
651 // And combine the above together, also adding in
652 // MemorySemanticsSequentiallyConsistentMask.
653 auto MemorySemantics =
654 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
655 ConstantSequentiallyConsistent, "", CI);
656 MemorySemantics = BinaryOperator::Create(
657 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
658
659 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
660 // Device Scope, otherwise Workgroup Scope.
661 const auto Cmp =
662 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
663 GlobalMemFenceMask, GlobalMemFence, "", CI);
664 const auto MemoryScope = SelectInst::Create(
665 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
666
667 // Lastly, the Execution Scope is always Workgroup Scope.
668 const auto ExecutionScope = ConstantScopeWorkgroup;
669
670 auto NewCI = CallInst::Create(
671 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
672
673 CI->replaceAllUsesWith(NewCI);
674
675 // Lastly, remember to remove the user.
676 ToRemoves.push_back(CI);
677 }
678 }
679
680 Changed = !ToRemoves.empty();
681
682 // And cleanup the calls we don't use anymore.
683 for (auto V : ToRemoves) {
684 V->eraseFromParent();
685 }
686
687 // And remove the function we don't need either too.
688 F->eraseFromParent();
689 }
690 }
691
692 return Changed;
693}
694
695bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
696 bool Changed = false;
697
698 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
699
Neil Henning39672102017-09-29 14:33:13 +0100700 using Tuple = std::tuple<const char *, unsigned>;
701 const std::map<const char *, Tuple> Map = {
702 {"_Z9mem_fencej",
703 Tuple("__spirv_memory_barrier",
704 spv::MemorySemanticsSequentiallyConsistentMask)},
705 {"_Z14read_mem_fencej",
706 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
707 {"_Z15write_mem_fencej",
708 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400709
710 for (auto Pair : Map) {
711 // If we find a function with the matching name.
712 if (auto F = M.getFunction(Pair.first)) {
713 SmallVector<Instruction *, 4> ToRemoves;
714
715 // Walk the users of the function.
716 for (auto &U : F->uses()) {
717 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
718 auto FType = F->getFunctionType();
719 SmallVector<Type *, 2> Params;
720 for (unsigned i = 0; i < 2; i++) {
721 Params.push_back(FType->getParamType(0));
722 }
723 auto NewFType =
724 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100725 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400726
727 auto Arg = CI->getOperand(0);
728
729 // We need to map the OpenCL constants to the SPIR-V equivalents.
730 const auto LocalMemFence =
731 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
732 const auto GlobalMemFence =
733 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
734 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100735 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400736 const auto ConstantScopeDevice =
737 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
738
739 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
740 const auto LocalMemFenceMask = BinaryOperator::Create(
741 Instruction::And, LocalMemFence, Arg, "", CI);
742 const auto WorkgroupShiftAmount =
743 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
744 clz(CLK_LOCAL_MEM_FENCE);
745 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
746 Instruction::Shl, LocalMemFenceMask,
747 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
748
749 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
750 const auto GlobalMemFenceMask = BinaryOperator::Create(
751 Instruction::And, GlobalMemFence, Arg, "", CI);
752 const auto UniformShiftAmount =
753 clz(spv::MemorySemanticsUniformMemoryMask) -
754 clz(CLK_GLOBAL_MEM_FENCE);
755 const auto MemorySemanticsUniform = BinaryOperator::Create(
756 Instruction::Shl, GlobalMemFenceMask,
757 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
758
759 // And combine the above together, also adding in
760 // MemorySemanticsSequentiallyConsistentMask.
761 auto MemorySemantics =
762 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
763 ConstantMemorySemantics, "", CI);
764 MemorySemantics = BinaryOperator::Create(
765 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
766
767 // Memory Scope is always device.
768 const auto MemoryScope = ConstantScopeDevice;
769
770 auto NewCI =
771 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
772
773 CI->replaceAllUsesWith(NewCI);
774
775 // Lastly, remember to remove the user.
776 ToRemoves.push_back(CI);
777 }
778 }
779
780 Changed = !ToRemoves.empty();
781
782 // And cleanup the calls we don't use anymore.
783 for (auto V : ToRemoves) {
784 V->eraseFromParent();
785 }
786
787 // And remove the function we don't need either too.
788 F->eraseFromParent();
789 }
790 }
791
792 return Changed;
793}
794
795bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
796 bool Changed = false;
797
798 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
799 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
800 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
801 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
802 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
803 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
804 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
805 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
806 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
807 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
808 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
809 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
810 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
811 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
812 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
813 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
814 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
815 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
816 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
817 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
818 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
819 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
820 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
821 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
822 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
823 };
824
825 for (auto Pair : Map) {
826 // If we find a function with the matching name.
827 if (auto F = M.getFunction(Pair.first)) {
828 SmallVector<Instruction *, 4> ToRemoves;
829
830 // Walk the users of the function.
831 for (auto &U : F->uses()) {
832 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
833 // The predicate to use in the CmpInst.
834 auto Predicate = Pair.second.first;
835
836 // The value to return for true.
837 auto TrueValue =
838 ConstantInt::getSigned(CI->getType(), Pair.second.second);
839
840 // The value to return for false.
841 auto FalseValue = Constant::getNullValue(CI->getType());
842
843 auto Arg1 = CI->getOperand(0);
844 auto Arg2 = CI->getOperand(1);
845
846 const auto Cmp =
847 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
848
849 const auto Select =
850 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
851
852 CI->replaceAllUsesWith(Select);
853
854 // Lastly, remember to remove the user.
855 ToRemoves.push_back(CI);
856 }
857 }
858
859 Changed = !ToRemoves.empty();
860
861 // And cleanup the calls we don't use anymore.
862 for (auto V : ToRemoves) {
863 V->eraseFromParent();
864 }
865
866 // And remove the function we don't need either too.
867 F->eraseFromParent();
868 }
869 }
870
871 return Changed;
872}
873
874bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
875 bool Changed = false;
876
877 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
878 {"_Z5isinff", {"__spirv_isinff", 1}},
879 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
880 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
881 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
882 {"_Z5isnanf", {"__spirv_isnanf", 1}},
883 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
884 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
885 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
886 };
887
888 for (auto Pair : Map) {
889 // If we find a function with the matching name.
890 if (auto F = M.getFunction(Pair.first)) {
891 SmallVector<Instruction *, 4> ToRemoves;
892
893 // Walk the users of the function.
894 for (auto &U : F->uses()) {
895 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
896 const auto CITy = CI->getType();
897
898 // The fake SPIR-V intrinsic to generate.
899 auto SPIRVIntrinsic = Pair.second.first;
900
901 // The value to return for true.
902 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
903
904 // The value to return for false.
905 auto FalseValue = Constant::getNullValue(CITy);
906
907 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
908 M.getContext(),
909 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
910
911 auto NewFType =
912 FunctionType::get(CorrespondingBoolTy,
913 F->getFunctionType()->getParamType(0), false);
914
915 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
916
917 auto Arg = CI->getOperand(0);
918
919 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
920
921 const auto Select =
922 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
923
924 CI->replaceAllUsesWith(Select);
925
926 // Lastly, remember to remove the user.
927 ToRemoves.push_back(CI);
928 }
929 }
930
931 Changed = !ToRemoves.empty();
932
933 // And cleanup the calls we don't use anymore.
934 for (auto V : ToRemoves) {
935 V->eraseFromParent();
936 }
937
938 // And remove the function we don't need either too.
939 F->eraseFromParent();
940 }
941 }
942
943 return Changed;
944}
945
946bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
947 bool Changed = false;
948
949 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000950 // all
alan-bakerb39c8262019-03-08 14:03:37 -0500951 {"_Z3allc", ""},
952 {"_Z3allDv2_c", "__spirv_allDv2_c"},
953 {"_Z3allDv3_c", "__spirv_allDv3_c"},
954 {"_Z3allDv4_c", "__spirv_allDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000955 {"_Z3alls", ""},
956 {"_Z3allDv2_s", "__spirv_allDv2_s"},
957 {"_Z3allDv3_s", "__spirv_allDv3_s"},
958 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400959 {"_Z3alli", ""},
960 {"_Z3allDv2_i", "__spirv_allDv2_i"},
961 {"_Z3allDv3_i", "__spirv_allDv3_i"},
962 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000963 {"_Z3alll", ""},
964 {"_Z3allDv2_l", "__spirv_allDv2_l"},
965 {"_Z3allDv3_l", "__spirv_allDv3_l"},
966 {"_Z3allDv4_l", "__spirv_allDv4_l"},
967
968 // any
alan-bakerb39c8262019-03-08 14:03:37 -0500969 {"_Z3anyc", ""},
970 {"_Z3anyDv2_c", "__spirv_anyDv2_c"},
971 {"_Z3anyDv3_c", "__spirv_anyDv3_c"},
972 {"_Z3anyDv4_c", "__spirv_anyDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000973 {"_Z3anys", ""},
974 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
975 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
976 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400977 {"_Z3anyi", ""},
978 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
979 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
980 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000981 {"_Z3anyl", ""},
982 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
983 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
984 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400985 };
986
987 for (auto Pair : Map) {
988 // If we find a function with the matching name.
989 if (auto F = M.getFunction(Pair.first)) {
990 SmallVector<Instruction *, 4> ToRemoves;
991
992 // Walk the users of the function.
993 for (auto &U : F->uses()) {
994 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
995 // The fake SPIR-V intrinsic to generate.
996 auto SPIRVIntrinsic = Pair.second;
997
998 auto Arg = CI->getOperand(0);
999
1000 Value *V;
1001
Kévin Petitfd27cca2018-10-31 13:00:17 +00001002 // If the argument is a 32-bit int, just use a shift
1003 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1004 V = BinaryOperator::Create(Instruction::LShr, Arg,
1005 ConstantInt::get(Arg->getType(), 31), "",
1006 CI);
1007 } else {
David Neto22f144c2017-06-12 14:26:21 -04001008 // The value for zero to compare against.
1009 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1010
David Neto22f144c2017-06-12 14:26:21 -04001011 // The value to return for true.
1012 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1013
1014 // The value to return for false.
1015 const auto FalseValue = Constant::getNullValue(CI->getType());
1016
Kévin Petitfd27cca2018-10-31 13:00:17 +00001017 const auto Cmp = CmpInst::Create(
1018 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1019
1020 Value* SelectSource;
1021
1022 // If we have a function to call, call it!
1023 if (0 < strlen(SPIRVIntrinsic)) {
1024
1025 const auto NewFType = FunctionType::get(
1026 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
1027
1028 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1029
1030 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
1031
1032 SelectSource = NewCI;
1033
1034 } else {
1035 SelectSource = Cmp;
1036 }
1037
1038 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001039 }
1040
1041 CI->replaceAllUsesWith(V);
1042
1043 // Lastly, remember to remove the user.
1044 ToRemoves.push_back(CI);
1045 }
1046 }
1047
1048 Changed = !ToRemoves.empty();
1049
1050 // And cleanup the calls we don't use anymore.
1051 for (auto V : ToRemoves) {
1052 V->eraseFromParent();
1053 }
1054
1055 // And remove the function we don't need either too.
1056 F->eraseFromParent();
1057 }
1058 }
1059
1060 return Changed;
1061}
1062
Kévin Petitbf0036c2019-03-06 13:57:10 +00001063bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1064 bool Changed = false;
1065
1066 for (auto const &SymVal : M.getValueSymbolTable()) {
1067 // Skip symbols whose name doesn't match
1068 if (!SymVal.getKey().startswith("_Z8upsample")) {
1069 continue;
1070 }
1071 // Is there a function going by that name?
1072 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1073
1074 SmallVector<Instruction *, 4> ToRemoves;
1075
1076 // Walk the users of the function.
1077 for (auto &U : F->uses()) {
1078 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1079
1080 // Get arguments
1081 auto HiValue = CI->getOperand(0);
1082 auto LoValue = CI->getOperand(1);
1083
1084 // Don't touch overloads that aren't in OpenCL C
1085 auto HiType = HiValue->getType();
1086 auto LoType = LoValue->getType();
1087
1088 if (HiType != LoType) {
1089 continue;
1090 }
1091
1092 if (!HiType->isIntOrIntVectorTy()) {
1093 continue;
1094 }
1095
1096 if (HiType->getScalarSizeInBits() * 2 !=
1097 CI->getType()->getScalarSizeInBits()) {
1098 continue;
1099 }
1100
1101 if ((HiType->getScalarSizeInBits() != 8) &&
1102 (HiType->getScalarSizeInBits() != 16) &&
1103 (HiType->getScalarSizeInBits() != 32)) {
1104 continue;
1105 }
1106
1107 if (HiType->isVectorTy()) {
1108 if ((HiType->getVectorNumElements() != 2) &&
1109 (HiType->getVectorNumElements() != 3) &&
1110 (HiType->getVectorNumElements() != 4) &&
1111 (HiType->getVectorNumElements() != 8) &&
1112 (HiType->getVectorNumElements() != 16)) {
1113 continue;
1114 }
1115 }
1116
1117 // Convert both operands to the result type
1118 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(),
1119 "", CI);
1120 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(),
1121 "", CI);
1122
1123 // Shift high operand
1124 auto ShiftAmount = ConstantInt::get(CI->getType(),
1125 HiType->getScalarSizeInBits());
1126 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1127 ShiftAmount, "", CI);
1128
1129 // OR both results
1130 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1131 "", CI);
1132
1133 // Replace call with the expression
1134 CI->replaceAllUsesWith(V);
1135
1136 // Lastly, remember to remove the user.
1137 ToRemoves.push_back(CI);
1138 }
1139 }
1140
1141 Changed = !ToRemoves.empty();
1142
1143 // And cleanup the calls we don't use anymore.
1144 for (auto V : ToRemoves) {
1145 V->eraseFromParent();
1146 }
1147
1148 // And remove the function we don't need either too.
1149 F->eraseFromParent();
1150 }
1151 }
1152
1153 return Changed;
1154}
1155
Kévin Petitd44eef52019-03-08 13:22:14 +00001156bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1157 bool Changed = false;
1158
1159 for (auto const &SymVal : M.getValueSymbolTable()) {
1160 // Skip symbols whose name doesn't match
1161 if (!SymVal.getKey().startswith("_Z6rotate")) {
1162 continue;
1163 }
1164 // Is there a function going by that name?
1165 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1166
1167 SmallVector<Instruction *, 4> ToRemoves;
1168
1169 // Walk the users of the function.
1170 for (auto &U : F->uses()) {
1171 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1172
1173 // Get arguments
1174 auto SrcValue = CI->getOperand(0);
1175 auto RotAmount = CI->getOperand(1);
1176
1177 // Don't touch overloads that aren't in OpenCL C
1178 auto SrcType = SrcValue->getType();
1179 auto RotType = RotAmount->getType();
1180
1181 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1182 continue;
1183 }
1184
1185 if (!SrcType->isIntOrIntVectorTy()) {
1186 continue;
1187 }
1188
1189 if ((SrcType->getScalarSizeInBits() != 8) &&
1190 (SrcType->getScalarSizeInBits() != 16) &&
1191 (SrcType->getScalarSizeInBits() != 32) &&
1192 (SrcType->getScalarSizeInBits() != 64)) {
1193 continue;
1194 }
1195
1196 if (SrcType->isVectorTy()) {
1197 if ((SrcType->getVectorNumElements() != 2) &&
1198 (SrcType->getVectorNumElements() != 3) &&
1199 (SrcType->getVectorNumElements() != 4) &&
1200 (SrcType->getVectorNumElements() != 8) &&
1201 (SrcType->getVectorNumElements() != 16)) {
1202 continue;
1203 }
1204 }
1205
1206 // The approach used is to shift the top bits down, the bottom bits up
1207 // and OR the two shifted values.
1208
1209 // The rotation amount is to be treated modulo the element size.
1210 // Since SPIR-V shift ops don't support this, let's apply the
1211 // modulo ahead of shifting. The element size is always a power of
1212 // two so we can just AND with a mask.
1213 auto ModMask = ConstantInt::get(SrcType,
1214 SrcType->getScalarSizeInBits() - 1);
1215 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1216 ModMask, "", CI);
1217
1218 // Let's calc the amount by which to shift top bits down
1219 auto ScalarSize = ConstantInt::get(SrcType,
1220 SrcType->getScalarSizeInBits());
1221 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1222 RotAmount, "", CI);
1223
1224 // Now shift the bottom bits up and the top bits down
1225 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1226 RotAmount, "", CI);
1227 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1228 DownAmount, "", CI);
1229
1230 // Finally OR the two shifted values
1231 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1232 HiRotated, "", CI);
1233
1234 // Replace call with the expression
1235 CI->replaceAllUsesWith(V);
1236
1237 // Lastly, remember to remove the user.
1238 ToRemoves.push_back(CI);
1239 }
1240 }
1241
1242 Changed = !ToRemoves.empty();
1243
1244 // And cleanup the calls we don't use anymore.
1245 for (auto V : ToRemoves) {
1246 V->eraseFromParent();
1247 }
1248
1249 // And remove the function we don't need either too.
1250 F->eraseFromParent();
1251 }
1252 }
1253
1254 return Changed;
1255}
1256
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001257bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1258 bool Changed = false;
1259
1260 for (auto const &SymVal : M.getValueSymbolTable()) {
1261
1262 // Skip symbols whose name obviously doesn't match
1263 if (!SymVal.getKey().contains("convert_")) {
1264 continue;
1265 }
1266
1267 // Is there a function going by that name?
1268 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1269
1270 // Get info from the mangled name
1271 FunctionInfo finfo;
1272 bool parsed = getFunctionInfoFromMangledNameCheck(F->getName(), &finfo);
1273
1274 // All functions of interest are handled by our mangled name parser
1275 if (!parsed) {
1276 continue;
1277 }
1278
1279 // Move on if this isn't a call to convert_
1280 if (!finfo.name.startswith("convert_")) {
1281 continue;
1282 }
1283
1284 // Extract the destination type from the function name
1285 StringRef DstTypeName = finfo.name;
1286 DstTypeName.consume_front("convert_");
1287
1288 auto DstSignedNess = StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1289 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1290 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1291 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1292 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1293 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1294 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1295 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1296 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1297 .Default(ArgTypeInfo::SignedNess::None);
1298
1299 auto SrcSignedNess = finfo.argTypeInfos[0].signedness;
1300
1301 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
1302 bool SrcIsSigned = SrcSignedNess == ArgTypeInfo::SignedNess::Signed;
1303
1304 SmallVector<Instruction *, 4> ToRemoves;
1305
1306 // Walk the users of the function.
1307 for (auto &U : F->uses()) {
1308 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1309
1310 // Get arguments
1311 auto SrcValue = CI->getOperand(0);
1312
1313 // Don't touch overloads that aren't in OpenCL C
1314 auto SrcType = SrcValue->getType();
1315 auto DstType = CI->getType();
1316
1317 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1318 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1319 continue;
1320 }
1321
1322 if (SrcType->isVectorTy()) {
1323
1324 if (SrcType->getVectorNumElements() !=
1325 DstType->getVectorNumElements()) {
1326 continue;
1327 }
1328
1329 if ((SrcType->getVectorNumElements() != 2) &&
1330 (SrcType->getVectorNumElements() != 3) &&
1331 (SrcType->getVectorNumElements() != 4) &&
1332 (SrcType->getVectorNumElements() != 8) &&
1333 (SrcType->getVectorNumElements() != 16)) {
1334 continue;
1335 }
1336 }
1337
1338 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1339 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1340
1341 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1342 bool DstIsInt = DstType->isIntOrIntVectorTy();
1343
1344 Value *V;
1345 if (SrcIsFloat && DstIsFloat) {
1346 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1347 } else if (SrcIsFloat && DstIsInt) {
1348 if (DstIsSigned) {
1349 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1350 } else {
1351 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1352 }
1353 } else if (SrcIsInt && DstIsFloat) {
1354 if (SrcIsSigned) {
1355 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1356 } else {
1357 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1358 }
1359 } else if (SrcIsInt && DstIsInt) {
1360 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1361 } else {
1362 // Not something we're supposed to handle, just move on
1363 continue;
1364 }
1365
1366 // Replace call with the expression
1367 CI->replaceAllUsesWith(V);
1368
1369 // Lastly, remember to remove the user.
1370 ToRemoves.push_back(CI);
1371 }
1372 }
1373
1374 Changed = !ToRemoves.empty();
1375
1376 // And cleanup the calls we don't use anymore.
1377 for (auto V : ToRemoves) {
1378 V->eraseFromParent();
1379 }
1380
1381 // And remove the function we don't need either too.
1382 F->eraseFromParent();
1383 }
1384 }
1385
1386 return Changed;
1387}
1388
Kévin Petit8a560882019-03-21 15:24:34 +00001389bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1390 bool Changed = false;
1391
Kévin Petit617a76d2019-04-04 13:54:16 +01001392 SmallVector<Function*, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001393
Kévin Petit617a76d2019-04-04 13:54:16 +01001394 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001395 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1396 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1397
1398 // Skip symbols whose name doesn't match
1399 if (!isMad && !isMul) {
1400 continue;
1401 }
1402
1403 // Is there a function going by that name?
1404 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001405 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001406 }
1407 }
1408
Kévin Petit617a76d2019-04-04 13:54:16 +01001409 for (auto F : FnWorklist) {
1410 SmallVector<Instruction *, 4> ToRemoves;
1411
1412 bool isMad = F->getName().startswith("_Z6mad_hi");
1413 // Walk the users of the function.
1414 for (auto &U : F->uses()) {
1415 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1416
1417 // Get arguments
1418 auto AValue = CI->getOperand(0);
1419 auto BValue = CI->getOperand(1);
1420 auto CValue = CI->getOperand(2);
1421
1422 // Don't touch overloads that aren't in OpenCL C
1423 auto AType = AValue->getType();
1424 auto BType = BValue->getType();
1425 auto CType = CValue->getType();
1426
1427 if ((AType != BType) || (CI->getType() != AType) ||
1428 (isMad && (AType != CType))) {
1429 continue;
1430 }
1431
1432 if (!AType->isIntOrIntVectorTy()) {
1433 continue;
1434 }
1435
1436 if ((AType->getScalarSizeInBits() != 8) &&
1437 (AType->getScalarSizeInBits() != 16) &&
1438 (AType->getScalarSizeInBits() != 32) &&
1439 (AType->getScalarSizeInBits() != 64)) {
1440 continue;
1441 }
1442
1443 if (AType->isVectorTy()) {
1444 if ((AType->getVectorNumElements() != 2) &&
1445 (AType->getVectorNumElements() != 3) &&
1446 (AType->getVectorNumElements() != 4) &&
1447 (AType->getVectorNumElements() != 8) &&
1448 (AType->getVectorNumElements() != 16)) {
1449 continue;
1450 }
1451 }
1452
1453 // Get infos from the mangled OpenCL built-in function name
1454 FunctionInfo finfo;
1455 getFunctionInfoFromMangledName(F->getName(), &finfo);
1456
1457 // Select the appropriate signed/unsigned SPIR-V op
1458 spv::Op opcode;
1459 if (finfo.argTypeInfos[0].signedness == ArgTypeInfo::SignedNess::Signed) {
1460 opcode = spv::OpSMulExtended;
1461 } else {
1462 opcode = spv::OpUMulExtended;
1463 }
1464
1465 // Our SPIR-V op returns a struct, create a type for it
1466 SmallVector<Type*, 2> TwoValueType = {
1467 AType,
1468 AType
1469 };
1470 auto ExMulRetType = StructType::create(TwoValueType);
1471
1472 // Call the SPIR-V op
1473 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1474 ExMulRetType, {AValue, BValue});
1475
1476 // Get the high part of the result
1477 unsigned Idxs[] = {1};
1478 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1479
1480 // If we're handling a mad_hi, add the third argument to the result
1481 if (isMad) {
1482 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1483 }
1484
1485 // Replace call with the expression
1486 CI->replaceAllUsesWith(V);
1487
1488 // Lastly, remember to remove the user.
1489 ToRemoves.push_back(CI);
1490 }
1491 }
1492
1493 Changed = !ToRemoves.empty();
1494
1495 // And cleanup the calls we don't use anymore.
1496 for (auto V : ToRemoves) {
1497 V->eraseFromParent();
1498 }
1499
1500 // And remove the function we don't need either too.
1501 F->eraseFromParent();
1502 }
1503
Kévin Petit8a560882019-03-21 15:24:34 +00001504 return Changed;
1505}
1506
Kévin Petitf5b78a22018-10-25 14:32:17 +00001507bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1508 bool Changed = false;
1509
1510 for (auto const &SymVal : M.getValueSymbolTable()) {
1511 // Skip symbols whose name doesn't match
1512 if (!SymVal.getKey().startswith("_Z6select")) {
1513 continue;
1514 }
1515 // Is there a function going by that name?
1516 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1517
1518 SmallVector<Instruction *, 4> ToRemoves;
1519
1520 // Walk the users of the function.
1521 for (auto &U : F->uses()) {
1522 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1523
1524 // Get arguments
1525 auto FalseValue = CI->getOperand(0);
1526 auto TrueValue = CI->getOperand(1);
1527 auto PredicateValue = CI->getOperand(2);
1528
1529 // Don't touch overloads that aren't in OpenCL C
1530 auto FalseType = FalseValue->getType();
1531 auto TrueType = TrueValue->getType();
1532 auto PredicateType = PredicateValue->getType();
1533
1534 if (FalseType != TrueType) {
1535 continue;
1536 }
1537
1538 if (!PredicateType->isIntOrIntVectorTy()) {
1539 continue;
1540 }
1541
1542 if (!FalseType->isIntOrIntVectorTy() &&
1543 !FalseType->getScalarType()->isFloatingPointTy()) {
1544 continue;
1545 }
1546
1547 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1548 continue;
1549 }
1550
1551 if (FalseType->getScalarSizeInBits() !=
1552 PredicateType->getScalarSizeInBits()) {
1553 continue;
1554 }
1555
1556 if (FalseType->isVectorTy()) {
1557 if (FalseType->getVectorNumElements() !=
1558 PredicateType->getVectorNumElements()) {
1559 continue;
1560 }
1561
1562 if ((FalseType->getVectorNumElements() != 2) &&
1563 (FalseType->getVectorNumElements() != 3) &&
1564 (FalseType->getVectorNumElements() != 4) &&
1565 (FalseType->getVectorNumElements() != 8) &&
1566 (FalseType->getVectorNumElements() != 16)) {
1567 continue;
1568 }
1569 }
1570
1571 // Create constant
1572 const auto ZeroValue = Constant::getNullValue(PredicateType);
1573
1574 // Scalar and vector are to be treated differently
1575 CmpInst::Predicate Pred;
1576 if (PredicateType->isVectorTy()) {
1577 Pred = CmpInst::ICMP_SLT;
1578 } else {
1579 Pred = CmpInst::ICMP_NE;
1580 }
1581
1582 // Create comparison instruction
1583 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1584 ZeroValue, "", CI);
1585
1586 // Create select
1587 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1588
1589 // Replace call with the selection
1590 CI->replaceAllUsesWith(V);
1591
1592 // Lastly, remember to remove the user.
1593 ToRemoves.push_back(CI);
1594 }
1595 }
1596
1597 Changed = !ToRemoves.empty();
1598
1599 // And cleanup the calls we don't use anymore.
1600 for (auto V : ToRemoves) {
1601 V->eraseFromParent();
1602 }
1603
1604 // And remove the function we don't need either too.
1605 F->eraseFromParent();
1606 }
1607 }
1608
1609 return Changed;
1610}
1611
Kévin Petite7d0cce2018-10-31 12:38:56 +00001612bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1613 bool Changed = false;
1614
1615 for (auto const &SymVal : M.getValueSymbolTable()) {
1616 // Skip symbols whose name doesn't match
1617 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1618 continue;
1619 }
1620 // Is there a function going by that name?
1621 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1622
1623 SmallVector<Instruction *, 4> ToRemoves;
1624
1625 // Walk the users of the function.
1626 for (auto &U : F->uses()) {
1627 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1628
1629 if (CI->getNumOperands() != 4) {
1630 continue;
1631 }
1632
1633 // Get arguments
1634 auto FalseValue = CI->getOperand(0);
1635 auto TrueValue = CI->getOperand(1);
1636 auto PredicateValue = CI->getOperand(2);
1637
1638 // Don't touch overloads that aren't in OpenCL C
1639 auto FalseType = FalseValue->getType();
1640 auto TrueType = TrueValue->getType();
1641 auto PredicateType = PredicateValue->getType();
1642
1643 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1644 continue;
1645 }
1646
1647 if (TrueType->isVectorTy()) {
1648 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1649 !TrueType->getScalarType()->isIntegerTy()) {
1650 continue;
1651 }
1652 if ((TrueType->getVectorNumElements() != 2) &&
1653 (TrueType->getVectorNumElements() != 3) &&
1654 (TrueType->getVectorNumElements() != 4) &&
1655 (TrueType->getVectorNumElements() != 8) &&
1656 (TrueType->getVectorNumElements() != 16)) {
1657 continue;
1658 }
1659 }
1660
1661 // Remember the type of the operands
1662 auto OpType = TrueType;
1663
1664 // The actual bit selection will always be done on an integer type,
1665 // declare it here
1666 Type *BitType;
1667
1668 // If the operands are float, then bitcast them to int
1669 if (OpType->getScalarType()->isFloatingPointTy()) {
1670
1671 // First create the new type
1672 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1673 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1674 if (OpType->isVectorTy()) {
1675 BitType = VectorType::get(BitType, OpType->getVectorNumElements());
1676 }
1677
1678 // Then bitcast all operands
1679 PredicateValue = CastInst::CreateZExtOrBitCast(PredicateValue,
1680 BitType, "", CI);
1681 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue,
1682 BitType, "", CI);
1683 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1684
1685 } else {
1686 // The operands have an integer type, use it directly
1687 BitType = OpType;
1688 }
1689
1690 // All the operands are now always integers
1691 // implement as (c & b) | (~c & a)
1692
1693 // Create our negated predicate value
1694 auto AllOnes = Constant::getAllOnesValue(BitType);
1695 auto NotPredicateValue = BinaryOperator::Create(Instruction::Xor,
1696 PredicateValue,
1697 AllOnes, "", CI);
1698
1699 // Then put everything together
1700 auto BitsFalse = BinaryOperator::Create(Instruction::And,
1701 NotPredicateValue,
1702 FalseValue, "", CI);
1703 auto BitsTrue = BinaryOperator::Create(Instruction::And,
1704 PredicateValue,
1705 TrueValue, "", CI);
1706
1707 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1708 BitsTrue, "", CI);
1709
1710 // If we were dealing with a floating point type, we must bitcast
1711 // the result back to that
1712 if (OpType->getScalarType()->isFloatingPointTy()) {
1713 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1714 }
1715
1716 // Replace call with our new code
1717 CI->replaceAllUsesWith(V);
1718
1719 // Lastly, remember to remove the user.
1720 ToRemoves.push_back(CI);
1721 }
1722 }
1723
1724 Changed = !ToRemoves.empty();
1725
1726 // And cleanup the calls we don't use anymore.
1727 for (auto V : ToRemoves) {
1728 V->eraseFromParent();
1729 }
1730
1731 // And remove the function we don't need either too.
1732 F->eraseFromParent();
1733 }
1734 }
1735
1736 return Changed;
1737}
1738
Kévin Petit6b0a9532018-10-30 20:00:39 +00001739bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1740 bool Changed = false;
1741
1742 const std::map<const char *, const char *> Map = {
1743 { "_Z4stepfDv2_f", "_Z4stepDv2_fS_" },
1744 { "_Z4stepfDv3_f", "_Z4stepDv3_fS_" },
1745 { "_Z4stepfDv4_f", "_Z4stepDv4_fS_" },
1746 { "_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_" },
1747 { "_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_" },
1748 { "_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_" },
1749 };
1750
1751 for (auto Pair : Map) {
1752 // If we find a function with the matching name.
1753 if (auto F = M.getFunction(Pair.first)) {
1754 SmallVector<Instruction *, 4> ToRemoves;
1755
1756 // Walk the users of the function.
1757 for (auto &U : F->uses()) {
1758 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1759
1760 auto ReplacementFn = Pair.second;
1761
1762 SmallVector<Value*, 2> ArgsToSplat = {CI->getOperand(0)};
1763 Value *VectorArg;
1764
1765 // First figure out which function we're dealing with
1766 if (F->getName().startswith("_Z10smoothstep")) {
1767 ArgsToSplat.push_back(CI->getOperand(1));
1768 VectorArg = CI->getOperand(2);
1769 } else {
1770 VectorArg = CI->getOperand(1);
1771 }
1772
1773 // Splat arguments that need to be
1774 SmallVector<Value*, 2> SplatArgs;
1775 auto VecType = VectorArg->getType();
1776
1777 for (auto arg : ArgsToSplat) {
1778 Value* NewVectorArg = UndefValue::get(VecType);
1779 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
1780 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1781 NewVectorArg = InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1782 }
1783 SplatArgs.push_back(NewVectorArg);
1784 }
1785
1786 // Replace the call with the vector/vector flavour
1787 SmallVector<Type*, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1788 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1789
1790 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1791
1792 SmallVector<Value*, 3> NewArgs;
1793 for (auto arg : SplatArgs) {
1794 NewArgs.push_back(arg);
1795 }
1796 NewArgs.push_back(VectorArg);
1797
1798 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1799
1800 CI->replaceAllUsesWith(NewCI);
1801
1802 // Lastly, remember to remove the user.
1803 ToRemoves.push_back(CI);
1804 }
1805 }
1806
1807 Changed = !ToRemoves.empty();
1808
1809 // And cleanup the calls we don't use anymore.
1810 for (auto V : ToRemoves) {
1811 V->eraseFromParent();
1812 }
1813
1814 // And remove the function we don't need either too.
1815 F->eraseFromParent();
1816 }
1817 }
1818
1819 return Changed;
1820}
1821
David Neto22f144c2017-06-12 14:26:21 -04001822bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1823 bool Changed = false;
1824
1825 const std::map<const char *, Instruction::BinaryOps> Map = {
1826 {"_Z7signbitf", Instruction::LShr},
1827 {"_Z7signbitDv2_f", Instruction::AShr},
1828 {"_Z7signbitDv3_f", Instruction::AShr},
1829 {"_Z7signbitDv4_f", Instruction::AShr},
1830 };
1831
1832 for (auto Pair : Map) {
1833 // If we find a function with the matching name.
1834 if (auto F = M.getFunction(Pair.first)) {
1835 SmallVector<Instruction *, 4> ToRemoves;
1836
1837 // Walk the users of the function.
1838 for (auto &U : F->uses()) {
1839 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1840 auto Arg = CI->getOperand(0);
1841
1842 auto Bitcast =
1843 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1844
1845 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1846 ConstantInt::get(CI->getType(), 31),
1847 "", CI);
1848
1849 CI->replaceAllUsesWith(Shr);
1850
1851 // Lastly, remember to remove the user.
1852 ToRemoves.push_back(CI);
1853 }
1854 }
1855
1856 Changed = !ToRemoves.empty();
1857
1858 // And cleanup the calls we don't use anymore.
1859 for (auto V : ToRemoves) {
1860 V->eraseFromParent();
1861 }
1862
1863 // And remove the function we don't need either too.
1864 F->eraseFromParent();
1865 }
1866 }
1867
1868 return Changed;
1869}
1870
1871bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1872 bool Changed = false;
1873
1874 const std::map<const char *,
1875 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1876 Map = {
1877 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1878 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1879 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1880 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1881 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1882 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1883 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1884 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1885 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1886 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1887 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1888 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1889 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1890 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1891 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1892 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1893 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1894 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1895 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1896 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1897 };
1898
1899 for (auto Pair : Map) {
1900 // If we find a function with the matching name.
1901 if (auto F = M.getFunction(Pair.first)) {
1902 SmallVector<Instruction *, 4> ToRemoves;
1903
1904 // Walk the users of the function.
1905 for (auto &U : F->uses()) {
1906 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1907 // The multiply instruction to use.
1908 auto MulInst = Pair.second.first;
1909
1910 // The add instruction to use.
1911 auto AddInst = Pair.second.second;
1912
1913 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1914
1915 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1916 CI->getArgOperand(1), "", CI);
1917
1918 if (Instruction::BinaryOpsEnd != AddInst) {
1919 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1920 CI);
1921 }
1922
1923 CI->replaceAllUsesWith(I);
1924
1925 // Lastly, remember to remove the user.
1926 ToRemoves.push_back(CI);
1927 }
1928 }
1929
1930 Changed = !ToRemoves.empty();
1931
1932 // And cleanup the calls we don't use anymore.
1933 for (auto V : ToRemoves) {
1934 V->eraseFromParent();
1935 }
1936
1937 // And remove the function we don't need either too.
1938 F->eraseFromParent();
1939 }
1940 }
1941
1942 return Changed;
1943}
1944
Derek Chowcfd368b2017-10-19 20:58:45 -07001945bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1946 bool Changed = false;
1947
1948 struct VectorStoreOps {
1949 const char* name;
1950 int n;
1951 Type* (*get_scalar_type_function)(LLVMContext&);
1952 } vector_store_ops[] = {
1953 // TODO(derekjchow): Expand this list.
1954 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
1955 };
1956
David Neto544fffc2017-11-16 18:35:14 -05001957 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001958 auto Name = Op.name;
1959 auto N = Op.n;
1960 auto TypeFn = Op.get_scalar_type_function;
1961 if (auto F = M.getFunction(Name)) {
1962 SmallVector<Instruction *, 4> ToRemoves;
1963
1964 // Walk the users of the function.
1965 for (auto &U : F->uses()) {
1966 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1967 // The value argument from vstoren.
1968 auto Arg0 = CI->getOperand(0);
1969
1970 // The index argument from vstoren.
1971 auto Arg1 = CI->getOperand(1);
1972
1973 // The pointer argument from vstoren.
1974 auto Arg2 = CI->getOperand(2);
1975
1976 // Get types.
1977 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1978 auto ScalarNPointerTy = PointerType::get(
1979 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1980
1981 // Cast to scalarn
1982 auto Cast = CastInst::CreatePointerCast(
1983 Arg2, ScalarNPointerTy, "", CI);
1984 // Index to correct address
1985 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1986 // Store
1987 auto Store = new StoreInst(Arg0, Index, CI);
1988
1989 CI->replaceAllUsesWith(Store);
1990 ToRemoves.push_back(CI);
1991 }
1992 }
1993
1994 Changed = !ToRemoves.empty();
1995
1996 // And cleanup the calls we don't use anymore.
1997 for (auto V : ToRemoves) {
1998 V->eraseFromParent();
1999 }
2000
2001 // And remove the function we don't need either too.
2002 F->eraseFromParent();
2003 }
2004 }
2005
2006 return Changed;
2007}
2008
2009bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2010 bool Changed = false;
2011
2012 struct VectorLoadOps {
2013 const char* name;
2014 int n;
2015 Type* (*get_scalar_type_function)(LLVMContext&);
2016 } vector_load_ops[] = {
2017 // TODO(derekjchow): Expand this list.
2018 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
2019 };
2020
David Neto544fffc2017-11-16 18:35:14 -05002021 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002022 auto Name = Op.name;
2023 auto N = Op.n;
2024 auto TypeFn = Op.get_scalar_type_function;
2025 // If we find a function with the matching name.
2026 if (auto F = M.getFunction(Name)) {
2027 SmallVector<Instruction *, 4> ToRemoves;
2028
2029 // Walk the users of the function.
2030 for (auto &U : F->uses()) {
2031 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2032 // The index argument from vloadn.
2033 auto Arg0 = CI->getOperand(0);
2034
2035 // The pointer argument from vloadn.
2036 auto Arg1 = CI->getOperand(1);
2037
2038 // Get types.
2039 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
2040 auto ScalarNPointerTy = PointerType::get(
2041 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
2042
2043 // Cast to scalarn
2044 auto Cast = CastInst::CreatePointerCast(
2045 Arg1, ScalarNPointerTy, "", CI);
2046 // Index to correct address
2047 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
2048 // Load
2049 auto Load = new LoadInst(Index, "", CI);
2050
2051 CI->replaceAllUsesWith(Load);
2052 ToRemoves.push_back(CI);
2053 }
2054 }
2055
2056 Changed = !ToRemoves.empty();
2057
2058 // And cleanup the calls we don't use anymore.
2059 for (auto V : ToRemoves) {
2060 V->eraseFromParent();
2061 }
2062
2063 // And remove the function we don't need either too.
2064 F->eraseFromParent();
2065
2066 }
2067 }
2068
2069 return Changed;
2070}
2071
David Neto22f144c2017-06-12 14:26:21 -04002072bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2073 bool Changed = false;
2074
2075 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2076 "_Z10vload_halfjPU3AS2KDh"};
2077
2078 for (auto Name : Map) {
2079 // If we find a function with the matching name.
2080 if (auto F = M.getFunction(Name)) {
2081 SmallVector<Instruction *, 4> ToRemoves;
2082
2083 // Walk the users of the function.
2084 for (auto &U : F->uses()) {
2085 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2086 // The index argument from vload_half.
2087 auto Arg0 = CI->getOperand(0);
2088
2089 // The pointer argument from vload_half.
2090 auto Arg1 = CI->getOperand(1);
2091
David Neto22f144c2017-06-12 14:26:21 -04002092 auto IntTy = Type::getInt32Ty(M.getContext());
2093 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002094 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2095
David Neto22f144c2017-06-12 14:26:21 -04002096 // Our intrinsic to unpack a float2 from an int.
2097 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2098
2099 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2100
David Neto482550a2018-03-24 05:21:07 -07002101 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002102 auto ShortTy = Type::getInt16Ty(M.getContext());
2103 auto ShortPointerTy = PointerType::get(
2104 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002105
David Netoac825b82017-05-30 12:49:01 -04002106 // Cast the half* pointer to short*.
2107 auto Cast =
2108 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002109
David Netoac825b82017-05-30 12:49:01 -04002110 // Index into the correct address of the casted pointer.
2111 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2112
2113 // Load from the short* we casted to.
2114 auto Load = new LoadInst(Index, "", CI);
2115
2116 // ZExt the short -> int.
2117 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2118
2119 // Get our float2.
2120 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2121
2122 // Extract out the bottom element which is our float result.
2123 auto Extract = ExtractElementInst::Create(
2124 Call, ConstantInt::get(IntTy, 0), "", CI);
2125
2126 CI->replaceAllUsesWith(Extract);
2127 } else {
2128 // Assume the pointer argument points to storage aligned to 32bits
2129 // or more.
2130 // TODO(dneto): Do more analysis to make sure this is true?
2131 //
2132 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2133 // with:
2134 //
2135 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2136 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2137 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2138 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2139 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2140 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2141 // x float> %converted, %index_is_odd32
2142
2143 auto IntPointerTy = PointerType::get(
2144 IntTy, Arg1->getType()->getPointerAddressSpace());
2145
David Neto973e6a82017-05-30 13:48:18 -04002146 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002147 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002148 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002149 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2150
2151 auto One = ConstantInt::get(IntTy, 1);
2152 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2153 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2154
2155 // Index into the correct address of the casted pointer.
2156 auto Ptr =
2157 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2158
2159 // Load from the int* we casted to.
2160 auto Load = new LoadInst(Ptr, "", CI);
2161
2162 // Get our float2.
2163 auto Call = CallInst::Create(NewF, Load, "", CI);
2164
2165 // Extract out the float result, where the element number is
2166 // determined by whether the original index was even or odd.
2167 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2168
2169 CI->replaceAllUsesWith(Extract);
2170 }
David Neto22f144c2017-06-12 14:26:21 -04002171
2172 // Lastly, remember to remove the user.
2173 ToRemoves.push_back(CI);
2174 }
2175 }
2176
2177 Changed = !ToRemoves.empty();
2178
2179 // And cleanup the calls we don't use anymore.
2180 for (auto V : ToRemoves) {
2181 V->eraseFromParent();
2182 }
2183
2184 // And remove the function we don't need either too.
2185 F->eraseFromParent();
2186 }
2187 }
2188
2189 return Changed;
2190}
2191
2192bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
2193 bool Changed = false;
2194
David Neto556c7e62018-06-08 13:45:55 -07002195 const std::vector<const char *> Map = {
2196 "_Z11vload_half2jPU3AS1KDh",
2197 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2198 "_Z11vload_half2jPU3AS2KDh",
2199 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2200 };
David Neto22f144c2017-06-12 14:26:21 -04002201
2202 for (auto Name : Map) {
2203 // If we find a function with the matching name.
2204 if (auto F = M.getFunction(Name)) {
2205 SmallVector<Instruction *, 4> ToRemoves;
2206
2207 // Walk the users of the function.
2208 for (auto &U : F->uses()) {
2209 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2210 // The index argument from vload_half.
2211 auto Arg0 = CI->getOperand(0);
2212
2213 // The pointer argument from vload_half.
2214 auto Arg1 = CI->getOperand(1);
2215
2216 auto IntTy = Type::getInt32Ty(M.getContext());
2217 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2218 auto NewPointerTy = PointerType::get(
2219 IntTy, Arg1->getType()->getPointerAddressSpace());
2220 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2221
2222 // Cast the half* pointer to int*.
2223 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2224
2225 // Index into the correct address of the casted pointer.
2226 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
2227
2228 // Load from the int* we casted to.
2229 auto Load = new LoadInst(Index, "", CI);
2230
2231 // Our intrinsic to unpack a float2 from an int.
2232 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2233
2234 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2235
2236 // Get our float2.
2237 auto Call = CallInst::Create(NewF, Load, "", CI);
2238
2239 CI->replaceAllUsesWith(Call);
2240
2241 // Lastly, remember to remove the user.
2242 ToRemoves.push_back(CI);
2243 }
2244 }
2245
2246 Changed = !ToRemoves.empty();
2247
2248 // And cleanup the calls we don't use anymore.
2249 for (auto V : ToRemoves) {
2250 V->eraseFromParent();
2251 }
2252
2253 // And remove the function we don't need either too.
2254 F->eraseFromParent();
2255 }
2256 }
2257
2258 return Changed;
2259}
2260
2261bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
2262 bool Changed = false;
2263
David Neto556c7e62018-06-08 13:45:55 -07002264 const std::vector<const char *> Map = {
2265 "_Z11vload_half4jPU3AS1KDh",
2266 "_Z12vloada_half4jPU3AS1KDh",
2267 "_Z11vload_half4jPU3AS2KDh",
2268 "_Z12vloada_half4jPU3AS2KDh",
2269 };
David Neto22f144c2017-06-12 14:26:21 -04002270
2271 for (auto Name : Map) {
2272 // If we find a function with the matching name.
2273 if (auto F = M.getFunction(Name)) {
2274 SmallVector<Instruction *, 4> ToRemoves;
2275
2276 // Walk the users of the function.
2277 for (auto &U : F->uses()) {
2278 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2279 // The index argument from vload_half.
2280 auto Arg0 = CI->getOperand(0);
2281
2282 // The pointer argument from vload_half.
2283 auto Arg1 = CI->getOperand(1);
2284
2285 auto IntTy = Type::getInt32Ty(M.getContext());
2286 auto Int2Ty = VectorType::get(IntTy, 2);
2287 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2288 auto NewPointerTy = PointerType::get(
2289 Int2Ty, Arg1->getType()->getPointerAddressSpace());
2290 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2291
2292 // Cast the half* pointer to int2*.
2293 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2294
2295 // Index into the correct address of the casted pointer.
2296 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
2297
2298 // Load from the int2* we casted to.
2299 auto Load = new LoadInst(Index, "", CI);
2300
2301 // Extract each element from the loaded int2.
2302 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2303 "", CI);
2304 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2305 "", CI);
2306
2307 // Our intrinsic to unpack a float2 from an int.
2308 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2309
2310 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2311
2312 // Get the lower (x & y) components of our final float4.
2313 auto Lo = CallInst::Create(NewF, X, "", CI);
2314
2315 // Get the higher (z & w) components of our final float4.
2316 auto Hi = CallInst::Create(NewF, Y, "", CI);
2317
2318 Constant *ShuffleMask[4] = {
2319 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2320 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2321
2322 // Combine our two float2's into one float4.
2323 auto Combine = new ShuffleVectorInst(
2324 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
2325
2326 CI->replaceAllUsesWith(Combine);
2327
2328 // Lastly, remember to remove the user.
2329 ToRemoves.push_back(CI);
2330 }
2331 }
2332
2333 Changed = !ToRemoves.empty();
2334
2335 // And cleanup the calls we don't use anymore.
2336 for (auto V : ToRemoves) {
2337 V->eraseFromParent();
2338 }
2339
2340 // And remove the function we don't need either too.
2341 F->eraseFromParent();
2342 }
2343 }
2344
2345 return Changed;
2346}
2347
David Neto6ad93232018-06-07 15:42:58 -07002348bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
2349 bool Changed = false;
2350
2351 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2352 //
2353 // %u = load i32 %ptr
2354 // %fxy = call <2 x float> Unpack2xHalf(u)
2355 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
2356 const std::vector<const char *> Map = {
2357 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2358 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2359 "_Z20__clspv_vloada_half2jPKj", // private
2360 };
2361
2362 for (auto Name : Map) {
2363 // If we find a function with the matching name.
2364 if (auto F = M.getFunction(Name)) {
2365 SmallVector<Instruction *, 4> ToRemoves;
2366
2367 // Walk the users of the function.
2368 for (auto &U : F->uses()) {
2369 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
2370 auto Index = CI->getOperand(0);
2371 auto Ptr = CI->getOperand(1);
2372
2373 auto IntTy = Type::getInt32Ty(M.getContext());
2374 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2375 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2376
2377 auto IndexedPtr =
2378 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
2379 auto Load = new LoadInst(IndexedPtr, "", CI);
2380
2381 // Our intrinsic to unpack a float2 from an int.
2382 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2383
2384 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2385
2386 // Get our final float2.
2387 auto Result = CallInst::Create(NewF, Load, "", CI);
2388
2389 CI->replaceAllUsesWith(Result);
2390
2391 // Lastly, remember to remove the user.
2392 ToRemoves.push_back(CI);
2393 }
2394 }
2395
2396 Changed = true;
2397
2398 // And cleanup the calls we don't use anymore.
2399 for (auto V : ToRemoves) {
2400 V->eraseFromParent();
2401 }
2402
2403 // And remove the function we don't need either too.
2404 F->eraseFromParent();
2405 }
2406 }
2407
2408 return Changed;
2409}
2410
2411bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
2412 bool Changed = false;
2413
2414 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2415 //
2416 // %u2 = load <2 x i32> %ptr
2417 // %u2xy = extractelement %u2, 0
2418 // %u2zw = extractelement %u2, 1
2419 // %fxy = call <2 x float> Unpack2xHalf(uint)
2420 // %fzw = call <2 x float> Unpack2xHalf(uint)
2421 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
2422 const std::vector<const char *> Map = {
2423 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2424 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2425 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2426 };
2427
2428 for (auto Name : Map) {
2429 // If we find a function with the matching name.
2430 if (auto F = M.getFunction(Name)) {
2431 SmallVector<Instruction *, 4> ToRemoves;
2432
2433 // Walk the users of the function.
2434 for (auto &U : F->uses()) {
2435 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2436 auto Index = CI->getOperand(0);
2437 auto Ptr = CI->getOperand(1);
2438
2439 auto IntTy = Type::getInt32Ty(M.getContext());
2440 auto Int2Ty = VectorType::get(IntTy, 2);
2441 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2442 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2443
2444 auto IndexedPtr =
2445 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
2446 auto Load = new LoadInst(IndexedPtr, "", CI);
2447
2448 // Extract each element from the loaded int2.
2449 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2450 "", CI);
2451 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2452 "", CI);
2453
2454 // Our intrinsic to unpack a float2 from an int.
2455 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2456
2457 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2458
2459 // Get the lower (x & y) components of our final float4.
2460 auto Lo = CallInst::Create(NewF, X, "", CI);
2461
2462 // Get the higher (z & w) components of our final float4.
2463 auto Hi = CallInst::Create(NewF, Y, "", CI);
2464
2465 Constant *ShuffleMask[4] = {
2466 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2467 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2468
2469 // Combine our two float2's into one float4.
2470 auto Combine = new ShuffleVectorInst(
2471 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
2472
2473 CI->replaceAllUsesWith(Combine);
2474
2475 // Lastly, remember to remove the user.
2476 ToRemoves.push_back(CI);
2477 }
2478 }
2479
2480 Changed = true;
2481
2482 // And cleanup the calls we don't use anymore.
2483 for (auto V : ToRemoves) {
2484 V->eraseFromParent();
2485 }
2486
2487 // And remove the function we don't need either too.
2488 F->eraseFromParent();
2489 }
2490 }
2491
2492 return Changed;
2493}
2494
David Neto22f144c2017-06-12 14:26:21 -04002495bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
2496 bool Changed = false;
2497
2498 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
2499 "_Z15vstore_half_rtefjPU3AS1Dh",
2500 "_Z15vstore_half_rtzfjPU3AS1Dh"};
2501
2502 for (auto Name : Map) {
2503 // If we find a function with the matching name.
2504 if (auto F = M.getFunction(Name)) {
2505 SmallVector<Instruction *, 4> ToRemoves;
2506
2507 // Walk the users of the function.
2508 for (auto &U : F->uses()) {
2509 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2510 // The value to store.
2511 auto Arg0 = CI->getOperand(0);
2512
2513 // The index argument from vstore_half.
2514 auto Arg1 = CI->getOperand(1);
2515
2516 // The pointer argument from vstore_half.
2517 auto Arg2 = CI->getOperand(2);
2518
David Neto22f144c2017-06-12 14:26:21 -04002519 auto IntTy = Type::getInt32Ty(M.getContext());
2520 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002521 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04002522 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002523
2524 // Our intrinsic to pack a float2 to an int.
2525 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2526
2527 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2528
2529 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04002530 auto TempVec =
2531 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
2532 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002533
2534 // Pack the float2 -> half2 (in an int).
2535 auto X = CallInst::Create(NewF, TempVec, "", CI);
2536
David Neto482550a2018-03-24 05:21:07 -07002537 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04002538 auto ShortTy = Type::getInt16Ty(M.getContext());
2539 auto ShortPointerTy = PointerType::get(
2540 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002541
David Neto17852de2017-05-29 17:29:31 -04002542 // Truncate our i32 to an i16.
2543 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002544
David Neto17852de2017-05-29 17:29:31 -04002545 // Cast the half* pointer to short*.
2546 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002547
David Neto17852de2017-05-29 17:29:31 -04002548 // Index into the correct address of the casted pointer.
2549 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002550
David Neto17852de2017-05-29 17:29:31 -04002551 // Store to the int* we casted to.
2552 auto Store = new StoreInst(Trunc, Index, CI);
2553
2554 CI->replaceAllUsesWith(Store);
2555 } else {
2556 // We can only write to 32-bit aligned words.
2557 //
2558 // Assuming base is aligned to 32-bits, replace the equivalent of
2559 // vstore_half(value, index, base)
2560 // with:
2561 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2562 // uint32_t write_to_upper_half = index & 1u;
2563 // uint32_t shift = write_to_upper_half << 4;
2564 //
2565 // // Pack the float value as a half number in bottom 16 bits
2566 // // of an i32.
2567 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2568 //
2569 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2570 // ^ ((packed & 0xffff) << shift)
2571 // // We only need relaxed consistency, but OpenCL 1.2 only has
2572 // // sequentially consistent atomics.
2573 // // TODO(dneto): Use relaxed consistency.
2574 // atomic_xor(target_ptr, xor_value)
2575 auto IntPointerTy = PointerType::get(
2576 IntTy, Arg2->getType()->getPointerAddressSpace());
2577
2578 auto Four = ConstantInt::get(IntTy, 4);
2579 auto FFFF = ConstantInt::get(IntTy, 0xffff);
2580
2581 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
2582 // Compute index / 2
2583 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2584 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2585 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
2586 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2587 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
2588 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2589 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
2590
2591 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2592 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2593 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
2594
2595 // Generate the call to atomi_xor.
2596 SmallVector<Type *, 5> ParamTypes;
2597 // The pointer type.
2598 ParamTypes.push_back(IntPointerTy);
2599 // The Types for memory scope, semantics, and value.
2600 ParamTypes.push_back(IntTy);
2601 ParamTypes.push_back(IntTy);
2602 ParamTypes.push_back(IntTy);
2603 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2604 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
2605
2606 const auto ConstantScopeDevice =
2607 ConstantInt::get(IntTy, spv::ScopeDevice);
2608 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2609 // (SPIR-V Workgroup).
2610 const auto AddrSpaceSemanticsBits =
2611 IntPointerTy->getPointerAddressSpace() == 1
2612 ? spv::MemorySemanticsUniformMemoryMask
2613 : spv::MemorySemanticsWorkgroupMemoryMask;
2614
2615 // We're using relaxed consistency here.
2616 const auto ConstantMemorySemantics =
2617 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2618 AddrSpaceSemanticsBits);
2619
2620 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2621 ConstantMemorySemantics, ValueToXor};
2622 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2623 }
David Neto22f144c2017-06-12 14:26:21 -04002624
2625 // Lastly, remember to remove the user.
2626 ToRemoves.push_back(CI);
2627 }
2628 }
2629
2630 Changed = !ToRemoves.empty();
2631
2632 // And cleanup the calls we don't use anymore.
2633 for (auto V : ToRemoves) {
2634 V->eraseFromParent();
2635 }
2636
2637 // And remove the function we don't need either too.
2638 F->eraseFromParent();
2639 }
2640 }
2641
2642 return Changed;
2643}
2644
2645bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
2646 bool Changed = false;
2647
David Netoe2871522018-06-08 11:09:54 -07002648 const std::vector<const char *> Map = {
2649 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2650 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2651 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2652 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2653 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2654 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2655 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2656 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2657 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2658 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2659 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2660 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2661 };
David Neto22f144c2017-06-12 14:26:21 -04002662
2663 for (auto Name : Map) {
2664 // If we find a function with the matching name.
2665 if (auto F = M.getFunction(Name)) {
2666 SmallVector<Instruction *, 4> ToRemoves;
2667
2668 // Walk the users of the function.
2669 for (auto &U : F->uses()) {
2670 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2671 // The value to store.
2672 auto Arg0 = CI->getOperand(0);
2673
2674 // The index argument from vstore_half.
2675 auto Arg1 = CI->getOperand(1);
2676
2677 // The pointer argument from vstore_half.
2678 auto Arg2 = CI->getOperand(2);
2679
2680 auto IntTy = Type::getInt32Ty(M.getContext());
2681 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2682 auto NewPointerTy = PointerType::get(
2683 IntTy, Arg2->getType()->getPointerAddressSpace());
2684 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2685
2686 // Our intrinsic to pack a float2 to an int.
2687 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2688
2689 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2690
2691 // Turn the packed x & y into the final packing.
2692 auto X = CallInst::Create(NewF, Arg0, "", CI);
2693
2694 // Cast the half* pointer to int*.
2695 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2696
2697 // Index into the correct address of the casted pointer.
2698 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
2699
2700 // Store to the int* we casted to.
2701 auto Store = new StoreInst(X, Index, CI);
2702
2703 CI->replaceAllUsesWith(Store);
2704
2705 // Lastly, remember to remove the user.
2706 ToRemoves.push_back(CI);
2707 }
2708 }
2709
2710 Changed = !ToRemoves.empty();
2711
2712 // And cleanup the calls we don't use anymore.
2713 for (auto V : ToRemoves) {
2714 V->eraseFromParent();
2715 }
2716
2717 // And remove the function we don't need either too.
2718 F->eraseFromParent();
2719 }
2720 }
2721
2722 return Changed;
2723}
2724
2725bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
2726 bool Changed = false;
2727
David Netoe2871522018-06-08 11:09:54 -07002728 const std::vector<const char *> Map = {
2729 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2730 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2731 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2732 "_Z13vstorea_half4Dv4_fjPDh", // private
2733 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2734 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2735 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2736 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2737 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2738 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2739 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2740 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2741 };
David Neto22f144c2017-06-12 14:26:21 -04002742
2743 for (auto Name : Map) {
2744 // If we find a function with the matching name.
2745 if (auto F = M.getFunction(Name)) {
2746 SmallVector<Instruction *, 4> ToRemoves;
2747
2748 // Walk the users of the function.
2749 for (auto &U : F->uses()) {
2750 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2751 // The value to store.
2752 auto Arg0 = CI->getOperand(0);
2753
2754 // The index argument from vstore_half.
2755 auto Arg1 = CI->getOperand(1);
2756
2757 // The pointer argument from vstore_half.
2758 auto Arg2 = CI->getOperand(2);
2759
2760 auto IntTy = Type::getInt32Ty(M.getContext());
2761 auto Int2Ty = VectorType::get(IntTy, 2);
2762 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2763 auto NewPointerTy = PointerType::get(
2764 Int2Ty, Arg2->getType()->getPointerAddressSpace());
2765 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2766
2767 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2768 ConstantInt::get(IntTy, 1)};
2769
2770 // Extract out the x & y components of our to store value.
2771 auto Lo =
2772 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2773 ConstantVector::get(LoShuffleMask), "", CI);
2774
2775 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2776 ConstantInt::get(IntTy, 3)};
2777
2778 // Extract out the z & w components of our to store value.
2779 auto Hi =
2780 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2781 ConstantVector::get(HiShuffleMask), "", CI);
2782
2783 // Our intrinsic to pack a float2 to an int.
2784 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2785
2786 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2787
2788 // Turn the packed x & y into the final component of our int2.
2789 auto X = CallInst::Create(NewF, Lo, "", CI);
2790
2791 // Turn the packed z & w into the final component of our int2.
2792 auto Y = CallInst::Create(NewF, Hi, "", CI);
2793
2794 auto Combine = InsertElementInst::Create(
2795 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
2796 Combine = InsertElementInst::Create(
2797 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
2798
2799 // Cast the half* pointer to int2*.
2800 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2801
2802 // Index into the correct address of the casted pointer.
2803 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
2804
2805 // Store to the int2* we casted to.
2806 auto Store = new StoreInst(Combine, Index, CI);
2807
2808 CI->replaceAllUsesWith(Store);
2809
2810 // Lastly, remember to remove the user.
2811 ToRemoves.push_back(CI);
2812 }
2813 }
2814
2815 Changed = !ToRemoves.empty();
2816
2817 // And cleanup the calls we don't use anymore.
2818 for (auto V : ToRemoves) {
2819 V->eraseFromParent();
2820 }
2821
2822 // And remove the function we don't need either too.
2823 F->eraseFromParent();
2824 }
2825 }
2826
2827 return Changed;
2828}
2829
2830bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2831 bool Changed = false;
2832
2833 const std::map<const char *, const char*> Map = {
2834 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
2835 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
2836 };
2837
2838 for (auto Pair : Map) {
2839 // If we find a function with the matching name.
2840 if (auto F = M.getFunction(Pair.first)) {
2841 SmallVector<Instruction *, 4> ToRemoves;
2842
2843 // Walk the users of the function.
2844 for (auto &U : F->uses()) {
2845 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2846 // The image.
2847 auto Arg0 = CI->getOperand(0);
2848
2849 // The sampler.
2850 auto Arg1 = CI->getOperand(1);
2851
2852 // The coordinate (integer type that we can't handle).
2853 auto Arg2 = CI->getOperand(2);
2854
2855 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
2856
2857 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
2858
2859 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2860
2861 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
2862
2863 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2864
2865 CI->replaceAllUsesWith(NewCI);
2866
2867 // Lastly, remember to remove the user.
2868 ToRemoves.push_back(CI);
2869 }
2870 }
2871
2872 Changed = !ToRemoves.empty();
2873
2874 // And cleanup the calls we don't use anymore.
2875 for (auto V : ToRemoves) {
2876 V->eraseFromParent();
2877 }
2878
2879 // And remove the function we don't need either too.
2880 F->eraseFromParent();
2881 }
2882 }
2883
2884 return Changed;
2885}
2886
2887bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2888 bool Changed = false;
2889
2890 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002891 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002892 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002893 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002894 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002895 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002896 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002897 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002898 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002899 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002900 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002901 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002902 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002903 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002904 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002905 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002906 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002907 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002908 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002909 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002910 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002911 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002912 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2913 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2914 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002915
2916 for (auto Pair : Map) {
2917 // If we find a function with the matching name.
2918 if (auto F = M.getFunction(Pair.first)) {
2919 SmallVector<Instruction *, 4> ToRemoves;
2920
2921 // Walk the users of the function.
2922 for (auto &U : F->uses()) {
2923 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2924 auto FType = F->getFunctionType();
2925 SmallVector<Type *, 5> ParamTypes;
2926
2927 // The pointer type.
2928 ParamTypes.push_back(FType->getParamType(0));
2929
2930 auto IntTy = Type::getInt32Ty(M.getContext());
2931
2932 // The memory scope type.
2933 ParamTypes.push_back(IntTy);
2934
2935 // The memory semantics type.
2936 ParamTypes.push_back(IntTy);
2937
2938 if (2 < CI->getNumArgOperands()) {
2939 // The unequal memory semantics type.
2940 ParamTypes.push_back(IntTy);
2941
2942 // The value type.
2943 ParamTypes.push_back(FType->getParamType(2));
2944
2945 // The comparator type.
2946 ParamTypes.push_back(FType->getParamType(1));
2947 } else if (1 < CI->getNumArgOperands()) {
2948 // The value type.
2949 ParamTypes.push_back(FType->getParamType(1));
2950 }
2951
2952 auto NewFType =
2953 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2954 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2955
2956 // We need to map the OpenCL constants to the SPIR-V equivalents.
2957 const auto ConstantScopeDevice =
2958 ConstantInt::get(IntTy, spv::ScopeDevice);
2959 const auto ConstantMemorySemantics = ConstantInt::get(
2960 IntTy, spv::MemorySemanticsUniformMemoryMask |
2961 spv::MemorySemanticsSequentiallyConsistentMask);
2962
2963 SmallVector<Value *, 5> Params;
2964
2965 // The pointer.
2966 Params.push_back(CI->getArgOperand(0));
2967
2968 // The memory scope.
2969 Params.push_back(ConstantScopeDevice);
2970
2971 // The memory semantics.
2972 Params.push_back(ConstantMemorySemantics);
2973
2974 if (2 < CI->getNumArgOperands()) {
2975 // The unequal memory semantics.
2976 Params.push_back(ConstantMemorySemantics);
2977
2978 // The value.
2979 Params.push_back(CI->getArgOperand(2));
2980
2981 // The comparator.
2982 Params.push_back(CI->getArgOperand(1));
2983 } else if (1 < CI->getNumArgOperands()) {
2984 // The value.
2985 Params.push_back(CI->getArgOperand(1));
2986 }
2987
2988 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2989
2990 CI->replaceAllUsesWith(NewCI);
2991
2992 // Lastly, remember to remove the user.
2993 ToRemoves.push_back(CI);
2994 }
2995 }
2996
2997 Changed = !ToRemoves.empty();
2998
2999 // And cleanup the calls we don't use anymore.
3000 for (auto V : ToRemoves) {
3001 V->eraseFromParent();
3002 }
3003
3004 // And remove the function we don't need either too.
3005 F->eraseFromParent();
3006 }
3007 }
3008
Neil Henning39672102017-09-29 14:33:13 +01003009 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003010 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003011 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003012 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003013 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003014 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003015 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003016 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003017 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003018 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003019 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003020 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003021 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003022 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003023 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003024 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003025 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003026 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003027 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003028 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003029 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003030 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003031 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003032 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003033 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003034 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003035 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003036 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003037 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003038 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003039 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003040 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003041 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01003042 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003043 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003044 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003045 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003046 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003047 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003048 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003049 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003050 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003051 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003052 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003053 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003054 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003055 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01003056 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003057 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01003058 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003059 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01003060 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003061 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01003062 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003063 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003064 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003065 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003066 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003067 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003068 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003069 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003070 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003071 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
3072 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
3073 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01003074
3075 for (auto Pair : Map2) {
3076 // If we find a function with the matching name.
3077 if (auto F = M.getFunction(Pair.first)) {
3078 SmallVector<Instruction *, 4> ToRemoves;
3079
3080 // Walk the users of the function.
3081 for (auto &U : F->uses()) {
3082 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3083 auto AtomicOp = new AtomicRMWInst(
3084 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
3085 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
3086
3087 CI->replaceAllUsesWith(AtomicOp);
3088
3089 // Lastly, remember to remove the user.
3090 ToRemoves.push_back(CI);
3091 }
3092 }
3093
3094 Changed = !ToRemoves.empty();
3095
3096 // And cleanup the calls we don't use anymore.
3097 for (auto V : ToRemoves) {
3098 V->eraseFromParent();
3099 }
3100
3101 // And remove the function we don't need either too.
3102 F->eraseFromParent();
3103 }
3104 }
3105
David Neto22f144c2017-06-12 14:26:21 -04003106 return Changed;
3107}
3108
3109bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
3110 bool Changed = false;
3111
3112 // If we find a function with the matching name.
3113 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
3114 SmallVector<Instruction *, 4> ToRemoves;
3115
3116 auto IntTy = Type::getInt32Ty(M.getContext());
3117 auto FloatTy = Type::getFloatTy(M.getContext());
3118
3119 Constant *DownShuffleMask[3] = {
3120 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3121 ConstantInt::get(IntTy, 2)};
3122
3123 Constant *UpShuffleMask[4] = {
3124 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3125 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3126
3127 Constant *FloatVec[3] = {
3128 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
3129 };
3130
3131 // Walk the users of the function.
3132 for (auto &U : F->uses()) {
3133 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3134 auto Vec4Ty = CI->getArgOperand(0)->getType();
3135 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
3136 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
3137 auto Vec3Ty = Arg0->getType();
3138
3139 auto NewFType =
3140 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
3141
3142 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
3143
3144 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
3145
3146 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
3147
3148 CI->replaceAllUsesWith(Result);
3149
3150 // Lastly, remember to remove the user.
3151 ToRemoves.push_back(CI);
3152 }
3153 }
3154
3155 Changed = !ToRemoves.empty();
3156
3157 // And cleanup the calls we don't use anymore.
3158 for (auto V : ToRemoves) {
3159 V->eraseFromParent();
3160 }
3161
3162 // And remove the function we don't need either too.
3163 F->eraseFromParent();
3164 }
3165
3166 return Changed;
3167}
David Neto62653202017-10-16 19:05:18 -04003168
3169bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3170 bool Changed = false;
3171
3172 // OpenCL's float result = fract(float x, float* ptr)
3173 //
3174 // In the LLVM domain:
3175 //
3176 // %floor_result = call spir_func float @floor(float %x)
3177 // store float %floor_result, float * %ptr
3178 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3179 // %result = call spir_func float
3180 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3181 //
3182 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3183 // and clspv.fract occur in the SPIR-V generator pass:
3184 //
3185 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3186 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3187 // ...
3188 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3189 // OpStore %ptr %floor_result
3190 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3191 // %fract_result = OpExtInst %float
3192 // %glsl_ext Fmin %fract_intermediate %just_under_1
3193
3194
3195 using std::string;
3196
3197 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3198 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
3199 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
3200 auto make_quad = [](const char *a, const char *b, const char *c,
3201 const char *d) {
3202 return std::tuple<const char *, const char *, const char *, const char *>(
3203 a, b, c, d);
3204 };
3205 const std::vector<QuadType> Functions = {
3206 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
3207 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
3208 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
3209 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
3210 };
3211
3212 for (auto& quad : Functions) {
3213 const StringRef fract_name(std::get<0>(quad));
3214
3215 // If we find a function with the matching name.
3216 if (auto F = M.getFunction(fract_name)) {
3217 if (F->use_begin() == F->use_end())
3218 continue;
3219
3220 // We have some uses.
3221 Changed = true;
3222
3223 auto& Context = M.getContext();
3224
3225 const StringRef floor_name(std::get<1>(quad));
3226 const StringRef fmin_name(std::get<2>(quad));
3227 const StringRef clspv_fract_name(std::get<3>(quad));
3228
3229 // This is either float or a float vector. All the float-like
3230 // types are this type.
3231 auto result_ty = F->getReturnType();
3232
3233 Function* fmin_fn = M.getFunction(fmin_name);
3234 if (!fmin_fn) {
3235 // Make the fmin function.
3236 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003237 fmin_fn =
3238 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003239 fmin_fn->addFnAttr(Attribute::ReadNone);
3240 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3241 }
3242
3243 Function* floor_fn = M.getFunction(floor_name);
3244 if (!floor_fn) {
3245 // Make the floor function.
3246 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003247 floor_fn = cast<Function>(
3248 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003249 floor_fn->addFnAttr(Attribute::ReadNone);
3250 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3251 }
3252
3253 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
3254 if (!clspv_fract_fn) {
3255 // Make the clspv_fract function.
3256 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003257 clspv_fract_fn = cast<Function>(
3258 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003259 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3260 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3261 }
3262
3263 // Number of significant significand bits, whether represented or not.
3264 unsigned num_significand_bits;
3265 switch (result_ty->getScalarType()->getTypeID()) {
3266 case Type::HalfTyID:
3267 num_significand_bits = 11;
3268 break;
3269 case Type::FloatTyID:
3270 num_significand_bits = 24;
3271 break;
3272 case Type::DoubleTyID:
3273 num_significand_bits = 53;
3274 break;
3275 default:
3276 assert(false && "Unhandled float type when processing fract builtin");
3277 break;
3278 }
3279 // Beware that the disassembler displays this value as
3280 // OpConstant %float 1
3281 // which is not quite right.
3282 const double kJustUnderOneScalar =
3283 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3284
3285 Constant *just_under_one =
3286 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3287 if (result_ty->isVectorTy()) {
3288 just_under_one = ConstantVector::getSplat(
3289 result_ty->getVectorNumElements(), just_under_one);
3290 }
3291
3292 IRBuilder<> Builder(Context);
3293
3294 SmallVector<Instruction *, 4> ToRemoves;
3295
3296 // Walk the users of the function.
3297 for (auto &U : F->uses()) {
3298 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3299
3300 Builder.SetInsertPoint(CI);
3301 auto arg = CI->getArgOperand(0);
3302 auto ptr = CI->getArgOperand(1);
3303
3304 // Compute floor result and store it.
3305 auto floor = Builder.CreateCall(floor_fn, {arg});
3306 Builder.CreateStore(floor, ptr);
3307
3308 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
3309 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
3310
3311 CI->replaceAllUsesWith(fract_result);
3312
3313 // Lastly, remember to remove the user.
3314 ToRemoves.push_back(CI);
3315 }
3316 }
3317
3318 // And cleanup the calls we don't use anymore.
3319 for (auto V : ToRemoves) {
3320 V->eraseFromParent();
3321 }
3322
3323 // And remove the function we don't need either too.
3324 F->eraseFromParent();
3325 }
3326 }
3327
3328 return Changed;
3329}