blob: f5831af9028e605fa20bd71dfc4bcbd616169ce0 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
David Neto482550a2018-03-24 05:21:07 -070032#include "clspv/Option.h"
33
David Neto22f144c2017-06-12 14:26:21 -040034using namespace llvm;
35
36#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
37
38namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000039
40struct ArgTypeInfo {
41 enum class SignedNess {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000042 None,
Kévin Petit8a560882019-03-21 15:24:34 +000043 Unsigned,
44 Signed
45 };
46 SignedNess signedness;
47};
48
49struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000050 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000051 std::vector<ArgTypeInfo> argTypeInfos;
52};
53
54bool getFunctionInfoFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
55 if (!name.consume_front("_Z")) {
56 return false;
57 }
58 size_t nameLen;
59 if (name.consumeInteger(10, nameLen)) {
60 return false;
61 }
62
Kévin Petit9d1a9d12019-03-25 15:23:46 +000063 finfo->name = name.take_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000064 name = name.drop_front(nameLen);
65
66 ArgTypeInfo prev_ti;
67
68 while (name.size() != 0) {
69
70 ArgTypeInfo ti;
71
72 // Try parsing a vector prefix
73 if (name.consume_front("Dv")) {
74 int numElems;
75 if (name.consumeInteger(10, numElems)) {
76 return false;
77 }
78
79 if (!name.consume_front("_")) {
80 return false;
81 }
82 }
83
84 // Parse the base type
85 char typeCode = name.front();
86 name = name.drop_front(1);
87 switch(typeCode) {
88 case 'c': // char
89 case 'a': // signed char
90 case 's': // short
91 case 'i': // int
92 case 'l': // long
93 ti.signedness = ArgTypeInfo::SignedNess::Signed;
94 break;
95 case 'h': // unsigned char
96 case 't': // unsigned short
97 case 'j': // unsigned int
98 case 'm': // unsigned long
99 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
100 break;
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000101 case 'f':
102 ti.signedness = ArgTypeInfo::SignedNess::None;
103 break;
Kévin Petit8a560882019-03-21 15:24:34 +0000104 case 'S':
105 ti = prev_ti;
106 if (!name.consume_front("_")) {
107 return false;
108 }
109 break;
110 default:
111 return false;
112 }
113
114 finfo->argTypeInfos.push_back(ti);
115
116 prev_ti = ti;
117 }
118
119 return true;
120};
121
122void getFunctionInfoFromMangledName(StringRef name, FunctionInfo *finfo) {
123 if (!getFunctionInfoFromMangledNameCheck(name, finfo)) {
124 llvm_unreachable("Can't parse mangled function name!");
125 }
126}
127
David Neto22f144c2017-06-12 14:26:21 -0400128uint32_t clz(uint32_t v) {
129 uint32_t r;
130 uint32_t shift;
131
132 r = (v > 0xFFFF) << 4;
133 v >>= r;
134 shift = (v > 0xFF) << 3;
135 v >>= shift;
136 r |= shift;
137 shift = (v > 0xF) << 2;
138 v >>= shift;
139 r |= shift;
140 shift = (v > 0x3) << 1;
141 v >>= shift;
142 r |= shift;
143 r |= (v >> 1);
144
145 return r;
146}
147
148Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
149 if (1 == elements) {
150 return Type::getInt1Ty(C);
151 } else {
152 return VectorType::get(Type::getInt1Ty(C), elements);
153 }
154}
155
156struct ReplaceOpenCLBuiltinPass final : public ModulePass {
157 static char ID;
158 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
159
160 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000161 bool replaceAbs(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100162 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400163 bool replaceRecip(Module &M);
164 bool replaceDivide(Module &M);
165 bool replaceExp10(Module &M);
166 bool replaceLog10(Module &M);
167 bool replaceBarrier(Module &M);
168 bool replaceMemFence(Module &M);
169 bool replaceRelational(Module &M);
170 bool replaceIsInfAndIsNan(Module &M);
171 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000172 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000173 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000174 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000175 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000176 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000177 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000178 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400179 bool replaceSignbit(Module &M);
180 bool replaceMadandMad24andMul24(Module &M);
181 bool replaceVloadHalf(Module &M);
182 bool replaceVloadHalf2(Module &M);
183 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700184 bool replaceClspvVloadaHalf2(Module &M);
185 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceVstoreHalf(Module &M);
187 bool replaceVstoreHalf2(Module &M);
188 bool replaceVstoreHalf4(Module &M);
189 bool replaceReadImageF(Module &M);
190 bool replaceAtomics(Module &M);
191 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400192 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700193 bool replaceVload(Module &M);
194 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400195};
196}
197
198char ReplaceOpenCLBuiltinPass::ID = 0;
199static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
200 "Replace OpenCL Builtins Pass");
201
202namespace clspv {
203ModulePass *createReplaceOpenCLBuiltinPass() {
204 return new ReplaceOpenCLBuiltinPass();
205}
206}
207
208bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
209 bool Changed = false;
210
Kévin Petit2444e9b2018-11-09 14:14:37 +0000211 Changed |= replaceAbs(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100212 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400213 Changed |= replaceRecip(M);
214 Changed |= replaceDivide(M);
215 Changed |= replaceExp10(M);
216 Changed |= replaceLog10(M);
217 Changed |= replaceBarrier(M);
218 Changed |= replaceMemFence(M);
219 Changed |= replaceRelational(M);
220 Changed |= replaceIsInfAndIsNan(M);
221 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000222 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000223 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000224 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000225 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000226 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000227 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000228 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400229 Changed |= replaceSignbit(M);
230 Changed |= replaceMadandMad24andMul24(M);
231 Changed |= replaceVloadHalf(M);
232 Changed |= replaceVloadHalf2(M);
233 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700234 Changed |= replaceClspvVloadaHalf2(M);
235 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400236 Changed |= replaceVstoreHalf(M);
237 Changed |= replaceVstoreHalf2(M);
238 Changed |= replaceVstoreHalf4(M);
239 Changed |= replaceReadImageF(M);
240 Changed |= replaceAtomics(M);
241 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400242 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700243 Changed |= replaceVload(M);
244 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400245
246 return Changed;
247}
248
Kévin Petit2444e9b2018-11-09 14:14:37 +0000249bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
250 bool Changed = false;
251
252 const char *Names[] = {
Kévin Petit5ace14c2019-04-01 16:29:53 +0100253 "_Z3absh",
254 "_Z3absDv2_h",
255 "_Z3absDv3_h",
256 "_Z3absDv4_h",
Kévin Petit2444e9b2018-11-09 14:14:37 +0000257 "_Z3abst",
258 "_Z3absDv2_t",
259 "_Z3absDv3_t",
260 "_Z3absDv4_t",
261 "_Z3absj",
262 "_Z3absDv2_j",
263 "_Z3absDv3_j",
264 "_Z3absDv4_j",
265 "_Z3absm",
266 "_Z3absDv2_m",
267 "_Z3absDv3_m",
268 "_Z3absDv4_m",
269 };
270
271 for (auto Name : Names) {
272 // If we find a function with the matching name.
273 if (auto F = M.getFunction(Name)) {
274 SmallVector<Instruction *, 4> ToRemoves;
275
276 // Walk the users of the function.
277 for (auto &U : F->uses()) {
278 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
279 // Abs has one arg.
280 auto Arg = CI->getOperand(0);
281
282 // Use the argument unchanged, we know it's unsigned
283 CI->replaceAllUsesWith(Arg);
284
285 // Lastly, remember to remove the user.
286 ToRemoves.push_back(CI);
287 }
288 }
289
290 Changed = !ToRemoves.empty();
291
292 // And cleanup the calls we don't use anymore.
293 for (auto V : ToRemoves) {
294 V->eraseFromParent();
295 }
296
297 // And remove the function we don't need either too.
298 F->eraseFromParent();
299 }
300 }
301
302 return Changed;
303}
304
Kévin Petit8c1be282019-04-02 19:34:25 +0100305bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
306 bool Changed = false;
307
308 const char *Names[] = {
309 "_Z8copysignff",
310 "_Z8copysignDv2_fS_",
311 "_Z8copysignDv3_fS_",
312 "_Z8copysignDv4_fS_",
313 };
314
315 for (auto Name : Names) {
316 // If we find a function with the matching name.
317 if (auto F = M.getFunction(Name)) {
318 SmallVector<Instruction *, 4> ToRemoves;
319
320 // Walk the users of the function.
321 for (auto &U : F->uses()) {
322 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
323
324 auto XValue = CI->getOperand(0);
325 auto YValue = CI->getOperand(1);
326
327 auto Ty = XValue->getType();
328
329 Type* IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
330 if (Ty->isVectorTy()) {
331 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
332 }
333
334 // Return X with the sign of Y
335
336 // Sign bit masks
337 auto SignBit = IntTy->getScalarSizeInBits() - 1;
338 auto SignBitMask = 1 << SignBit;
339 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
340 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
341
342 IRBuilder<> Builder(CI);
343
344 // Extract sign of Y
345 auto YInt = Builder.CreateBitCast(YValue, IntTy);
346 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
347
348 // Clear sign bit in X
349 auto XInt = Builder.CreateBitCast(XValue, IntTy);
350 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
351
352 // Insert sign bit of Y into X
353 auto NewXInt = Builder.CreateOr(XInt, YSign);
354
355 // And cast back to floating-point
356 auto NewX = Builder.CreateBitCast(NewXInt, Ty);
357
358 CI->replaceAllUsesWith(NewX);
359
360 // Lastly, remember to remove the user.
361 ToRemoves.push_back(CI);
362 }
363 }
364
365 Changed = !ToRemoves.empty();
366
367 // And cleanup the calls we don't use anymore.
368 for (auto V : ToRemoves) {
369 V->eraseFromParent();
370 }
371
372 // And remove the function we don't need either too.
373 F->eraseFromParent();
374 }
375 }
376
377 return Changed;
378}
379
David Neto22f144c2017-06-12 14:26:21 -0400380bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
381 bool Changed = false;
382
383 const char *Names[] = {
384 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
385 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
386 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
387 };
388
389 for (auto Name : Names) {
390 // If we find a function with the matching name.
391 if (auto F = M.getFunction(Name)) {
392 SmallVector<Instruction *, 4> ToRemoves;
393
394 // Walk the users of the function.
395 for (auto &U : F->uses()) {
396 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
397 // Recip has one arg.
398 auto Arg = CI->getOperand(0);
399
400 auto Div = BinaryOperator::Create(
401 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
402 CI);
403
404 CI->replaceAllUsesWith(Div);
405
406 // Lastly, remember to remove the user.
407 ToRemoves.push_back(CI);
408 }
409 }
410
411 Changed = !ToRemoves.empty();
412
413 // And cleanup the calls we don't use anymore.
414 for (auto V : ToRemoves) {
415 V->eraseFromParent();
416 }
417
418 // And remove the function we don't need either too.
419 F->eraseFromParent();
420 }
421 }
422
423 return Changed;
424}
425
426bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
427 bool Changed = false;
428
429 const char *Names[] = {
430 "_Z11half_divideff", "_Z13native_divideff",
431 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
432 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
433 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
434 };
435
436 for (auto Name : Names) {
437 // If we find a function with the matching name.
438 if (auto F = M.getFunction(Name)) {
439 SmallVector<Instruction *, 4> ToRemoves;
440
441 // Walk the users of the function.
442 for (auto &U : F->uses()) {
443 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
444 auto Div = BinaryOperator::Create(
445 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
446
447 CI->replaceAllUsesWith(Div);
448
449 // Lastly, remember to remove the user.
450 ToRemoves.push_back(CI);
451 }
452 }
453
454 Changed = !ToRemoves.empty();
455
456 // And cleanup the calls we don't use anymore.
457 for (auto V : ToRemoves) {
458 V->eraseFromParent();
459 }
460
461 // And remove the function we don't need either too.
462 F->eraseFromParent();
463 }
464 }
465
466 return Changed;
467}
468
469bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
470 bool Changed = false;
471
472 const std::map<const char *, const char *> Map = {
473 {"_Z5exp10f", "_Z3expf"},
474 {"_Z10half_exp10f", "_Z8half_expf"},
475 {"_Z12native_exp10f", "_Z10native_expf"},
476 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
477 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
478 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
479 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
480 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
481 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
482 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
483 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
484 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
485
486 for (auto Pair : Map) {
487 // If we find a function with the matching name.
488 if (auto F = M.getFunction(Pair.first)) {
489 SmallVector<Instruction *, 4> ToRemoves;
490
491 // Walk the users of the function.
492 for (auto &U : F->uses()) {
493 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
494 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
495
496 auto Arg = CI->getOperand(0);
497
498 // Constant of the natural log of 10 (ln(10)).
499 const double Ln10 =
500 2.302585092994045684017991454684364207601101488628772976033;
501
502 auto Mul = BinaryOperator::Create(
503 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
504 CI);
505
506 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
507
508 CI->replaceAllUsesWith(NewCI);
509
510 // Lastly, remember to remove the user.
511 ToRemoves.push_back(CI);
512 }
513 }
514
515 Changed = !ToRemoves.empty();
516
517 // And cleanup the calls we don't use anymore.
518 for (auto V : ToRemoves) {
519 V->eraseFromParent();
520 }
521
522 // And remove the function we don't need either too.
523 F->eraseFromParent();
524 }
525 }
526
527 return Changed;
528}
529
530bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
531 bool Changed = false;
532
533 const std::map<const char *, const char *> Map = {
534 {"_Z5log10f", "_Z3logf"},
535 {"_Z10half_log10f", "_Z8half_logf"},
536 {"_Z12native_log10f", "_Z10native_logf"},
537 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
538 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
539 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
540 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
541 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
542 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
543 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
544 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
545 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
546
547 for (auto Pair : Map) {
548 // If we find a function with the matching name.
549 if (auto F = M.getFunction(Pair.first)) {
550 SmallVector<Instruction *, 4> ToRemoves;
551
552 // Walk the users of the function.
553 for (auto &U : F->uses()) {
554 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
555 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
556
557 auto Arg = CI->getOperand(0);
558
559 // Constant of the reciprocal of the natural log of 10 (ln(10)).
560 const double Ln10 =
561 0.434294481903251827651128918916605082294397005803666566114;
562
563 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
564
565 auto Mul = BinaryOperator::Create(
566 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
567 "", CI);
568
569 CI->replaceAllUsesWith(Mul);
570
571 // Lastly, remember to remove the user.
572 ToRemoves.push_back(CI);
573 }
574 }
575
576 Changed = !ToRemoves.empty();
577
578 // And cleanup the calls we don't use anymore.
579 for (auto V : ToRemoves) {
580 V->eraseFromParent();
581 }
582
583 // And remove the function we don't need either too.
584 F->eraseFromParent();
585 }
586 }
587
588 return Changed;
589}
590
591bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
592 bool Changed = false;
593
594 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
595
596 const std::map<const char *, const char *> Map = {
597 {"_Z7barrierj", "__spirv_control_barrier"}};
598
599 for (auto Pair : Map) {
600 // If we find a function with the matching name.
601 if (auto F = M.getFunction(Pair.first)) {
602 SmallVector<Instruction *, 4> ToRemoves;
603
604 // Walk the users of the function.
605 for (auto &U : F->uses()) {
606 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
607 auto FType = F->getFunctionType();
608 SmallVector<Type *, 3> Params;
609 for (unsigned i = 0; i < 3; i++) {
610 Params.push_back(FType->getParamType(0));
611 }
612 auto NewFType =
613 FunctionType::get(FType->getReturnType(), Params, false);
614 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
615
616 auto Arg = CI->getOperand(0);
617
618 // We need to map the OpenCL constants to the SPIR-V equivalents.
619 const auto LocalMemFence =
620 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
621 const auto GlobalMemFence =
622 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
623 const auto ConstantSequentiallyConsistent = ConstantInt::get(
624 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
625 const auto ConstantScopeDevice =
626 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
627 const auto ConstantScopeWorkgroup =
628 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
629
630 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
631 const auto LocalMemFenceMask = BinaryOperator::Create(
632 Instruction::And, LocalMemFence, Arg, "", CI);
633 const auto WorkgroupShiftAmount =
634 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
635 clz(CLK_LOCAL_MEM_FENCE);
636 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
637 Instruction::Shl, LocalMemFenceMask,
638 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
639
640 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
641 const auto GlobalMemFenceMask = BinaryOperator::Create(
642 Instruction::And, GlobalMemFence, Arg, "", CI);
643 const auto UniformShiftAmount =
644 clz(spv::MemorySemanticsUniformMemoryMask) -
645 clz(CLK_GLOBAL_MEM_FENCE);
646 const auto MemorySemanticsUniform = BinaryOperator::Create(
647 Instruction::Shl, GlobalMemFenceMask,
648 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
649
650 // And combine the above together, also adding in
651 // MemorySemanticsSequentiallyConsistentMask.
652 auto MemorySemantics =
653 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
654 ConstantSequentiallyConsistent, "", CI);
655 MemorySemantics = BinaryOperator::Create(
656 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
657
658 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
659 // Device Scope, otherwise Workgroup Scope.
660 const auto Cmp =
661 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
662 GlobalMemFenceMask, GlobalMemFence, "", CI);
663 const auto MemoryScope = SelectInst::Create(
664 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
665
666 // Lastly, the Execution Scope is always Workgroup Scope.
667 const auto ExecutionScope = ConstantScopeWorkgroup;
668
669 auto NewCI = CallInst::Create(
670 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
671
672 CI->replaceAllUsesWith(NewCI);
673
674 // Lastly, remember to remove the user.
675 ToRemoves.push_back(CI);
676 }
677 }
678
679 Changed = !ToRemoves.empty();
680
681 // And cleanup the calls we don't use anymore.
682 for (auto V : ToRemoves) {
683 V->eraseFromParent();
684 }
685
686 // And remove the function we don't need either too.
687 F->eraseFromParent();
688 }
689 }
690
691 return Changed;
692}
693
694bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
695 bool Changed = false;
696
697 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
698
Neil Henning39672102017-09-29 14:33:13 +0100699 using Tuple = std::tuple<const char *, unsigned>;
700 const std::map<const char *, Tuple> Map = {
701 {"_Z9mem_fencej",
702 Tuple("__spirv_memory_barrier",
703 spv::MemorySemanticsSequentiallyConsistentMask)},
704 {"_Z14read_mem_fencej",
705 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
706 {"_Z15write_mem_fencej",
707 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400708
709 for (auto Pair : Map) {
710 // If we find a function with the matching name.
711 if (auto F = M.getFunction(Pair.first)) {
712 SmallVector<Instruction *, 4> ToRemoves;
713
714 // Walk the users of the function.
715 for (auto &U : F->uses()) {
716 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
717 auto FType = F->getFunctionType();
718 SmallVector<Type *, 2> Params;
719 for (unsigned i = 0; i < 2; i++) {
720 Params.push_back(FType->getParamType(0));
721 }
722 auto NewFType =
723 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100724 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400725
726 auto Arg = CI->getOperand(0);
727
728 // We need to map the OpenCL constants to the SPIR-V equivalents.
729 const auto LocalMemFence =
730 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
731 const auto GlobalMemFence =
732 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
733 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100734 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400735 const auto ConstantScopeDevice =
736 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
737
738 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
739 const auto LocalMemFenceMask = BinaryOperator::Create(
740 Instruction::And, LocalMemFence, Arg, "", CI);
741 const auto WorkgroupShiftAmount =
742 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
743 clz(CLK_LOCAL_MEM_FENCE);
744 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
745 Instruction::Shl, LocalMemFenceMask,
746 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
747
748 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
749 const auto GlobalMemFenceMask = BinaryOperator::Create(
750 Instruction::And, GlobalMemFence, Arg, "", CI);
751 const auto UniformShiftAmount =
752 clz(spv::MemorySemanticsUniformMemoryMask) -
753 clz(CLK_GLOBAL_MEM_FENCE);
754 const auto MemorySemanticsUniform = BinaryOperator::Create(
755 Instruction::Shl, GlobalMemFenceMask,
756 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
757
758 // And combine the above together, also adding in
759 // MemorySemanticsSequentiallyConsistentMask.
760 auto MemorySemantics =
761 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
762 ConstantMemorySemantics, "", CI);
763 MemorySemantics = BinaryOperator::Create(
764 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
765
766 // Memory Scope is always device.
767 const auto MemoryScope = ConstantScopeDevice;
768
769 auto NewCI =
770 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
771
772 CI->replaceAllUsesWith(NewCI);
773
774 // Lastly, remember to remove the user.
775 ToRemoves.push_back(CI);
776 }
777 }
778
779 Changed = !ToRemoves.empty();
780
781 // And cleanup the calls we don't use anymore.
782 for (auto V : ToRemoves) {
783 V->eraseFromParent();
784 }
785
786 // And remove the function we don't need either too.
787 F->eraseFromParent();
788 }
789 }
790
791 return Changed;
792}
793
794bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
795 bool Changed = false;
796
797 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
798 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
799 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
800 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
801 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
802 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
803 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
804 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
805 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
806 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
807 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
808 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
809 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
810 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
811 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
812 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
813 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
814 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
815 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
816 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
817 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
818 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
819 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
820 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
821 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
822 };
823
824 for (auto Pair : Map) {
825 // If we find a function with the matching name.
826 if (auto F = M.getFunction(Pair.first)) {
827 SmallVector<Instruction *, 4> ToRemoves;
828
829 // Walk the users of the function.
830 for (auto &U : F->uses()) {
831 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
832 // The predicate to use in the CmpInst.
833 auto Predicate = Pair.second.first;
834
835 // The value to return for true.
836 auto TrueValue =
837 ConstantInt::getSigned(CI->getType(), Pair.second.second);
838
839 // The value to return for false.
840 auto FalseValue = Constant::getNullValue(CI->getType());
841
842 auto Arg1 = CI->getOperand(0);
843 auto Arg2 = CI->getOperand(1);
844
845 const auto Cmp =
846 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
847
848 const auto Select =
849 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
850
851 CI->replaceAllUsesWith(Select);
852
853 // Lastly, remember to remove the user.
854 ToRemoves.push_back(CI);
855 }
856 }
857
858 Changed = !ToRemoves.empty();
859
860 // And cleanup the calls we don't use anymore.
861 for (auto V : ToRemoves) {
862 V->eraseFromParent();
863 }
864
865 // And remove the function we don't need either too.
866 F->eraseFromParent();
867 }
868 }
869
870 return Changed;
871}
872
873bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
874 bool Changed = false;
875
876 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
877 {"_Z5isinff", {"__spirv_isinff", 1}},
878 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
879 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
880 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
881 {"_Z5isnanf", {"__spirv_isnanf", 1}},
882 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
883 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
884 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
885 };
886
887 for (auto Pair : Map) {
888 // If we find a function with the matching name.
889 if (auto F = M.getFunction(Pair.first)) {
890 SmallVector<Instruction *, 4> ToRemoves;
891
892 // Walk the users of the function.
893 for (auto &U : F->uses()) {
894 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
895 const auto CITy = CI->getType();
896
897 // The fake SPIR-V intrinsic to generate.
898 auto SPIRVIntrinsic = Pair.second.first;
899
900 // The value to return for true.
901 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
902
903 // The value to return for false.
904 auto FalseValue = Constant::getNullValue(CITy);
905
906 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
907 M.getContext(),
908 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
909
910 auto NewFType =
911 FunctionType::get(CorrespondingBoolTy,
912 F->getFunctionType()->getParamType(0), false);
913
914 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
915
916 auto Arg = CI->getOperand(0);
917
918 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
919
920 const auto Select =
921 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
922
923 CI->replaceAllUsesWith(Select);
924
925 // Lastly, remember to remove the user.
926 ToRemoves.push_back(CI);
927 }
928 }
929
930 Changed = !ToRemoves.empty();
931
932 // And cleanup the calls we don't use anymore.
933 for (auto V : ToRemoves) {
934 V->eraseFromParent();
935 }
936
937 // And remove the function we don't need either too.
938 F->eraseFromParent();
939 }
940 }
941
942 return Changed;
943}
944
945bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
946 bool Changed = false;
947
948 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000949 // all
alan-bakerb39c8262019-03-08 14:03:37 -0500950 {"_Z3allc", ""},
951 {"_Z3allDv2_c", "__spirv_allDv2_c"},
952 {"_Z3allDv3_c", "__spirv_allDv3_c"},
953 {"_Z3allDv4_c", "__spirv_allDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000954 {"_Z3alls", ""},
955 {"_Z3allDv2_s", "__spirv_allDv2_s"},
956 {"_Z3allDv3_s", "__spirv_allDv3_s"},
957 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400958 {"_Z3alli", ""},
959 {"_Z3allDv2_i", "__spirv_allDv2_i"},
960 {"_Z3allDv3_i", "__spirv_allDv3_i"},
961 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000962 {"_Z3alll", ""},
963 {"_Z3allDv2_l", "__spirv_allDv2_l"},
964 {"_Z3allDv3_l", "__spirv_allDv3_l"},
965 {"_Z3allDv4_l", "__spirv_allDv4_l"},
966
967 // any
alan-bakerb39c8262019-03-08 14:03:37 -0500968 {"_Z3anyc", ""},
969 {"_Z3anyDv2_c", "__spirv_anyDv2_c"},
970 {"_Z3anyDv3_c", "__spirv_anyDv3_c"},
971 {"_Z3anyDv4_c", "__spirv_anyDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000972 {"_Z3anys", ""},
973 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
974 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
975 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400976 {"_Z3anyi", ""},
977 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
978 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
979 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000980 {"_Z3anyl", ""},
981 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
982 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
983 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400984 };
985
986 for (auto Pair : Map) {
987 // If we find a function with the matching name.
988 if (auto F = M.getFunction(Pair.first)) {
989 SmallVector<Instruction *, 4> ToRemoves;
990
991 // Walk the users of the function.
992 for (auto &U : F->uses()) {
993 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
994 // The fake SPIR-V intrinsic to generate.
995 auto SPIRVIntrinsic = Pair.second;
996
997 auto Arg = CI->getOperand(0);
998
999 Value *V;
1000
Kévin Petitfd27cca2018-10-31 13:00:17 +00001001 // If the argument is a 32-bit int, just use a shift
1002 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1003 V = BinaryOperator::Create(Instruction::LShr, Arg,
1004 ConstantInt::get(Arg->getType(), 31), "",
1005 CI);
1006 } else {
David Neto22f144c2017-06-12 14:26:21 -04001007 // The value for zero to compare against.
1008 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1009
David Neto22f144c2017-06-12 14:26:21 -04001010 // The value to return for true.
1011 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1012
1013 // The value to return for false.
1014 const auto FalseValue = Constant::getNullValue(CI->getType());
1015
Kévin Petitfd27cca2018-10-31 13:00:17 +00001016 const auto Cmp = CmpInst::Create(
1017 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1018
1019 Value* SelectSource;
1020
1021 // If we have a function to call, call it!
1022 if (0 < strlen(SPIRVIntrinsic)) {
1023
1024 const auto NewFType = FunctionType::get(
1025 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
1026
1027 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1028
1029 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
1030
1031 SelectSource = NewCI;
1032
1033 } else {
1034 SelectSource = Cmp;
1035 }
1036
1037 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001038 }
1039
1040 CI->replaceAllUsesWith(V);
1041
1042 // Lastly, remember to remove the user.
1043 ToRemoves.push_back(CI);
1044 }
1045 }
1046
1047 Changed = !ToRemoves.empty();
1048
1049 // And cleanup the calls we don't use anymore.
1050 for (auto V : ToRemoves) {
1051 V->eraseFromParent();
1052 }
1053
1054 // And remove the function we don't need either too.
1055 F->eraseFromParent();
1056 }
1057 }
1058
1059 return Changed;
1060}
1061
Kévin Petitbf0036c2019-03-06 13:57:10 +00001062bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1063 bool Changed = false;
1064
1065 for (auto const &SymVal : M.getValueSymbolTable()) {
1066 // Skip symbols whose name doesn't match
1067 if (!SymVal.getKey().startswith("_Z8upsample")) {
1068 continue;
1069 }
1070 // Is there a function going by that name?
1071 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1072
1073 SmallVector<Instruction *, 4> ToRemoves;
1074
1075 // Walk the users of the function.
1076 for (auto &U : F->uses()) {
1077 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1078
1079 // Get arguments
1080 auto HiValue = CI->getOperand(0);
1081 auto LoValue = CI->getOperand(1);
1082
1083 // Don't touch overloads that aren't in OpenCL C
1084 auto HiType = HiValue->getType();
1085 auto LoType = LoValue->getType();
1086
1087 if (HiType != LoType) {
1088 continue;
1089 }
1090
1091 if (!HiType->isIntOrIntVectorTy()) {
1092 continue;
1093 }
1094
1095 if (HiType->getScalarSizeInBits() * 2 !=
1096 CI->getType()->getScalarSizeInBits()) {
1097 continue;
1098 }
1099
1100 if ((HiType->getScalarSizeInBits() != 8) &&
1101 (HiType->getScalarSizeInBits() != 16) &&
1102 (HiType->getScalarSizeInBits() != 32)) {
1103 continue;
1104 }
1105
1106 if (HiType->isVectorTy()) {
1107 if ((HiType->getVectorNumElements() != 2) &&
1108 (HiType->getVectorNumElements() != 3) &&
1109 (HiType->getVectorNumElements() != 4) &&
1110 (HiType->getVectorNumElements() != 8) &&
1111 (HiType->getVectorNumElements() != 16)) {
1112 continue;
1113 }
1114 }
1115
1116 // Convert both operands to the result type
1117 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(),
1118 "", CI);
1119 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(),
1120 "", CI);
1121
1122 // Shift high operand
1123 auto ShiftAmount = ConstantInt::get(CI->getType(),
1124 HiType->getScalarSizeInBits());
1125 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1126 ShiftAmount, "", CI);
1127
1128 // OR both results
1129 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1130 "", CI);
1131
1132 // Replace call with the expression
1133 CI->replaceAllUsesWith(V);
1134
1135 // Lastly, remember to remove the user.
1136 ToRemoves.push_back(CI);
1137 }
1138 }
1139
1140 Changed = !ToRemoves.empty();
1141
1142 // And cleanup the calls we don't use anymore.
1143 for (auto V : ToRemoves) {
1144 V->eraseFromParent();
1145 }
1146
1147 // And remove the function we don't need either too.
1148 F->eraseFromParent();
1149 }
1150 }
1151
1152 return Changed;
1153}
1154
Kévin Petitd44eef52019-03-08 13:22:14 +00001155bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1156 bool Changed = false;
1157
1158 for (auto const &SymVal : M.getValueSymbolTable()) {
1159 // Skip symbols whose name doesn't match
1160 if (!SymVal.getKey().startswith("_Z6rotate")) {
1161 continue;
1162 }
1163 // Is there a function going by that name?
1164 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1165
1166 SmallVector<Instruction *, 4> ToRemoves;
1167
1168 // Walk the users of the function.
1169 for (auto &U : F->uses()) {
1170 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1171
1172 // Get arguments
1173 auto SrcValue = CI->getOperand(0);
1174 auto RotAmount = CI->getOperand(1);
1175
1176 // Don't touch overloads that aren't in OpenCL C
1177 auto SrcType = SrcValue->getType();
1178 auto RotType = RotAmount->getType();
1179
1180 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1181 continue;
1182 }
1183
1184 if (!SrcType->isIntOrIntVectorTy()) {
1185 continue;
1186 }
1187
1188 if ((SrcType->getScalarSizeInBits() != 8) &&
1189 (SrcType->getScalarSizeInBits() != 16) &&
1190 (SrcType->getScalarSizeInBits() != 32) &&
1191 (SrcType->getScalarSizeInBits() != 64)) {
1192 continue;
1193 }
1194
1195 if (SrcType->isVectorTy()) {
1196 if ((SrcType->getVectorNumElements() != 2) &&
1197 (SrcType->getVectorNumElements() != 3) &&
1198 (SrcType->getVectorNumElements() != 4) &&
1199 (SrcType->getVectorNumElements() != 8) &&
1200 (SrcType->getVectorNumElements() != 16)) {
1201 continue;
1202 }
1203 }
1204
1205 // The approach used is to shift the top bits down, the bottom bits up
1206 // and OR the two shifted values.
1207
1208 // The rotation amount is to be treated modulo the element size.
1209 // Since SPIR-V shift ops don't support this, let's apply the
1210 // modulo ahead of shifting. The element size is always a power of
1211 // two so we can just AND with a mask.
1212 auto ModMask = ConstantInt::get(SrcType,
1213 SrcType->getScalarSizeInBits() - 1);
1214 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1215 ModMask, "", CI);
1216
1217 // Let's calc the amount by which to shift top bits down
1218 auto ScalarSize = ConstantInt::get(SrcType,
1219 SrcType->getScalarSizeInBits());
1220 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1221 RotAmount, "", CI);
1222
1223 // Now shift the bottom bits up and the top bits down
1224 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1225 RotAmount, "", CI);
1226 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1227 DownAmount, "", CI);
1228
1229 // Finally OR the two shifted values
1230 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1231 HiRotated, "", CI);
1232
1233 // Replace call with the expression
1234 CI->replaceAllUsesWith(V);
1235
1236 // Lastly, remember to remove the user.
1237 ToRemoves.push_back(CI);
1238 }
1239 }
1240
1241 Changed = !ToRemoves.empty();
1242
1243 // And cleanup the calls we don't use anymore.
1244 for (auto V : ToRemoves) {
1245 V->eraseFromParent();
1246 }
1247
1248 // And remove the function we don't need either too.
1249 F->eraseFromParent();
1250 }
1251 }
1252
1253 return Changed;
1254}
1255
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001256bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1257 bool Changed = false;
1258
1259 for (auto const &SymVal : M.getValueSymbolTable()) {
1260
1261 // Skip symbols whose name obviously doesn't match
1262 if (!SymVal.getKey().contains("convert_")) {
1263 continue;
1264 }
1265
1266 // Is there a function going by that name?
1267 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1268
1269 // Get info from the mangled name
1270 FunctionInfo finfo;
1271 bool parsed = getFunctionInfoFromMangledNameCheck(F->getName(), &finfo);
1272
1273 // All functions of interest are handled by our mangled name parser
1274 if (!parsed) {
1275 continue;
1276 }
1277
1278 // Move on if this isn't a call to convert_
1279 if (!finfo.name.startswith("convert_")) {
1280 continue;
1281 }
1282
1283 // Extract the destination type from the function name
1284 StringRef DstTypeName = finfo.name;
1285 DstTypeName.consume_front("convert_");
1286
1287 auto DstSignedNess = StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1288 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1289 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1290 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1291 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1292 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1293 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1294 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1295 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1296 .Default(ArgTypeInfo::SignedNess::None);
1297
1298 auto SrcSignedNess = finfo.argTypeInfos[0].signedness;
1299
1300 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
1301 bool SrcIsSigned = SrcSignedNess == ArgTypeInfo::SignedNess::Signed;
1302
1303 SmallVector<Instruction *, 4> ToRemoves;
1304
1305 // Walk the users of the function.
1306 for (auto &U : F->uses()) {
1307 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1308
1309 // Get arguments
1310 auto SrcValue = CI->getOperand(0);
1311
1312 // Don't touch overloads that aren't in OpenCL C
1313 auto SrcType = SrcValue->getType();
1314 auto DstType = CI->getType();
1315
1316 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1317 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1318 continue;
1319 }
1320
1321 if (SrcType->isVectorTy()) {
1322
1323 if (SrcType->getVectorNumElements() !=
1324 DstType->getVectorNumElements()) {
1325 continue;
1326 }
1327
1328 if ((SrcType->getVectorNumElements() != 2) &&
1329 (SrcType->getVectorNumElements() != 3) &&
1330 (SrcType->getVectorNumElements() != 4) &&
1331 (SrcType->getVectorNumElements() != 8) &&
1332 (SrcType->getVectorNumElements() != 16)) {
1333 continue;
1334 }
1335 }
1336
1337 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1338 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1339
1340 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1341 bool DstIsInt = DstType->isIntOrIntVectorTy();
1342
1343 Value *V;
1344 if (SrcIsFloat && DstIsFloat) {
1345 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1346 } else if (SrcIsFloat && DstIsInt) {
1347 if (DstIsSigned) {
1348 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1349 } else {
1350 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1351 }
1352 } else if (SrcIsInt && DstIsFloat) {
1353 if (SrcIsSigned) {
1354 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1355 } else {
1356 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1357 }
1358 } else if (SrcIsInt && DstIsInt) {
1359 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1360 } else {
1361 // Not something we're supposed to handle, just move on
1362 continue;
1363 }
1364
1365 // Replace call with the expression
1366 CI->replaceAllUsesWith(V);
1367
1368 // Lastly, remember to remove the user.
1369 ToRemoves.push_back(CI);
1370 }
1371 }
1372
1373 Changed = !ToRemoves.empty();
1374
1375 // And cleanup the calls we don't use anymore.
1376 for (auto V : ToRemoves) {
1377 V->eraseFromParent();
1378 }
1379
1380 // And remove the function we don't need either too.
1381 F->eraseFromParent();
1382 }
1383 }
1384
1385 return Changed;
1386}
1387
Kévin Petit8a560882019-03-21 15:24:34 +00001388bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1389 bool Changed = false;
1390
1391 for (auto const &SymVal : M.getValueSymbolTable()) {
1392
1393 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1394 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1395
1396 // Skip symbols whose name doesn't match
1397 if (!isMad && !isMul) {
1398 continue;
1399 }
1400
1401 // Is there a function going by that name?
1402 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1403
1404 SmallVector<Instruction *, 4> ToRemoves;
1405
1406 // Walk the users of the function.
1407 for (auto &U : F->uses()) {
1408 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1409
1410 // Get arguments
1411 auto AValue = CI->getOperand(0);
1412 auto BValue = CI->getOperand(1);
1413 auto CValue = CI->getOperand(2);
1414
1415 // Don't touch overloads that aren't in OpenCL C
1416 auto AType = AValue->getType();
1417 auto BType = BValue->getType();
1418 auto CType = CValue->getType();
1419
1420 if ((AType != BType) || (CI->getType() != AType) ||
1421 (isMad && (AType != CType))) {
1422 continue;
1423 }
1424
1425 if (!AType->isIntOrIntVectorTy()) {
1426 continue;
1427 }
1428
1429 if ((AType->getScalarSizeInBits() != 8) &&
1430 (AType->getScalarSizeInBits() != 16) &&
1431 (AType->getScalarSizeInBits() != 32) &&
1432 (AType->getScalarSizeInBits() != 64)) {
1433 continue;
1434 }
1435
1436 if (AType->isVectorTy()) {
1437 if ((AType->getVectorNumElements() != 2) &&
1438 (AType->getVectorNumElements() != 3) &&
1439 (AType->getVectorNumElements() != 4) &&
1440 (AType->getVectorNumElements() != 8) &&
1441 (AType->getVectorNumElements() != 16)) {
1442 continue;
1443 }
1444 }
1445
1446 // Create struct type for the return type of our SPIR-V intrinsic
1447 SmallVector<Type*, 2> TwoValueType = {
1448 AType,
1449 AType
1450 };
1451
1452 auto ExMulRetType = StructType::create(TwoValueType);
1453
1454 // And a function type
1455 auto NewFType = FunctionType::get(ExMulRetType, TwoValueType, false);
1456
1457 // Get infos from the mangled OpenCL built-in function name
1458 FunctionInfo finfo;
1459 getFunctionInfoFromMangledName(F->getName(), &finfo);
1460
1461 // Use it to select the appropriate signed/unsigned SPIR-V intrinsic
1462 StringRef intrinsic;
1463 if (finfo.argTypeInfos[0].signedness == ArgTypeInfo::SignedNess::Signed) {
1464 intrinsic = "spirv.smul_extended";
1465 } else {
1466 intrinsic = "spirv.umul_extended";
1467 }
1468
1469 // Add the intrinsic function to the module
1470 auto NewF = M.getOrInsertFunction(intrinsic, NewFType);
1471
1472 // Call it
1473 SmallVector<Value*, 4> NewFArgs = {
1474 AValue,
1475 BValue,
1476 };
1477
1478 auto Call = CallInst::Create(NewF, NewFArgs, "", CI);
1479
1480 // Get the high part of the result
1481 unsigned Idxs[] = {1};
1482 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1483
1484 // If we're handling a mad_hi, add the third argument to the result
1485 if (isMad) {
1486 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1487 }
1488
1489 // Replace call with the expression
1490 CI->replaceAllUsesWith(V);
1491
1492 // Lastly, remember to remove the user.
1493 ToRemoves.push_back(CI);
1494 }
1495 }
1496
1497 Changed = !ToRemoves.empty();
1498
1499 // And cleanup the calls we don't use anymore.
1500 for (auto V : ToRemoves) {
1501 V->eraseFromParent();
1502 }
1503
1504 // And remove the function we don't need either too.
1505 F->eraseFromParent();
1506 }
1507 }
1508
1509 return Changed;
1510}
1511
Kévin Petitf5b78a22018-10-25 14:32:17 +00001512bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1513 bool Changed = false;
1514
1515 for (auto const &SymVal : M.getValueSymbolTable()) {
1516 // Skip symbols whose name doesn't match
1517 if (!SymVal.getKey().startswith("_Z6select")) {
1518 continue;
1519 }
1520 // Is there a function going by that name?
1521 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1522
1523 SmallVector<Instruction *, 4> ToRemoves;
1524
1525 // Walk the users of the function.
1526 for (auto &U : F->uses()) {
1527 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1528
1529 // Get arguments
1530 auto FalseValue = CI->getOperand(0);
1531 auto TrueValue = CI->getOperand(1);
1532 auto PredicateValue = CI->getOperand(2);
1533
1534 // Don't touch overloads that aren't in OpenCL C
1535 auto FalseType = FalseValue->getType();
1536 auto TrueType = TrueValue->getType();
1537 auto PredicateType = PredicateValue->getType();
1538
1539 if (FalseType != TrueType) {
1540 continue;
1541 }
1542
1543 if (!PredicateType->isIntOrIntVectorTy()) {
1544 continue;
1545 }
1546
1547 if (!FalseType->isIntOrIntVectorTy() &&
1548 !FalseType->getScalarType()->isFloatingPointTy()) {
1549 continue;
1550 }
1551
1552 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1553 continue;
1554 }
1555
1556 if (FalseType->getScalarSizeInBits() !=
1557 PredicateType->getScalarSizeInBits()) {
1558 continue;
1559 }
1560
1561 if (FalseType->isVectorTy()) {
1562 if (FalseType->getVectorNumElements() !=
1563 PredicateType->getVectorNumElements()) {
1564 continue;
1565 }
1566
1567 if ((FalseType->getVectorNumElements() != 2) &&
1568 (FalseType->getVectorNumElements() != 3) &&
1569 (FalseType->getVectorNumElements() != 4) &&
1570 (FalseType->getVectorNumElements() != 8) &&
1571 (FalseType->getVectorNumElements() != 16)) {
1572 continue;
1573 }
1574 }
1575
1576 // Create constant
1577 const auto ZeroValue = Constant::getNullValue(PredicateType);
1578
1579 // Scalar and vector are to be treated differently
1580 CmpInst::Predicate Pred;
1581 if (PredicateType->isVectorTy()) {
1582 Pred = CmpInst::ICMP_SLT;
1583 } else {
1584 Pred = CmpInst::ICMP_NE;
1585 }
1586
1587 // Create comparison instruction
1588 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1589 ZeroValue, "", CI);
1590
1591 // Create select
1592 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1593
1594 // Replace call with the selection
1595 CI->replaceAllUsesWith(V);
1596
1597 // Lastly, remember to remove the user.
1598 ToRemoves.push_back(CI);
1599 }
1600 }
1601
1602 Changed = !ToRemoves.empty();
1603
1604 // And cleanup the calls we don't use anymore.
1605 for (auto V : ToRemoves) {
1606 V->eraseFromParent();
1607 }
1608
1609 // And remove the function we don't need either too.
1610 F->eraseFromParent();
1611 }
1612 }
1613
1614 return Changed;
1615}
1616
Kévin Petite7d0cce2018-10-31 12:38:56 +00001617bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1618 bool Changed = false;
1619
1620 for (auto const &SymVal : M.getValueSymbolTable()) {
1621 // Skip symbols whose name doesn't match
1622 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1623 continue;
1624 }
1625 // Is there a function going by that name?
1626 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1627
1628 SmallVector<Instruction *, 4> ToRemoves;
1629
1630 // Walk the users of the function.
1631 for (auto &U : F->uses()) {
1632 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1633
1634 if (CI->getNumOperands() != 4) {
1635 continue;
1636 }
1637
1638 // Get arguments
1639 auto FalseValue = CI->getOperand(0);
1640 auto TrueValue = CI->getOperand(1);
1641 auto PredicateValue = CI->getOperand(2);
1642
1643 // Don't touch overloads that aren't in OpenCL C
1644 auto FalseType = FalseValue->getType();
1645 auto TrueType = TrueValue->getType();
1646 auto PredicateType = PredicateValue->getType();
1647
1648 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1649 continue;
1650 }
1651
1652 if (TrueType->isVectorTy()) {
1653 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1654 !TrueType->getScalarType()->isIntegerTy()) {
1655 continue;
1656 }
1657 if ((TrueType->getVectorNumElements() != 2) &&
1658 (TrueType->getVectorNumElements() != 3) &&
1659 (TrueType->getVectorNumElements() != 4) &&
1660 (TrueType->getVectorNumElements() != 8) &&
1661 (TrueType->getVectorNumElements() != 16)) {
1662 continue;
1663 }
1664 }
1665
1666 // Remember the type of the operands
1667 auto OpType = TrueType;
1668
1669 // The actual bit selection will always be done on an integer type,
1670 // declare it here
1671 Type *BitType;
1672
1673 // If the operands are float, then bitcast them to int
1674 if (OpType->getScalarType()->isFloatingPointTy()) {
1675
1676 // First create the new type
1677 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1678 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1679 if (OpType->isVectorTy()) {
1680 BitType = VectorType::get(BitType, OpType->getVectorNumElements());
1681 }
1682
1683 // Then bitcast all operands
1684 PredicateValue = CastInst::CreateZExtOrBitCast(PredicateValue,
1685 BitType, "", CI);
1686 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue,
1687 BitType, "", CI);
1688 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1689
1690 } else {
1691 // The operands have an integer type, use it directly
1692 BitType = OpType;
1693 }
1694
1695 // All the operands are now always integers
1696 // implement as (c & b) | (~c & a)
1697
1698 // Create our negated predicate value
1699 auto AllOnes = Constant::getAllOnesValue(BitType);
1700 auto NotPredicateValue = BinaryOperator::Create(Instruction::Xor,
1701 PredicateValue,
1702 AllOnes, "", CI);
1703
1704 // Then put everything together
1705 auto BitsFalse = BinaryOperator::Create(Instruction::And,
1706 NotPredicateValue,
1707 FalseValue, "", CI);
1708 auto BitsTrue = BinaryOperator::Create(Instruction::And,
1709 PredicateValue,
1710 TrueValue, "", CI);
1711
1712 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1713 BitsTrue, "", CI);
1714
1715 // If we were dealing with a floating point type, we must bitcast
1716 // the result back to that
1717 if (OpType->getScalarType()->isFloatingPointTy()) {
1718 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1719 }
1720
1721 // Replace call with our new code
1722 CI->replaceAllUsesWith(V);
1723
1724 // Lastly, remember to remove the user.
1725 ToRemoves.push_back(CI);
1726 }
1727 }
1728
1729 Changed = !ToRemoves.empty();
1730
1731 // And cleanup the calls we don't use anymore.
1732 for (auto V : ToRemoves) {
1733 V->eraseFromParent();
1734 }
1735
1736 // And remove the function we don't need either too.
1737 F->eraseFromParent();
1738 }
1739 }
1740
1741 return Changed;
1742}
1743
Kévin Petit6b0a9532018-10-30 20:00:39 +00001744bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1745 bool Changed = false;
1746
1747 const std::map<const char *, const char *> Map = {
1748 { "_Z4stepfDv2_f", "_Z4stepDv2_fS_" },
1749 { "_Z4stepfDv3_f", "_Z4stepDv3_fS_" },
1750 { "_Z4stepfDv4_f", "_Z4stepDv4_fS_" },
1751 { "_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_" },
1752 { "_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_" },
1753 { "_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_" },
1754 };
1755
1756 for (auto Pair : Map) {
1757 // If we find a function with the matching name.
1758 if (auto F = M.getFunction(Pair.first)) {
1759 SmallVector<Instruction *, 4> ToRemoves;
1760
1761 // Walk the users of the function.
1762 for (auto &U : F->uses()) {
1763 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1764
1765 auto ReplacementFn = Pair.second;
1766
1767 SmallVector<Value*, 2> ArgsToSplat = {CI->getOperand(0)};
1768 Value *VectorArg;
1769
1770 // First figure out which function we're dealing with
1771 if (F->getName().startswith("_Z10smoothstep")) {
1772 ArgsToSplat.push_back(CI->getOperand(1));
1773 VectorArg = CI->getOperand(2);
1774 } else {
1775 VectorArg = CI->getOperand(1);
1776 }
1777
1778 // Splat arguments that need to be
1779 SmallVector<Value*, 2> SplatArgs;
1780 auto VecType = VectorArg->getType();
1781
1782 for (auto arg : ArgsToSplat) {
1783 Value* NewVectorArg = UndefValue::get(VecType);
1784 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
1785 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1786 NewVectorArg = InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1787 }
1788 SplatArgs.push_back(NewVectorArg);
1789 }
1790
1791 // Replace the call with the vector/vector flavour
1792 SmallVector<Type*, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1793 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1794
1795 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1796
1797 SmallVector<Value*, 3> NewArgs;
1798 for (auto arg : SplatArgs) {
1799 NewArgs.push_back(arg);
1800 }
1801 NewArgs.push_back(VectorArg);
1802
1803 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1804
1805 CI->replaceAllUsesWith(NewCI);
1806
1807 // Lastly, remember to remove the user.
1808 ToRemoves.push_back(CI);
1809 }
1810 }
1811
1812 Changed = !ToRemoves.empty();
1813
1814 // And cleanup the calls we don't use anymore.
1815 for (auto V : ToRemoves) {
1816 V->eraseFromParent();
1817 }
1818
1819 // And remove the function we don't need either too.
1820 F->eraseFromParent();
1821 }
1822 }
1823
1824 return Changed;
1825}
1826
David Neto22f144c2017-06-12 14:26:21 -04001827bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1828 bool Changed = false;
1829
1830 const std::map<const char *, Instruction::BinaryOps> Map = {
1831 {"_Z7signbitf", Instruction::LShr},
1832 {"_Z7signbitDv2_f", Instruction::AShr},
1833 {"_Z7signbitDv3_f", Instruction::AShr},
1834 {"_Z7signbitDv4_f", Instruction::AShr},
1835 };
1836
1837 for (auto Pair : Map) {
1838 // If we find a function with the matching name.
1839 if (auto F = M.getFunction(Pair.first)) {
1840 SmallVector<Instruction *, 4> ToRemoves;
1841
1842 // Walk the users of the function.
1843 for (auto &U : F->uses()) {
1844 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1845 auto Arg = CI->getOperand(0);
1846
1847 auto Bitcast =
1848 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1849
1850 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1851 ConstantInt::get(CI->getType(), 31),
1852 "", CI);
1853
1854 CI->replaceAllUsesWith(Shr);
1855
1856 // Lastly, remember to remove the user.
1857 ToRemoves.push_back(CI);
1858 }
1859 }
1860
1861 Changed = !ToRemoves.empty();
1862
1863 // And cleanup the calls we don't use anymore.
1864 for (auto V : ToRemoves) {
1865 V->eraseFromParent();
1866 }
1867
1868 // And remove the function we don't need either too.
1869 F->eraseFromParent();
1870 }
1871 }
1872
1873 return Changed;
1874}
1875
1876bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1877 bool Changed = false;
1878
1879 const std::map<const char *,
1880 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1881 Map = {
1882 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1883 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1884 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1885 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1886 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1887 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1888 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1889 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1890 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1891 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1892 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1893 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1894 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1895 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1896 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1897 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1898 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1899 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1900 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1901 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1902 };
1903
1904 for (auto Pair : Map) {
1905 // If we find a function with the matching name.
1906 if (auto F = M.getFunction(Pair.first)) {
1907 SmallVector<Instruction *, 4> ToRemoves;
1908
1909 // Walk the users of the function.
1910 for (auto &U : F->uses()) {
1911 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1912 // The multiply instruction to use.
1913 auto MulInst = Pair.second.first;
1914
1915 // The add instruction to use.
1916 auto AddInst = Pair.second.second;
1917
1918 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1919
1920 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1921 CI->getArgOperand(1), "", CI);
1922
1923 if (Instruction::BinaryOpsEnd != AddInst) {
1924 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1925 CI);
1926 }
1927
1928 CI->replaceAllUsesWith(I);
1929
1930 // Lastly, remember to remove the user.
1931 ToRemoves.push_back(CI);
1932 }
1933 }
1934
1935 Changed = !ToRemoves.empty();
1936
1937 // And cleanup the calls we don't use anymore.
1938 for (auto V : ToRemoves) {
1939 V->eraseFromParent();
1940 }
1941
1942 // And remove the function we don't need either too.
1943 F->eraseFromParent();
1944 }
1945 }
1946
1947 return Changed;
1948}
1949
Derek Chowcfd368b2017-10-19 20:58:45 -07001950bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1951 bool Changed = false;
1952
1953 struct VectorStoreOps {
1954 const char* name;
1955 int n;
1956 Type* (*get_scalar_type_function)(LLVMContext&);
1957 } vector_store_ops[] = {
1958 // TODO(derekjchow): Expand this list.
1959 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
1960 };
1961
David Neto544fffc2017-11-16 18:35:14 -05001962 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001963 auto Name = Op.name;
1964 auto N = Op.n;
1965 auto TypeFn = Op.get_scalar_type_function;
1966 if (auto F = M.getFunction(Name)) {
1967 SmallVector<Instruction *, 4> ToRemoves;
1968
1969 // Walk the users of the function.
1970 for (auto &U : F->uses()) {
1971 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1972 // The value argument from vstoren.
1973 auto Arg0 = CI->getOperand(0);
1974
1975 // The index argument from vstoren.
1976 auto Arg1 = CI->getOperand(1);
1977
1978 // The pointer argument from vstoren.
1979 auto Arg2 = CI->getOperand(2);
1980
1981 // Get types.
1982 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1983 auto ScalarNPointerTy = PointerType::get(
1984 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1985
1986 // Cast to scalarn
1987 auto Cast = CastInst::CreatePointerCast(
1988 Arg2, ScalarNPointerTy, "", CI);
1989 // Index to correct address
1990 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1991 // Store
1992 auto Store = new StoreInst(Arg0, Index, CI);
1993
1994 CI->replaceAllUsesWith(Store);
1995 ToRemoves.push_back(CI);
1996 }
1997 }
1998
1999 Changed = !ToRemoves.empty();
2000
2001 // And cleanup the calls we don't use anymore.
2002 for (auto V : ToRemoves) {
2003 V->eraseFromParent();
2004 }
2005
2006 // And remove the function we don't need either too.
2007 F->eraseFromParent();
2008 }
2009 }
2010
2011 return Changed;
2012}
2013
2014bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2015 bool Changed = false;
2016
2017 struct VectorLoadOps {
2018 const char* name;
2019 int n;
2020 Type* (*get_scalar_type_function)(LLVMContext&);
2021 } vector_load_ops[] = {
2022 // TODO(derekjchow): Expand this list.
2023 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
2024 };
2025
David Neto544fffc2017-11-16 18:35:14 -05002026 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002027 auto Name = Op.name;
2028 auto N = Op.n;
2029 auto TypeFn = Op.get_scalar_type_function;
2030 // If we find a function with the matching name.
2031 if (auto F = M.getFunction(Name)) {
2032 SmallVector<Instruction *, 4> ToRemoves;
2033
2034 // Walk the users of the function.
2035 for (auto &U : F->uses()) {
2036 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2037 // The index argument from vloadn.
2038 auto Arg0 = CI->getOperand(0);
2039
2040 // The pointer argument from vloadn.
2041 auto Arg1 = CI->getOperand(1);
2042
2043 // Get types.
2044 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
2045 auto ScalarNPointerTy = PointerType::get(
2046 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
2047
2048 // Cast to scalarn
2049 auto Cast = CastInst::CreatePointerCast(
2050 Arg1, ScalarNPointerTy, "", CI);
2051 // Index to correct address
2052 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
2053 // Load
2054 auto Load = new LoadInst(Index, "", CI);
2055
2056 CI->replaceAllUsesWith(Load);
2057 ToRemoves.push_back(CI);
2058 }
2059 }
2060
2061 Changed = !ToRemoves.empty();
2062
2063 // And cleanup the calls we don't use anymore.
2064 for (auto V : ToRemoves) {
2065 V->eraseFromParent();
2066 }
2067
2068 // And remove the function we don't need either too.
2069 F->eraseFromParent();
2070
2071 }
2072 }
2073
2074 return Changed;
2075}
2076
David Neto22f144c2017-06-12 14:26:21 -04002077bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2078 bool Changed = false;
2079
2080 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2081 "_Z10vload_halfjPU3AS2KDh"};
2082
2083 for (auto Name : Map) {
2084 // If we find a function with the matching name.
2085 if (auto F = M.getFunction(Name)) {
2086 SmallVector<Instruction *, 4> ToRemoves;
2087
2088 // Walk the users of the function.
2089 for (auto &U : F->uses()) {
2090 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2091 // The index argument from vload_half.
2092 auto Arg0 = CI->getOperand(0);
2093
2094 // The pointer argument from vload_half.
2095 auto Arg1 = CI->getOperand(1);
2096
David Neto22f144c2017-06-12 14:26:21 -04002097 auto IntTy = Type::getInt32Ty(M.getContext());
2098 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002099 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2100
David Neto22f144c2017-06-12 14:26:21 -04002101 // Our intrinsic to unpack a float2 from an int.
2102 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2103
2104 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2105
David Neto482550a2018-03-24 05:21:07 -07002106 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002107 auto ShortTy = Type::getInt16Ty(M.getContext());
2108 auto ShortPointerTy = PointerType::get(
2109 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002110
David Netoac825b82017-05-30 12:49:01 -04002111 // Cast the half* pointer to short*.
2112 auto Cast =
2113 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002114
David Netoac825b82017-05-30 12:49:01 -04002115 // Index into the correct address of the casted pointer.
2116 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2117
2118 // Load from the short* we casted to.
2119 auto Load = new LoadInst(Index, "", CI);
2120
2121 // ZExt the short -> int.
2122 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2123
2124 // Get our float2.
2125 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2126
2127 // Extract out the bottom element which is our float result.
2128 auto Extract = ExtractElementInst::Create(
2129 Call, ConstantInt::get(IntTy, 0), "", CI);
2130
2131 CI->replaceAllUsesWith(Extract);
2132 } else {
2133 // Assume the pointer argument points to storage aligned to 32bits
2134 // or more.
2135 // TODO(dneto): Do more analysis to make sure this is true?
2136 //
2137 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2138 // with:
2139 //
2140 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2141 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2142 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2143 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2144 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2145 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2146 // x float> %converted, %index_is_odd32
2147
2148 auto IntPointerTy = PointerType::get(
2149 IntTy, Arg1->getType()->getPointerAddressSpace());
2150
David Neto973e6a82017-05-30 13:48:18 -04002151 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002152 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002153 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002154 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2155
2156 auto One = ConstantInt::get(IntTy, 1);
2157 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2158 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2159
2160 // Index into the correct address of the casted pointer.
2161 auto Ptr =
2162 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2163
2164 // Load from the int* we casted to.
2165 auto Load = new LoadInst(Ptr, "", CI);
2166
2167 // Get our float2.
2168 auto Call = CallInst::Create(NewF, Load, "", CI);
2169
2170 // Extract out the float result, where the element number is
2171 // determined by whether the original index was even or odd.
2172 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2173
2174 CI->replaceAllUsesWith(Extract);
2175 }
David Neto22f144c2017-06-12 14:26:21 -04002176
2177 // Lastly, remember to remove the user.
2178 ToRemoves.push_back(CI);
2179 }
2180 }
2181
2182 Changed = !ToRemoves.empty();
2183
2184 // And cleanup the calls we don't use anymore.
2185 for (auto V : ToRemoves) {
2186 V->eraseFromParent();
2187 }
2188
2189 // And remove the function we don't need either too.
2190 F->eraseFromParent();
2191 }
2192 }
2193
2194 return Changed;
2195}
2196
2197bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
2198 bool Changed = false;
2199
David Neto556c7e62018-06-08 13:45:55 -07002200 const std::vector<const char *> Map = {
2201 "_Z11vload_half2jPU3AS1KDh",
2202 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2203 "_Z11vload_half2jPU3AS2KDh",
2204 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2205 };
David Neto22f144c2017-06-12 14:26:21 -04002206
2207 for (auto Name : Map) {
2208 // If we find a function with the matching name.
2209 if (auto F = M.getFunction(Name)) {
2210 SmallVector<Instruction *, 4> ToRemoves;
2211
2212 // Walk the users of the function.
2213 for (auto &U : F->uses()) {
2214 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2215 // The index argument from vload_half.
2216 auto Arg0 = CI->getOperand(0);
2217
2218 // The pointer argument from vload_half.
2219 auto Arg1 = CI->getOperand(1);
2220
2221 auto IntTy = Type::getInt32Ty(M.getContext());
2222 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2223 auto NewPointerTy = PointerType::get(
2224 IntTy, Arg1->getType()->getPointerAddressSpace());
2225 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2226
2227 // Cast the half* pointer to int*.
2228 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2229
2230 // Index into the correct address of the casted pointer.
2231 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
2232
2233 // Load from the int* we casted to.
2234 auto Load = new LoadInst(Index, "", CI);
2235
2236 // Our intrinsic to unpack a float2 from an int.
2237 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2238
2239 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2240
2241 // Get our float2.
2242 auto Call = CallInst::Create(NewF, Load, "", CI);
2243
2244 CI->replaceAllUsesWith(Call);
2245
2246 // Lastly, remember to remove the user.
2247 ToRemoves.push_back(CI);
2248 }
2249 }
2250
2251 Changed = !ToRemoves.empty();
2252
2253 // And cleanup the calls we don't use anymore.
2254 for (auto V : ToRemoves) {
2255 V->eraseFromParent();
2256 }
2257
2258 // And remove the function we don't need either too.
2259 F->eraseFromParent();
2260 }
2261 }
2262
2263 return Changed;
2264}
2265
2266bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
2267 bool Changed = false;
2268
David Neto556c7e62018-06-08 13:45:55 -07002269 const std::vector<const char *> Map = {
2270 "_Z11vload_half4jPU3AS1KDh",
2271 "_Z12vloada_half4jPU3AS1KDh",
2272 "_Z11vload_half4jPU3AS2KDh",
2273 "_Z12vloada_half4jPU3AS2KDh",
2274 };
David Neto22f144c2017-06-12 14:26:21 -04002275
2276 for (auto Name : Map) {
2277 // If we find a function with the matching name.
2278 if (auto F = M.getFunction(Name)) {
2279 SmallVector<Instruction *, 4> ToRemoves;
2280
2281 // Walk the users of the function.
2282 for (auto &U : F->uses()) {
2283 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2284 // The index argument from vload_half.
2285 auto Arg0 = CI->getOperand(0);
2286
2287 // The pointer argument from vload_half.
2288 auto Arg1 = CI->getOperand(1);
2289
2290 auto IntTy = Type::getInt32Ty(M.getContext());
2291 auto Int2Ty = VectorType::get(IntTy, 2);
2292 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2293 auto NewPointerTy = PointerType::get(
2294 Int2Ty, Arg1->getType()->getPointerAddressSpace());
2295 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2296
2297 // Cast the half* pointer to int2*.
2298 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
2299
2300 // Index into the correct address of the casted pointer.
2301 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
2302
2303 // Load from the int2* we casted to.
2304 auto Load = new LoadInst(Index, "", CI);
2305
2306 // Extract each element from the loaded int2.
2307 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2308 "", CI);
2309 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2310 "", CI);
2311
2312 // Our intrinsic to unpack a float2 from an int.
2313 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2314
2315 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2316
2317 // Get the lower (x & y) components of our final float4.
2318 auto Lo = CallInst::Create(NewF, X, "", CI);
2319
2320 // Get the higher (z & w) components of our final float4.
2321 auto Hi = CallInst::Create(NewF, Y, "", CI);
2322
2323 Constant *ShuffleMask[4] = {
2324 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2325 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2326
2327 // Combine our two float2's into one float4.
2328 auto Combine = new ShuffleVectorInst(
2329 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
2330
2331 CI->replaceAllUsesWith(Combine);
2332
2333 // Lastly, remember to remove the user.
2334 ToRemoves.push_back(CI);
2335 }
2336 }
2337
2338 Changed = !ToRemoves.empty();
2339
2340 // And cleanup the calls we don't use anymore.
2341 for (auto V : ToRemoves) {
2342 V->eraseFromParent();
2343 }
2344
2345 // And remove the function we don't need either too.
2346 F->eraseFromParent();
2347 }
2348 }
2349
2350 return Changed;
2351}
2352
David Neto6ad93232018-06-07 15:42:58 -07002353bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
2354 bool Changed = false;
2355
2356 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2357 //
2358 // %u = load i32 %ptr
2359 // %fxy = call <2 x float> Unpack2xHalf(u)
2360 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
2361 const std::vector<const char *> Map = {
2362 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2363 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2364 "_Z20__clspv_vloada_half2jPKj", // private
2365 };
2366
2367 for (auto Name : Map) {
2368 // If we find a function with the matching name.
2369 if (auto F = M.getFunction(Name)) {
2370 SmallVector<Instruction *, 4> ToRemoves;
2371
2372 // Walk the users of the function.
2373 for (auto &U : F->uses()) {
2374 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
2375 auto Index = CI->getOperand(0);
2376 auto Ptr = CI->getOperand(1);
2377
2378 auto IntTy = Type::getInt32Ty(M.getContext());
2379 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2380 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2381
2382 auto IndexedPtr =
2383 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
2384 auto Load = new LoadInst(IndexedPtr, "", CI);
2385
2386 // Our intrinsic to unpack a float2 from an int.
2387 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2388
2389 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2390
2391 // Get our final float2.
2392 auto Result = CallInst::Create(NewF, Load, "", CI);
2393
2394 CI->replaceAllUsesWith(Result);
2395
2396 // Lastly, remember to remove the user.
2397 ToRemoves.push_back(CI);
2398 }
2399 }
2400
2401 Changed = true;
2402
2403 // And cleanup the calls we don't use anymore.
2404 for (auto V : ToRemoves) {
2405 V->eraseFromParent();
2406 }
2407
2408 // And remove the function we don't need either too.
2409 F->eraseFromParent();
2410 }
2411 }
2412
2413 return Changed;
2414}
2415
2416bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
2417 bool Changed = false;
2418
2419 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2420 //
2421 // %u2 = load <2 x i32> %ptr
2422 // %u2xy = extractelement %u2, 0
2423 // %u2zw = extractelement %u2, 1
2424 // %fxy = call <2 x float> Unpack2xHalf(uint)
2425 // %fzw = call <2 x float> Unpack2xHalf(uint)
2426 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
2427 const std::vector<const char *> Map = {
2428 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2429 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2430 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2431 };
2432
2433 for (auto Name : Map) {
2434 // If we find a function with the matching name.
2435 if (auto F = M.getFunction(Name)) {
2436 SmallVector<Instruction *, 4> ToRemoves;
2437
2438 // Walk the users of the function.
2439 for (auto &U : F->uses()) {
2440 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2441 auto Index = CI->getOperand(0);
2442 auto Ptr = CI->getOperand(1);
2443
2444 auto IntTy = Type::getInt32Ty(M.getContext());
2445 auto Int2Ty = VectorType::get(IntTy, 2);
2446 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2447 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2448
2449 auto IndexedPtr =
2450 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
2451 auto Load = new LoadInst(IndexedPtr, "", CI);
2452
2453 // Extract each element from the loaded int2.
2454 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2455 "", CI);
2456 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2457 "", CI);
2458
2459 // Our intrinsic to unpack a float2 from an int.
2460 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2461
2462 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2463
2464 // Get the lower (x & y) components of our final float4.
2465 auto Lo = CallInst::Create(NewF, X, "", CI);
2466
2467 // Get the higher (z & w) components of our final float4.
2468 auto Hi = CallInst::Create(NewF, Y, "", CI);
2469
2470 Constant *ShuffleMask[4] = {
2471 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2472 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2473
2474 // Combine our two float2's into one float4.
2475 auto Combine = new ShuffleVectorInst(
2476 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
2477
2478 CI->replaceAllUsesWith(Combine);
2479
2480 // Lastly, remember to remove the user.
2481 ToRemoves.push_back(CI);
2482 }
2483 }
2484
2485 Changed = true;
2486
2487 // And cleanup the calls we don't use anymore.
2488 for (auto V : ToRemoves) {
2489 V->eraseFromParent();
2490 }
2491
2492 // And remove the function we don't need either too.
2493 F->eraseFromParent();
2494 }
2495 }
2496
2497 return Changed;
2498}
2499
David Neto22f144c2017-06-12 14:26:21 -04002500bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
2501 bool Changed = false;
2502
2503 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
2504 "_Z15vstore_half_rtefjPU3AS1Dh",
2505 "_Z15vstore_half_rtzfjPU3AS1Dh"};
2506
2507 for (auto Name : Map) {
2508 // If we find a function with the matching name.
2509 if (auto F = M.getFunction(Name)) {
2510 SmallVector<Instruction *, 4> ToRemoves;
2511
2512 // Walk the users of the function.
2513 for (auto &U : F->uses()) {
2514 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2515 // The value to store.
2516 auto Arg0 = CI->getOperand(0);
2517
2518 // The index argument from vstore_half.
2519 auto Arg1 = CI->getOperand(1);
2520
2521 // The pointer argument from vstore_half.
2522 auto Arg2 = CI->getOperand(2);
2523
David Neto22f144c2017-06-12 14:26:21 -04002524 auto IntTy = Type::getInt32Ty(M.getContext());
2525 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002526 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04002527 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002528
2529 // Our intrinsic to pack a float2 to an int.
2530 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2531
2532 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2533
2534 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04002535 auto TempVec =
2536 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
2537 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002538
2539 // Pack the float2 -> half2 (in an int).
2540 auto X = CallInst::Create(NewF, TempVec, "", CI);
2541
David Neto482550a2018-03-24 05:21:07 -07002542 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04002543 auto ShortTy = Type::getInt16Ty(M.getContext());
2544 auto ShortPointerTy = PointerType::get(
2545 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002546
David Neto17852de2017-05-29 17:29:31 -04002547 // Truncate our i32 to an i16.
2548 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002549
David Neto17852de2017-05-29 17:29:31 -04002550 // Cast the half* pointer to short*.
2551 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002552
David Neto17852de2017-05-29 17:29:31 -04002553 // Index into the correct address of the casted pointer.
2554 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002555
David Neto17852de2017-05-29 17:29:31 -04002556 // Store to the int* we casted to.
2557 auto Store = new StoreInst(Trunc, Index, CI);
2558
2559 CI->replaceAllUsesWith(Store);
2560 } else {
2561 // We can only write to 32-bit aligned words.
2562 //
2563 // Assuming base is aligned to 32-bits, replace the equivalent of
2564 // vstore_half(value, index, base)
2565 // with:
2566 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2567 // uint32_t write_to_upper_half = index & 1u;
2568 // uint32_t shift = write_to_upper_half << 4;
2569 //
2570 // // Pack the float value as a half number in bottom 16 bits
2571 // // of an i32.
2572 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2573 //
2574 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2575 // ^ ((packed & 0xffff) << shift)
2576 // // We only need relaxed consistency, but OpenCL 1.2 only has
2577 // // sequentially consistent atomics.
2578 // // TODO(dneto): Use relaxed consistency.
2579 // atomic_xor(target_ptr, xor_value)
2580 auto IntPointerTy = PointerType::get(
2581 IntTy, Arg2->getType()->getPointerAddressSpace());
2582
2583 auto Four = ConstantInt::get(IntTy, 4);
2584 auto FFFF = ConstantInt::get(IntTy, 0xffff);
2585
2586 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
2587 // Compute index / 2
2588 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2589 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2590 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
2591 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2592 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
2593 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2594 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
2595
2596 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2597 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2598 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
2599
2600 // Generate the call to atomi_xor.
2601 SmallVector<Type *, 5> ParamTypes;
2602 // The pointer type.
2603 ParamTypes.push_back(IntPointerTy);
2604 // The Types for memory scope, semantics, and value.
2605 ParamTypes.push_back(IntTy);
2606 ParamTypes.push_back(IntTy);
2607 ParamTypes.push_back(IntTy);
2608 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2609 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
2610
2611 const auto ConstantScopeDevice =
2612 ConstantInt::get(IntTy, spv::ScopeDevice);
2613 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2614 // (SPIR-V Workgroup).
2615 const auto AddrSpaceSemanticsBits =
2616 IntPointerTy->getPointerAddressSpace() == 1
2617 ? spv::MemorySemanticsUniformMemoryMask
2618 : spv::MemorySemanticsWorkgroupMemoryMask;
2619
2620 // We're using relaxed consistency here.
2621 const auto ConstantMemorySemantics =
2622 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2623 AddrSpaceSemanticsBits);
2624
2625 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2626 ConstantMemorySemantics, ValueToXor};
2627 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2628 }
David Neto22f144c2017-06-12 14:26:21 -04002629
2630 // Lastly, remember to remove the user.
2631 ToRemoves.push_back(CI);
2632 }
2633 }
2634
2635 Changed = !ToRemoves.empty();
2636
2637 // And cleanup the calls we don't use anymore.
2638 for (auto V : ToRemoves) {
2639 V->eraseFromParent();
2640 }
2641
2642 // And remove the function we don't need either too.
2643 F->eraseFromParent();
2644 }
2645 }
2646
2647 return Changed;
2648}
2649
2650bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
2651 bool Changed = false;
2652
David Netoe2871522018-06-08 11:09:54 -07002653 const std::vector<const char *> Map = {
2654 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2655 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2656 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2657 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2658 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2659 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2660 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2661 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2662 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2663 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2664 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2665 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2666 };
David Neto22f144c2017-06-12 14:26:21 -04002667
2668 for (auto Name : Map) {
2669 // If we find a function with the matching name.
2670 if (auto F = M.getFunction(Name)) {
2671 SmallVector<Instruction *, 4> ToRemoves;
2672
2673 // Walk the users of the function.
2674 for (auto &U : F->uses()) {
2675 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2676 // The value to store.
2677 auto Arg0 = CI->getOperand(0);
2678
2679 // The index argument from vstore_half.
2680 auto Arg1 = CI->getOperand(1);
2681
2682 // The pointer argument from vstore_half.
2683 auto Arg2 = CI->getOperand(2);
2684
2685 auto IntTy = Type::getInt32Ty(M.getContext());
2686 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2687 auto NewPointerTy = PointerType::get(
2688 IntTy, Arg2->getType()->getPointerAddressSpace());
2689 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2690
2691 // Our intrinsic to pack a float2 to an int.
2692 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2693
2694 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2695
2696 // Turn the packed x & y into the final packing.
2697 auto X = CallInst::Create(NewF, Arg0, "", CI);
2698
2699 // Cast the half* pointer to int*.
2700 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2701
2702 // Index into the correct address of the casted pointer.
2703 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
2704
2705 // Store to the int* we casted to.
2706 auto Store = new StoreInst(X, Index, CI);
2707
2708 CI->replaceAllUsesWith(Store);
2709
2710 // Lastly, remember to remove the user.
2711 ToRemoves.push_back(CI);
2712 }
2713 }
2714
2715 Changed = !ToRemoves.empty();
2716
2717 // And cleanup the calls we don't use anymore.
2718 for (auto V : ToRemoves) {
2719 V->eraseFromParent();
2720 }
2721
2722 // And remove the function we don't need either too.
2723 F->eraseFromParent();
2724 }
2725 }
2726
2727 return Changed;
2728}
2729
2730bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
2731 bool Changed = false;
2732
David Netoe2871522018-06-08 11:09:54 -07002733 const std::vector<const char *> Map = {
2734 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2735 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2736 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2737 "_Z13vstorea_half4Dv4_fjPDh", // private
2738 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2739 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2740 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2741 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2742 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2743 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2744 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2745 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2746 };
David Neto22f144c2017-06-12 14:26:21 -04002747
2748 for (auto Name : Map) {
2749 // If we find a function with the matching name.
2750 if (auto F = M.getFunction(Name)) {
2751 SmallVector<Instruction *, 4> ToRemoves;
2752
2753 // Walk the users of the function.
2754 for (auto &U : F->uses()) {
2755 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2756 // The value to store.
2757 auto Arg0 = CI->getOperand(0);
2758
2759 // The index argument from vstore_half.
2760 auto Arg1 = CI->getOperand(1);
2761
2762 // The pointer argument from vstore_half.
2763 auto Arg2 = CI->getOperand(2);
2764
2765 auto IntTy = Type::getInt32Ty(M.getContext());
2766 auto Int2Ty = VectorType::get(IntTy, 2);
2767 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2768 auto NewPointerTy = PointerType::get(
2769 Int2Ty, Arg2->getType()->getPointerAddressSpace());
2770 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2771
2772 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2773 ConstantInt::get(IntTy, 1)};
2774
2775 // Extract out the x & y components of our to store value.
2776 auto Lo =
2777 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2778 ConstantVector::get(LoShuffleMask), "", CI);
2779
2780 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2781 ConstantInt::get(IntTy, 3)};
2782
2783 // Extract out the z & w components of our to store value.
2784 auto Hi =
2785 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2786 ConstantVector::get(HiShuffleMask), "", CI);
2787
2788 // Our intrinsic to pack a float2 to an int.
2789 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2790
2791 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2792
2793 // Turn the packed x & y into the final component of our int2.
2794 auto X = CallInst::Create(NewF, Lo, "", CI);
2795
2796 // Turn the packed z & w into the final component of our int2.
2797 auto Y = CallInst::Create(NewF, Hi, "", CI);
2798
2799 auto Combine = InsertElementInst::Create(
2800 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
2801 Combine = InsertElementInst::Create(
2802 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
2803
2804 // Cast the half* pointer to int2*.
2805 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2806
2807 // Index into the correct address of the casted pointer.
2808 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
2809
2810 // Store to the int2* we casted to.
2811 auto Store = new StoreInst(Combine, Index, CI);
2812
2813 CI->replaceAllUsesWith(Store);
2814
2815 // Lastly, remember to remove the user.
2816 ToRemoves.push_back(CI);
2817 }
2818 }
2819
2820 Changed = !ToRemoves.empty();
2821
2822 // And cleanup the calls we don't use anymore.
2823 for (auto V : ToRemoves) {
2824 V->eraseFromParent();
2825 }
2826
2827 // And remove the function we don't need either too.
2828 F->eraseFromParent();
2829 }
2830 }
2831
2832 return Changed;
2833}
2834
2835bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2836 bool Changed = false;
2837
2838 const std::map<const char *, const char*> Map = {
2839 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
2840 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
2841 };
2842
2843 for (auto Pair : Map) {
2844 // If we find a function with the matching name.
2845 if (auto F = M.getFunction(Pair.first)) {
2846 SmallVector<Instruction *, 4> ToRemoves;
2847
2848 // Walk the users of the function.
2849 for (auto &U : F->uses()) {
2850 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2851 // The image.
2852 auto Arg0 = CI->getOperand(0);
2853
2854 // The sampler.
2855 auto Arg1 = CI->getOperand(1);
2856
2857 // The coordinate (integer type that we can't handle).
2858 auto Arg2 = CI->getOperand(2);
2859
2860 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
2861
2862 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
2863
2864 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2865
2866 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
2867
2868 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2869
2870 CI->replaceAllUsesWith(NewCI);
2871
2872 // Lastly, remember to remove the user.
2873 ToRemoves.push_back(CI);
2874 }
2875 }
2876
2877 Changed = !ToRemoves.empty();
2878
2879 // And cleanup the calls we don't use anymore.
2880 for (auto V : ToRemoves) {
2881 V->eraseFromParent();
2882 }
2883
2884 // And remove the function we don't need either too.
2885 F->eraseFromParent();
2886 }
2887 }
2888
2889 return Changed;
2890}
2891
2892bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2893 bool Changed = false;
2894
2895 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002896 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002897 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002898 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002899 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002900 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002901 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002902 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002903 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002904 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002905 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002906 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002907 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002908 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002909 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002910 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002911 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002912 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002913 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002914 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002915 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002916 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002917 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2918 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2919 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002920
2921 for (auto Pair : Map) {
2922 // If we find a function with the matching name.
2923 if (auto F = M.getFunction(Pair.first)) {
2924 SmallVector<Instruction *, 4> ToRemoves;
2925
2926 // Walk the users of the function.
2927 for (auto &U : F->uses()) {
2928 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2929 auto FType = F->getFunctionType();
2930 SmallVector<Type *, 5> ParamTypes;
2931
2932 // The pointer type.
2933 ParamTypes.push_back(FType->getParamType(0));
2934
2935 auto IntTy = Type::getInt32Ty(M.getContext());
2936
2937 // The memory scope type.
2938 ParamTypes.push_back(IntTy);
2939
2940 // The memory semantics type.
2941 ParamTypes.push_back(IntTy);
2942
2943 if (2 < CI->getNumArgOperands()) {
2944 // The unequal memory semantics type.
2945 ParamTypes.push_back(IntTy);
2946
2947 // The value type.
2948 ParamTypes.push_back(FType->getParamType(2));
2949
2950 // The comparator type.
2951 ParamTypes.push_back(FType->getParamType(1));
2952 } else if (1 < CI->getNumArgOperands()) {
2953 // The value type.
2954 ParamTypes.push_back(FType->getParamType(1));
2955 }
2956
2957 auto NewFType =
2958 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2959 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2960
2961 // We need to map the OpenCL constants to the SPIR-V equivalents.
2962 const auto ConstantScopeDevice =
2963 ConstantInt::get(IntTy, spv::ScopeDevice);
2964 const auto ConstantMemorySemantics = ConstantInt::get(
2965 IntTy, spv::MemorySemanticsUniformMemoryMask |
2966 spv::MemorySemanticsSequentiallyConsistentMask);
2967
2968 SmallVector<Value *, 5> Params;
2969
2970 // The pointer.
2971 Params.push_back(CI->getArgOperand(0));
2972
2973 // The memory scope.
2974 Params.push_back(ConstantScopeDevice);
2975
2976 // The memory semantics.
2977 Params.push_back(ConstantMemorySemantics);
2978
2979 if (2 < CI->getNumArgOperands()) {
2980 // The unequal memory semantics.
2981 Params.push_back(ConstantMemorySemantics);
2982
2983 // The value.
2984 Params.push_back(CI->getArgOperand(2));
2985
2986 // The comparator.
2987 Params.push_back(CI->getArgOperand(1));
2988 } else if (1 < CI->getNumArgOperands()) {
2989 // The value.
2990 Params.push_back(CI->getArgOperand(1));
2991 }
2992
2993 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2994
2995 CI->replaceAllUsesWith(NewCI);
2996
2997 // Lastly, remember to remove the user.
2998 ToRemoves.push_back(CI);
2999 }
3000 }
3001
3002 Changed = !ToRemoves.empty();
3003
3004 // And cleanup the calls we don't use anymore.
3005 for (auto V : ToRemoves) {
3006 V->eraseFromParent();
3007 }
3008
3009 // And remove the function we don't need either too.
3010 F->eraseFromParent();
3011 }
3012 }
3013
Neil Henning39672102017-09-29 14:33:13 +01003014 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003015 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003016 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003017 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003018 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003019 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003020 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003021 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003022 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003023 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003024 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003025 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003026 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003027 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003028 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003029 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003030 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003031 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003032 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003033 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003034 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003035 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003036 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003037 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003038 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003039 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003040 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003041 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003042 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003043 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003044 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003045 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003046 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01003047 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003048 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003049 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003050 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003051 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003052 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003053 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003054 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003055 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003056 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003057 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003058 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003059 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003060 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01003061 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003062 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01003063 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003064 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01003065 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003066 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01003067 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003068 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003069 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003070 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003071 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003072 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003073 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003074 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003075 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003076 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
3077 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
3078 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01003079
3080 for (auto Pair : Map2) {
3081 // If we find a function with the matching name.
3082 if (auto F = M.getFunction(Pair.first)) {
3083 SmallVector<Instruction *, 4> ToRemoves;
3084
3085 // Walk the users of the function.
3086 for (auto &U : F->uses()) {
3087 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3088 auto AtomicOp = new AtomicRMWInst(
3089 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
3090 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
3091
3092 CI->replaceAllUsesWith(AtomicOp);
3093
3094 // Lastly, remember to remove the user.
3095 ToRemoves.push_back(CI);
3096 }
3097 }
3098
3099 Changed = !ToRemoves.empty();
3100
3101 // And cleanup the calls we don't use anymore.
3102 for (auto V : ToRemoves) {
3103 V->eraseFromParent();
3104 }
3105
3106 // And remove the function we don't need either too.
3107 F->eraseFromParent();
3108 }
3109 }
3110
David Neto22f144c2017-06-12 14:26:21 -04003111 return Changed;
3112}
3113
3114bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
3115 bool Changed = false;
3116
3117 // If we find a function with the matching name.
3118 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
3119 SmallVector<Instruction *, 4> ToRemoves;
3120
3121 auto IntTy = Type::getInt32Ty(M.getContext());
3122 auto FloatTy = Type::getFloatTy(M.getContext());
3123
3124 Constant *DownShuffleMask[3] = {
3125 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3126 ConstantInt::get(IntTy, 2)};
3127
3128 Constant *UpShuffleMask[4] = {
3129 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3130 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3131
3132 Constant *FloatVec[3] = {
3133 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
3134 };
3135
3136 // Walk the users of the function.
3137 for (auto &U : F->uses()) {
3138 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3139 auto Vec4Ty = CI->getArgOperand(0)->getType();
3140 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
3141 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
3142 auto Vec3Ty = Arg0->getType();
3143
3144 auto NewFType =
3145 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
3146
3147 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
3148
3149 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
3150
3151 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
3152
3153 CI->replaceAllUsesWith(Result);
3154
3155 // Lastly, remember to remove the user.
3156 ToRemoves.push_back(CI);
3157 }
3158 }
3159
3160 Changed = !ToRemoves.empty();
3161
3162 // And cleanup the calls we don't use anymore.
3163 for (auto V : ToRemoves) {
3164 V->eraseFromParent();
3165 }
3166
3167 // And remove the function we don't need either too.
3168 F->eraseFromParent();
3169 }
3170
3171 return Changed;
3172}
David Neto62653202017-10-16 19:05:18 -04003173
3174bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3175 bool Changed = false;
3176
3177 // OpenCL's float result = fract(float x, float* ptr)
3178 //
3179 // In the LLVM domain:
3180 //
3181 // %floor_result = call spir_func float @floor(float %x)
3182 // store float %floor_result, float * %ptr
3183 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3184 // %result = call spir_func float
3185 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3186 //
3187 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3188 // and clspv.fract occur in the SPIR-V generator pass:
3189 //
3190 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3191 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3192 // ...
3193 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3194 // OpStore %ptr %floor_result
3195 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3196 // %fract_result = OpExtInst %float
3197 // %glsl_ext Fmin %fract_intermediate %just_under_1
3198
3199
3200 using std::string;
3201
3202 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3203 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
3204 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
3205 auto make_quad = [](const char *a, const char *b, const char *c,
3206 const char *d) {
3207 return std::tuple<const char *, const char *, const char *, const char *>(
3208 a, b, c, d);
3209 };
3210 const std::vector<QuadType> Functions = {
3211 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
3212 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
3213 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
3214 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
3215 };
3216
3217 for (auto& quad : Functions) {
3218 const StringRef fract_name(std::get<0>(quad));
3219
3220 // If we find a function with the matching name.
3221 if (auto F = M.getFunction(fract_name)) {
3222 if (F->use_begin() == F->use_end())
3223 continue;
3224
3225 // We have some uses.
3226 Changed = true;
3227
3228 auto& Context = M.getContext();
3229
3230 const StringRef floor_name(std::get<1>(quad));
3231 const StringRef fmin_name(std::get<2>(quad));
3232 const StringRef clspv_fract_name(std::get<3>(quad));
3233
3234 // This is either float or a float vector. All the float-like
3235 // types are this type.
3236 auto result_ty = F->getReturnType();
3237
3238 Function* fmin_fn = M.getFunction(fmin_name);
3239 if (!fmin_fn) {
3240 // Make the fmin function.
3241 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003242 fmin_fn =
3243 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003244 fmin_fn->addFnAttr(Attribute::ReadNone);
3245 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3246 }
3247
3248 Function* floor_fn = M.getFunction(floor_name);
3249 if (!floor_fn) {
3250 // Make the floor function.
3251 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003252 floor_fn = cast<Function>(
3253 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003254 floor_fn->addFnAttr(Attribute::ReadNone);
3255 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3256 }
3257
3258 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
3259 if (!clspv_fract_fn) {
3260 // Make the clspv_fract function.
3261 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003262 clspv_fract_fn = cast<Function>(
3263 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003264 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3265 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3266 }
3267
3268 // Number of significant significand bits, whether represented or not.
3269 unsigned num_significand_bits;
3270 switch (result_ty->getScalarType()->getTypeID()) {
3271 case Type::HalfTyID:
3272 num_significand_bits = 11;
3273 break;
3274 case Type::FloatTyID:
3275 num_significand_bits = 24;
3276 break;
3277 case Type::DoubleTyID:
3278 num_significand_bits = 53;
3279 break;
3280 default:
3281 assert(false && "Unhandled float type when processing fract builtin");
3282 break;
3283 }
3284 // Beware that the disassembler displays this value as
3285 // OpConstant %float 1
3286 // which is not quite right.
3287 const double kJustUnderOneScalar =
3288 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3289
3290 Constant *just_under_one =
3291 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3292 if (result_ty->isVectorTy()) {
3293 just_under_one = ConstantVector::getSplat(
3294 result_ty->getVectorNumElements(), just_under_one);
3295 }
3296
3297 IRBuilder<> Builder(Context);
3298
3299 SmallVector<Instruction *, 4> ToRemoves;
3300
3301 // Walk the users of the function.
3302 for (auto &U : F->uses()) {
3303 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3304
3305 Builder.SetInsertPoint(CI);
3306 auto arg = CI->getArgOperand(0);
3307 auto ptr = CI->getArgOperand(1);
3308
3309 // Compute floor result and store it.
3310 auto floor = Builder.CreateCall(floor_fn, {arg});
3311 Builder.CreateStore(floor, ptr);
3312
3313 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
3314 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
3315
3316 CI->replaceAllUsesWith(fract_result);
3317
3318 // Lastly, remember to remove the user.
3319 ToRemoves.push_back(CI);
3320 }
3321 }
3322
3323 // And cleanup the calls we don't use anymore.
3324 for (auto V : ToRemoves) {
3325 V->eraseFromParent();
3326 }
3327
3328 // And remove the function we don't need either too.
3329 F->eraseFromParent();
3330 }
3331 }
3332
3333 return Changed;
3334}