blob: 88b35892b33bccf58619e44d6a90ed3e54c74372 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <llvm/IR/Constants.h>
16#include <llvm/IR/Instructions.h>
17#include <llvm/IR/Module.h>
18#include <llvm/Pass.h>
19#include <llvm/Support/raw_ostream.h>
20#include <llvm/Transforms/Utils/Cloning.h>
21
22#include <spirv/1.0/spirv.hpp>
23
24using namespace llvm;
25
26#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
27
28namespace {
29uint32_t clz(uint32_t v) {
30 uint32_t r;
31 uint32_t shift;
32
33 r = (v > 0xFFFF) << 4;
34 v >>= r;
35 shift = (v > 0xFF) << 3;
36 v >>= shift;
37 r |= shift;
38 shift = (v > 0xF) << 2;
39 v >>= shift;
40 r |= shift;
41 shift = (v > 0x3) << 1;
42 v >>= shift;
43 r |= shift;
44 r |= (v >> 1);
45
46 return r;
47}
48
49Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
50 if (1 == elements) {
51 return Type::getInt1Ty(C);
52 } else {
53 return VectorType::get(Type::getInt1Ty(C), elements);
54 }
55}
56
57struct ReplaceOpenCLBuiltinPass final : public ModulePass {
58 static char ID;
59 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
60
61 bool runOnModule(Module &M) override;
62 bool replaceRecip(Module &M);
63 bool replaceDivide(Module &M);
64 bool replaceExp10(Module &M);
65 bool replaceLog10(Module &M);
66 bool replaceBarrier(Module &M);
67 bool replaceMemFence(Module &M);
68 bool replaceRelational(Module &M);
69 bool replaceIsInfAndIsNan(Module &M);
70 bool replaceAllAndAny(Module &M);
71 bool replaceSignbit(Module &M);
72 bool replaceMadandMad24andMul24(Module &M);
73 bool replaceVloadHalf(Module &M);
74 bool replaceVloadHalf2(Module &M);
75 bool replaceVloadHalf4(Module &M);
76 bool replaceVstoreHalf(Module &M);
77 bool replaceVstoreHalf2(Module &M);
78 bool replaceVstoreHalf4(Module &M);
79 bool replaceReadImageF(Module &M);
80 bool replaceAtomics(Module &M);
81 bool replaceCross(Module &M);
82};
83}
84
85char ReplaceOpenCLBuiltinPass::ID = 0;
86static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
87 "Replace OpenCL Builtins Pass");
88
89namespace clspv {
90ModulePass *createReplaceOpenCLBuiltinPass() {
91 return new ReplaceOpenCLBuiltinPass();
92}
93}
94
95bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
96 bool Changed = false;
97
98 Changed |= replaceRecip(M);
99 Changed |= replaceDivide(M);
100 Changed |= replaceExp10(M);
101 Changed |= replaceLog10(M);
102 Changed |= replaceBarrier(M);
103 Changed |= replaceMemFence(M);
104 Changed |= replaceRelational(M);
105 Changed |= replaceIsInfAndIsNan(M);
106 Changed |= replaceAllAndAny(M);
107 Changed |= replaceSignbit(M);
108 Changed |= replaceMadandMad24andMul24(M);
109 Changed |= replaceVloadHalf(M);
110 Changed |= replaceVloadHalf2(M);
111 Changed |= replaceVloadHalf4(M);
112 Changed |= replaceVstoreHalf(M);
113 Changed |= replaceVstoreHalf2(M);
114 Changed |= replaceVstoreHalf4(M);
115 Changed |= replaceReadImageF(M);
116 Changed |= replaceAtomics(M);
117 Changed |= replaceCross(M);
118
119 return Changed;
120}
121
122bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
123 bool Changed = false;
124
125 const char *Names[] = {
126 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
127 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
128 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
129 };
130
131 for (auto Name : Names) {
132 // If we find a function with the matching name.
133 if (auto F = M.getFunction(Name)) {
134 SmallVector<Instruction *, 4> ToRemoves;
135
136 // Walk the users of the function.
137 for (auto &U : F->uses()) {
138 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
139 // Recip has one arg.
140 auto Arg = CI->getOperand(0);
141
142 auto Div = BinaryOperator::Create(
143 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
144 CI);
145
146 CI->replaceAllUsesWith(Div);
147
148 // Lastly, remember to remove the user.
149 ToRemoves.push_back(CI);
150 }
151 }
152
153 Changed = !ToRemoves.empty();
154
155 // And cleanup the calls we don't use anymore.
156 for (auto V : ToRemoves) {
157 V->eraseFromParent();
158 }
159
160 // And remove the function we don't need either too.
161 F->eraseFromParent();
162 }
163 }
164
165 return Changed;
166}
167
168bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
169 bool Changed = false;
170
171 const char *Names[] = {
172 "_Z11half_divideff", "_Z13native_divideff",
173 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
174 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
175 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
176 };
177
178 for (auto Name : Names) {
179 // If we find a function with the matching name.
180 if (auto F = M.getFunction(Name)) {
181 SmallVector<Instruction *, 4> ToRemoves;
182
183 // Walk the users of the function.
184 for (auto &U : F->uses()) {
185 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
186 auto Div = BinaryOperator::Create(
187 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
188
189 CI->replaceAllUsesWith(Div);
190
191 // Lastly, remember to remove the user.
192 ToRemoves.push_back(CI);
193 }
194 }
195
196 Changed = !ToRemoves.empty();
197
198 // And cleanup the calls we don't use anymore.
199 for (auto V : ToRemoves) {
200 V->eraseFromParent();
201 }
202
203 // And remove the function we don't need either too.
204 F->eraseFromParent();
205 }
206 }
207
208 return Changed;
209}
210
211bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
212 bool Changed = false;
213
214 const std::map<const char *, const char *> Map = {
215 {"_Z5exp10f", "_Z3expf"},
216 {"_Z10half_exp10f", "_Z8half_expf"},
217 {"_Z12native_exp10f", "_Z10native_expf"},
218 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
219 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
220 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
221 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
222 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
223 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
224 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
225 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
226 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
227
228 for (auto Pair : Map) {
229 // If we find a function with the matching name.
230 if (auto F = M.getFunction(Pair.first)) {
231 SmallVector<Instruction *, 4> ToRemoves;
232
233 // Walk the users of the function.
234 for (auto &U : F->uses()) {
235 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
236 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
237
238 auto Arg = CI->getOperand(0);
239
240 // Constant of the natural log of 10 (ln(10)).
241 const double Ln10 =
242 2.302585092994045684017991454684364207601101488628772976033;
243
244 auto Mul = BinaryOperator::Create(
245 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
246 CI);
247
248 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
249
250 CI->replaceAllUsesWith(NewCI);
251
252 // Lastly, remember to remove the user.
253 ToRemoves.push_back(CI);
254 }
255 }
256
257 Changed = !ToRemoves.empty();
258
259 // And cleanup the calls we don't use anymore.
260 for (auto V : ToRemoves) {
261 V->eraseFromParent();
262 }
263
264 // And remove the function we don't need either too.
265 F->eraseFromParent();
266 }
267 }
268
269 return Changed;
270}
271
272bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
273 bool Changed = false;
274
275 const std::map<const char *, const char *> Map = {
276 {"_Z5log10f", "_Z3logf"},
277 {"_Z10half_log10f", "_Z8half_logf"},
278 {"_Z12native_log10f", "_Z10native_logf"},
279 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
280 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
281 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
282 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
283 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
284 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
285 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
286 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
287 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
288
289 for (auto Pair : Map) {
290 // If we find a function with the matching name.
291 if (auto F = M.getFunction(Pair.first)) {
292 SmallVector<Instruction *, 4> ToRemoves;
293
294 // Walk the users of the function.
295 for (auto &U : F->uses()) {
296 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
297 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
298
299 auto Arg = CI->getOperand(0);
300
301 // Constant of the reciprocal of the natural log of 10 (ln(10)).
302 const double Ln10 =
303 0.434294481903251827651128918916605082294397005803666566114;
304
305 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
306
307 auto Mul = BinaryOperator::Create(
308 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
309 "", CI);
310
311 CI->replaceAllUsesWith(Mul);
312
313 // Lastly, remember to remove the user.
314 ToRemoves.push_back(CI);
315 }
316 }
317
318 Changed = !ToRemoves.empty();
319
320 // And cleanup the calls we don't use anymore.
321 for (auto V : ToRemoves) {
322 V->eraseFromParent();
323 }
324
325 // And remove the function we don't need either too.
326 F->eraseFromParent();
327 }
328 }
329
330 return Changed;
331}
332
333bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
334 bool Changed = false;
335
336 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
337
338 const std::map<const char *, const char *> Map = {
339 {"_Z7barrierj", "__spirv_control_barrier"}};
340
341 for (auto Pair : Map) {
342 // If we find a function with the matching name.
343 if (auto F = M.getFunction(Pair.first)) {
344 SmallVector<Instruction *, 4> ToRemoves;
345
346 // Walk the users of the function.
347 for (auto &U : F->uses()) {
348 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
349 auto FType = F->getFunctionType();
350 SmallVector<Type *, 3> Params;
351 for (unsigned i = 0; i < 3; i++) {
352 Params.push_back(FType->getParamType(0));
353 }
354 auto NewFType =
355 FunctionType::get(FType->getReturnType(), Params, false);
356 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
357
358 auto Arg = CI->getOperand(0);
359
360 // We need to map the OpenCL constants to the SPIR-V equivalents.
361 const auto LocalMemFence =
362 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
363 const auto GlobalMemFence =
364 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
365 const auto ConstantSequentiallyConsistent = ConstantInt::get(
366 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
367 const auto ConstantScopeDevice =
368 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
369 const auto ConstantScopeWorkgroup =
370 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
371
372 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
373 const auto LocalMemFenceMask = BinaryOperator::Create(
374 Instruction::And, LocalMemFence, Arg, "", CI);
375 const auto WorkgroupShiftAmount =
376 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
377 clz(CLK_LOCAL_MEM_FENCE);
378 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
379 Instruction::Shl, LocalMemFenceMask,
380 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
381
382 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
383 const auto GlobalMemFenceMask = BinaryOperator::Create(
384 Instruction::And, GlobalMemFence, Arg, "", CI);
385 const auto UniformShiftAmount =
386 clz(spv::MemorySemanticsUniformMemoryMask) -
387 clz(CLK_GLOBAL_MEM_FENCE);
388 const auto MemorySemanticsUniform = BinaryOperator::Create(
389 Instruction::Shl, GlobalMemFenceMask,
390 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
391
392 // And combine the above together, also adding in
393 // MemorySemanticsSequentiallyConsistentMask.
394 auto MemorySemantics =
395 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
396 ConstantSequentiallyConsistent, "", CI);
397 MemorySemantics = BinaryOperator::Create(
398 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
399
400 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
401 // Device Scope, otherwise Workgroup Scope.
402 const auto Cmp =
403 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
404 GlobalMemFenceMask, GlobalMemFence, "", CI);
405 const auto MemoryScope = SelectInst::Create(
406 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
407
408 // Lastly, the Execution Scope is always Workgroup Scope.
409 const auto ExecutionScope = ConstantScopeWorkgroup;
410
411 auto NewCI = CallInst::Create(
412 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
413
414 CI->replaceAllUsesWith(NewCI);
415
416 // Lastly, remember to remove the user.
417 ToRemoves.push_back(CI);
418 }
419 }
420
421 Changed = !ToRemoves.empty();
422
423 // And cleanup the calls we don't use anymore.
424 for (auto V : ToRemoves) {
425 V->eraseFromParent();
426 }
427
428 // And remove the function we don't need either too.
429 F->eraseFromParent();
430 }
431 }
432
433 return Changed;
434}
435
436bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
437 bool Changed = false;
438
439 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
440
441 const std::map<const char *, const char *> Map = {
442 {"_Z9mem_fencej", "__spirv_memory_barrier"},
443 {"_Z14read_mem_fencej", "__spirv_memory_barrier"},
444 {"_Z15write_mem_fencej", "__spirv_memory_barrier"}};
445
446 std::map<const char *, uint32_t> MemorySemanticsMap = {
447 {"_Z9mem_fencej", spv::MemorySemanticsSequentiallyConsistentMask},
448 {"_Z14read_mem_fencej", spv::MemorySemanticsAcquireMask},
449 {"_Z15write_mem_fencej", spv::MemorySemanticsReleaseMask}};
450
451 for (auto Pair : Map) {
452 // If we find a function with the matching name.
453 if (auto F = M.getFunction(Pair.first)) {
454 SmallVector<Instruction *, 4> ToRemoves;
455
456 // Walk the users of the function.
457 for (auto &U : F->uses()) {
458 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
459 auto FType = F->getFunctionType();
460 SmallVector<Type *, 2> Params;
461 for (unsigned i = 0; i < 2; i++) {
462 Params.push_back(FType->getParamType(0));
463 }
464 auto NewFType =
465 FunctionType::get(FType->getReturnType(), Params, false);
466 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
467
468 auto Arg = CI->getOperand(0);
469
470 // We need to map the OpenCL constants to the SPIR-V equivalents.
471 const auto LocalMemFence =
472 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
473 const auto GlobalMemFence =
474 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
475 const auto ConstantMemorySemantics =
476 ConstantInt::get(Arg->getType(), MemorySemanticsMap[Pair.first]);
477 const auto ConstantScopeDevice =
478 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
479
480 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
481 const auto LocalMemFenceMask = BinaryOperator::Create(
482 Instruction::And, LocalMemFence, Arg, "", CI);
483 const auto WorkgroupShiftAmount =
484 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
485 clz(CLK_LOCAL_MEM_FENCE);
486 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
487 Instruction::Shl, LocalMemFenceMask,
488 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
489
490 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
491 const auto GlobalMemFenceMask = BinaryOperator::Create(
492 Instruction::And, GlobalMemFence, Arg, "", CI);
493 const auto UniformShiftAmount =
494 clz(spv::MemorySemanticsUniformMemoryMask) -
495 clz(CLK_GLOBAL_MEM_FENCE);
496 const auto MemorySemanticsUniform = BinaryOperator::Create(
497 Instruction::Shl, GlobalMemFenceMask,
498 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
499
500 // And combine the above together, also adding in
501 // MemorySemanticsSequentiallyConsistentMask.
502 auto MemorySemantics =
503 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
504 ConstantMemorySemantics, "", CI);
505 MemorySemantics = BinaryOperator::Create(
506 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
507
508 // Memory Scope is always device.
509 const auto MemoryScope = ConstantScopeDevice;
510
511 auto NewCI =
512 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
513
514 CI->replaceAllUsesWith(NewCI);
515
516 // Lastly, remember to remove the user.
517 ToRemoves.push_back(CI);
518 }
519 }
520
521 Changed = !ToRemoves.empty();
522
523 // And cleanup the calls we don't use anymore.
524 for (auto V : ToRemoves) {
525 V->eraseFromParent();
526 }
527
528 // And remove the function we don't need either too.
529 F->eraseFromParent();
530 }
531 }
532
533 return Changed;
534}
535
536bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
537 bool Changed = false;
538
539 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
540 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
541 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
542 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
543 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
544 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
545 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
546 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
547 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
548 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
549 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
550 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
551 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
552 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
553 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
554 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
555 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
556 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
557 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
558 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
559 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
560 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
561 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
562 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
563 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
564 };
565
566 for (auto Pair : Map) {
567 // If we find a function with the matching name.
568 if (auto F = M.getFunction(Pair.first)) {
569 SmallVector<Instruction *, 4> ToRemoves;
570
571 // Walk the users of the function.
572 for (auto &U : F->uses()) {
573 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
574 // The predicate to use in the CmpInst.
575 auto Predicate = Pair.second.first;
576
577 // The value to return for true.
578 auto TrueValue =
579 ConstantInt::getSigned(CI->getType(), Pair.second.second);
580
581 // The value to return for false.
582 auto FalseValue = Constant::getNullValue(CI->getType());
583
584 auto Arg1 = CI->getOperand(0);
585 auto Arg2 = CI->getOperand(1);
586
587 const auto Cmp =
588 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
589
590 const auto Select =
591 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
592
593 CI->replaceAllUsesWith(Select);
594
595 // Lastly, remember to remove the user.
596 ToRemoves.push_back(CI);
597 }
598 }
599
600 Changed = !ToRemoves.empty();
601
602 // And cleanup the calls we don't use anymore.
603 for (auto V : ToRemoves) {
604 V->eraseFromParent();
605 }
606
607 // And remove the function we don't need either too.
608 F->eraseFromParent();
609 }
610 }
611
612 return Changed;
613}
614
615bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
616 bool Changed = false;
617
618 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
619 {"_Z5isinff", {"__spirv_isinff", 1}},
620 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
621 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
622 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
623 {"_Z5isnanf", {"__spirv_isnanf", 1}},
624 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
625 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
626 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
627 };
628
629 for (auto Pair : Map) {
630 // If we find a function with the matching name.
631 if (auto F = M.getFunction(Pair.first)) {
632 SmallVector<Instruction *, 4> ToRemoves;
633
634 // Walk the users of the function.
635 for (auto &U : F->uses()) {
636 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
637 const auto CITy = CI->getType();
638
639 // The fake SPIR-V intrinsic to generate.
640 auto SPIRVIntrinsic = Pair.second.first;
641
642 // The value to return for true.
643 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
644
645 // The value to return for false.
646 auto FalseValue = Constant::getNullValue(CITy);
647
648 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
649 M.getContext(),
650 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
651
652 auto NewFType =
653 FunctionType::get(CorrespondingBoolTy,
654 F->getFunctionType()->getParamType(0), false);
655
656 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
657
658 auto Arg = CI->getOperand(0);
659
660 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
661
662 const auto Select =
663 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
664
665 CI->replaceAllUsesWith(Select);
666
667 // Lastly, remember to remove the user.
668 ToRemoves.push_back(CI);
669 }
670 }
671
672 Changed = !ToRemoves.empty();
673
674 // And cleanup the calls we don't use anymore.
675 for (auto V : ToRemoves) {
676 V->eraseFromParent();
677 }
678
679 // And remove the function we don't need either too.
680 F->eraseFromParent();
681 }
682 }
683
684 return Changed;
685}
686
687bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
688 bool Changed = false;
689
690 const std::map<const char *, const char *> Map = {
691 {"_Z3alli", ""},
692 {"_Z3allDv2_i", "__spirv_allDv2_i"},
693 {"_Z3allDv3_i", "__spirv_allDv3_i"},
694 {"_Z3allDv4_i", "__spirv_allDv4_i"},
695 {"_Z3anyi", ""},
696 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
697 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
698 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
699 };
700
701 for (auto Pair : Map) {
702 // If we find a function with the matching name.
703 if (auto F = M.getFunction(Pair.first)) {
704 SmallVector<Instruction *, 4> ToRemoves;
705
706 // Walk the users of the function.
707 for (auto &U : F->uses()) {
708 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
709 // The fake SPIR-V intrinsic to generate.
710 auto SPIRVIntrinsic = Pair.second;
711
712 auto Arg = CI->getOperand(0);
713
714 Value *V;
715
716 // If we have a function to call, call it!
717 if (0 < strlen(SPIRVIntrinsic)) {
718 // The value for zero to compare against.
719 const auto ZeroValue = Constant::getNullValue(Arg->getType());
720
721 const auto Cmp = CmpInst::Create(
722 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
723 const auto NewFType = FunctionType::get(
724 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
725
726 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
727
728 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
729
730 // The value to return for true.
731 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
732
733 // The value to return for false.
734 const auto FalseValue = Constant::getNullValue(CI->getType());
735
736 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
737 } else {
738 V = BinaryOperator::Create(Instruction::LShr, Arg,
739 ConstantInt::get(CI->getType(), 31), "",
740 CI);
741 }
742
743 CI->replaceAllUsesWith(V);
744
745 // Lastly, remember to remove the user.
746 ToRemoves.push_back(CI);
747 }
748 }
749
750 Changed = !ToRemoves.empty();
751
752 // And cleanup the calls we don't use anymore.
753 for (auto V : ToRemoves) {
754 V->eraseFromParent();
755 }
756
757 // And remove the function we don't need either too.
758 F->eraseFromParent();
759 }
760 }
761
762 return Changed;
763}
764
765bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
766 bool Changed = false;
767
768 const std::map<const char *, Instruction::BinaryOps> Map = {
769 {"_Z7signbitf", Instruction::LShr},
770 {"_Z7signbitDv2_f", Instruction::AShr},
771 {"_Z7signbitDv3_f", Instruction::AShr},
772 {"_Z7signbitDv4_f", Instruction::AShr},
773 };
774
775 for (auto Pair : Map) {
776 // If we find a function with the matching name.
777 if (auto F = M.getFunction(Pair.first)) {
778 SmallVector<Instruction *, 4> ToRemoves;
779
780 // Walk the users of the function.
781 for (auto &U : F->uses()) {
782 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
783 auto Arg = CI->getOperand(0);
784
785 auto Bitcast =
786 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
787
788 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
789 ConstantInt::get(CI->getType(), 31),
790 "", CI);
791
792 CI->replaceAllUsesWith(Shr);
793
794 // Lastly, remember to remove the user.
795 ToRemoves.push_back(CI);
796 }
797 }
798
799 Changed = !ToRemoves.empty();
800
801 // And cleanup the calls we don't use anymore.
802 for (auto V : ToRemoves) {
803 V->eraseFromParent();
804 }
805
806 // And remove the function we don't need either too.
807 F->eraseFromParent();
808 }
809 }
810
811 return Changed;
812}
813
814bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
815 bool Changed = false;
816
817 const std::map<const char *,
818 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
819 Map = {
820 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
821 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
822 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
823 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
824 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
825 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
826 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
827 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
828 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
829 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
830 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
831 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
832 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
833 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
834 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
835 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
836 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
837 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
838 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
839 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
840 };
841
842 for (auto Pair : Map) {
843 // If we find a function with the matching name.
844 if (auto F = M.getFunction(Pair.first)) {
845 SmallVector<Instruction *, 4> ToRemoves;
846
847 // Walk the users of the function.
848 for (auto &U : F->uses()) {
849 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
850 // The multiply instruction to use.
851 auto MulInst = Pair.second.first;
852
853 // The add instruction to use.
854 auto AddInst = Pair.second.second;
855
856 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
857
858 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
859 CI->getArgOperand(1), "", CI);
860
861 if (Instruction::BinaryOpsEnd != AddInst) {
862 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
863 CI);
864 }
865
866 CI->replaceAllUsesWith(I);
867
868 // Lastly, remember to remove the user.
869 ToRemoves.push_back(CI);
870 }
871 }
872
873 Changed = !ToRemoves.empty();
874
875 // And cleanup the calls we don't use anymore.
876 for (auto V : ToRemoves) {
877 V->eraseFromParent();
878 }
879
880 // And remove the function we don't need either too.
881 F->eraseFromParent();
882 }
883 }
884
885 return Changed;
886}
887
888bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
889 bool Changed = false;
890
891 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
892 "_Z10vload_halfjPU3AS2KDh"};
893
894 for (auto Name : Map) {
895 // If we find a function with the matching name.
896 if (auto F = M.getFunction(Name)) {
897 SmallVector<Instruction *, 4> ToRemoves;
898
899 // Walk the users of the function.
900 for (auto &U : F->uses()) {
901 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
902 // The index argument from vload_half.
903 auto Arg0 = CI->getOperand(0);
904
905 // The pointer argument from vload_half.
906 auto Arg1 = CI->getOperand(1);
907
908 auto ShortTy = Type::getInt16Ty(M.getContext());
909 auto IntTy = Type::getInt32Ty(M.getContext());
910 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
911 auto NewPointerTy = PointerType::get(
912 ShortTy, Arg1->getType()->getPointerAddressSpace());
913 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
914
915 // Cast the half* pointer to short*.
916 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
917
918 // Index into the correct address of the casted pointer.
919 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
920
921 // Load from the short* we casted to.
922 auto Load = new LoadInst(Index, "", CI);
923
924 // ZExt the short -> int.
925 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
926
927 // Our intrinsic to unpack a float2 from an int.
928 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
929
930 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
931
932 // Get our float2.
933 auto Call = CallInst::Create(NewF, ZExt, "", CI);
934
935 // Extract out the bottom element which is our float result.
936 auto Extract = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
937
938 CI->replaceAllUsesWith(Extract);
939
940 // Lastly, remember to remove the user.
941 ToRemoves.push_back(CI);
942 }
943 }
944
945 Changed = !ToRemoves.empty();
946
947 // And cleanup the calls we don't use anymore.
948 for (auto V : ToRemoves) {
949 V->eraseFromParent();
950 }
951
952 // And remove the function we don't need either too.
953 F->eraseFromParent();
954 }
955 }
956
957 return Changed;
958}
959
960bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
961 bool Changed = false;
962
963 const std::vector<const char *> Map = {"_Z11vload_half2jPU3AS1KDh",
964 "_Z11vload_half2jPU3AS2KDh"};
965
966 for (auto Name : Map) {
967 // If we find a function with the matching name.
968 if (auto F = M.getFunction(Name)) {
969 SmallVector<Instruction *, 4> ToRemoves;
970
971 // Walk the users of the function.
972 for (auto &U : F->uses()) {
973 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
974 // The index argument from vload_half.
975 auto Arg0 = CI->getOperand(0);
976
977 // The pointer argument from vload_half.
978 auto Arg1 = CI->getOperand(1);
979
980 auto IntTy = Type::getInt32Ty(M.getContext());
981 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
982 auto NewPointerTy = PointerType::get(
983 IntTy, Arg1->getType()->getPointerAddressSpace());
984 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
985
986 // Cast the half* pointer to int*.
987 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
988
989 // Index into the correct address of the casted pointer.
990 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
991
992 // Load from the int* we casted to.
993 auto Load = new LoadInst(Index, "", CI);
994
995 // Our intrinsic to unpack a float2 from an int.
996 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
997
998 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
999
1000 // Get our float2.
1001 auto Call = CallInst::Create(NewF, Load, "", CI);
1002
1003 CI->replaceAllUsesWith(Call);
1004
1005 // Lastly, remember to remove the user.
1006 ToRemoves.push_back(CI);
1007 }
1008 }
1009
1010 Changed = !ToRemoves.empty();
1011
1012 // And cleanup the calls we don't use anymore.
1013 for (auto V : ToRemoves) {
1014 V->eraseFromParent();
1015 }
1016
1017 // And remove the function we don't need either too.
1018 F->eraseFromParent();
1019 }
1020 }
1021
1022 return Changed;
1023}
1024
1025bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1026 bool Changed = false;
1027
1028 const std::vector<const char *> Map = {"_Z11vload_half4jPU3AS1KDh",
1029 "_Z11vload_half4jPU3AS2KDh"};
1030
1031 for (auto Name : Map) {
1032 // If we find a function with the matching name.
1033 if (auto F = M.getFunction(Name)) {
1034 SmallVector<Instruction *, 4> ToRemoves;
1035
1036 // Walk the users of the function.
1037 for (auto &U : F->uses()) {
1038 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1039 // The index argument from vload_half.
1040 auto Arg0 = CI->getOperand(0);
1041
1042 // The pointer argument from vload_half.
1043 auto Arg1 = CI->getOperand(1);
1044
1045 auto IntTy = Type::getInt32Ty(M.getContext());
1046 auto Int2Ty = VectorType::get(IntTy, 2);
1047 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1048 auto NewPointerTy = PointerType::get(
1049 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1050 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1051
1052 // Cast the half* pointer to int2*.
1053 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1054
1055 // Index into the correct address of the casted pointer.
1056 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1057
1058 // Load from the int2* we casted to.
1059 auto Load = new LoadInst(Index, "", CI);
1060
1061 // Extract each element from the loaded int2.
1062 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1063 "", CI);
1064 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1065 "", CI);
1066
1067 // Our intrinsic to unpack a float2 from an int.
1068 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1069
1070 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1071
1072 // Get the lower (x & y) components of our final float4.
1073 auto Lo = CallInst::Create(NewF, X, "", CI);
1074
1075 // Get the higher (z & w) components of our final float4.
1076 auto Hi = CallInst::Create(NewF, Y, "", CI);
1077
1078 Constant *ShuffleMask[4] = {
1079 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1080 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1081
1082 // Combine our two float2's into one float4.
1083 auto Combine = new ShuffleVectorInst(
1084 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1085
1086 CI->replaceAllUsesWith(Combine);
1087
1088 // Lastly, remember to remove the user.
1089 ToRemoves.push_back(CI);
1090 }
1091 }
1092
1093 Changed = !ToRemoves.empty();
1094
1095 // And cleanup the calls we don't use anymore.
1096 for (auto V : ToRemoves) {
1097 V->eraseFromParent();
1098 }
1099
1100 // And remove the function we don't need either too.
1101 F->eraseFromParent();
1102 }
1103 }
1104
1105 return Changed;
1106}
1107
1108bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1109 bool Changed = false;
1110
1111 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1112 "_Z15vstore_half_rtefjPU3AS1Dh",
1113 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1114
1115 for (auto Name : Map) {
1116 // If we find a function with the matching name.
1117 if (auto F = M.getFunction(Name)) {
1118 SmallVector<Instruction *, 4> ToRemoves;
1119
1120 // Walk the users of the function.
1121 for (auto &U : F->uses()) {
1122 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1123 // The value to store.
1124 auto Arg0 = CI->getOperand(0);
1125
1126 // The index argument from vstore_half.
1127 auto Arg1 = CI->getOperand(1);
1128
1129 // The pointer argument from vstore_half.
1130 auto Arg2 = CI->getOperand(2);
1131
1132 auto ShortTy = Type::getInt16Ty(M.getContext());
1133 auto IntTy = Type::getInt32Ty(M.getContext());
1134 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1135 auto NewPointerTy = PointerType::get(
1136 ShortTy, Arg2->getType()->getPointerAddressSpace());
1137 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1138
1139 // Our intrinsic to pack a float2 to an int.
1140 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1141
1142 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1143
1144 // Insert our value into a float2 so that we can pack it.
1145 auto TempVec = InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
1146
1147 // Pack the float2 -> half2 (in an int).
1148 auto X = CallInst::Create(NewF, TempVec, "", CI);
1149
1150 // Truncate our i32 to an i16.
1151 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
1152
1153 // Cast the half* pointer to short*.
1154 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1155
1156 // Index into the correct address of the casted pointer.
1157 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
1158
1159 // Store to the int* we casted to.
1160 auto Store = new StoreInst(Trunc, Index, CI);
1161
1162 CI->replaceAllUsesWith(Store);
1163
1164 // Lastly, remember to remove the user.
1165 ToRemoves.push_back(CI);
1166 }
1167 }
1168
1169 Changed = !ToRemoves.empty();
1170
1171 // And cleanup the calls we don't use anymore.
1172 for (auto V : ToRemoves) {
1173 V->eraseFromParent();
1174 }
1175
1176 // And remove the function we don't need either too.
1177 F->eraseFromParent();
1178 }
1179 }
1180
1181 return Changed;
1182}
1183
1184bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1185 bool Changed = false;
1186
1187 const std::vector<const char *> Map = {"_Z12vstore_half2Dv2_fjPU3AS1Dh",
1188 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1189 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh"};
1190
1191 for (auto Name : Map) {
1192 // If we find a function with the matching name.
1193 if (auto F = M.getFunction(Name)) {
1194 SmallVector<Instruction *, 4> ToRemoves;
1195
1196 // Walk the users of the function.
1197 for (auto &U : F->uses()) {
1198 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1199 // The value to store.
1200 auto Arg0 = CI->getOperand(0);
1201
1202 // The index argument from vstore_half.
1203 auto Arg1 = CI->getOperand(1);
1204
1205 // The pointer argument from vstore_half.
1206 auto Arg2 = CI->getOperand(2);
1207
1208 auto IntTy = Type::getInt32Ty(M.getContext());
1209 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1210 auto NewPointerTy = PointerType::get(
1211 IntTy, Arg2->getType()->getPointerAddressSpace());
1212 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1213
1214 // Our intrinsic to pack a float2 to an int.
1215 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1216
1217 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1218
1219 // Turn the packed x & y into the final packing.
1220 auto X = CallInst::Create(NewF, Arg0, "", CI);
1221
1222 // Cast the half* pointer to int*.
1223 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1224
1225 // Index into the correct address of the casted pointer.
1226 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1227
1228 // Store to the int* we casted to.
1229 auto Store = new StoreInst(X, Index, CI);
1230
1231 CI->replaceAllUsesWith(Store);
1232
1233 // Lastly, remember to remove the user.
1234 ToRemoves.push_back(CI);
1235 }
1236 }
1237
1238 Changed = !ToRemoves.empty();
1239
1240 // And cleanup the calls we don't use anymore.
1241 for (auto V : ToRemoves) {
1242 V->eraseFromParent();
1243 }
1244
1245 // And remove the function we don't need either too.
1246 F->eraseFromParent();
1247 }
1248 }
1249
1250 return Changed;
1251}
1252
1253bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1254 bool Changed = false;
1255
1256 const std::vector<const char *> Map = {"_Z12vstore_half4Dv4_fjPU3AS1Dh",
1257 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1258 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh"};
1259
1260 for (auto Name : Map) {
1261 // If we find a function with the matching name.
1262 if (auto F = M.getFunction(Name)) {
1263 SmallVector<Instruction *, 4> ToRemoves;
1264
1265 // Walk the users of the function.
1266 for (auto &U : F->uses()) {
1267 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1268 // The value to store.
1269 auto Arg0 = CI->getOperand(0);
1270
1271 // The index argument from vstore_half.
1272 auto Arg1 = CI->getOperand(1);
1273
1274 // The pointer argument from vstore_half.
1275 auto Arg2 = CI->getOperand(2);
1276
1277 auto IntTy = Type::getInt32Ty(M.getContext());
1278 auto Int2Ty = VectorType::get(IntTy, 2);
1279 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1280 auto NewPointerTy = PointerType::get(
1281 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1282 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1283
1284 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1285 ConstantInt::get(IntTy, 1)};
1286
1287 // Extract out the x & y components of our to store value.
1288 auto Lo =
1289 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1290 ConstantVector::get(LoShuffleMask), "", CI);
1291
1292 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1293 ConstantInt::get(IntTy, 3)};
1294
1295 // Extract out the z & w components of our to store value.
1296 auto Hi =
1297 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1298 ConstantVector::get(HiShuffleMask), "", CI);
1299
1300 // Our intrinsic to pack a float2 to an int.
1301 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1302
1303 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1304
1305 // Turn the packed x & y into the final component of our int2.
1306 auto X = CallInst::Create(NewF, Lo, "", CI);
1307
1308 // Turn the packed z & w into the final component of our int2.
1309 auto Y = CallInst::Create(NewF, Hi, "", CI);
1310
1311 auto Combine = InsertElementInst::Create(
1312 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1313 Combine = InsertElementInst::Create(
1314 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1315
1316 // Cast the half* pointer to int2*.
1317 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1318
1319 // Index into the correct address of the casted pointer.
1320 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1321
1322 // Store to the int2* we casted to.
1323 auto Store = new StoreInst(Combine, Index, CI);
1324
1325 CI->replaceAllUsesWith(Store);
1326
1327 // Lastly, remember to remove the user.
1328 ToRemoves.push_back(CI);
1329 }
1330 }
1331
1332 Changed = !ToRemoves.empty();
1333
1334 // And cleanup the calls we don't use anymore.
1335 for (auto V : ToRemoves) {
1336 V->eraseFromParent();
1337 }
1338
1339 // And remove the function we don't need either too.
1340 F->eraseFromParent();
1341 }
1342 }
1343
1344 return Changed;
1345}
1346
1347bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1348 bool Changed = false;
1349
1350 const std::map<const char *, const char*> Map = {
1351 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1352 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1353 };
1354
1355 for (auto Pair : Map) {
1356 // If we find a function with the matching name.
1357 if (auto F = M.getFunction(Pair.first)) {
1358 SmallVector<Instruction *, 4> ToRemoves;
1359
1360 // Walk the users of the function.
1361 for (auto &U : F->uses()) {
1362 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1363 // The image.
1364 auto Arg0 = CI->getOperand(0);
1365
1366 // The sampler.
1367 auto Arg1 = CI->getOperand(1);
1368
1369 // The coordinate (integer type that we can't handle).
1370 auto Arg2 = CI->getOperand(2);
1371
1372 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1373
1374 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1375
1376 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1377
1378 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1379
1380 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1381
1382 CI->replaceAllUsesWith(NewCI);
1383
1384 // Lastly, remember to remove the user.
1385 ToRemoves.push_back(CI);
1386 }
1387 }
1388
1389 Changed = !ToRemoves.empty();
1390
1391 // And cleanup the calls we don't use anymore.
1392 for (auto V : ToRemoves) {
1393 V->eraseFromParent();
1394 }
1395
1396 // And remove the function we don't need either too.
1397 F->eraseFromParent();
1398 }
1399 }
1400
1401 return Changed;
1402}
1403
1404bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1405 bool Changed = false;
1406
1407 const std::map<const char *, const char *> Map = {
1408 {"_Z10atomic_addPU3AS1Vii", "spirv.atomic_add"},
1409 {"_Z10atomic_addPU3AS1Vjj", "spirv.atomic_add"},
1410 {"_Z10atomic_subPU3AS1Vii", "spirv.atomic_sub"},
1411 {"_Z10atomic_subPU3AS1Vjj", "spirv.atomic_sub"},
1412 {"_Z11atomic_xchgPU3AS1Vii", "spirv.atomic_exchange"},
1413 {"_Z11atomic_xchgPU3AS1Vjj", "spirv.atomic_exchange"},
1414 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1415 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1416 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1417 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1418 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
1419 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
1420 {"_Z10atomic_minPU3AS1Vii", "spirv.atomic_smin"},
1421 {"_Z10atomic_minPU3AS1Vjj", "spirv.atomic_umin"},
1422 {"_Z10atomic_maxPU3AS1Vii", "spirv.atomic_smax"},
1423 {"_Z10atomic_maxPU3AS1Vjj", "spirv.atomic_umax"},
1424 {"_Z10atomic_andPU3AS1Vii", "spirv.atomic_and"},
1425 {"_Z10atomic_andPU3AS1Vjj", "spirv.atomic_and"},
1426 {"_Z9atomic_orPU3AS1Vii", "spirv.atomic_or"},
1427 {"_Z9atomic_orPU3AS1Vjj", "spirv.atomic_or"},
1428 {"_Z10atomic_xorPU3AS1Vii", "spirv.atomic_xor"},
1429 {"_Z10atomic_xorPU3AS1Vjj", "spirv.atomic_xor"}};
1430
1431 for (auto Pair : Map) {
1432 // If we find a function with the matching name.
1433 if (auto F = M.getFunction(Pair.first)) {
1434 SmallVector<Instruction *, 4> ToRemoves;
1435
1436 // Walk the users of the function.
1437 for (auto &U : F->uses()) {
1438 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1439 auto FType = F->getFunctionType();
1440 SmallVector<Type *, 5> ParamTypes;
1441
1442 // The pointer type.
1443 ParamTypes.push_back(FType->getParamType(0));
1444
1445 auto IntTy = Type::getInt32Ty(M.getContext());
1446
1447 // The memory scope type.
1448 ParamTypes.push_back(IntTy);
1449
1450 // The memory semantics type.
1451 ParamTypes.push_back(IntTy);
1452
1453 if (2 < CI->getNumArgOperands()) {
1454 // The unequal memory semantics type.
1455 ParamTypes.push_back(IntTy);
1456
1457 // The value type.
1458 ParamTypes.push_back(FType->getParamType(2));
1459
1460 // The comparator type.
1461 ParamTypes.push_back(FType->getParamType(1));
1462 } else if (1 < CI->getNumArgOperands()) {
1463 // The value type.
1464 ParamTypes.push_back(FType->getParamType(1));
1465 }
1466
1467 auto NewFType =
1468 FunctionType::get(FType->getReturnType(), ParamTypes, false);
1469 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1470
1471 // We need to map the OpenCL constants to the SPIR-V equivalents.
1472 const auto ConstantScopeDevice =
1473 ConstantInt::get(IntTy, spv::ScopeDevice);
1474 const auto ConstantMemorySemantics = ConstantInt::get(
1475 IntTy, spv::MemorySemanticsUniformMemoryMask |
1476 spv::MemorySemanticsSequentiallyConsistentMask);
1477
1478 SmallVector<Value *, 5> Params;
1479
1480 // The pointer.
1481 Params.push_back(CI->getArgOperand(0));
1482
1483 // The memory scope.
1484 Params.push_back(ConstantScopeDevice);
1485
1486 // The memory semantics.
1487 Params.push_back(ConstantMemorySemantics);
1488
1489 if (2 < CI->getNumArgOperands()) {
1490 // The unequal memory semantics.
1491 Params.push_back(ConstantMemorySemantics);
1492
1493 // The value.
1494 Params.push_back(CI->getArgOperand(2));
1495
1496 // The comparator.
1497 Params.push_back(CI->getArgOperand(1));
1498 } else if (1 < CI->getNumArgOperands()) {
1499 // The value.
1500 Params.push_back(CI->getArgOperand(1));
1501 }
1502
1503 auto NewCI = CallInst::Create(NewF, Params, "", CI);
1504
1505 CI->replaceAllUsesWith(NewCI);
1506
1507 // Lastly, remember to remove the user.
1508 ToRemoves.push_back(CI);
1509 }
1510 }
1511
1512 Changed = !ToRemoves.empty();
1513
1514 // And cleanup the calls we don't use anymore.
1515 for (auto V : ToRemoves) {
1516 V->eraseFromParent();
1517 }
1518
1519 // And remove the function we don't need either too.
1520 F->eraseFromParent();
1521 }
1522 }
1523
1524 return Changed;
1525}
1526
1527bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
1528 bool Changed = false;
1529
1530 // If we find a function with the matching name.
1531 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
1532 SmallVector<Instruction *, 4> ToRemoves;
1533
1534 auto IntTy = Type::getInt32Ty(M.getContext());
1535 auto FloatTy = Type::getFloatTy(M.getContext());
1536
1537 Constant *DownShuffleMask[3] = {
1538 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1539 ConstantInt::get(IntTy, 2)};
1540
1541 Constant *UpShuffleMask[4] = {
1542 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1543 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1544
1545 Constant *FloatVec[3] = {
1546 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
1547 };
1548
1549 // Walk the users of the function.
1550 for (auto &U : F->uses()) {
1551 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1552 auto Vec4Ty = CI->getArgOperand(0)->getType();
1553 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1554 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1555 auto Vec3Ty = Arg0->getType();
1556
1557 auto NewFType =
1558 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
1559
1560 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
1561
1562 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
1563
1564 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
1565
1566 CI->replaceAllUsesWith(Result);
1567
1568 // Lastly, remember to remove the user.
1569 ToRemoves.push_back(CI);
1570 }
1571 }
1572
1573 Changed = !ToRemoves.empty();
1574
1575 // And cleanup the calls we don't use anymore.
1576 for (auto V : ToRemoves) {
1577 V->eraseFromParent();
1578 }
1579
1580 // And remove the function we don't need either too.
1581 F->eraseFromParent();
1582 }
1583
1584 return Changed;
1585}