blob: 5a27e81ea91dc3b83e7ad019b0f14eb0f7d16c15 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <llvm/IR/Constants.h>
16#include <llvm/IR/Instructions.h>
17#include <llvm/IR/Module.h>
18#include <llvm/Pass.h>
David Neto17852de2017-05-29 17:29:31 -040019#include <llvm/Support/CommandLine.h>
David Neto22f144c2017-06-12 14:26:21 -040020#include <llvm/Support/raw_ostream.h>
21#include <llvm/Transforms/Utils/Cloning.h>
22
23#include <spirv/1.0/spirv.hpp>
24
25using namespace llvm;
26
27#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
28
David Neto17852de2017-05-29 17:29:31 -040029static llvm::cl::opt<bool> f16bit_storage(
30 "f16bit_storage", llvm::cl::init(false),
31 llvm::cl::desc("Assume the target supports SPV_KHR_16bit_storage"));
32
David Neto22f144c2017-06-12 14:26:21 -040033namespace {
34uint32_t clz(uint32_t v) {
35 uint32_t r;
36 uint32_t shift;
37
38 r = (v > 0xFFFF) << 4;
39 v >>= r;
40 shift = (v > 0xFF) << 3;
41 v >>= shift;
42 r |= shift;
43 shift = (v > 0xF) << 2;
44 v >>= shift;
45 r |= shift;
46 shift = (v > 0x3) << 1;
47 v >>= shift;
48 r |= shift;
49 r |= (v >> 1);
50
51 return r;
52}
53
54Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
55 if (1 == elements) {
56 return Type::getInt1Ty(C);
57 } else {
58 return VectorType::get(Type::getInt1Ty(C), elements);
59 }
60}
61
62struct ReplaceOpenCLBuiltinPass final : public ModulePass {
63 static char ID;
64 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
65
66 bool runOnModule(Module &M) override;
67 bool replaceRecip(Module &M);
68 bool replaceDivide(Module &M);
69 bool replaceExp10(Module &M);
70 bool replaceLog10(Module &M);
71 bool replaceBarrier(Module &M);
72 bool replaceMemFence(Module &M);
73 bool replaceRelational(Module &M);
74 bool replaceIsInfAndIsNan(Module &M);
75 bool replaceAllAndAny(Module &M);
76 bool replaceSignbit(Module &M);
77 bool replaceMadandMad24andMul24(Module &M);
78 bool replaceVloadHalf(Module &M);
79 bool replaceVloadHalf2(Module &M);
80 bool replaceVloadHalf4(Module &M);
81 bool replaceVstoreHalf(Module &M);
82 bool replaceVstoreHalf2(Module &M);
83 bool replaceVstoreHalf4(Module &M);
84 bool replaceReadImageF(Module &M);
85 bool replaceAtomics(Module &M);
86 bool replaceCross(Module &M);
87};
88}
89
90char ReplaceOpenCLBuiltinPass::ID = 0;
91static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
92 "Replace OpenCL Builtins Pass");
93
94namespace clspv {
95ModulePass *createReplaceOpenCLBuiltinPass() {
96 return new ReplaceOpenCLBuiltinPass();
97}
98}
99
100bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
101 bool Changed = false;
102
103 Changed |= replaceRecip(M);
104 Changed |= replaceDivide(M);
105 Changed |= replaceExp10(M);
106 Changed |= replaceLog10(M);
107 Changed |= replaceBarrier(M);
108 Changed |= replaceMemFence(M);
109 Changed |= replaceRelational(M);
110 Changed |= replaceIsInfAndIsNan(M);
111 Changed |= replaceAllAndAny(M);
112 Changed |= replaceSignbit(M);
113 Changed |= replaceMadandMad24andMul24(M);
114 Changed |= replaceVloadHalf(M);
115 Changed |= replaceVloadHalf2(M);
116 Changed |= replaceVloadHalf4(M);
117 Changed |= replaceVstoreHalf(M);
118 Changed |= replaceVstoreHalf2(M);
119 Changed |= replaceVstoreHalf4(M);
120 Changed |= replaceReadImageF(M);
121 Changed |= replaceAtomics(M);
122 Changed |= replaceCross(M);
123
124 return Changed;
125}
126
127bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
128 bool Changed = false;
129
130 const char *Names[] = {
131 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
132 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
133 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
134 };
135
136 for (auto Name : Names) {
137 // If we find a function with the matching name.
138 if (auto F = M.getFunction(Name)) {
139 SmallVector<Instruction *, 4> ToRemoves;
140
141 // Walk the users of the function.
142 for (auto &U : F->uses()) {
143 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
144 // Recip has one arg.
145 auto Arg = CI->getOperand(0);
146
147 auto Div = BinaryOperator::Create(
148 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
149 CI);
150
151 CI->replaceAllUsesWith(Div);
152
153 // Lastly, remember to remove the user.
154 ToRemoves.push_back(CI);
155 }
156 }
157
158 Changed = !ToRemoves.empty();
159
160 // And cleanup the calls we don't use anymore.
161 for (auto V : ToRemoves) {
162 V->eraseFromParent();
163 }
164
165 // And remove the function we don't need either too.
166 F->eraseFromParent();
167 }
168 }
169
170 return Changed;
171}
172
173bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
174 bool Changed = false;
175
176 const char *Names[] = {
177 "_Z11half_divideff", "_Z13native_divideff",
178 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
179 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
180 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
181 };
182
183 for (auto Name : Names) {
184 // If we find a function with the matching name.
185 if (auto F = M.getFunction(Name)) {
186 SmallVector<Instruction *, 4> ToRemoves;
187
188 // Walk the users of the function.
189 for (auto &U : F->uses()) {
190 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
191 auto Div = BinaryOperator::Create(
192 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
193
194 CI->replaceAllUsesWith(Div);
195
196 // Lastly, remember to remove the user.
197 ToRemoves.push_back(CI);
198 }
199 }
200
201 Changed = !ToRemoves.empty();
202
203 // And cleanup the calls we don't use anymore.
204 for (auto V : ToRemoves) {
205 V->eraseFromParent();
206 }
207
208 // And remove the function we don't need either too.
209 F->eraseFromParent();
210 }
211 }
212
213 return Changed;
214}
215
216bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
217 bool Changed = false;
218
219 const std::map<const char *, const char *> Map = {
220 {"_Z5exp10f", "_Z3expf"},
221 {"_Z10half_exp10f", "_Z8half_expf"},
222 {"_Z12native_exp10f", "_Z10native_expf"},
223 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
224 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
225 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
226 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
227 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
228 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
229 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
230 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
231 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
232
233 for (auto Pair : Map) {
234 // If we find a function with the matching name.
235 if (auto F = M.getFunction(Pair.first)) {
236 SmallVector<Instruction *, 4> ToRemoves;
237
238 // Walk the users of the function.
239 for (auto &U : F->uses()) {
240 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
241 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
242
243 auto Arg = CI->getOperand(0);
244
245 // Constant of the natural log of 10 (ln(10)).
246 const double Ln10 =
247 2.302585092994045684017991454684364207601101488628772976033;
248
249 auto Mul = BinaryOperator::Create(
250 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
251 CI);
252
253 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
254
255 CI->replaceAllUsesWith(NewCI);
256
257 // Lastly, remember to remove the user.
258 ToRemoves.push_back(CI);
259 }
260 }
261
262 Changed = !ToRemoves.empty();
263
264 // And cleanup the calls we don't use anymore.
265 for (auto V : ToRemoves) {
266 V->eraseFromParent();
267 }
268
269 // And remove the function we don't need either too.
270 F->eraseFromParent();
271 }
272 }
273
274 return Changed;
275}
276
277bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
278 bool Changed = false;
279
280 const std::map<const char *, const char *> Map = {
281 {"_Z5log10f", "_Z3logf"},
282 {"_Z10half_log10f", "_Z8half_logf"},
283 {"_Z12native_log10f", "_Z10native_logf"},
284 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
285 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
286 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
287 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
288 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
289 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
290 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
291 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
292 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
293
294 for (auto Pair : Map) {
295 // If we find a function with the matching name.
296 if (auto F = M.getFunction(Pair.first)) {
297 SmallVector<Instruction *, 4> ToRemoves;
298
299 // Walk the users of the function.
300 for (auto &U : F->uses()) {
301 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
302 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
303
304 auto Arg = CI->getOperand(0);
305
306 // Constant of the reciprocal of the natural log of 10 (ln(10)).
307 const double Ln10 =
308 0.434294481903251827651128918916605082294397005803666566114;
309
310 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
311
312 auto Mul = BinaryOperator::Create(
313 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
314 "", CI);
315
316 CI->replaceAllUsesWith(Mul);
317
318 // Lastly, remember to remove the user.
319 ToRemoves.push_back(CI);
320 }
321 }
322
323 Changed = !ToRemoves.empty();
324
325 // And cleanup the calls we don't use anymore.
326 for (auto V : ToRemoves) {
327 V->eraseFromParent();
328 }
329
330 // And remove the function we don't need either too.
331 F->eraseFromParent();
332 }
333 }
334
335 return Changed;
336}
337
338bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
339 bool Changed = false;
340
341 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
342
343 const std::map<const char *, const char *> Map = {
344 {"_Z7barrierj", "__spirv_control_barrier"}};
345
346 for (auto Pair : Map) {
347 // If we find a function with the matching name.
348 if (auto F = M.getFunction(Pair.first)) {
349 SmallVector<Instruction *, 4> ToRemoves;
350
351 // Walk the users of the function.
352 for (auto &U : F->uses()) {
353 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
354 auto FType = F->getFunctionType();
355 SmallVector<Type *, 3> Params;
356 for (unsigned i = 0; i < 3; i++) {
357 Params.push_back(FType->getParamType(0));
358 }
359 auto NewFType =
360 FunctionType::get(FType->getReturnType(), Params, false);
361 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
362
363 auto Arg = CI->getOperand(0);
364
365 // We need to map the OpenCL constants to the SPIR-V equivalents.
366 const auto LocalMemFence =
367 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
368 const auto GlobalMemFence =
369 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
370 const auto ConstantSequentiallyConsistent = ConstantInt::get(
371 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
372 const auto ConstantScopeDevice =
373 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
374 const auto ConstantScopeWorkgroup =
375 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
376
377 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
378 const auto LocalMemFenceMask = BinaryOperator::Create(
379 Instruction::And, LocalMemFence, Arg, "", CI);
380 const auto WorkgroupShiftAmount =
381 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
382 clz(CLK_LOCAL_MEM_FENCE);
383 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
384 Instruction::Shl, LocalMemFenceMask,
385 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
386
387 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
388 const auto GlobalMemFenceMask = BinaryOperator::Create(
389 Instruction::And, GlobalMemFence, Arg, "", CI);
390 const auto UniformShiftAmount =
391 clz(spv::MemorySemanticsUniformMemoryMask) -
392 clz(CLK_GLOBAL_MEM_FENCE);
393 const auto MemorySemanticsUniform = BinaryOperator::Create(
394 Instruction::Shl, GlobalMemFenceMask,
395 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
396
397 // And combine the above together, also adding in
398 // MemorySemanticsSequentiallyConsistentMask.
399 auto MemorySemantics =
400 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
401 ConstantSequentiallyConsistent, "", CI);
402 MemorySemantics = BinaryOperator::Create(
403 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
404
405 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
406 // Device Scope, otherwise Workgroup Scope.
407 const auto Cmp =
408 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
409 GlobalMemFenceMask, GlobalMemFence, "", CI);
410 const auto MemoryScope = SelectInst::Create(
411 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
412
413 // Lastly, the Execution Scope is always Workgroup Scope.
414 const auto ExecutionScope = ConstantScopeWorkgroup;
415
416 auto NewCI = CallInst::Create(
417 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
418
419 CI->replaceAllUsesWith(NewCI);
420
421 // Lastly, remember to remove the user.
422 ToRemoves.push_back(CI);
423 }
424 }
425
426 Changed = !ToRemoves.empty();
427
428 // And cleanup the calls we don't use anymore.
429 for (auto V : ToRemoves) {
430 V->eraseFromParent();
431 }
432
433 // And remove the function we don't need either too.
434 F->eraseFromParent();
435 }
436 }
437
438 return Changed;
439}
440
441bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
442 bool Changed = false;
443
444 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
445
446 const std::map<const char *, const char *> Map = {
447 {"_Z9mem_fencej", "__spirv_memory_barrier"},
448 {"_Z14read_mem_fencej", "__spirv_memory_barrier"},
449 {"_Z15write_mem_fencej", "__spirv_memory_barrier"}};
450
451 std::map<const char *, uint32_t> MemorySemanticsMap = {
452 {"_Z9mem_fencej", spv::MemorySemanticsSequentiallyConsistentMask},
453 {"_Z14read_mem_fencej", spv::MemorySemanticsAcquireMask},
454 {"_Z15write_mem_fencej", spv::MemorySemanticsReleaseMask}};
455
456 for (auto Pair : Map) {
457 // If we find a function with the matching name.
458 if (auto F = M.getFunction(Pair.first)) {
459 SmallVector<Instruction *, 4> ToRemoves;
460
461 // Walk the users of the function.
462 for (auto &U : F->uses()) {
463 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
464 auto FType = F->getFunctionType();
465 SmallVector<Type *, 2> Params;
466 for (unsigned i = 0; i < 2; i++) {
467 Params.push_back(FType->getParamType(0));
468 }
469 auto NewFType =
470 FunctionType::get(FType->getReturnType(), Params, false);
471 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
472
473 auto Arg = CI->getOperand(0);
474
475 // We need to map the OpenCL constants to the SPIR-V equivalents.
476 const auto LocalMemFence =
477 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
478 const auto GlobalMemFence =
479 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
480 const auto ConstantMemorySemantics =
481 ConstantInt::get(Arg->getType(), MemorySemanticsMap[Pair.first]);
482 const auto ConstantScopeDevice =
483 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
484
485 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
486 const auto LocalMemFenceMask = BinaryOperator::Create(
487 Instruction::And, LocalMemFence, Arg, "", CI);
488 const auto WorkgroupShiftAmount =
489 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
490 clz(CLK_LOCAL_MEM_FENCE);
491 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
492 Instruction::Shl, LocalMemFenceMask,
493 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
494
495 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
496 const auto GlobalMemFenceMask = BinaryOperator::Create(
497 Instruction::And, GlobalMemFence, Arg, "", CI);
498 const auto UniformShiftAmount =
499 clz(spv::MemorySemanticsUniformMemoryMask) -
500 clz(CLK_GLOBAL_MEM_FENCE);
501 const auto MemorySemanticsUniform = BinaryOperator::Create(
502 Instruction::Shl, GlobalMemFenceMask,
503 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
504
505 // And combine the above together, also adding in
506 // MemorySemanticsSequentiallyConsistentMask.
507 auto MemorySemantics =
508 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
509 ConstantMemorySemantics, "", CI);
510 MemorySemantics = BinaryOperator::Create(
511 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
512
513 // Memory Scope is always device.
514 const auto MemoryScope = ConstantScopeDevice;
515
516 auto NewCI =
517 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
518
519 CI->replaceAllUsesWith(NewCI);
520
521 // Lastly, remember to remove the user.
522 ToRemoves.push_back(CI);
523 }
524 }
525
526 Changed = !ToRemoves.empty();
527
528 // And cleanup the calls we don't use anymore.
529 for (auto V : ToRemoves) {
530 V->eraseFromParent();
531 }
532
533 // And remove the function we don't need either too.
534 F->eraseFromParent();
535 }
536 }
537
538 return Changed;
539}
540
541bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
542 bool Changed = false;
543
544 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
545 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
546 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
547 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
548 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
549 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
550 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
551 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
552 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
553 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
554 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
555 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
556 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
557 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
558 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
559 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
560 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
561 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
562 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
563 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
564 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
565 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
566 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
567 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
568 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
569 };
570
571 for (auto Pair : Map) {
572 // If we find a function with the matching name.
573 if (auto F = M.getFunction(Pair.first)) {
574 SmallVector<Instruction *, 4> ToRemoves;
575
576 // Walk the users of the function.
577 for (auto &U : F->uses()) {
578 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
579 // The predicate to use in the CmpInst.
580 auto Predicate = Pair.second.first;
581
582 // The value to return for true.
583 auto TrueValue =
584 ConstantInt::getSigned(CI->getType(), Pair.second.second);
585
586 // The value to return for false.
587 auto FalseValue = Constant::getNullValue(CI->getType());
588
589 auto Arg1 = CI->getOperand(0);
590 auto Arg2 = CI->getOperand(1);
591
592 const auto Cmp =
593 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
594
595 const auto Select =
596 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
597
598 CI->replaceAllUsesWith(Select);
599
600 // Lastly, remember to remove the user.
601 ToRemoves.push_back(CI);
602 }
603 }
604
605 Changed = !ToRemoves.empty();
606
607 // And cleanup the calls we don't use anymore.
608 for (auto V : ToRemoves) {
609 V->eraseFromParent();
610 }
611
612 // And remove the function we don't need either too.
613 F->eraseFromParent();
614 }
615 }
616
617 return Changed;
618}
619
620bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
621 bool Changed = false;
622
623 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
624 {"_Z5isinff", {"__spirv_isinff", 1}},
625 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
626 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
627 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
628 {"_Z5isnanf", {"__spirv_isnanf", 1}},
629 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
630 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
631 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
632 };
633
634 for (auto Pair : Map) {
635 // If we find a function with the matching name.
636 if (auto F = M.getFunction(Pair.first)) {
637 SmallVector<Instruction *, 4> ToRemoves;
638
639 // Walk the users of the function.
640 for (auto &U : F->uses()) {
641 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
642 const auto CITy = CI->getType();
643
644 // The fake SPIR-V intrinsic to generate.
645 auto SPIRVIntrinsic = Pair.second.first;
646
647 // The value to return for true.
648 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
649
650 // The value to return for false.
651 auto FalseValue = Constant::getNullValue(CITy);
652
653 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
654 M.getContext(),
655 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
656
657 auto NewFType =
658 FunctionType::get(CorrespondingBoolTy,
659 F->getFunctionType()->getParamType(0), false);
660
661 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
662
663 auto Arg = CI->getOperand(0);
664
665 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
666
667 const auto Select =
668 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
669
670 CI->replaceAllUsesWith(Select);
671
672 // Lastly, remember to remove the user.
673 ToRemoves.push_back(CI);
674 }
675 }
676
677 Changed = !ToRemoves.empty();
678
679 // And cleanup the calls we don't use anymore.
680 for (auto V : ToRemoves) {
681 V->eraseFromParent();
682 }
683
684 // And remove the function we don't need either too.
685 F->eraseFromParent();
686 }
687 }
688
689 return Changed;
690}
691
692bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
693 bool Changed = false;
694
695 const std::map<const char *, const char *> Map = {
696 {"_Z3alli", ""},
697 {"_Z3allDv2_i", "__spirv_allDv2_i"},
698 {"_Z3allDv3_i", "__spirv_allDv3_i"},
699 {"_Z3allDv4_i", "__spirv_allDv4_i"},
700 {"_Z3anyi", ""},
701 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
702 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
703 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
704 };
705
706 for (auto Pair : Map) {
707 // If we find a function with the matching name.
708 if (auto F = M.getFunction(Pair.first)) {
709 SmallVector<Instruction *, 4> ToRemoves;
710
711 // Walk the users of the function.
712 for (auto &U : F->uses()) {
713 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
714 // The fake SPIR-V intrinsic to generate.
715 auto SPIRVIntrinsic = Pair.second;
716
717 auto Arg = CI->getOperand(0);
718
719 Value *V;
720
721 // If we have a function to call, call it!
722 if (0 < strlen(SPIRVIntrinsic)) {
723 // The value for zero to compare against.
724 const auto ZeroValue = Constant::getNullValue(Arg->getType());
725
726 const auto Cmp = CmpInst::Create(
727 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
728 const auto NewFType = FunctionType::get(
729 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
730
731 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
732
733 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
734
735 // The value to return for true.
736 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
737
738 // The value to return for false.
739 const auto FalseValue = Constant::getNullValue(CI->getType());
740
741 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
742 } else {
743 V = BinaryOperator::Create(Instruction::LShr, Arg,
744 ConstantInt::get(CI->getType(), 31), "",
745 CI);
746 }
747
748 CI->replaceAllUsesWith(V);
749
750 // Lastly, remember to remove the user.
751 ToRemoves.push_back(CI);
752 }
753 }
754
755 Changed = !ToRemoves.empty();
756
757 // And cleanup the calls we don't use anymore.
758 for (auto V : ToRemoves) {
759 V->eraseFromParent();
760 }
761
762 // And remove the function we don't need either too.
763 F->eraseFromParent();
764 }
765 }
766
767 return Changed;
768}
769
770bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
771 bool Changed = false;
772
773 const std::map<const char *, Instruction::BinaryOps> Map = {
774 {"_Z7signbitf", Instruction::LShr},
775 {"_Z7signbitDv2_f", Instruction::AShr},
776 {"_Z7signbitDv3_f", Instruction::AShr},
777 {"_Z7signbitDv4_f", Instruction::AShr},
778 };
779
780 for (auto Pair : Map) {
781 // If we find a function with the matching name.
782 if (auto F = M.getFunction(Pair.first)) {
783 SmallVector<Instruction *, 4> ToRemoves;
784
785 // Walk the users of the function.
786 for (auto &U : F->uses()) {
787 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
788 auto Arg = CI->getOperand(0);
789
790 auto Bitcast =
791 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
792
793 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
794 ConstantInt::get(CI->getType(), 31),
795 "", CI);
796
797 CI->replaceAllUsesWith(Shr);
798
799 // Lastly, remember to remove the user.
800 ToRemoves.push_back(CI);
801 }
802 }
803
804 Changed = !ToRemoves.empty();
805
806 // And cleanup the calls we don't use anymore.
807 for (auto V : ToRemoves) {
808 V->eraseFromParent();
809 }
810
811 // And remove the function we don't need either too.
812 F->eraseFromParent();
813 }
814 }
815
816 return Changed;
817}
818
819bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
820 bool Changed = false;
821
822 const std::map<const char *,
823 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
824 Map = {
825 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
826 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
827 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
828 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
829 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
830 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
831 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
832 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
833 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
834 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
835 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
836 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
837 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
838 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
839 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
840 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
841 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
842 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
843 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
844 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
845 };
846
847 for (auto Pair : Map) {
848 // If we find a function with the matching name.
849 if (auto F = M.getFunction(Pair.first)) {
850 SmallVector<Instruction *, 4> ToRemoves;
851
852 // Walk the users of the function.
853 for (auto &U : F->uses()) {
854 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
855 // The multiply instruction to use.
856 auto MulInst = Pair.second.first;
857
858 // The add instruction to use.
859 auto AddInst = Pair.second.second;
860
861 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
862
863 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
864 CI->getArgOperand(1), "", CI);
865
866 if (Instruction::BinaryOpsEnd != AddInst) {
867 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
868 CI);
869 }
870
871 CI->replaceAllUsesWith(I);
872
873 // Lastly, remember to remove the user.
874 ToRemoves.push_back(CI);
875 }
876 }
877
878 Changed = !ToRemoves.empty();
879
880 // And cleanup the calls we don't use anymore.
881 for (auto V : ToRemoves) {
882 V->eraseFromParent();
883 }
884
885 // And remove the function we don't need either too.
886 F->eraseFromParent();
887 }
888 }
889
890 return Changed;
891}
892
893bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
894 bool Changed = false;
895
896 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
897 "_Z10vload_halfjPU3AS2KDh"};
898
899 for (auto Name : Map) {
900 // If we find a function with the matching name.
901 if (auto F = M.getFunction(Name)) {
902 SmallVector<Instruction *, 4> ToRemoves;
903
904 // Walk the users of the function.
905 for (auto &U : F->uses()) {
906 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
907 // The index argument from vload_half.
908 auto Arg0 = CI->getOperand(0);
909
910 // The pointer argument from vload_half.
911 auto Arg1 = CI->getOperand(1);
912
913 auto ShortTy = Type::getInt16Ty(M.getContext());
914 auto IntTy = Type::getInt32Ty(M.getContext());
915 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
916 auto NewPointerTy = PointerType::get(
917 ShortTy, Arg1->getType()->getPointerAddressSpace());
918 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
919
920 // Cast the half* pointer to short*.
921 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
922
923 // Index into the correct address of the casted pointer.
924 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
925
926 // Load from the short* we casted to.
927 auto Load = new LoadInst(Index, "", CI);
928
929 // ZExt the short -> int.
930 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
931
932 // Our intrinsic to unpack a float2 from an int.
933 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
934
935 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
936
937 // Get our float2.
938 auto Call = CallInst::Create(NewF, ZExt, "", CI);
939
940 // Extract out the bottom element which is our float result.
941 auto Extract = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
942
943 CI->replaceAllUsesWith(Extract);
944
945 // Lastly, remember to remove the user.
946 ToRemoves.push_back(CI);
947 }
948 }
949
950 Changed = !ToRemoves.empty();
951
952 // And cleanup the calls we don't use anymore.
953 for (auto V : ToRemoves) {
954 V->eraseFromParent();
955 }
956
957 // And remove the function we don't need either too.
958 F->eraseFromParent();
959 }
960 }
961
962 return Changed;
963}
964
965bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
966 bool Changed = false;
967
968 const std::vector<const char *> Map = {"_Z11vload_half2jPU3AS1KDh",
969 "_Z11vload_half2jPU3AS2KDh"};
970
971 for (auto Name : Map) {
972 // If we find a function with the matching name.
973 if (auto F = M.getFunction(Name)) {
974 SmallVector<Instruction *, 4> ToRemoves;
975
976 // Walk the users of the function.
977 for (auto &U : F->uses()) {
978 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
979 // The index argument from vload_half.
980 auto Arg0 = CI->getOperand(0);
981
982 // The pointer argument from vload_half.
983 auto Arg1 = CI->getOperand(1);
984
985 auto IntTy = Type::getInt32Ty(M.getContext());
986 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
987 auto NewPointerTy = PointerType::get(
988 IntTy, Arg1->getType()->getPointerAddressSpace());
989 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
990
991 // Cast the half* pointer to int*.
992 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
993
994 // Index into the correct address of the casted pointer.
995 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
996
997 // Load from the int* we casted to.
998 auto Load = new LoadInst(Index, "", CI);
999
1000 // Our intrinsic to unpack a float2 from an int.
1001 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1002
1003 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1004
1005 // Get our float2.
1006 auto Call = CallInst::Create(NewF, Load, "", CI);
1007
1008 CI->replaceAllUsesWith(Call);
1009
1010 // Lastly, remember to remove the user.
1011 ToRemoves.push_back(CI);
1012 }
1013 }
1014
1015 Changed = !ToRemoves.empty();
1016
1017 // And cleanup the calls we don't use anymore.
1018 for (auto V : ToRemoves) {
1019 V->eraseFromParent();
1020 }
1021
1022 // And remove the function we don't need either too.
1023 F->eraseFromParent();
1024 }
1025 }
1026
1027 return Changed;
1028}
1029
1030bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1031 bool Changed = false;
1032
1033 const std::vector<const char *> Map = {"_Z11vload_half4jPU3AS1KDh",
1034 "_Z11vload_half4jPU3AS2KDh"};
1035
1036 for (auto Name : Map) {
1037 // If we find a function with the matching name.
1038 if (auto F = M.getFunction(Name)) {
1039 SmallVector<Instruction *, 4> ToRemoves;
1040
1041 // Walk the users of the function.
1042 for (auto &U : F->uses()) {
1043 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1044 // The index argument from vload_half.
1045 auto Arg0 = CI->getOperand(0);
1046
1047 // The pointer argument from vload_half.
1048 auto Arg1 = CI->getOperand(1);
1049
1050 auto IntTy = Type::getInt32Ty(M.getContext());
1051 auto Int2Ty = VectorType::get(IntTy, 2);
1052 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1053 auto NewPointerTy = PointerType::get(
1054 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1055 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1056
1057 // Cast the half* pointer to int2*.
1058 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1059
1060 // Index into the correct address of the casted pointer.
1061 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1062
1063 // Load from the int2* we casted to.
1064 auto Load = new LoadInst(Index, "", CI);
1065
1066 // Extract each element from the loaded int2.
1067 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1068 "", CI);
1069 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1070 "", CI);
1071
1072 // Our intrinsic to unpack a float2 from an int.
1073 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1074
1075 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1076
1077 // Get the lower (x & y) components of our final float4.
1078 auto Lo = CallInst::Create(NewF, X, "", CI);
1079
1080 // Get the higher (z & w) components of our final float4.
1081 auto Hi = CallInst::Create(NewF, Y, "", CI);
1082
1083 Constant *ShuffleMask[4] = {
1084 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1085 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1086
1087 // Combine our two float2's into one float4.
1088 auto Combine = new ShuffleVectorInst(
1089 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1090
1091 CI->replaceAllUsesWith(Combine);
1092
1093 // Lastly, remember to remove the user.
1094 ToRemoves.push_back(CI);
1095 }
1096 }
1097
1098 Changed = !ToRemoves.empty();
1099
1100 // And cleanup the calls we don't use anymore.
1101 for (auto V : ToRemoves) {
1102 V->eraseFromParent();
1103 }
1104
1105 // And remove the function we don't need either too.
1106 F->eraseFromParent();
1107 }
1108 }
1109
1110 return Changed;
1111}
1112
1113bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1114 bool Changed = false;
1115
1116 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1117 "_Z15vstore_half_rtefjPU3AS1Dh",
1118 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1119
1120 for (auto Name : Map) {
1121 // If we find a function with the matching name.
1122 if (auto F = M.getFunction(Name)) {
1123 SmallVector<Instruction *, 4> ToRemoves;
1124
1125 // Walk the users of the function.
1126 for (auto &U : F->uses()) {
1127 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1128 // The value to store.
1129 auto Arg0 = CI->getOperand(0);
1130
1131 // The index argument from vstore_half.
1132 auto Arg1 = CI->getOperand(1);
1133
1134 // The pointer argument from vstore_half.
1135 auto Arg2 = CI->getOperand(2);
1136
David Neto22f144c2017-06-12 14:26:21 -04001137 auto IntTy = Type::getInt32Ty(M.getContext());
1138 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001139 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001140 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001141
1142 // Our intrinsic to pack a float2 to an int.
1143 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1144
1145 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1146
1147 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001148 auto TempVec =
1149 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1150 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001151
1152 // Pack the float2 -> half2 (in an int).
1153 auto X = CallInst::Create(NewF, TempVec, "", CI);
1154
David Neto17852de2017-05-29 17:29:31 -04001155 if (f16bit_storage) {
1156 auto ShortTy = Type::getInt16Ty(M.getContext());
1157 auto ShortPointerTy = PointerType::get(
1158 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001159
David Neto17852de2017-05-29 17:29:31 -04001160 // Truncate our i32 to an i16.
1161 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001162
David Neto17852de2017-05-29 17:29:31 -04001163 // Cast the half* pointer to short*.
1164 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001165
David Neto17852de2017-05-29 17:29:31 -04001166 // Index into the correct address of the casted pointer.
1167 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001168
David Neto17852de2017-05-29 17:29:31 -04001169 // Store to the int* we casted to.
1170 auto Store = new StoreInst(Trunc, Index, CI);
1171
1172 CI->replaceAllUsesWith(Store);
1173 } else {
1174 // We can only write to 32-bit aligned words.
1175 //
1176 // Assuming base is aligned to 32-bits, replace the equivalent of
1177 // vstore_half(value, index, base)
1178 // with:
1179 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1180 // uint32_t write_to_upper_half = index & 1u;
1181 // uint32_t shift = write_to_upper_half << 4;
1182 //
1183 // // Pack the float value as a half number in bottom 16 bits
1184 // // of an i32.
1185 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1186 //
1187 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1188 // ^ ((packed & 0xffff) << shift)
1189 // // We only need relaxed consistency, but OpenCL 1.2 only has
1190 // // sequentially consistent atomics.
1191 // // TODO(dneto): Use relaxed consistency.
1192 // atomic_xor(target_ptr, xor_value)
1193 auto IntPointerTy = PointerType::get(
1194 IntTy, Arg2->getType()->getPointerAddressSpace());
1195
1196 auto Four = ConstantInt::get(IntTy, 4);
1197 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1198
1199 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1200 // Compute index / 2
1201 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1202 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1203 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1204 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1205 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1206 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1207 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1208
1209 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1210 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1211 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1212
1213 // Generate the call to atomi_xor.
1214 SmallVector<Type *, 5> ParamTypes;
1215 // The pointer type.
1216 ParamTypes.push_back(IntPointerTy);
1217 // The Types for memory scope, semantics, and value.
1218 ParamTypes.push_back(IntTy);
1219 ParamTypes.push_back(IntTy);
1220 ParamTypes.push_back(IntTy);
1221 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1222 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1223
1224 const auto ConstantScopeDevice =
1225 ConstantInt::get(IntTy, spv::ScopeDevice);
1226 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1227 // (SPIR-V Workgroup).
1228 const auto AddrSpaceSemanticsBits =
1229 IntPointerTy->getPointerAddressSpace() == 1
1230 ? spv::MemorySemanticsUniformMemoryMask
1231 : spv::MemorySemanticsWorkgroupMemoryMask;
1232
1233 // We're using relaxed consistency here.
1234 const auto ConstantMemorySemantics =
1235 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1236 AddrSpaceSemanticsBits);
1237
1238 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1239 ConstantMemorySemantics, ValueToXor};
1240 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1241 }
David Neto22f144c2017-06-12 14:26:21 -04001242
1243 // Lastly, remember to remove the user.
1244 ToRemoves.push_back(CI);
1245 }
1246 }
1247
1248 Changed = !ToRemoves.empty();
1249
1250 // And cleanup the calls we don't use anymore.
1251 for (auto V : ToRemoves) {
1252 V->eraseFromParent();
1253 }
1254
1255 // And remove the function we don't need either too.
1256 F->eraseFromParent();
1257 }
1258 }
1259
1260 return Changed;
1261}
1262
1263bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1264 bool Changed = false;
1265
1266 const std::vector<const char *> Map = {"_Z12vstore_half2Dv2_fjPU3AS1Dh",
1267 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1268 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh"};
1269
1270 for (auto Name : Map) {
1271 // If we find a function with the matching name.
1272 if (auto F = M.getFunction(Name)) {
1273 SmallVector<Instruction *, 4> ToRemoves;
1274
1275 // Walk the users of the function.
1276 for (auto &U : F->uses()) {
1277 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1278 // The value to store.
1279 auto Arg0 = CI->getOperand(0);
1280
1281 // The index argument from vstore_half.
1282 auto Arg1 = CI->getOperand(1);
1283
1284 // The pointer argument from vstore_half.
1285 auto Arg2 = CI->getOperand(2);
1286
1287 auto IntTy = Type::getInt32Ty(M.getContext());
1288 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1289 auto NewPointerTy = PointerType::get(
1290 IntTy, Arg2->getType()->getPointerAddressSpace());
1291 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1292
1293 // Our intrinsic to pack a float2 to an int.
1294 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1295
1296 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1297
1298 // Turn the packed x & y into the final packing.
1299 auto X = CallInst::Create(NewF, Arg0, "", CI);
1300
1301 // Cast the half* pointer to int*.
1302 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1303
1304 // Index into the correct address of the casted pointer.
1305 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1306
1307 // Store to the int* we casted to.
1308 auto Store = new StoreInst(X, Index, CI);
1309
1310 CI->replaceAllUsesWith(Store);
1311
1312 // Lastly, remember to remove the user.
1313 ToRemoves.push_back(CI);
1314 }
1315 }
1316
1317 Changed = !ToRemoves.empty();
1318
1319 // And cleanup the calls we don't use anymore.
1320 for (auto V : ToRemoves) {
1321 V->eraseFromParent();
1322 }
1323
1324 // And remove the function we don't need either too.
1325 F->eraseFromParent();
1326 }
1327 }
1328
1329 return Changed;
1330}
1331
1332bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1333 bool Changed = false;
1334
1335 const std::vector<const char *> Map = {"_Z12vstore_half4Dv4_fjPU3AS1Dh",
1336 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1337 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh"};
1338
1339 for (auto Name : Map) {
1340 // If we find a function with the matching name.
1341 if (auto F = M.getFunction(Name)) {
1342 SmallVector<Instruction *, 4> ToRemoves;
1343
1344 // Walk the users of the function.
1345 for (auto &U : F->uses()) {
1346 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1347 // The value to store.
1348 auto Arg0 = CI->getOperand(0);
1349
1350 // The index argument from vstore_half.
1351 auto Arg1 = CI->getOperand(1);
1352
1353 // The pointer argument from vstore_half.
1354 auto Arg2 = CI->getOperand(2);
1355
1356 auto IntTy = Type::getInt32Ty(M.getContext());
1357 auto Int2Ty = VectorType::get(IntTy, 2);
1358 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1359 auto NewPointerTy = PointerType::get(
1360 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1361 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1362
1363 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1364 ConstantInt::get(IntTy, 1)};
1365
1366 // Extract out the x & y components of our to store value.
1367 auto Lo =
1368 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1369 ConstantVector::get(LoShuffleMask), "", CI);
1370
1371 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1372 ConstantInt::get(IntTy, 3)};
1373
1374 // Extract out the z & w components of our to store value.
1375 auto Hi =
1376 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1377 ConstantVector::get(HiShuffleMask), "", CI);
1378
1379 // Our intrinsic to pack a float2 to an int.
1380 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1381
1382 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1383
1384 // Turn the packed x & y into the final component of our int2.
1385 auto X = CallInst::Create(NewF, Lo, "", CI);
1386
1387 // Turn the packed z & w into the final component of our int2.
1388 auto Y = CallInst::Create(NewF, Hi, "", CI);
1389
1390 auto Combine = InsertElementInst::Create(
1391 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1392 Combine = InsertElementInst::Create(
1393 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1394
1395 // Cast the half* pointer to int2*.
1396 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1397
1398 // Index into the correct address of the casted pointer.
1399 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1400
1401 // Store to the int2* we casted to.
1402 auto Store = new StoreInst(Combine, Index, CI);
1403
1404 CI->replaceAllUsesWith(Store);
1405
1406 // Lastly, remember to remove the user.
1407 ToRemoves.push_back(CI);
1408 }
1409 }
1410
1411 Changed = !ToRemoves.empty();
1412
1413 // And cleanup the calls we don't use anymore.
1414 for (auto V : ToRemoves) {
1415 V->eraseFromParent();
1416 }
1417
1418 // And remove the function we don't need either too.
1419 F->eraseFromParent();
1420 }
1421 }
1422
1423 return Changed;
1424}
1425
1426bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1427 bool Changed = false;
1428
1429 const std::map<const char *, const char*> Map = {
1430 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1431 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1432 };
1433
1434 for (auto Pair : Map) {
1435 // If we find a function with the matching name.
1436 if (auto F = M.getFunction(Pair.first)) {
1437 SmallVector<Instruction *, 4> ToRemoves;
1438
1439 // Walk the users of the function.
1440 for (auto &U : F->uses()) {
1441 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1442 // The image.
1443 auto Arg0 = CI->getOperand(0);
1444
1445 // The sampler.
1446 auto Arg1 = CI->getOperand(1);
1447
1448 // The coordinate (integer type that we can't handle).
1449 auto Arg2 = CI->getOperand(2);
1450
1451 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1452
1453 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1454
1455 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1456
1457 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1458
1459 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1460
1461 CI->replaceAllUsesWith(NewCI);
1462
1463 // Lastly, remember to remove the user.
1464 ToRemoves.push_back(CI);
1465 }
1466 }
1467
1468 Changed = !ToRemoves.empty();
1469
1470 // And cleanup the calls we don't use anymore.
1471 for (auto V : ToRemoves) {
1472 V->eraseFromParent();
1473 }
1474
1475 // And remove the function we don't need either too.
1476 F->eraseFromParent();
1477 }
1478 }
1479
1480 return Changed;
1481}
1482
1483bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1484 bool Changed = false;
1485
1486 const std::map<const char *, const char *> Map = {
1487 {"_Z10atomic_addPU3AS1Vii", "spirv.atomic_add"},
1488 {"_Z10atomic_addPU3AS1Vjj", "spirv.atomic_add"},
1489 {"_Z10atomic_subPU3AS1Vii", "spirv.atomic_sub"},
1490 {"_Z10atomic_subPU3AS1Vjj", "spirv.atomic_sub"},
1491 {"_Z11atomic_xchgPU3AS1Vii", "spirv.atomic_exchange"},
1492 {"_Z11atomic_xchgPU3AS1Vjj", "spirv.atomic_exchange"},
1493 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1494 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1495 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1496 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1497 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
1498 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
1499 {"_Z10atomic_minPU3AS1Vii", "spirv.atomic_smin"},
1500 {"_Z10atomic_minPU3AS1Vjj", "spirv.atomic_umin"},
1501 {"_Z10atomic_maxPU3AS1Vii", "spirv.atomic_smax"},
1502 {"_Z10atomic_maxPU3AS1Vjj", "spirv.atomic_umax"},
1503 {"_Z10atomic_andPU3AS1Vii", "spirv.atomic_and"},
1504 {"_Z10atomic_andPU3AS1Vjj", "spirv.atomic_and"},
1505 {"_Z9atomic_orPU3AS1Vii", "spirv.atomic_or"},
1506 {"_Z9atomic_orPU3AS1Vjj", "spirv.atomic_or"},
1507 {"_Z10atomic_xorPU3AS1Vii", "spirv.atomic_xor"},
1508 {"_Z10atomic_xorPU3AS1Vjj", "spirv.atomic_xor"}};
1509
1510 for (auto Pair : Map) {
1511 // If we find a function with the matching name.
1512 if (auto F = M.getFunction(Pair.first)) {
1513 SmallVector<Instruction *, 4> ToRemoves;
1514
1515 // Walk the users of the function.
1516 for (auto &U : F->uses()) {
1517 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1518 auto FType = F->getFunctionType();
1519 SmallVector<Type *, 5> ParamTypes;
1520
1521 // The pointer type.
1522 ParamTypes.push_back(FType->getParamType(0));
1523
1524 auto IntTy = Type::getInt32Ty(M.getContext());
1525
1526 // The memory scope type.
1527 ParamTypes.push_back(IntTy);
1528
1529 // The memory semantics type.
1530 ParamTypes.push_back(IntTy);
1531
1532 if (2 < CI->getNumArgOperands()) {
1533 // The unequal memory semantics type.
1534 ParamTypes.push_back(IntTy);
1535
1536 // The value type.
1537 ParamTypes.push_back(FType->getParamType(2));
1538
1539 // The comparator type.
1540 ParamTypes.push_back(FType->getParamType(1));
1541 } else if (1 < CI->getNumArgOperands()) {
1542 // The value type.
1543 ParamTypes.push_back(FType->getParamType(1));
1544 }
1545
1546 auto NewFType =
1547 FunctionType::get(FType->getReturnType(), ParamTypes, false);
1548 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1549
1550 // We need to map the OpenCL constants to the SPIR-V equivalents.
1551 const auto ConstantScopeDevice =
1552 ConstantInt::get(IntTy, spv::ScopeDevice);
1553 const auto ConstantMemorySemantics = ConstantInt::get(
1554 IntTy, spv::MemorySemanticsUniformMemoryMask |
1555 spv::MemorySemanticsSequentiallyConsistentMask);
1556
1557 SmallVector<Value *, 5> Params;
1558
1559 // The pointer.
1560 Params.push_back(CI->getArgOperand(0));
1561
1562 // The memory scope.
1563 Params.push_back(ConstantScopeDevice);
1564
1565 // The memory semantics.
1566 Params.push_back(ConstantMemorySemantics);
1567
1568 if (2 < CI->getNumArgOperands()) {
1569 // The unequal memory semantics.
1570 Params.push_back(ConstantMemorySemantics);
1571
1572 // The value.
1573 Params.push_back(CI->getArgOperand(2));
1574
1575 // The comparator.
1576 Params.push_back(CI->getArgOperand(1));
1577 } else if (1 < CI->getNumArgOperands()) {
1578 // The value.
1579 Params.push_back(CI->getArgOperand(1));
1580 }
1581
1582 auto NewCI = CallInst::Create(NewF, Params, "", CI);
1583
1584 CI->replaceAllUsesWith(NewCI);
1585
1586 // Lastly, remember to remove the user.
1587 ToRemoves.push_back(CI);
1588 }
1589 }
1590
1591 Changed = !ToRemoves.empty();
1592
1593 // And cleanup the calls we don't use anymore.
1594 for (auto V : ToRemoves) {
1595 V->eraseFromParent();
1596 }
1597
1598 // And remove the function we don't need either too.
1599 F->eraseFromParent();
1600 }
1601 }
1602
1603 return Changed;
1604}
1605
1606bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
1607 bool Changed = false;
1608
1609 // If we find a function with the matching name.
1610 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
1611 SmallVector<Instruction *, 4> ToRemoves;
1612
1613 auto IntTy = Type::getInt32Ty(M.getContext());
1614 auto FloatTy = Type::getFloatTy(M.getContext());
1615
1616 Constant *DownShuffleMask[3] = {
1617 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1618 ConstantInt::get(IntTy, 2)};
1619
1620 Constant *UpShuffleMask[4] = {
1621 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1622 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1623
1624 Constant *FloatVec[3] = {
1625 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
1626 };
1627
1628 // Walk the users of the function.
1629 for (auto &U : F->uses()) {
1630 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1631 auto Vec4Ty = CI->getArgOperand(0)->getType();
1632 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1633 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1634 auto Vec3Ty = Arg0->getType();
1635
1636 auto NewFType =
1637 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
1638
1639 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
1640
1641 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
1642
1643 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
1644
1645 CI->replaceAllUsesWith(Result);
1646
1647 // Lastly, remember to remove the user.
1648 ToRemoves.push_back(CI);
1649 }
1650 }
1651
1652 Changed = !ToRemoves.empty();
1653
1654 // And cleanup the calls we don't use anymore.
1655 for (auto V : ToRemoves) {
1656 V->eraseFromParent();
1657 }
1658
1659 // And remove the function we don't need either too.
1660 F->eraseFromParent();
1661 }
1662
1663 return Changed;
1664}