blob: 8363c9dae3ef0223654b3ae7abc1e7fa6cdd6453 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto22f144c2017-06-12 14:26:21 -040019#include <llvm/IR/Constants.h>
20#include <llvm/IR/Instructions.h>
David Neto62653202017-10-16 19:05:18 -040021#include <llvm/IR/IRBuilder.h>
David Neto22f144c2017-06-12 14:26:21 -040022#include <llvm/IR/Module.h>
23#include <llvm/Pass.h>
David Neto17852de2017-05-29 17:29:31 -040024#include <llvm/Support/CommandLine.h>
David Neto22f144c2017-06-12 14:26:21 -040025#include <llvm/Support/raw_ostream.h>
26#include <llvm/Transforms/Utils/Cloning.h>
27
28#include <spirv/1.0/spirv.hpp>
29
30using namespace llvm;
31
32#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
33
David Netoac825b82017-05-30 12:49:01 -040034// TODO(dneto): As per Neil's suggestion, might not need this if you can
35// trace the pointer back far enough to see that it's 32-bit aligned.
36// However, even in the vstore_half case, you'll probably get better
37// performance if you can rely on SPV_KHR_16bit_storage since in the
38// alternate case you're using a (relaxed) atomic, and therefore have
39// to write through to the cache.
David Neto17852de2017-05-29 17:29:31 -040040static llvm::cl::opt<bool> f16bit_storage(
41 "f16bit_storage", llvm::cl::init(false),
42 llvm::cl::desc("Assume the target supports SPV_KHR_16bit_storage"));
43
David Neto22f144c2017-06-12 14:26:21 -040044namespace {
45uint32_t clz(uint32_t v) {
46 uint32_t r;
47 uint32_t shift;
48
49 r = (v > 0xFFFF) << 4;
50 v >>= r;
51 shift = (v > 0xFF) << 3;
52 v >>= shift;
53 r |= shift;
54 shift = (v > 0xF) << 2;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0x3) << 1;
58 v >>= shift;
59 r |= shift;
60 r |= (v >> 1);
61
62 return r;
63}
64
65Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
66 if (1 == elements) {
67 return Type::getInt1Ty(C);
68 } else {
69 return VectorType::get(Type::getInt1Ty(C), elements);
70 }
71}
72
73struct ReplaceOpenCLBuiltinPass final : public ModulePass {
74 static char ID;
75 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
76
77 bool runOnModule(Module &M) override;
78 bool replaceRecip(Module &M);
79 bool replaceDivide(Module &M);
80 bool replaceExp10(Module &M);
81 bool replaceLog10(Module &M);
82 bool replaceBarrier(Module &M);
83 bool replaceMemFence(Module &M);
84 bool replaceRelational(Module &M);
85 bool replaceIsInfAndIsNan(Module &M);
86 bool replaceAllAndAny(Module &M);
87 bool replaceSignbit(Module &M);
88 bool replaceMadandMad24andMul24(Module &M);
89 bool replaceVloadHalf(Module &M);
90 bool replaceVloadHalf2(Module &M);
91 bool replaceVloadHalf4(Module &M);
92 bool replaceVstoreHalf(Module &M);
93 bool replaceVstoreHalf2(Module &M);
94 bool replaceVstoreHalf4(Module &M);
95 bool replaceReadImageF(Module &M);
96 bool replaceAtomics(Module &M);
97 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040098 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070099 bool replaceVload(Module &M);
100 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400101};
102}
103
104char ReplaceOpenCLBuiltinPass::ID = 0;
105static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
106 "Replace OpenCL Builtins Pass");
107
108namespace clspv {
109ModulePass *createReplaceOpenCLBuiltinPass() {
110 return new ReplaceOpenCLBuiltinPass();
111}
112}
113
114bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
115 bool Changed = false;
116
117 Changed |= replaceRecip(M);
118 Changed |= replaceDivide(M);
119 Changed |= replaceExp10(M);
120 Changed |= replaceLog10(M);
121 Changed |= replaceBarrier(M);
122 Changed |= replaceMemFence(M);
123 Changed |= replaceRelational(M);
124 Changed |= replaceIsInfAndIsNan(M);
125 Changed |= replaceAllAndAny(M);
126 Changed |= replaceSignbit(M);
127 Changed |= replaceMadandMad24andMul24(M);
128 Changed |= replaceVloadHalf(M);
129 Changed |= replaceVloadHalf2(M);
130 Changed |= replaceVloadHalf4(M);
131 Changed |= replaceVstoreHalf(M);
132 Changed |= replaceVstoreHalf2(M);
133 Changed |= replaceVstoreHalf4(M);
134 Changed |= replaceReadImageF(M);
135 Changed |= replaceAtomics(M);
136 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400137 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700138 Changed |= replaceVload(M);
139 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400140
141 return Changed;
142}
143
144bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
145 bool Changed = false;
146
147 const char *Names[] = {
148 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
149 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
150 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
151 };
152
153 for (auto Name : Names) {
154 // If we find a function with the matching name.
155 if (auto F = M.getFunction(Name)) {
156 SmallVector<Instruction *, 4> ToRemoves;
157
158 // Walk the users of the function.
159 for (auto &U : F->uses()) {
160 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
161 // Recip has one arg.
162 auto Arg = CI->getOperand(0);
163
164 auto Div = BinaryOperator::Create(
165 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
166 CI);
167
168 CI->replaceAllUsesWith(Div);
169
170 // Lastly, remember to remove the user.
171 ToRemoves.push_back(CI);
172 }
173 }
174
175 Changed = !ToRemoves.empty();
176
177 // And cleanup the calls we don't use anymore.
178 for (auto V : ToRemoves) {
179 V->eraseFromParent();
180 }
181
182 // And remove the function we don't need either too.
183 F->eraseFromParent();
184 }
185 }
186
187 return Changed;
188}
189
190bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
191 bool Changed = false;
192
193 const char *Names[] = {
194 "_Z11half_divideff", "_Z13native_divideff",
195 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
196 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
197 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
198 };
199
200 for (auto Name : Names) {
201 // If we find a function with the matching name.
202 if (auto F = M.getFunction(Name)) {
203 SmallVector<Instruction *, 4> ToRemoves;
204
205 // Walk the users of the function.
206 for (auto &U : F->uses()) {
207 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
208 auto Div = BinaryOperator::Create(
209 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
210
211 CI->replaceAllUsesWith(Div);
212
213 // Lastly, remember to remove the user.
214 ToRemoves.push_back(CI);
215 }
216 }
217
218 Changed = !ToRemoves.empty();
219
220 // And cleanup the calls we don't use anymore.
221 for (auto V : ToRemoves) {
222 V->eraseFromParent();
223 }
224
225 // And remove the function we don't need either too.
226 F->eraseFromParent();
227 }
228 }
229
230 return Changed;
231}
232
233bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
234 bool Changed = false;
235
236 const std::map<const char *, const char *> Map = {
237 {"_Z5exp10f", "_Z3expf"},
238 {"_Z10half_exp10f", "_Z8half_expf"},
239 {"_Z12native_exp10f", "_Z10native_expf"},
240 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
241 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
242 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
243 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
244 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
245 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
246 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
247 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
248 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
249
250 for (auto Pair : Map) {
251 // If we find a function with the matching name.
252 if (auto F = M.getFunction(Pair.first)) {
253 SmallVector<Instruction *, 4> ToRemoves;
254
255 // Walk the users of the function.
256 for (auto &U : F->uses()) {
257 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
258 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
259
260 auto Arg = CI->getOperand(0);
261
262 // Constant of the natural log of 10 (ln(10)).
263 const double Ln10 =
264 2.302585092994045684017991454684364207601101488628772976033;
265
266 auto Mul = BinaryOperator::Create(
267 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
268 CI);
269
270 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
271
272 CI->replaceAllUsesWith(NewCI);
273
274 // Lastly, remember to remove the user.
275 ToRemoves.push_back(CI);
276 }
277 }
278
279 Changed = !ToRemoves.empty();
280
281 // And cleanup the calls we don't use anymore.
282 for (auto V : ToRemoves) {
283 V->eraseFromParent();
284 }
285
286 // And remove the function we don't need either too.
287 F->eraseFromParent();
288 }
289 }
290
291 return Changed;
292}
293
294bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
295 bool Changed = false;
296
297 const std::map<const char *, const char *> Map = {
298 {"_Z5log10f", "_Z3logf"},
299 {"_Z10half_log10f", "_Z8half_logf"},
300 {"_Z12native_log10f", "_Z10native_logf"},
301 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
302 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
303 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
304 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
305 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
306 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
307 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
308 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
309 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
310
311 for (auto Pair : Map) {
312 // If we find a function with the matching name.
313 if (auto F = M.getFunction(Pair.first)) {
314 SmallVector<Instruction *, 4> ToRemoves;
315
316 // Walk the users of the function.
317 for (auto &U : F->uses()) {
318 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
319 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
320
321 auto Arg = CI->getOperand(0);
322
323 // Constant of the reciprocal of the natural log of 10 (ln(10)).
324 const double Ln10 =
325 0.434294481903251827651128918916605082294397005803666566114;
326
327 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
328
329 auto Mul = BinaryOperator::Create(
330 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
331 "", CI);
332
333 CI->replaceAllUsesWith(Mul);
334
335 // Lastly, remember to remove the user.
336 ToRemoves.push_back(CI);
337 }
338 }
339
340 Changed = !ToRemoves.empty();
341
342 // And cleanup the calls we don't use anymore.
343 for (auto V : ToRemoves) {
344 V->eraseFromParent();
345 }
346
347 // And remove the function we don't need either too.
348 F->eraseFromParent();
349 }
350 }
351
352 return Changed;
353}
354
355bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
356 bool Changed = false;
357
358 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
359
360 const std::map<const char *, const char *> Map = {
361 {"_Z7barrierj", "__spirv_control_barrier"}};
362
363 for (auto Pair : Map) {
364 // If we find a function with the matching name.
365 if (auto F = M.getFunction(Pair.first)) {
366 SmallVector<Instruction *, 4> ToRemoves;
367
368 // Walk the users of the function.
369 for (auto &U : F->uses()) {
370 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
371 auto FType = F->getFunctionType();
372 SmallVector<Type *, 3> Params;
373 for (unsigned i = 0; i < 3; i++) {
374 Params.push_back(FType->getParamType(0));
375 }
376 auto NewFType =
377 FunctionType::get(FType->getReturnType(), Params, false);
378 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
379
380 auto Arg = CI->getOperand(0);
381
382 // We need to map the OpenCL constants to the SPIR-V equivalents.
383 const auto LocalMemFence =
384 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
385 const auto GlobalMemFence =
386 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
387 const auto ConstantSequentiallyConsistent = ConstantInt::get(
388 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
389 const auto ConstantScopeDevice =
390 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
391 const auto ConstantScopeWorkgroup =
392 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
393
394 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
395 const auto LocalMemFenceMask = BinaryOperator::Create(
396 Instruction::And, LocalMemFence, Arg, "", CI);
397 const auto WorkgroupShiftAmount =
398 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
399 clz(CLK_LOCAL_MEM_FENCE);
400 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
401 Instruction::Shl, LocalMemFenceMask,
402 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
403
404 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
405 const auto GlobalMemFenceMask = BinaryOperator::Create(
406 Instruction::And, GlobalMemFence, Arg, "", CI);
407 const auto UniformShiftAmount =
408 clz(spv::MemorySemanticsUniformMemoryMask) -
409 clz(CLK_GLOBAL_MEM_FENCE);
410 const auto MemorySemanticsUniform = BinaryOperator::Create(
411 Instruction::Shl, GlobalMemFenceMask,
412 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
413
414 // And combine the above together, also adding in
415 // MemorySemanticsSequentiallyConsistentMask.
416 auto MemorySemantics =
417 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
418 ConstantSequentiallyConsistent, "", CI);
419 MemorySemantics = BinaryOperator::Create(
420 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
421
422 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
423 // Device Scope, otherwise Workgroup Scope.
424 const auto Cmp =
425 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
426 GlobalMemFenceMask, GlobalMemFence, "", CI);
427 const auto MemoryScope = SelectInst::Create(
428 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
429
430 // Lastly, the Execution Scope is always Workgroup Scope.
431 const auto ExecutionScope = ConstantScopeWorkgroup;
432
433 auto NewCI = CallInst::Create(
434 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
435
436 CI->replaceAllUsesWith(NewCI);
437
438 // Lastly, remember to remove the user.
439 ToRemoves.push_back(CI);
440 }
441 }
442
443 Changed = !ToRemoves.empty();
444
445 // And cleanup the calls we don't use anymore.
446 for (auto V : ToRemoves) {
447 V->eraseFromParent();
448 }
449
450 // And remove the function we don't need either too.
451 F->eraseFromParent();
452 }
453 }
454
455 return Changed;
456}
457
458bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
459 bool Changed = false;
460
461 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
462
Neil Henning39672102017-09-29 14:33:13 +0100463 using Tuple = std::tuple<const char *, unsigned>;
464 const std::map<const char *, Tuple> Map = {
465 {"_Z9mem_fencej",
466 Tuple("__spirv_memory_barrier",
467 spv::MemorySemanticsSequentiallyConsistentMask)},
468 {"_Z14read_mem_fencej",
469 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
470 {"_Z15write_mem_fencej",
471 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400472
473 for (auto Pair : Map) {
474 // If we find a function with the matching name.
475 if (auto F = M.getFunction(Pair.first)) {
476 SmallVector<Instruction *, 4> ToRemoves;
477
478 // Walk the users of the function.
479 for (auto &U : F->uses()) {
480 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
481 auto FType = F->getFunctionType();
482 SmallVector<Type *, 2> Params;
483 for (unsigned i = 0; i < 2; i++) {
484 Params.push_back(FType->getParamType(0));
485 }
486 auto NewFType =
487 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100488 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400489
490 auto Arg = CI->getOperand(0);
491
492 // We need to map the OpenCL constants to the SPIR-V equivalents.
493 const auto LocalMemFence =
494 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
495 const auto GlobalMemFence =
496 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
497 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100498 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400499 const auto ConstantScopeDevice =
500 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
501
502 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
503 const auto LocalMemFenceMask = BinaryOperator::Create(
504 Instruction::And, LocalMemFence, Arg, "", CI);
505 const auto WorkgroupShiftAmount =
506 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
507 clz(CLK_LOCAL_MEM_FENCE);
508 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
509 Instruction::Shl, LocalMemFenceMask,
510 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
511
512 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
513 const auto GlobalMemFenceMask = BinaryOperator::Create(
514 Instruction::And, GlobalMemFence, Arg, "", CI);
515 const auto UniformShiftAmount =
516 clz(spv::MemorySemanticsUniformMemoryMask) -
517 clz(CLK_GLOBAL_MEM_FENCE);
518 const auto MemorySemanticsUniform = BinaryOperator::Create(
519 Instruction::Shl, GlobalMemFenceMask,
520 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
521
522 // And combine the above together, also adding in
523 // MemorySemanticsSequentiallyConsistentMask.
524 auto MemorySemantics =
525 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
526 ConstantMemorySemantics, "", CI);
527 MemorySemantics = BinaryOperator::Create(
528 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
529
530 // Memory Scope is always device.
531 const auto MemoryScope = ConstantScopeDevice;
532
533 auto NewCI =
534 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
535
536 CI->replaceAllUsesWith(NewCI);
537
538 // Lastly, remember to remove the user.
539 ToRemoves.push_back(CI);
540 }
541 }
542
543 Changed = !ToRemoves.empty();
544
545 // And cleanup the calls we don't use anymore.
546 for (auto V : ToRemoves) {
547 V->eraseFromParent();
548 }
549
550 // And remove the function we don't need either too.
551 F->eraseFromParent();
552 }
553 }
554
555 return Changed;
556}
557
558bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
559 bool Changed = false;
560
561 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
562 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
563 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
564 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
565 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
566 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
567 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
568 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
569 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
570 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
571 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
572 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
573 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
574 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
575 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
576 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
577 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
578 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
579 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
580 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
581 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
582 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
583 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
584 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
585 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
586 };
587
588 for (auto Pair : Map) {
589 // If we find a function with the matching name.
590 if (auto F = M.getFunction(Pair.first)) {
591 SmallVector<Instruction *, 4> ToRemoves;
592
593 // Walk the users of the function.
594 for (auto &U : F->uses()) {
595 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
596 // The predicate to use in the CmpInst.
597 auto Predicate = Pair.second.first;
598
599 // The value to return for true.
600 auto TrueValue =
601 ConstantInt::getSigned(CI->getType(), Pair.second.second);
602
603 // The value to return for false.
604 auto FalseValue = Constant::getNullValue(CI->getType());
605
606 auto Arg1 = CI->getOperand(0);
607 auto Arg2 = CI->getOperand(1);
608
609 const auto Cmp =
610 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
611
612 const auto Select =
613 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
614
615 CI->replaceAllUsesWith(Select);
616
617 // Lastly, remember to remove the user.
618 ToRemoves.push_back(CI);
619 }
620 }
621
622 Changed = !ToRemoves.empty();
623
624 // And cleanup the calls we don't use anymore.
625 for (auto V : ToRemoves) {
626 V->eraseFromParent();
627 }
628
629 // And remove the function we don't need either too.
630 F->eraseFromParent();
631 }
632 }
633
634 return Changed;
635}
636
637bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
638 bool Changed = false;
639
640 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
641 {"_Z5isinff", {"__spirv_isinff", 1}},
642 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
643 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
644 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
645 {"_Z5isnanf", {"__spirv_isnanf", 1}},
646 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
647 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
648 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
649 };
650
651 for (auto Pair : Map) {
652 // If we find a function with the matching name.
653 if (auto F = M.getFunction(Pair.first)) {
654 SmallVector<Instruction *, 4> ToRemoves;
655
656 // Walk the users of the function.
657 for (auto &U : F->uses()) {
658 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
659 const auto CITy = CI->getType();
660
661 // The fake SPIR-V intrinsic to generate.
662 auto SPIRVIntrinsic = Pair.second.first;
663
664 // The value to return for true.
665 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
666
667 // The value to return for false.
668 auto FalseValue = Constant::getNullValue(CITy);
669
670 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
671 M.getContext(),
672 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
673
674 auto NewFType =
675 FunctionType::get(CorrespondingBoolTy,
676 F->getFunctionType()->getParamType(0), false);
677
678 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
679
680 auto Arg = CI->getOperand(0);
681
682 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
683
684 const auto Select =
685 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
686
687 CI->replaceAllUsesWith(Select);
688
689 // Lastly, remember to remove the user.
690 ToRemoves.push_back(CI);
691 }
692 }
693
694 Changed = !ToRemoves.empty();
695
696 // And cleanup the calls we don't use anymore.
697 for (auto V : ToRemoves) {
698 V->eraseFromParent();
699 }
700
701 // And remove the function we don't need either too.
702 F->eraseFromParent();
703 }
704 }
705
706 return Changed;
707}
708
709bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
710 bool Changed = false;
711
712 const std::map<const char *, const char *> Map = {
713 {"_Z3alli", ""},
714 {"_Z3allDv2_i", "__spirv_allDv2_i"},
715 {"_Z3allDv3_i", "__spirv_allDv3_i"},
716 {"_Z3allDv4_i", "__spirv_allDv4_i"},
717 {"_Z3anyi", ""},
718 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
719 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
720 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
721 };
722
723 for (auto Pair : Map) {
724 // If we find a function with the matching name.
725 if (auto F = M.getFunction(Pair.first)) {
726 SmallVector<Instruction *, 4> ToRemoves;
727
728 // Walk the users of the function.
729 for (auto &U : F->uses()) {
730 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
731 // The fake SPIR-V intrinsic to generate.
732 auto SPIRVIntrinsic = Pair.second;
733
734 auto Arg = CI->getOperand(0);
735
736 Value *V;
737
738 // If we have a function to call, call it!
739 if (0 < strlen(SPIRVIntrinsic)) {
740 // The value for zero to compare against.
741 const auto ZeroValue = Constant::getNullValue(Arg->getType());
742
743 const auto Cmp = CmpInst::Create(
744 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
745 const auto NewFType = FunctionType::get(
746 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
747
748 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
749
750 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
751
752 // The value to return for true.
753 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
754
755 // The value to return for false.
756 const auto FalseValue = Constant::getNullValue(CI->getType());
757
758 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
759 } else {
760 V = BinaryOperator::Create(Instruction::LShr, Arg,
761 ConstantInt::get(CI->getType(), 31), "",
762 CI);
763 }
764
765 CI->replaceAllUsesWith(V);
766
767 // Lastly, remember to remove the user.
768 ToRemoves.push_back(CI);
769 }
770 }
771
772 Changed = !ToRemoves.empty();
773
774 // And cleanup the calls we don't use anymore.
775 for (auto V : ToRemoves) {
776 V->eraseFromParent();
777 }
778
779 // And remove the function we don't need either too.
780 F->eraseFromParent();
781 }
782 }
783
784 return Changed;
785}
786
787bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
788 bool Changed = false;
789
790 const std::map<const char *, Instruction::BinaryOps> Map = {
791 {"_Z7signbitf", Instruction::LShr},
792 {"_Z7signbitDv2_f", Instruction::AShr},
793 {"_Z7signbitDv3_f", Instruction::AShr},
794 {"_Z7signbitDv4_f", Instruction::AShr},
795 };
796
797 for (auto Pair : Map) {
798 // If we find a function with the matching name.
799 if (auto F = M.getFunction(Pair.first)) {
800 SmallVector<Instruction *, 4> ToRemoves;
801
802 // Walk the users of the function.
803 for (auto &U : F->uses()) {
804 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
805 auto Arg = CI->getOperand(0);
806
807 auto Bitcast =
808 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
809
810 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
811 ConstantInt::get(CI->getType(), 31),
812 "", CI);
813
814 CI->replaceAllUsesWith(Shr);
815
816 // Lastly, remember to remove the user.
817 ToRemoves.push_back(CI);
818 }
819 }
820
821 Changed = !ToRemoves.empty();
822
823 // And cleanup the calls we don't use anymore.
824 for (auto V : ToRemoves) {
825 V->eraseFromParent();
826 }
827
828 // And remove the function we don't need either too.
829 F->eraseFromParent();
830 }
831 }
832
833 return Changed;
834}
835
836bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
837 bool Changed = false;
838
839 const std::map<const char *,
840 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
841 Map = {
842 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
843 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
844 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
845 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
846 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
847 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
848 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
849 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
850 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
851 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
852 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
853 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
854 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
855 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
856 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
857 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
858 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
859 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
860 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
861 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
862 };
863
864 for (auto Pair : Map) {
865 // If we find a function with the matching name.
866 if (auto F = M.getFunction(Pair.first)) {
867 SmallVector<Instruction *, 4> ToRemoves;
868
869 // Walk the users of the function.
870 for (auto &U : F->uses()) {
871 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
872 // The multiply instruction to use.
873 auto MulInst = Pair.second.first;
874
875 // The add instruction to use.
876 auto AddInst = Pair.second.second;
877
878 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
879
880 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
881 CI->getArgOperand(1), "", CI);
882
883 if (Instruction::BinaryOpsEnd != AddInst) {
884 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
885 CI);
886 }
887
888 CI->replaceAllUsesWith(I);
889
890 // Lastly, remember to remove the user.
891 ToRemoves.push_back(CI);
892 }
893 }
894
895 Changed = !ToRemoves.empty();
896
897 // And cleanup the calls we don't use anymore.
898 for (auto V : ToRemoves) {
899 V->eraseFromParent();
900 }
901
902 // And remove the function we don't need either too.
903 F->eraseFromParent();
904 }
905 }
906
907 return Changed;
908}
909
Derek Chowcfd368b2017-10-19 20:58:45 -0700910bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
911 bool Changed = false;
912
913 struct VectorStoreOps {
914 const char* name;
915 int n;
916 Type* (*get_scalar_type_function)(LLVMContext&);
917 } vector_store_ops[] = {
918 // TODO(derekjchow): Expand this list.
919 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
920 };
921
David Neto544fffc2017-11-16 18:35:14 -0500922 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -0700923 auto Name = Op.name;
924 auto N = Op.n;
925 auto TypeFn = Op.get_scalar_type_function;
926 if (auto F = M.getFunction(Name)) {
927 SmallVector<Instruction *, 4> ToRemoves;
928
929 // Walk the users of the function.
930 for (auto &U : F->uses()) {
931 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
932 // The value argument from vstoren.
933 auto Arg0 = CI->getOperand(0);
934
935 // The index argument from vstoren.
936 auto Arg1 = CI->getOperand(1);
937
938 // The pointer argument from vstoren.
939 auto Arg2 = CI->getOperand(2);
940
941 // Get types.
942 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
943 auto ScalarNPointerTy = PointerType::get(
944 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
945
946 // Cast to scalarn
947 auto Cast = CastInst::CreatePointerCast(
948 Arg2, ScalarNPointerTy, "", CI);
949 // Index to correct address
950 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
951 // Store
952 auto Store = new StoreInst(Arg0, Index, CI);
953
954 CI->replaceAllUsesWith(Store);
955 ToRemoves.push_back(CI);
956 }
957 }
958
959 Changed = !ToRemoves.empty();
960
961 // And cleanup the calls we don't use anymore.
962 for (auto V : ToRemoves) {
963 V->eraseFromParent();
964 }
965
966 // And remove the function we don't need either too.
967 F->eraseFromParent();
968 }
969 }
970
971 return Changed;
972}
973
974bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
975 bool Changed = false;
976
977 struct VectorLoadOps {
978 const char* name;
979 int n;
980 Type* (*get_scalar_type_function)(LLVMContext&);
981 } vector_load_ops[] = {
982 // TODO(derekjchow): Expand this list.
983 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
984 };
985
David Neto544fffc2017-11-16 18:35:14 -0500986 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -0700987 auto Name = Op.name;
988 auto N = Op.n;
989 auto TypeFn = Op.get_scalar_type_function;
990 // If we find a function with the matching name.
991 if (auto F = M.getFunction(Name)) {
992 SmallVector<Instruction *, 4> ToRemoves;
993
994 // Walk the users of the function.
995 for (auto &U : F->uses()) {
996 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
997 // The index argument from vloadn.
998 auto Arg0 = CI->getOperand(0);
999
1000 // The pointer argument from vloadn.
1001 auto Arg1 = CI->getOperand(1);
1002
1003 // Get types.
1004 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1005 auto ScalarNPointerTy = PointerType::get(
1006 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1007
1008 // Cast to scalarn
1009 auto Cast = CastInst::CreatePointerCast(
1010 Arg1, ScalarNPointerTy, "", CI);
1011 // Index to correct address
1012 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1013 // Load
1014 auto Load = new LoadInst(Index, "", CI);
1015
1016 CI->replaceAllUsesWith(Load);
1017 ToRemoves.push_back(CI);
1018 }
1019 }
1020
1021 Changed = !ToRemoves.empty();
1022
1023 // And cleanup the calls we don't use anymore.
1024 for (auto V : ToRemoves) {
1025 V->eraseFromParent();
1026 }
1027
1028 // And remove the function we don't need either too.
1029 F->eraseFromParent();
1030
1031 }
1032 }
1033
1034 return Changed;
1035}
1036
David Neto22f144c2017-06-12 14:26:21 -04001037bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1038 bool Changed = false;
1039
1040 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1041 "_Z10vload_halfjPU3AS2KDh"};
1042
1043 for (auto Name : Map) {
1044 // If we find a function with the matching name.
1045 if (auto F = M.getFunction(Name)) {
1046 SmallVector<Instruction *, 4> ToRemoves;
1047
1048 // Walk the users of the function.
1049 for (auto &U : F->uses()) {
1050 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1051 // The index argument from vload_half.
1052 auto Arg0 = CI->getOperand(0);
1053
1054 // The pointer argument from vload_half.
1055 auto Arg1 = CI->getOperand(1);
1056
David Neto22f144c2017-06-12 14:26:21 -04001057 auto IntTy = Type::getInt32Ty(M.getContext());
1058 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001059 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1060
David Neto22f144c2017-06-12 14:26:21 -04001061 // Our intrinsic to unpack a float2 from an int.
1062 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1063
1064 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1065
David Netoac825b82017-05-30 12:49:01 -04001066 if (f16bit_storage) {
1067 auto ShortTy = Type::getInt16Ty(M.getContext());
1068 auto ShortPointerTy = PointerType::get(
1069 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001070
David Netoac825b82017-05-30 12:49:01 -04001071 // Cast the half* pointer to short*.
1072 auto Cast =
1073 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001074
David Netoac825b82017-05-30 12:49:01 -04001075 // Index into the correct address of the casted pointer.
1076 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1077
1078 // Load from the short* we casted to.
1079 auto Load = new LoadInst(Index, "", CI);
1080
1081 // ZExt the short -> int.
1082 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1083
1084 // Get our float2.
1085 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1086
1087 // Extract out the bottom element which is our float result.
1088 auto Extract = ExtractElementInst::Create(
1089 Call, ConstantInt::get(IntTy, 0), "", CI);
1090
1091 CI->replaceAllUsesWith(Extract);
1092 } else {
1093 // Assume the pointer argument points to storage aligned to 32bits
1094 // or more.
1095 // TODO(dneto): Do more analysis to make sure this is true?
1096 //
1097 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1098 // with:
1099 //
1100 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1101 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1102 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1103 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1104 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1105 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1106 // x float> %converted, %index_is_odd32
1107
1108 auto IntPointerTy = PointerType::get(
1109 IntTy, Arg1->getType()->getPointerAddressSpace());
1110
David Neto973e6a82017-05-30 13:48:18 -04001111 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001112 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001113 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001114 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1115
1116 auto One = ConstantInt::get(IntTy, 1);
1117 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1118 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1119
1120 // Index into the correct address of the casted pointer.
1121 auto Ptr =
1122 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1123
1124 // Load from the int* we casted to.
1125 auto Load = new LoadInst(Ptr, "", CI);
1126
1127 // Get our float2.
1128 auto Call = CallInst::Create(NewF, Load, "", CI);
1129
1130 // Extract out the float result, where the element number is
1131 // determined by whether the original index was even or odd.
1132 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1133
1134 CI->replaceAllUsesWith(Extract);
1135 }
David Neto22f144c2017-06-12 14:26:21 -04001136
1137 // Lastly, remember to remove the user.
1138 ToRemoves.push_back(CI);
1139 }
1140 }
1141
1142 Changed = !ToRemoves.empty();
1143
1144 // And cleanup the calls we don't use anymore.
1145 for (auto V : ToRemoves) {
1146 V->eraseFromParent();
1147 }
1148
1149 // And remove the function we don't need either too.
1150 F->eraseFromParent();
1151 }
1152 }
1153
1154 return Changed;
1155}
1156
1157bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1158 bool Changed = false;
1159
1160 const std::vector<const char *> Map = {"_Z11vload_half2jPU3AS1KDh",
1161 "_Z11vload_half2jPU3AS2KDh"};
1162
1163 for (auto Name : Map) {
1164 // If we find a function with the matching name.
1165 if (auto F = M.getFunction(Name)) {
1166 SmallVector<Instruction *, 4> ToRemoves;
1167
1168 // Walk the users of the function.
1169 for (auto &U : F->uses()) {
1170 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1171 // The index argument from vload_half.
1172 auto Arg0 = CI->getOperand(0);
1173
1174 // The pointer argument from vload_half.
1175 auto Arg1 = CI->getOperand(1);
1176
1177 auto IntTy = Type::getInt32Ty(M.getContext());
1178 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1179 auto NewPointerTy = PointerType::get(
1180 IntTy, Arg1->getType()->getPointerAddressSpace());
1181 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1182
1183 // Cast the half* pointer to int*.
1184 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1185
1186 // Index into the correct address of the casted pointer.
1187 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1188
1189 // Load from the int* we casted to.
1190 auto Load = new LoadInst(Index, "", CI);
1191
1192 // Our intrinsic to unpack a float2 from an int.
1193 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1194
1195 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1196
1197 // Get our float2.
1198 auto Call = CallInst::Create(NewF, Load, "", CI);
1199
1200 CI->replaceAllUsesWith(Call);
1201
1202 // Lastly, remember to remove the user.
1203 ToRemoves.push_back(CI);
1204 }
1205 }
1206
1207 Changed = !ToRemoves.empty();
1208
1209 // And cleanup the calls we don't use anymore.
1210 for (auto V : ToRemoves) {
1211 V->eraseFromParent();
1212 }
1213
1214 // And remove the function we don't need either too.
1215 F->eraseFromParent();
1216 }
1217 }
1218
1219 return Changed;
1220}
1221
1222bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1223 bool Changed = false;
1224
1225 const std::vector<const char *> Map = {"_Z11vload_half4jPU3AS1KDh",
1226 "_Z11vload_half4jPU3AS2KDh"};
1227
1228 for (auto Name : Map) {
1229 // If we find a function with the matching name.
1230 if (auto F = M.getFunction(Name)) {
1231 SmallVector<Instruction *, 4> ToRemoves;
1232
1233 // Walk the users of the function.
1234 for (auto &U : F->uses()) {
1235 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1236 // The index argument from vload_half.
1237 auto Arg0 = CI->getOperand(0);
1238
1239 // The pointer argument from vload_half.
1240 auto Arg1 = CI->getOperand(1);
1241
1242 auto IntTy = Type::getInt32Ty(M.getContext());
1243 auto Int2Ty = VectorType::get(IntTy, 2);
1244 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1245 auto NewPointerTy = PointerType::get(
1246 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1247 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1248
1249 // Cast the half* pointer to int2*.
1250 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1251
1252 // Index into the correct address of the casted pointer.
1253 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1254
1255 // Load from the int2* we casted to.
1256 auto Load = new LoadInst(Index, "", CI);
1257
1258 // Extract each element from the loaded int2.
1259 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1260 "", CI);
1261 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1262 "", CI);
1263
1264 // Our intrinsic to unpack a float2 from an int.
1265 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1266
1267 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1268
1269 // Get the lower (x & y) components of our final float4.
1270 auto Lo = CallInst::Create(NewF, X, "", CI);
1271
1272 // Get the higher (z & w) components of our final float4.
1273 auto Hi = CallInst::Create(NewF, Y, "", CI);
1274
1275 Constant *ShuffleMask[4] = {
1276 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1277 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1278
1279 // Combine our two float2's into one float4.
1280 auto Combine = new ShuffleVectorInst(
1281 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1282
1283 CI->replaceAllUsesWith(Combine);
1284
1285 // Lastly, remember to remove the user.
1286 ToRemoves.push_back(CI);
1287 }
1288 }
1289
1290 Changed = !ToRemoves.empty();
1291
1292 // And cleanup the calls we don't use anymore.
1293 for (auto V : ToRemoves) {
1294 V->eraseFromParent();
1295 }
1296
1297 // And remove the function we don't need either too.
1298 F->eraseFromParent();
1299 }
1300 }
1301
1302 return Changed;
1303}
1304
1305bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1306 bool Changed = false;
1307
1308 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1309 "_Z15vstore_half_rtefjPU3AS1Dh",
1310 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1311
1312 for (auto Name : Map) {
1313 // If we find a function with the matching name.
1314 if (auto F = M.getFunction(Name)) {
1315 SmallVector<Instruction *, 4> ToRemoves;
1316
1317 // Walk the users of the function.
1318 for (auto &U : F->uses()) {
1319 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1320 // The value to store.
1321 auto Arg0 = CI->getOperand(0);
1322
1323 // The index argument from vstore_half.
1324 auto Arg1 = CI->getOperand(1);
1325
1326 // The pointer argument from vstore_half.
1327 auto Arg2 = CI->getOperand(2);
1328
David Neto22f144c2017-06-12 14:26:21 -04001329 auto IntTy = Type::getInt32Ty(M.getContext());
1330 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001331 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001332 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001333
1334 // Our intrinsic to pack a float2 to an int.
1335 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1336
1337 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1338
1339 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001340 auto TempVec =
1341 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1342 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001343
1344 // Pack the float2 -> half2 (in an int).
1345 auto X = CallInst::Create(NewF, TempVec, "", CI);
1346
David Neto17852de2017-05-29 17:29:31 -04001347 if (f16bit_storage) {
1348 auto ShortTy = Type::getInt16Ty(M.getContext());
1349 auto ShortPointerTy = PointerType::get(
1350 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001351
David Neto17852de2017-05-29 17:29:31 -04001352 // Truncate our i32 to an i16.
1353 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001354
David Neto17852de2017-05-29 17:29:31 -04001355 // Cast the half* pointer to short*.
1356 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001357
David Neto17852de2017-05-29 17:29:31 -04001358 // Index into the correct address of the casted pointer.
1359 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001360
David Neto17852de2017-05-29 17:29:31 -04001361 // Store to the int* we casted to.
1362 auto Store = new StoreInst(Trunc, Index, CI);
1363
1364 CI->replaceAllUsesWith(Store);
1365 } else {
1366 // We can only write to 32-bit aligned words.
1367 //
1368 // Assuming base is aligned to 32-bits, replace the equivalent of
1369 // vstore_half(value, index, base)
1370 // with:
1371 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1372 // uint32_t write_to_upper_half = index & 1u;
1373 // uint32_t shift = write_to_upper_half << 4;
1374 //
1375 // // Pack the float value as a half number in bottom 16 bits
1376 // // of an i32.
1377 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1378 //
1379 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1380 // ^ ((packed & 0xffff) << shift)
1381 // // We only need relaxed consistency, but OpenCL 1.2 only has
1382 // // sequentially consistent atomics.
1383 // // TODO(dneto): Use relaxed consistency.
1384 // atomic_xor(target_ptr, xor_value)
1385 auto IntPointerTy = PointerType::get(
1386 IntTy, Arg2->getType()->getPointerAddressSpace());
1387
1388 auto Four = ConstantInt::get(IntTy, 4);
1389 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1390
1391 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1392 // Compute index / 2
1393 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1394 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1395 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1396 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1397 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1398 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1399 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1400
1401 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1402 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1403 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1404
1405 // Generate the call to atomi_xor.
1406 SmallVector<Type *, 5> ParamTypes;
1407 // The pointer type.
1408 ParamTypes.push_back(IntPointerTy);
1409 // The Types for memory scope, semantics, and value.
1410 ParamTypes.push_back(IntTy);
1411 ParamTypes.push_back(IntTy);
1412 ParamTypes.push_back(IntTy);
1413 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1414 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1415
1416 const auto ConstantScopeDevice =
1417 ConstantInt::get(IntTy, spv::ScopeDevice);
1418 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1419 // (SPIR-V Workgroup).
1420 const auto AddrSpaceSemanticsBits =
1421 IntPointerTy->getPointerAddressSpace() == 1
1422 ? spv::MemorySemanticsUniformMemoryMask
1423 : spv::MemorySemanticsWorkgroupMemoryMask;
1424
1425 // We're using relaxed consistency here.
1426 const auto ConstantMemorySemantics =
1427 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1428 AddrSpaceSemanticsBits);
1429
1430 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1431 ConstantMemorySemantics, ValueToXor};
1432 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1433 }
David Neto22f144c2017-06-12 14:26:21 -04001434
1435 // Lastly, remember to remove the user.
1436 ToRemoves.push_back(CI);
1437 }
1438 }
1439
1440 Changed = !ToRemoves.empty();
1441
1442 // And cleanup the calls we don't use anymore.
1443 for (auto V : ToRemoves) {
1444 V->eraseFromParent();
1445 }
1446
1447 // And remove the function we don't need either too.
1448 F->eraseFromParent();
1449 }
1450 }
1451
1452 return Changed;
1453}
1454
1455bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1456 bool Changed = false;
1457
1458 const std::vector<const char *> Map = {"_Z12vstore_half2Dv2_fjPU3AS1Dh",
1459 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1460 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh"};
1461
1462 for (auto Name : Map) {
1463 // If we find a function with the matching name.
1464 if (auto F = M.getFunction(Name)) {
1465 SmallVector<Instruction *, 4> ToRemoves;
1466
1467 // Walk the users of the function.
1468 for (auto &U : F->uses()) {
1469 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1470 // The value to store.
1471 auto Arg0 = CI->getOperand(0);
1472
1473 // The index argument from vstore_half.
1474 auto Arg1 = CI->getOperand(1);
1475
1476 // The pointer argument from vstore_half.
1477 auto Arg2 = CI->getOperand(2);
1478
1479 auto IntTy = Type::getInt32Ty(M.getContext());
1480 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1481 auto NewPointerTy = PointerType::get(
1482 IntTy, Arg2->getType()->getPointerAddressSpace());
1483 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1484
1485 // Our intrinsic to pack a float2 to an int.
1486 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1487
1488 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1489
1490 // Turn the packed x & y into the final packing.
1491 auto X = CallInst::Create(NewF, Arg0, "", CI);
1492
1493 // Cast the half* pointer to int*.
1494 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1495
1496 // Index into the correct address of the casted pointer.
1497 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1498
1499 // Store to the int* we casted to.
1500 auto Store = new StoreInst(X, Index, CI);
1501
1502 CI->replaceAllUsesWith(Store);
1503
1504 // Lastly, remember to remove the user.
1505 ToRemoves.push_back(CI);
1506 }
1507 }
1508
1509 Changed = !ToRemoves.empty();
1510
1511 // And cleanup the calls we don't use anymore.
1512 for (auto V : ToRemoves) {
1513 V->eraseFromParent();
1514 }
1515
1516 // And remove the function we don't need either too.
1517 F->eraseFromParent();
1518 }
1519 }
1520
1521 return Changed;
1522}
1523
1524bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1525 bool Changed = false;
1526
1527 const std::vector<const char *> Map = {"_Z12vstore_half4Dv4_fjPU3AS1Dh",
1528 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1529 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh"};
1530
1531 for (auto Name : Map) {
1532 // If we find a function with the matching name.
1533 if (auto F = M.getFunction(Name)) {
1534 SmallVector<Instruction *, 4> ToRemoves;
1535
1536 // Walk the users of the function.
1537 for (auto &U : F->uses()) {
1538 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1539 // The value to store.
1540 auto Arg0 = CI->getOperand(0);
1541
1542 // The index argument from vstore_half.
1543 auto Arg1 = CI->getOperand(1);
1544
1545 // The pointer argument from vstore_half.
1546 auto Arg2 = CI->getOperand(2);
1547
1548 auto IntTy = Type::getInt32Ty(M.getContext());
1549 auto Int2Ty = VectorType::get(IntTy, 2);
1550 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1551 auto NewPointerTy = PointerType::get(
1552 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1553 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1554
1555 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1556 ConstantInt::get(IntTy, 1)};
1557
1558 // Extract out the x & y components of our to store value.
1559 auto Lo =
1560 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1561 ConstantVector::get(LoShuffleMask), "", CI);
1562
1563 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1564 ConstantInt::get(IntTy, 3)};
1565
1566 // Extract out the z & w components of our to store value.
1567 auto Hi =
1568 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1569 ConstantVector::get(HiShuffleMask), "", CI);
1570
1571 // Our intrinsic to pack a float2 to an int.
1572 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1573
1574 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1575
1576 // Turn the packed x & y into the final component of our int2.
1577 auto X = CallInst::Create(NewF, Lo, "", CI);
1578
1579 // Turn the packed z & w into the final component of our int2.
1580 auto Y = CallInst::Create(NewF, Hi, "", CI);
1581
1582 auto Combine = InsertElementInst::Create(
1583 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1584 Combine = InsertElementInst::Create(
1585 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1586
1587 // Cast the half* pointer to int2*.
1588 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1589
1590 // Index into the correct address of the casted pointer.
1591 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1592
1593 // Store to the int2* we casted to.
1594 auto Store = new StoreInst(Combine, Index, CI);
1595
1596 CI->replaceAllUsesWith(Store);
1597
1598 // Lastly, remember to remove the user.
1599 ToRemoves.push_back(CI);
1600 }
1601 }
1602
1603 Changed = !ToRemoves.empty();
1604
1605 // And cleanup the calls we don't use anymore.
1606 for (auto V : ToRemoves) {
1607 V->eraseFromParent();
1608 }
1609
1610 // And remove the function we don't need either too.
1611 F->eraseFromParent();
1612 }
1613 }
1614
1615 return Changed;
1616}
1617
1618bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1619 bool Changed = false;
1620
1621 const std::map<const char *, const char*> Map = {
1622 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1623 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1624 };
1625
1626 for (auto Pair : Map) {
1627 // If we find a function with the matching name.
1628 if (auto F = M.getFunction(Pair.first)) {
1629 SmallVector<Instruction *, 4> ToRemoves;
1630
1631 // Walk the users of the function.
1632 for (auto &U : F->uses()) {
1633 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1634 // The image.
1635 auto Arg0 = CI->getOperand(0);
1636
1637 // The sampler.
1638 auto Arg1 = CI->getOperand(1);
1639
1640 // The coordinate (integer type that we can't handle).
1641 auto Arg2 = CI->getOperand(2);
1642
1643 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1644
1645 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1646
1647 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1648
1649 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1650
1651 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1652
1653 CI->replaceAllUsesWith(NewCI);
1654
1655 // Lastly, remember to remove the user.
1656 ToRemoves.push_back(CI);
1657 }
1658 }
1659
1660 Changed = !ToRemoves.empty();
1661
1662 // And cleanup the calls we don't use anymore.
1663 for (auto V : ToRemoves) {
1664 V->eraseFromParent();
1665 }
1666
1667 // And remove the function we don't need either too.
1668 F->eraseFromParent();
1669 }
1670 }
1671
1672 return Changed;
1673}
1674
1675bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1676 bool Changed = false;
1677
1678 const std::map<const char *, const char *> Map = {
David Neto22f144c2017-06-12 14:26:21 -04001679 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1680 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1681 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1682 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1683 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01001684 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04001685
1686 for (auto Pair : Map) {
1687 // If we find a function with the matching name.
1688 if (auto F = M.getFunction(Pair.first)) {
1689 SmallVector<Instruction *, 4> ToRemoves;
1690
1691 // Walk the users of the function.
1692 for (auto &U : F->uses()) {
1693 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1694 auto FType = F->getFunctionType();
1695 SmallVector<Type *, 5> ParamTypes;
1696
1697 // The pointer type.
1698 ParamTypes.push_back(FType->getParamType(0));
1699
1700 auto IntTy = Type::getInt32Ty(M.getContext());
1701
1702 // The memory scope type.
1703 ParamTypes.push_back(IntTy);
1704
1705 // The memory semantics type.
1706 ParamTypes.push_back(IntTy);
1707
1708 if (2 < CI->getNumArgOperands()) {
1709 // The unequal memory semantics type.
1710 ParamTypes.push_back(IntTy);
1711
1712 // The value type.
1713 ParamTypes.push_back(FType->getParamType(2));
1714
1715 // The comparator type.
1716 ParamTypes.push_back(FType->getParamType(1));
1717 } else if (1 < CI->getNumArgOperands()) {
1718 // The value type.
1719 ParamTypes.push_back(FType->getParamType(1));
1720 }
1721
1722 auto NewFType =
1723 FunctionType::get(FType->getReturnType(), ParamTypes, false);
1724 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1725
1726 // We need to map the OpenCL constants to the SPIR-V equivalents.
1727 const auto ConstantScopeDevice =
1728 ConstantInt::get(IntTy, spv::ScopeDevice);
1729 const auto ConstantMemorySemantics = ConstantInt::get(
1730 IntTy, spv::MemorySemanticsUniformMemoryMask |
1731 spv::MemorySemanticsSequentiallyConsistentMask);
1732
1733 SmallVector<Value *, 5> Params;
1734
1735 // The pointer.
1736 Params.push_back(CI->getArgOperand(0));
1737
1738 // The memory scope.
1739 Params.push_back(ConstantScopeDevice);
1740
1741 // The memory semantics.
1742 Params.push_back(ConstantMemorySemantics);
1743
1744 if (2 < CI->getNumArgOperands()) {
1745 // The unequal memory semantics.
1746 Params.push_back(ConstantMemorySemantics);
1747
1748 // The value.
1749 Params.push_back(CI->getArgOperand(2));
1750
1751 // The comparator.
1752 Params.push_back(CI->getArgOperand(1));
1753 } else if (1 < CI->getNumArgOperands()) {
1754 // The value.
1755 Params.push_back(CI->getArgOperand(1));
1756 }
1757
1758 auto NewCI = CallInst::Create(NewF, Params, "", CI);
1759
1760 CI->replaceAllUsesWith(NewCI);
1761
1762 // Lastly, remember to remove the user.
1763 ToRemoves.push_back(CI);
1764 }
1765 }
1766
1767 Changed = !ToRemoves.empty();
1768
1769 // And cleanup the calls we don't use anymore.
1770 for (auto V : ToRemoves) {
1771 V->eraseFromParent();
1772 }
1773
1774 // And remove the function we don't need either too.
1775 F->eraseFromParent();
1776 }
1777 }
1778
Neil Henning39672102017-09-29 14:33:13 +01001779 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
1780 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
1781 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
1782 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
1783 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
1784 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
1785 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
1786 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
1787 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
1788 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
1789 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
1790 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
1791 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
1792 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
1793 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
1794 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
1795 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
1796
1797 for (auto Pair : Map2) {
1798 // If we find a function with the matching name.
1799 if (auto F = M.getFunction(Pair.first)) {
1800 SmallVector<Instruction *, 4> ToRemoves;
1801
1802 // Walk the users of the function.
1803 for (auto &U : F->uses()) {
1804 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1805 auto AtomicOp = new AtomicRMWInst(
1806 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
1807 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
1808
1809 CI->replaceAllUsesWith(AtomicOp);
1810
1811 // Lastly, remember to remove the user.
1812 ToRemoves.push_back(CI);
1813 }
1814 }
1815
1816 Changed = !ToRemoves.empty();
1817
1818 // And cleanup the calls we don't use anymore.
1819 for (auto V : ToRemoves) {
1820 V->eraseFromParent();
1821 }
1822
1823 // And remove the function we don't need either too.
1824 F->eraseFromParent();
1825 }
1826 }
1827
David Neto22f144c2017-06-12 14:26:21 -04001828 return Changed;
1829}
1830
1831bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
1832 bool Changed = false;
1833
1834 // If we find a function with the matching name.
1835 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
1836 SmallVector<Instruction *, 4> ToRemoves;
1837
1838 auto IntTy = Type::getInt32Ty(M.getContext());
1839 auto FloatTy = Type::getFloatTy(M.getContext());
1840
1841 Constant *DownShuffleMask[3] = {
1842 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1843 ConstantInt::get(IntTy, 2)};
1844
1845 Constant *UpShuffleMask[4] = {
1846 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1847 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1848
1849 Constant *FloatVec[3] = {
1850 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
1851 };
1852
1853 // Walk the users of the function.
1854 for (auto &U : F->uses()) {
1855 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1856 auto Vec4Ty = CI->getArgOperand(0)->getType();
1857 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1858 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1859 auto Vec3Ty = Arg0->getType();
1860
1861 auto NewFType =
1862 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
1863
1864 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
1865
1866 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
1867
1868 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
1869
1870 CI->replaceAllUsesWith(Result);
1871
1872 // Lastly, remember to remove the user.
1873 ToRemoves.push_back(CI);
1874 }
1875 }
1876
1877 Changed = !ToRemoves.empty();
1878
1879 // And cleanup the calls we don't use anymore.
1880 for (auto V : ToRemoves) {
1881 V->eraseFromParent();
1882 }
1883
1884 // And remove the function we don't need either too.
1885 F->eraseFromParent();
1886 }
1887
1888 return Changed;
1889}
David Neto62653202017-10-16 19:05:18 -04001890
1891bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
1892 bool Changed = false;
1893
1894 // OpenCL's float result = fract(float x, float* ptr)
1895 //
1896 // In the LLVM domain:
1897 //
1898 // %floor_result = call spir_func float @floor(float %x)
1899 // store float %floor_result, float * %ptr
1900 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
1901 // %result = call spir_func float
1902 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
1903 //
1904 // Becomes in the SPIR-V domain, where translations of floor, fmin,
1905 // and clspv.fract occur in the SPIR-V generator pass:
1906 //
1907 // %glsl_ext = OpExtInstImport "GLSL.std.450"
1908 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
1909 // ...
1910 // %floor_result = OpExtInst %float %glsl_ext Floor %x
1911 // OpStore %ptr %floor_result
1912 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
1913 // %fract_result = OpExtInst %float
1914 // %glsl_ext Fmin %fract_intermediate %just_under_1
1915
1916
1917 using std::string;
1918
1919 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
1920 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
1921 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
1922 auto make_quad = [](const char *a, const char *b, const char *c,
1923 const char *d) {
1924 return std::tuple<const char *, const char *, const char *, const char *>(
1925 a, b, c, d);
1926 };
1927 const std::vector<QuadType> Functions = {
1928 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
1929 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
1930 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
1931 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
1932 };
1933
1934 for (auto& quad : Functions) {
1935 const StringRef fract_name(std::get<0>(quad));
1936
1937 // If we find a function with the matching name.
1938 if (auto F = M.getFunction(fract_name)) {
1939 if (F->use_begin() == F->use_end())
1940 continue;
1941
1942 // We have some uses.
1943 Changed = true;
1944
1945 auto& Context = M.getContext();
1946
1947 const StringRef floor_name(std::get<1>(quad));
1948 const StringRef fmin_name(std::get<2>(quad));
1949 const StringRef clspv_fract_name(std::get<3>(quad));
1950
1951 // This is either float or a float vector. All the float-like
1952 // types are this type.
1953 auto result_ty = F->getReturnType();
1954
1955 Function* fmin_fn = M.getFunction(fmin_name);
1956 if (!fmin_fn) {
1957 // Make the fmin function.
1958 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
1959 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
1960 fmin_fn->addFnAttr(Attribute::ReadOnly);
1961 fmin_fn->addFnAttr(Attribute::ReadNone);
1962 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
1963 }
1964
1965 Function* floor_fn = M.getFunction(floor_name);
1966 if (!floor_fn) {
1967 // Make the floor function.
1968 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
1969 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
1970 floor_fn->addFnAttr(Attribute::ReadOnly);
1971 floor_fn->addFnAttr(Attribute::ReadNone);
1972 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
1973 }
1974
1975 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
1976 if (!clspv_fract_fn) {
1977 // Make the clspv_fract function.
1978 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
1979 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
1980 clspv_fract_fn->addFnAttr(Attribute::ReadOnly);
1981 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
1982 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
1983 }
1984
1985 // Number of significant significand bits, whether represented or not.
1986 unsigned num_significand_bits;
1987 switch (result_ty->getScalarType()->getTypeID()) {
1988 case Type::HalfTyID:
1989 num_significand_bits = 11;
1990 break;
1991 case Type::FloatTyID:
1992 num_significand_bits = 24;
1993 break;
1994 case Type::DoubleTyID:
1995 num_significand_bits = 53;
1996 break;
1997 default:
1998 assert(false && "Unhandled float type when processing fract builtin");
1999 break;
2000 }
2001 // Beware that the disassembler displays this value as
2002 // OpConstant %float 1
2003 // which is not quite right.
2004 const double kJustUnderOneScalar =
2005 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2006
2007 Constant *just_under_one =
2008 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2009 if (result_ty->isVectorTy()) {
2010 just_under_one = ConstantVector::getSplat(
2011 result_ty->getVectorNumElements(), just_under_one);
2012 }
2013
2014 IRBuilder<> Builder(Context);
2015
2016 SmallVector<Instruction *, 4> ToRemoves;
2017
2018 // Walk the users of the function.
2019 for (auto &U : F->uses()) {
2020 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2021
2022 Builder.SetInsertPoint(CI);
2023 auto arg = CI->getArgOperand(0);
2024 auto ptr = CI->getArgOperand(1);
2025
2026 // Compute floor result and store it.
2027 auto floor = Builder.CreateCall(floor_fn, {arg});
2028 Builder.CreateStore(floor, ptr);
2029
2030 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2031 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2032
2033 CI->replaceAllUsesWith(fract_result);
2034
2035 // Lastly, remember to remove the user.
2036 ToRemoves.push_back(CI);
2037 }
2038 }
2039
2040 // And cleanup the calls we don't use anymore.
2041 for (auto V : ToRemoves) {
2042 V->eraseFromParent();
2043 }
2044
2045 // And remove the function we don't need either too.
2046 F->eraseFromParent();
2047 }
2048 }
2049
2050 return Changed;
2051}