blob: b688a2782d8ce00805d0dfd49d22a3ed4f0347f2 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto22f144c2017-06-12 14:26:21 -040019#include <llvm/IR/Constants.h>
20#include <llvm/IR/Instructions.h>
David Neto62653202017-10-16 19:05:18 -040021#include <llvm/IR/IRBuilder.h>
David Neto22f144c2017-06-12 14:26:21 -040022#include <llvm/IR/Module.h>
23#include <llvm/Pass.h>
David Neto17852de2017-05-29 17:29:31 -040024#include <llvm/Support/CommandLine.h>
David Neto22f144c2017-06-12 14:26:21 -040025#include <llvm/Support/raw_ostream.h>
26#include <llvm/Transforms/Utils/Cloning.h>
27
28#include <spirv/1.0/spirv.hpp>
29
30using namespace llvm;
31
32#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
33
David Netoac825b82017-05-30 12:49:01 -040034// TODO(dneto): As per Neil's suggestion, might not need this if you can
35// trace the pointer back far enough to see that it's 32-bit aligned.
36// However, even in the vstore_half case, you'll probably get better
37// performance if you can rely on SPV_KHR_16bit_storage since in the
38// alternate case you're using a (relaxed) atomic, and therefore have
39// to write through to the cache.
David Neto17852de2017-05-29 17:29:31 -040040static llvm::cl::opt<bool> f16bit_storage(
41 "f16bit_storage", llvm::cl::init(false),
42 llvm::cl::desc("Assume the target supports SPV_KHR_16bit_storage"));
43
David Neto22f144c2017-06-12 14:26:21 -040044namespace {
45uint32_t clz(uint32_t v) {
46 uint32_t r;
47 uint32_t shift;
48
49 r = (v > 0xFFFF) << 4;
50 v >>= r;
51 shift = (v > 0xFF) << 3;
52 v >>= shift;
53 r |= shift;
54 shift = (v > 0xF) << 2;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0x3) << 1;
58 v >>= shift;
59 r |= shift;
60 r |= (v >> 1);
61
62 return r;
63}
64
65Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
66 if (1 == elements) {
67 return Type::getInt1Ty(C);
68 } else {
69 return VectorType::get(Type::getInt1Ty(C), elements);
70 }
71}
72
73struct ReplaceOpenCLBuiltinPass final : public ModulePass {
74 static char ID;
75 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
76
77 bool runOnModule(Module &M) override;
78 bool replaceRecip(Module &M);
79 bool replaceDivide(Module &M);
80 bool replaceExp10(Module &M);
81 bool replaceLog10(Module &M);
82 bool replaceBarrier(Module &M);
83 bool replaceMemFence(Module &M);
84 bool replaceRelational(Module &M);
85 bool replaceIsInfAndIsNan(Module &M);
86 bool replaceAllAndAny(Module &M);
87 bool replaceSignbit(Module &M);
88 bool replaceMadandMad24andMul24(Module &M);
89 bool replaceVloadHalf(Module &M);
90 bool replaceVloadHalf2(Module &M);
91 bool replaceVloadHalf4(Module &M);
92 bool replaceVstoreHalf(Module &M);
93 bool replaceVstoreHalf2(Module &M);
94 bool replaceVstoreHalf4(Module &M);
95 bool replaceReadImageF(Module &M);
96 bool replaceAtomics(Module &M);
97 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040098 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070099 bool replaceVload(Module &M);
100 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400101};
102}
103
104char ReplaceOpenCLBuiltinPass::ID = 0;
105static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
106 "Replace OpenCL Builtins Pass");
107
108namespace clspv {
109ModulePass *createReplaceOpenCLBuiltinPass() {
110 return new ReplaceOpenCLBuiltinPass();
111}
112}
113
114bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
115 bool Changed = false;
116
117 Changed |= replaceRecip(M);
118 Changed |= replaceDivide(M);
119 Changed |= replaceExp10(M);
120 Changed |= replaceLog10(M);
121 Changed |= replaceBarrier(M);
122 Changed |= replaceMemFence(M);
123 Changed |= replaceRelational(M);
124 Changed |= replaceIsInfAndIsNan(M);
125 Changed |= replaceAllAndAny(M);
126 Changed |= replaceSignbit(M);
127 Changed |= replaceMadandMad24andMul24(M);
128 Changed |= replaceVloadHalf(M);
129 Changed |= replaceVloadHalf2(M);
130 Changed |= replaceVloadHalf4(M);
131 Changed |= replaceVstoreHalf(M);
132 Changed |= replaceVstoreHalf2(M);
133 Changed |= replaceVstoreHalf4(M);
134 Changed |= replaceReadImageF(M);
135 Changed |= replaceAtomics(M);
136 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400137 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700138 Changed |= replaceVload(M);
139 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400140
141 return Changed;
142}
143
144bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
145 bool Changed = false;
146
147 const char *Names[] = {
148 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
149 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
150 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
151 };
152
153 for (auto Name : Names) {
154 // If we find a function with the matching name.
155 if (auto F = M.getFunction(Name)) {
156 SmallVector<Instruction *, 4> ToRemoves;
157
158 // Walk the users of the function.
159 for (auto &U : F->uses()) {
160 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
161 // Recip has one arg.
162 auto Arg = CI->getOperand(0);
163
164 auto Div = BinaryOperator::Create(
165 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
166 CI);
167
168 CI->replaceAllUsesWith(Div);
169
170 // Lastly, remember to remove the user.
171 ToRemoves.push_back(CI);
172 }
173 }
174
175 Changed = !ToRemoves.empty();
176
177 // And cleanup the calls we don't use anymore.
178 for (auto V : ToRemoves) {
179 V->eraseFromParent();
180 }
181
182 // And remove the function we don't need either too.
183 F->eraseFromParent();
184 }
185 }
186
187 return Changed;
188}
189
190bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
191 bool Changed = false;
192
193 const char *Names[] = {
194 "_Z11half_divideff", "_Z13native_divideff",
195 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
196 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
197 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
198 };
199
200 for (auto Name : Names) {
201 // If we find a function with the matching name.
202 if (auto F = M.getFunction(Name)) {
203 SmallVector<Instruction *, 4> ToRemoves;
204
205 // Walk the users of the function.
206 for (auto &U : F->uses()) {
207 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
208 auto Div = BinaryOperator::Create(
209 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
210
211 CI->replaceAllUsesWith(Div);
212
213 // Lastly, remember to remove the user.
214 ToRemoves.push_back(CI);
215 }
216 }
217
218 Changed = !ToRemoves.empty();
219
220 // And cleanup the calls we don't use anymore.
221 for (auto V : ToRemoves) {
222 V->eraseFromParent();
223 }
224
225 // And remove the function we don't need either too.
226 F->eraseFromParent();
227 }
228 }
229
230 return Changed;
231}
232
233bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
234 bool Changed = false;
235
236 const std::map<const char *, const char *> Map = {
237 {"_Z5exp10f", "_Z3expf"},
238 {"_Z10half_exp10f", "_Z8half_expf"},
239 {"_Z12native_exp10f", "_Z10native_expf"},
240 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
241 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
242 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
243 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
244 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
245 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
246 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
247 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
248 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
249
250 for (auto Pair : Map) {
251 // If we find a function with the matching name.
252 if (auto F = M.getFunction(Pair.first)) {
253 SmallVector<Instruction *, 4> ToRemoves;
254
255 // Walk the users of the function.
256 for (auto &U : F->uses()) {
257 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
258 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
259
260 auto Arg = CI->getOperand(0);
261
262 // Constant of the natural log of 10 (ln(10)).
263 const double Ln10 =
264 2.302585092994045684017991454684364207601101488628772976033;
265
266 auto Mul = BinaryOperator::Create(
267 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
268 CI);
269
270 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
271
272 CI->replaceAllUsesWith(NewCI);
273
274 // Lastly, remember to remove the user.
275 ToRemoves.push_back(CI);
276 }
277 }
278
279 Changed = !ToRemoves.empty();
280
281 // And cleanup the calls we don't use anymore.
282 for (auto V : ToRemoves) {
283 V->eraseFromParent();
284 }
285
286 // And remove the function we don't need either too.
287 F->eraseFromParent();
288 }
289 }
290
291 return Changed;
292}
293
294bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
295 bool Changed = false;
296
297 const std::map<const char *, const char *> Map = {
298 {"_Z5log10f", "_Z3logf"},
299 {"_Z10half_log10f", "_Z8half_logf"},
300 {"_Z12native_log10f", "_Z10native_logf"},
301 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
302 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
303 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
304 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
305 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
306 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
307 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
308 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
309 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
310
311 for (auto Pair : Map) {
312 // If we find a function with the matching name.
313 if (auto F = M.getFunction(Pair.first)) {
314 SmallVector<Instruction *, 4> ToRemoves;
315
316 // Walk the users of the function.
317 for (auto &U : F->uses()) {
318 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
319 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
320
321 auto Arg = CI->getOperand(0);
322
323 // Constant of the reciprocal of the natural log of 10 (ln(10)).
324 const double Ln10 =
325 0.434294481903251827651128918916605082294397005803666566114;
326
327 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
328
329 auto Mul = BinaryOperator::Create(
330 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
331 "", CI);
332
333 CI->replaceAllUsesWith(Mul);
334
335 // Lastly, remember to remove the user.
336 ToRemoves.push_back(CI);
337 }
338 }
339
340 Changed = !ToRemoves.empty();
341
342 // And cleanup the calls we don't use anymore.
343 for (auto V : ToRemoves) {
344 V->eraseFromParent();
345 }
346
347 // And remove the function we don't need either too.
348 F->eraseFromParent();
349 }
350 }
351
352 return Changed;
353}
354
355bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
356 bool Changed = false;
357
358 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
359
360 const std::map<const char *, const char *> Map = {
361 {"_Z7barrierj", "__spirv_control_barrier"}};
362
363 for (auto Pair : Map) {
364 // If we find a function with the matching name.
365 if (auto F = M.getFunction(Pair.first)) {
366 SmallVector<Instruction *, 4> ToRemoves;
367
368 // Walk the users of the function.
369 for (auto &U : F->uses()) {
370 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
371 auto FType = F->getFunctionType();
372 SmallVector<Type *, 3> Params;
373 for (unsigned i = 0; i < 3; i++) {
374 Params.push_back(FType->getParamType(0));
375 }
376 auto NewFType =
377 FunctionType::get(FType->getReturnType(), Params, false);
378 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
379
380 auto Arg = CI->getOperand(0);
381
382 // We need to map the OpenCL constants to the SPIR-V equivalents.
383 const auto LocalMemFence =
384 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
385 const auto GlobalMemFence =
386 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
387 const auto ConstantSequentiallyConsistent = ConstantInt::get(
388 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
389 const auto ConstantScopeDevice =
390 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
391 const auto ConstantScopeWorkgroup =
392 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
393
394 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
395 const auto LocalMemFenceMask = BinaryOperator::Create(
396 Instruction::And, LocalMemFence, Arg, "", CI);
397 const auto WorkgroupShiftAmount =
398 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
399 clz(CLK_LOCAL_MEM_FENCE);
400 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
401 Instruction::Shl, LocalMemFenceMask,
402 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
403
404 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
405 const auto GlobalMemFenceMask = BinaryOperator::Create(
406 Instruction::And, GlobalMemFence, Arg, "", CI);
407 const auto UniformShiftAmount =
408 clz(spv::MemorySemanticsUniformMemoryMask) -
409 clz(CLK_GLOBAL_MEM_FENCE);
410 const auto MemorySemanticsUniform = BinaryOperator::Create(
411 Instruction::Shl, GlobalMemFenceMask,
412 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
413
414 // And combine the above together, also adding in
415 // MemorySemanticsSequentiallyConsistentMask.
416 auto MemorySemantics =
417 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
418 ConstantSequentiallyConsistent, "", CI);
419 MemorySemantics = BinaryOperator::Create(
420 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
421
422 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
423 // Device Scope, otherwise Workgroup Scope.
424 const auto Cmp =
425 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
426 GlobalMemFenceMask, GlobalMemFence, "", CI);
427 const auto MemoryScope = SelectInst::Create(
428 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
429
430 // Lastly, the Execution Scope is always Workgroup Scope.
431 const auto ExecutionScope = ConstantScopeWorkgroup;
432
433 auto NewCI = CallInst::Create(
434 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
435
436 CI->replaceAllUsesWith(NewCI);
437
438 // Lastly, remember to remove the user.
439 ToRemoves.push_back(CI);
440 }
441 }
442
443 Changed = !ToRemoves.empty();
444
445 // And cleanup the calls we don't use anymore.
446 for (auto V : ToRemoves) {
447 V->eraseFromParent();
448 }
449
450 // And remove the function we don't need either too.
451 F->eraseFromParent();
452 }
453 }
454
455 return Changed;
456}
457
458bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
459 bool Changed = false;
460
461 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
462
Neil Henning39672102017-09-29 14:33:13 +0100463 using Tuple = std::tuple<const char *, unsigned>;
464 const std::map<const char *, Tuple> Map = {
465 {"_Z9mem_fencej",
466 Tuple("__spirv_memory_barrier",
467 spv::MemorySemanticsSequentiallyConsistentMask)},
468 {"_Z14read_mem_fencej",
469 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
470 {"_Z15write_mem_fencej",
471 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400472
473 for (auto Pair : Map) {
474 // If we find a function with the matching name.
475 if (auto F = M.getFunction(Pair.first)) {
476 SmallVector<Instruction *, 4> ToRemoves;
477
478 // Walk the users of the function.
479 for (auto &U : F->uses()) {
480 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
481 auto FType = F->getFunctionType();
482 SmallVector<Type *, 2> Params;
483 for (unsigned i = 0; i < 2; i++) {
484 Params.push_back(FType->getParamType(0));
485 }
486 auto NewFType =
487 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100488 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400489
490 auto Arg = CI->getOperand(0);
491
492 // We need to map the OpenCL constants to the SPIR-V equivalents.
493 const auto LocalMemFence =
494 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
495 const auto GlobalMemFence =
496 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
497 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100498 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400499 const auto ConstantScopeDevice =
500 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
501
502 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
503 const auto LocalMemFenceMask = BinaryOperator::Create(
504 Instruction::And, LocalMemFence, Arg, "", CI);
505 const auto WorkgroupShiftAmount =
506 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
507 clz(CLK_LOCAL_MEM_FENCE);
508 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
509 Instruction::Shl, LocalMemFenceMask,
510 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
511
512 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
513 const auto GlobalMemFenceMask = BinaryOperator::Create(
514 Instruction::And, GlobalMemFence, Arg, "", CI);
515 const auto UniformShiftAmount =
516 clz(spv::MemorySemanticsUniformMemoryMask) -
517 clz(CLK_GLOBAL_MEM_FENCE);
518 const auto MemorySemanticsUniform = BinaryOperator::Create(
519 Instruction::Shl, GlobalMemFenceMask,
520 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
521
522 // And combine the above together, also adding in
523 // MemorySemanticsSequentiallyConsistentMask.
524 auto MemorySemantics =
525 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
526 ConstantMemorySemantics, "", CI);
527 MemorySemantics = BinaryOperator::Create(
528 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
529
530 // Memory Scope is always device.
531 const auto MemoryScope = ConstantScopeDevice;
532
533 auto NewCI =
534 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
535
536 CI->replaceAllUsesWith(NewCI);
537
538 // Lastly, remember to remove the user.
539 ToRemoves.push_back(CI);
540 }
541 }
542
543 Changed = !ToRemoves.empty();
544
545 // And cleanup the calls we don't use anymore.
546 for (auto V : ToRemoves) {
547 V->eraseFromParent();
548 }
549
550 // And remove the function we don't need either too.
551 F->eraseFromParent();
552 }
553 }
554
555 return Changed;
556}
557
558bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
559 bool Changed = false;
560
561 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
562 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
563 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
564 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
565 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
566 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
567 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
568 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
569 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
570 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
571 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
572 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
573 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
574 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
575 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
576 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
577 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
578 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
579 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
580 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
581 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
582 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
583 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
584 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
585 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
586 };
587
588 for (auto Pair : Map) {
589 // If we find a function with the matching name.
590 if (auto F = M.getFunction(Pair.first)) {
591 SmallVector<Instruction *, 4> ToRemoves;
592
593 // Walk the users of the function.
594 for (auto &U : F->uses()) {
595 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
596 // The predicate to use in the CmpInst.
597 auto Predicate = Pair.second.first;
598
599 // The value to return for true.
600 auto TrueValue =
601 ConstantInt::getSigned(CI->getType(), Pair.second.second);
602
603 // The value to return for false.
604 auto FalseValue = Constant::getNullValue(CI->getType());
605
606 auto Arg1 = CI->getOperand(0);
607 auto Arg2 = CI->getOperand(1);
608
609 const auto Cmp =
610 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
611
612 const auto Select =
613 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
614
615 CI->replaceAllUsesWith(Select);
616
617 // Lastly, remember to remove the user.
618 ToRemoves.push_back(CI);
619 }
620 }
621
622 Changed = !ToRemoves.empty();
623
624 // And cleanup the calls we don't use anymore.
625 for (auto V : ToRemoves) {
626 V->eraseFromParent();
627 }
628
629 // And remove the function we don't need either too.
630 F->eraseFromParent();
631 }
632 }
633
634 return Changed;
635}
636
637bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
638 bool Changed = false;
639
640 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
641 {"_Z5isinff", {"__spirv_isinff", 1}},
642 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
643 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
644 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
645 {"_Z5isnanf", {"__spirv_isnanf", 1}},
646 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
647 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
648 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
649 };
650
651 for (auto Pair : Map) {
652 // If we find a function with the matching name.
653 if (auto F = M.getFunction(Pair.first)) {
654 SmallVector<Instruction *, 4> ToRemoves;
655
656 // Walk the users of the function.
657 for (auto &U : F->uses()) {
658 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
659 const auto CITy = CI->getType();
660
661 // The fake SPIR-V intrinsic to generate.
662 auto SPIRVIntrinsic = Pair.second.first;
663
664 // The value to return for true.
665 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
666
667 // The value to return for false.
668 auto FalseValue = Constant::getNullValue(CITy);
669
670 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
671 M.getContext(),
672 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
673
674 auto NewFType =
675 FunctionType::get(CorrespondingBoolTy,
676 F->getFunctionType()->getParamType(0), false);
677
678 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
679
680 auto Arg = CI->getOperand(0);
681
682 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
683
684 const auto Select =
685 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
686
687 CI->replaceAllUsesWith(Select);
688
689 // Lastly, remember to remove the user.
690 ToRemoves.push_back(CI);
691 }
692 }
693
694 Changed = !ToRemoves.empty();
695
696 // And cleanup the calls we don't use anymore.
697 for (auto V : ToRemoves) {
698 V->eraseFromParent();
699 }
700
701 // And remove the function we don't need either too.
702 F->eraseFromParent();
703 }
704 }
705
706 return Changed;
707}
708
709bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
710 bool Changed = false;
711
712 const std::map<const char *, const char *> Map = {
713 {"_Z3alli", ""},
714 {"_Z3allDv2_i", "__spirv_allDv2_i"},
715 {"_Z3allDv3_i", "__spirv_allDv3_i"},
716 {"_Z3allDv4_i", "__spirv_allDv4_i"},
717 {"_Z3anyi", ""},
718 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
719 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
720 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
721 };
722
723 for (auto Pair : Map) {
724 // If we find a function with the matching name.
725 if (auto F = M.getFunction(Pair.first)) {
726 SmallVector<Instruction *, 4> ToRemoves;
727
728 // Walk the users of the function.
729 for (auto &U : F->uses()) {
730 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
731 // The fake SPIR-V intrinsic to generate.
732 auto SPIRVIntrinsic = Pair.second;
733
734 auto Arg = CI->getOperand(0);
735
736 Value *V;
737
738 // If we have a function to call, call it!
739 if (0 < strlen(SPIRVIntrinsic)) {
740 // The value for zero to compare against.
741 const auto ZeroValue = Constant::getNullValue(Arg->getType());
742
743 const auto Cmp = CmpInst::Create(
744 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
745 const auto NewFType = FunctionType::get(
746 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
747
748 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
749
750 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
751
752 // The value to return for true.
753 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
754
755 // The value to return for false.
756 const auto FalseValue = Constant::getNullValue(CI->getType());
757
758 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
759 } else {
760 V = BinaryOperator::Create(Instruction::LShr, Arg,
761 ConstantInt::get(CI->getType(), 31), "",
762 CI);
763 }
764
765 CI->replaceAllUsesWith(V);
766
767 // Lastly, remember to remove the user.
768 ToRemoves.push_back(CI);
769 }
770 }
771
772 Changed = !ToRemoves.empty();
773
774 // And cleanup the calls we don't use anymore.
775 for (auto V : ToRemoves) {
776 V->eraseFromParent();
777 }
778
779 // And remove the function we don't need either too.
780 F->eraseFromParent();
781 }
782 }
783
784 return Changed;
785}
786
787bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
788 bool Changed = false;
789
790 const std::map<const char *, Instruction::BinaryOps> Map = {
791 {"_Z7signbitf", Instruction::LShr},
792 {"_Z7signbitDv2_f", Instruction::AShr},
793 {"_Z7signbitDv3_f", Instruction::AShr},
794 {"_Z7signbitDv4_f", Instruction::AShr},
795 };
796
797 for (auto Pair : Map) {
798 // If we find a function with the matching name.
799 if (auto F = M.getFunction(Pair.first)) {
800 SmallVector<Instruction *, 4> ToRemoves;
801
802 // Walk the users of the function.
803 for (auto &U : F->uses()) {
804 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
805 auto Arg = CI->getOperand(0);
806
807 auto Bitcast =
808 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
809
810 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
811 ConstantInt::get(CI->getType(), 31),
812 "", CI);
813
814 CI->replaceAllUsesWith(Shr);
815
816 // Lastly, remember to remove the user.
817 ToRemoves.push_back(CI);
818 }
819 }
820
821 Changed = !ToRemoves.empty();
822
823 // And cleanup the calls we don't use anymore.
824 for (auto V : ToRemoves) {
825 V->eraseFromParent();
826 }
827
828 // And remove the function we don't need either too.
829 F->eraseFromParent();
830 }
831 }
832
833 return Changed;
834}
835
836bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
837 bool Changed = false;
838
839 const std::map<const char *,
840 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
841 Map = {
842 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
843 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
844 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
845 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
846 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
847 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
848 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
849 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
850 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
851 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
852 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
853 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
854 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
855 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
856 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
857 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
858 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
859 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
860 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
861 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
862 };
863
864 for (auto Pair : Map) {
865 // If we find a function with the matching name.
866 if (auto F = M.getFunction(Pair.first)) {
867 SmallVector<Instruction *, 4> ToRemoves;
868
869 // Walk the users of the function.
870 for (auto &U : F->uses()) {
871 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
872 // The multiply instruction to use.
873 auto MulInst = Pair.second.first;
874
875 // The add instruction to use.
876 auto AddInst = Pair.second.second;
877
878 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
879
880 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
881 CI->getArgOperand(1), "", CI);
882
883 if (Instruction::BinaryOpsEnd != AddInst) {
884 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
885 CI);
886 }
887
888 CI->replaceAllUsesWith(I);
889
890 // Lastly, remember to remove the user.
891 ToRemoves.push_back(CI);
892 }
893 }
894
895 Changed = !ToRemoves.empty();
896
897 // And cleanup the calls we don't use anymore.
898 for (auto V : ToRemoves) {
899 V->eraseFromParent();
900 }
901
902 // And remove the function we don't need either too.
903 F->eraseFromParent();
904 }
905 }
906
907 return Changed;
908}
909
Derek Chowcfd368b2017-10-19 20:58:45 -0700910bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
911 bool Changed = false;
912
913 struct VectorStoreOps {
914 const char* name;
915 int n;
916 Type* (*get_scalar_type_function)(LLVMContext&);
917 } vector_store_ops[] = {
918 // TODO(derekjchow): Expand this list.
919 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
920 };
921
922 for (int i = 0; i < sizeof(vector_store_ops) / sizeof(*vector_store_ops);
923 ++i) {
924 const auto& Op = vector_store_ops[i];
925 auto Name = Op.name;
926 auto N = Op.n;
927 auto TypeFn = Op.get_scalar_type_function;
928 if (auto F = M.getFunction(Name)) {
929 SmallVector<Instruction *, 4> ToRemoves;
930
931 // Walk the users of the function.
932 for (auto &U : F->uses()) {
933 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
934 // The value argument from vstoren.
935 auto Arg0 = CI->getOperand(0);
936
937 // The index argument from vstoren.
938 auto Arg1 = CI->getOperand(1);
939
940 // The pointer argument from vstoren.
941 auto Arg2 = CI->getOperand(2);
942
943 // Get types.
944 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
945 auto ScalarNPointerTy = PointerType::get(
946 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
947
948 // Cast to scalarn
949 auto Cast = CastInst::CreatePointerCast(
950 Arg2, ScalarNPointerTy, "", CI);
951 // Index to correct address
952 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
953 // Store
954 auto Store = new StoreInst(Arg0, Index, CI);
955
956 CI->replaceAllUsesWith(Store);
957 ToRemoves.push_back(CI);
958 }
959 }
960
961 Changed = !ToRemoves.empty();
962
963 // And cleanup the calls we don't use anymore.
964 for (auto V : ToRemoves) {
965 V->eraseFromParent();
966 }
967
968 // And remove the function we don't need either too.
969 F->eraseFromParent();
970 }
971 }
972
973 return Changed;
974}
975
976bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
977 bool Changed = false;
978
979 struct VectorLoadOps {
980 const char* name;
981 int n;
982 Type* (*get_scalar_type_function)(LLVMContext&);
983 } vector_load_ops[] = {
984 // TODO(derekjchow): Expand this list.
985 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
986 };
987
988 for (int i = 0; i < sizeof(vector_load_ops) / sizeof(*vector_load_ops); ++i) {
989 const auto& Op = vector_load_ops[i];
990 auto Name = Op.name;
991 auto N = Op.n;
992 auto TypeFn = Op.get_scalar_type_function;
993 // If we find a function with the matching name.
994 if (auto F = M.getFunction(Name)) {
995 SmallVector<Instruction *, 4> ToRemoves;
996
997 // Walk the users of the function.
998 for (auto &U : F->uses()) {
999 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1000 // The index argument from vloadn.
1001 auto Arg0 = CI->getOperand(0);
1002
1003 // The pointer argument from vloadn.
1004 auto Arg1 = CI->getOperand(1);
1005
1006 // Get types.
1007 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1008 auto ScalarNPointerTy = PointerType::get(
1009 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1010
1011 // Cast to scalarn
1012 auto Cast = CastInst::CreatePointerCast(
1013 Arg1, ScalarNPointerTy, "", CI);
1014 // Index to correct address
1015 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1016 // Load
1017 auto Load = new LoadInst(Index, "", CI);
1018
1019 CI->replaceAllUsesWith(Load);
1020 ToRemoves.push_back(CI);
1021 }
1022 }
1023
1024 Changed = !ToRemoves.empty();
1025
1026 // And cleanup the calls we don't use anymore.
1027 for (auto V : ToRemoves) {
1028 V->eraseFromParent();
1029 }
1030
1031 // And remove the function we don't need either too.
1032 F->eraseFromParent();
1033
1034 }
1035 }
1036
1037 return Changed;
1038}
1039
David Neto22f144c2017-06-12 14:26:21 -04001040bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1041 bool Changed = false;
1042
1043 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1044 "_Z10vload_halfjPU3AS2KDh"};
1045
1046 for (auto Name : Map) {
1047 // If we find a function with the matching name.
1048 if (auto F = M.getFunction(Name)) {
1049 SmallVector<Instruction *, 4> ToRemoves;
1050
1051 // Walk the users of the function.
1052 for (auto &U : F->uses()) {
1053 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1054 // The index argument from vload_half.
1055 auto Arg0 = CI->getOperand(0);
1056
1057 // The pointer argument from vload_half.
1058 auto Arg1 = CI->getOperand(1);
1059
David Neto22f144c2017-06-12 14:26:21 -04001060 auto IntTy = Type::getInt32Ty(M.getContext());
1061 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001062 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1063
David Neto22f144c2017-06-12 14:26:21 -04001064 // Our intrinsic to unpack a float2 from an int.
1065 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1066
1067 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1068
David Netoac825b82017-05-30 12:49:01 -04001069 if (f16bit_storage) {
1070 auto ShortTy = Type::getInt16Ty(M.getContext());
1071 auto ShortPointerTy = PointerType::get(
1072 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001073
David Netoac825b82017-05-30 12:49:01 -04001074 // Cast the half* pointer to short*.
1075 auto Cast =
1076 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001077
David Netoac825b82017-05-30 12:49:01 -04001078 // Index into the correct address of the casted pointer.
1079 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1080
1081 // Load from the short* we casted to.
1082 auto Load = new LoadInst(Index, "", CI);
1083
1084 // ZExt the short -> int.
1085 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1086
1087 // Get our float2.
1088 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1089
1090 // Extract out the bottom element which is our float result.
1091 auto Extract = ExtractElementInst::Create(
1092 Call, ConstantInt::get(IntTy, 0), "", CI);
1093
1094 CI->replaceAllUsesWith(Extract);
1095 } else {
1096 // Assume the pointer argument points to storage aligned to 32bits
1097 // or more.
1098 // TODO(dneto): Do more analysis to make sure this is true?
1099 //
1100 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1101 // with:
1102 //
1103 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1104 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1105 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1106 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1107 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1108 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1109 // x float> %converted, %index_is_odd32
1110
1111 auto IntPointerTy = PointerType::get(
1112 IntTy, Arg1->getType()->getPointerAddressSpace());
1113
David Neto973e6a82017-05-30 13:48:18 -04001114 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001115 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001116 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001117 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1118
1119 auto One = ConstantInt::get(IntTy, 1);
1120 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1121 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1122
1123 // Index into the correct address of the casted pointer.
1124 auto Ptr =
1125 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1126
1127 // Load from the int* we casted to.
1128 auto Load = new LoadInst(Ptr, "", CI);
1129
1130 // Get our float2.
1131 auto Call = CallInst::Create(NewF, Load, "", CI);
1132
1133 // Extract out the float result, where the element number is
1134 // determined by whether the original index was even or odd.
1135 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1136
1137 CI->replaceAllUsesWith(Extract);
1138 }
David Neto22f144c2017-06-12 14:26:21 -04001139
1140 // Lastly, remember to remove the user.
1141 ToRemoves.push_back(CI);
1142 }
1143 }
1144
1145 Changed = !ToRemoves.empty();
1146
1147 // And cleanup the calls we don't use anymore.
1148 for (auto V : ToRemoves) {
1149 V->eraseFromParent();
1150 }
1151
1152 // And remove the function we don't need either too.
1153 F->eraseFromParent();
1154 }
1155 }
1156
1157 return Changed;
1158}
1159
1160bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1161 bool Changed = false;
1162
1163 const std::vector<const char *> Map = {"_Z11vload_half2jPU3AS1KDh",
1164 "_Z11vload_half2jPU3AS2KDh"};
1165
1166 for (auto Name : Map) {
1167 // If we find a function with the matching name.
1168 if (auto F = M.getFunction(Name)) {
1169 SmallVector<Instruction *, 4> ToRemoves;
1170
1171 // Walk the users of the function.
1172 for (auto &U : F->uses()) {
1173 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1174 // The index argument from vload_half.
1175 auto Arg0 = CI->getOperand(0);
1176
1177 // The pointer argument from vload_half.
1178 auto Arg1 = CI->getOperand(1);
1179
1180 auto IntTy = Type::getInt32Ty(M.getContext());
1181 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1182 auto NewPointerTy = PointerType::get(
1183 IntTy, Arg1->getType()->getPointerAddressSpace());
1184 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1185
1186 // Cast the half* pointer to int*.
1187 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1188
1189 // Index into the correct address of the casted pointer.
1190 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1191
1192 // Load from the int* we casted to.
1193 auto Load = new LoadInst(Index, "", CI);
1194
1195 // Our intrinsic to unpack a float2 from an int.
1196 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1197
1198 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1199
1200 // Get our float2.
1201 auto Call = CallInst::Create(NewF, Load, "", CI);
1202
1203 CI->replaceAllUsesWith(Call);
1204
1205 // Lastly, remember to remove the user.
1206 ToRemoves.push_back(CI);
1207 }
1208 }
1209
1210 Changed = !ToRemoves.empty();
1211
1212 // And cleanup the calls we don't use anymore.
1213 for (auto V : ToRemoves) {
1214 V->eraseFromParent();
1215 }
1216
1217 // And remove the function we don't need either too.
1218 F->eraseFromParent();
1219 }
1220 }
1221
1222 return Changed;
1223}
1224
1225bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1226 bool Changed = false;
1227
1228 const std::vector<const char *> Map = {"_Z11vload_half4jPU3AS1KDh",
1229 "_Z11vload_half4jPU3AS2KDh"};
1230
1231 for (auto Name : Map) {
1232 // If we find a function with the matching name.
1233 if (auto F = M.getFunction(Name)) {
1234 SmallVector<Instruction *, 4> ToRemoves;
1235
1236 // Walk the users of the function.
1237 for (auto &U : F->uses()) {
1238 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1239 // The index argument from vload_half.
1240 auto Arg0 = CI->getOperand(0);
1241
1242 // The pointer argument from vload_half.
1243 auto Arg1 = CI->getOperand(1);
1244
1245 auto IntTy = Type::getInt32Ty(M.getContext());
1246 auto Int2Ty = VectorType::get(IntTy, 2);
1247 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1248 auto NewPointerTy = PointerType::get(
1249 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1250 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1251
1252 // Cast the half* pointer to int2*.
1253 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1254
1255 // Index into the correct address of the casted pointer.
1256 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1257
1258 // Load from the int2* we casted to.
1259 auto Load = new LoadInst(Index, "", CI);
1260
1261 // Extract each element from the loaded int2.
1262 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1263 "", CI);
1264 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1265 "", CI);
1266
1267 // Our intrinsic to unpack a float2 from an int.
1268 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1269
1270 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1271
1272 // Get the lower (x & y) components of our final float4.
1273 auto Lo = CallInst::Create(NewF, X, "", CI);
1274
1275 // Get the higher (z & w) components of our final float4.
1276 auto Hi = CallInst::Create(NewF, Y, "", CI);
1277
1278 Constant *ShuffleMask[4] = {
1279 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1280 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1281
1282 // Combine our two float2's into one float4.
1283 auto Combine = new ShuffleVectorInst(
1284 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1285
1286 CI->replaceAllUsesWith(Combine);
1287
1288 // Lastly, remember to remove the user.
1289 ToRemoves.push_back(CI);
1290 }
1291 }
1292
1293 Changed = !ToRemoves.empty();
1294
1295 // And cleanup the calls we don't use anymore.
1296 for (auto V : ToRemoves) {
1297 V->eraseFromParent();
1298 }
1299
1300 // And remove the function we don't need either too.
1301 F->eraseFromParent();
1302 }
1303 }
1304
1305 return Changed;
1306}
1307
1308bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1309 bool Changed = false;
1310
1311 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1312 "_Z15vstore_half_rtefjPU3AS1Dh",
1313 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1314
1315 for (auto Name : Map) {
1316 // If we find a function with the matching name.
1317 if (auto F = M.getFunction(Name)) {
1318 SmallVector<Instruction *, 4> ToRemoves;
1319
1320 // Walk the users of the function.
1321 for (auto &U : F->uses()) {
1322 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1323 // The value to store.
1324 auto Arg0 = CI->getOperand(0);
1325
1326 // The index argument from vstore_half.
1327 auto Arg1 = CI->getOperand(1);
1328
1329 // The pointer argument from vstore_half.
1330 auto Arg2 = CI->getOperand(2);
1331
David Neto22f144c2017-06-12 14:26:21 -04001332 auto IntTy = Type::getInt32Ty(M.getContext());
1333 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001334 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001335 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001336
1337 // Our intrinsic to pack a float2 to an int.
1338 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1339
1340 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1341
1342 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001343 auto TempVec =
1344 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1345 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001346
1347 // Pack the float2 -> half2 (in an int).
1348 auto X = CallInst::Create(NewF, TempVec, "", CI);
1349
David Neto17852de2017-05-29 17:29:31 -04001350 if (f16bit_storage) {
1351 auto ShortTy = Type::getInt16Ty(M.getContext());
1352 auto ShortPointerTy = PointerType::get(
1353 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001354
David Neto17852de2017-05-29 17:29:31 -04001355 // Truncate our i32 to an i16.
1356 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001357
David Neto17852de2017-05-29 17:29:31 -04001358 // Cast the half* pointer to short*.
1359 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001360
David Neto17852de2017-05-29 17:29:31 -04001361 // Index into the correct address of the casted pointer.
1362 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001363
David Neto17852de2017-05-29 17:29:31 -04001364 // Store to the int* we casted to.
1365 auto Store = new StoreInst(Trunc, Index, CI);
1366
1367 CI->replaceAllUsesWith(Store);
1368 } else {
1369 // We can only write to 32-bit aligned words.
1370 //
1371 // Assuming base is aligned to 32-bits, replace the equivalent of
1372 // vstore_half(value, index, base)
1373 // with:
1374 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1375 // uint32_t write_to_upper_half = index & 1u;
1376 // uint32_t shift = write_to_upper_half << 4;
1377 //
1378 // // Pack the float value as a half number in bottom 16 bits
1379 // // of an i32.
1380 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1381 //
1382 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1383 // ^ ((packed & 0xffff) << shift)
1384 // // We only need relaxed consistency, but OpenCL 1.2 only has
1385 // // sequentially consistent atomics.
1386 // // TODO(dneto): Use relaxed consistency.
1387 // atomic_xor(target_ptr, xor_value)
1388 auto IntPointerTy = PointerType::get(
1389 IntTy, Arg2->getType()->getPointerAddressSpace());
1390
1391 auto Four = ConstantInt::get(IntTy, 4);
1392 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1393
1394 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1395 // Compute index / 2
1396 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1397 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1398 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1399 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1400 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1401 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1402 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1403
1404 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1405 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1406 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1407
1408 // Generate the call to atomi_xor.
1409 SmallVector<Type *, 5> ParamTypes;
1410 // The pointer type.
1411 ParamTypes.push_back(IntPointerTy);
1412 // The Types for memory scope, semantics, and value.
1413 ParamTypes.push_back(IntTy);
1414 ParamTypes.push_back(IntTy);
1415 ParamTypes.push_back(IntTy);
1416 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1417 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1418
1419 const auto ConstantScopeDevice =
1420 ConstantInt::get(IntTy, spv::ScopeDevice);
1421 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1422 // (SPIR-V Workgroup).
1423 const auto AddrSpaceSemanticsBits =
1424 IntPointerTy->getPointerAddressSpace() == 1
1425 ? spv::MemorySemanticsUniformMemoryMask
1426 : spv::MemorySemanticsWorkgroupMemoryMask;
1427
1428 // We're using relaxed consistency here.
1429 const auto ConstantMemorySemantics =
1430 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1431 AddrSpaceSemanticsBits);
1432
1433 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1434 ConstantMemorySemantics, ValueToXor};
1435 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1436 }
David Neto22f144c2017-06-12 14:26:21 -04001437
1438 // Lastly, remember to remove the user.
1439 ToRemoves.push_back(CI);
1440 }
1441 }
1442
1443 Changed = !ToRemoves.empty();
1444
1445 // And cleanup the calls we don't use anymore.
1446 for (auto V : ToRemoves) {
1447 V->eraseFromParent();
1448 }
1449
1450 // And remove the function we don't need either too.
1451 F->eraseFromParent();
1452 }
1453 }
1454
1455 return Changed;
1456}
1457
1458bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1459 bool Changed = false;
1460
1461 const std::vector<const char *> Map = {"_Z12vstore_half2Dv2_fjPU3AS1Dh",
1462 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1463 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh"};
1464
1465 for (auto Name : Map) {
1466 // If we find a function with the matching name.
1467 if (auto F = M.getFunction(Name)) {
1468 SmallVector<Instruction *, 4> ToRemoves;
1469
1470 // Walk the users of the function.
1471 for (auto &U : F->uses()) {
1472 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1473 // The value to store.
1474 auto Arg0 = CI->getOperand(0);
1475
1476 // The index argument from vstore_half.
1477 auto Arg1 = CI->getOperand(1);
1478
1479 // The pointer argument from vstore_half.
1480 auto Arg2 = CI->getOperand(2);
1481
1482 auto IntTy = Type::getInt32Ty(M.getContext());
1483 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1484 auto NewPointerTy = PointerType::get(
1485 IntTy, Arg2->getType()->getPointerAddressSpace());
1486 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1487
1488 // Our intrinsic to pack a float2 to an int.
1489 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1490
1491 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1492
1493 // Turn the packed x & y into the final packing.
1494 auto X = CallInst::Create(NewF, Arg0, "", CI);
1495
1496 // Cast the half* pointer to int*.
1497 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1498
1499 // Index into the correct address of the casted pointer.
1500 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1501
1502 // Store to the int* we casted to.
1503 auto Store = new StoreInst(X, Index, CI);
1504
1505 CI->replaceAllUsesWith(Store);
1506
1507 // Lastly, remember to remove the user.
1508 ToRemoves.push_back(CI);
1509 }
1510 }
1511
1512 Changed = !ToRemoves.empty();
1513
1514 // And cleanup the calls we don't use anymore.
1515 for (auto V : ToRemoves) {
1516 V->eraseFromParent();
1517 }
1518
1519 // And remove the function we don't need either too.
1520 F->eraseFromParent();
1521 }
1522 }
1523
1524 return Changed;
1525}
1526
1527bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1528 bool Changed = false;
1529
1530 const std::vector<const char *> Map = {"_Z12vstore_half4Dv4_fjPU3AS1Dh",
1531 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1532 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh"};
1533
1534 for (auto Name : Map) {
1535 // If we find a function with the matching name.
1536 if (auto F = M.getFunction(Name)) {
1537 SmallVector<Instruction *, 4> ToRemoves;
1538
1539 // Walk the users of the function.
1540 for (auto &U : F->uses()) {
1541 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1542 // The value to store.
1543 auto Arg0 = CI->getOperand(0);
1544
1545 // The index argument from vstore_half.
1546 auto Arg1 = CI->getOperand(1);
1547
1548 // The pointer argument from vstore_half.
1549 auto Arg2 = CI->getOperand(2);
1550
1551 auto IntTy = Type::getInt32Ty(M.getContext());
1552 auto Int2Ty = VectorType::get(IntTy, 2);
1553 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1554 auto NewPointerTy = PointerType::get(
1555 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1556 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1557
1558 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1559 ConstantInt::get(IntTy, 1)};
1560
1561 // Extract out the x & y components of our to store value.
1562 auto Lo =
1563 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1564 ConstantVector::get(LoShuffleMask), "", CI);
1565
1566 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1567 ConstantInt::get(IntTy, 3)};
1568
1569 // Extract out the z & w components of our to store value.
1570 auto Hi =
1571 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1572 ConstantVector::get(HiShuffleMask), "", CI);
1573
1574 // Our intrinsic to pack a float2 to an int.
1575 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1576
1577 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1578
1579 // Turn the packed x & y into the final component of our int2.
1580 auto X = CallInst::Create(NewF, Lo, "", CI);
1581
1582 // Turn the packed z & w into the final component of our int2.
1583 auto Y = CallInst::Create(NewF, Hi, "", CI);
1584
1585 auto Combine = InsertElementInst::Create(
1586 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1587 Combine = InsertElementInst::Create(
1588 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1589
1590 // Cast the half* pointer to int2*.
1591 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1592
1593 // Index into the correct address of the casted pointer.
1594 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1595
1596 // Store to the int2* we casted to.
1597 auto Store = new StoreInst(Combine, Index, CI);
1598
1599 CI->replaceAllUsesWith(Store);
1600
1601 // Lastly, remember to remove the user.
1602 ToRemoves.push_back(CI);
1603 }
1604 }
1605
1606 Changed = !ToRemoves.empty();
1607
1608 // And cleanup the calls we don't use anymore.
1609 for (auto V : ToRemoves) {
1610 V->eraseFromParent();
1611 }
1612
1613 // And remove the function we don't need either too.
1614 F->eraseFromParent();
1615 }
1616 }
1617
1618 return Changed;
1619}
1620
1621bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1622 bool Changed = false;
1623
1624 const std::map<const char *, const char*> Map = {
1625 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1626 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1627 };
1628
1629 for (auto Pair : Map) {
1630 // If we find a function with the matching name.
1631 if (auto F = M.getFunction(Pair.first)) {
1632 SmallVector<Instruction *, 4> ToRemoves;
1633
1634 // Walk the users of the function.
1635 for (auto &U : F->uses()) {
1636 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1637 // The image.
1638 auto Arg0 = CI->getOperand(0);
1639
1640 // The sampler.
1641 auto Arg1 = CI->getOperand(1);
1642
1643 // The coordinate (integer type that we can't handle).
1644 auto Arg2 = CI->getOperand(2);
1645
1646 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1647
1648 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1649
1650 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1651
1652 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1653
1654 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1655
1656 CI->replaceAllUsesWith(NewCI);
1657
1658 // Lastly, remember to remove the user.
1659 ToRemoves.push_back(CI);
1660 }
1661 }
1662
1663 Changed = !ToRemoves.empty();
1664
1665 // And cleanup the calls we don't use anymore.
1666 for (auto V : ToRemoves) {
1667 V->eraseFromParent();
1668 }
1669
1670 // And remove the function we don't need either too.
1671 F->eraseFromParent();
1672 }
1673 }
1674
1675 return Changed;
1676}
1677
1678bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1679 bool Changed = false;
1680
1681 const std::map<const char *, const char *> Map = {
David Neto22f144c2017-06-12 14:26:21 -04001682 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1683 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1684 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1685 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1686 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01001687 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04001688
1689 for (auto Pair : Map) {
1690 // If we find a function with the matching name.
1691 if (auto F = M.getFunction(Pair.first)) {
1692 SmallVector<Instruction *, 4> ToRemoves;
1693
1694 // Walk the users of the function.
1695 for (auto &U : F->uses()) {
1696 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1697 auto FType = F->getFunctionType();
1698 SmallVector<Type *, 5> ParamTypes;
1699
1700 // The pointer type.
1701 ParamTypes.push_back(FType->getParamType(0));
1702
1703 auto IntTy = Type::getInt32Ty(M.getContext());
1704
1705 // The memory scope type.
1706 ParamTypes.push_back(IntTy);
1707
1708 // The memory semantics type.
1709 ParamTypes.push_back(IntTy);
1710
1711 if (2 < CI->getNumArgOperands()) {
1712 // The unequal memory semantics type.
1713 ParamTypes.push_back(IntTy);
1714
1715 // The value type.
1716 ParamTypes.push_back(FType->getParamType(2));
1717
1718 // The comparator type.
1719 ParamTypes.push_back(FType->getParamType(1));
1720 } else if (1 < CI->getNumArgOperands()) {
1721 // The value type.
1722 ParamTypes.push_back(FType->getParamType(1));
1723 }
1724
1725 auto NewFType =
1726 FunctionType::get(FType->getReturnType(), ParamTypes, false);
1727 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1728
1729 // We need to map the OpenCL constants to the SPIR-V equivalents.
1730 const auto ConstantScopeDevice =
1731 ConstantInt::get(IntTy, spv::ScopeDevice);
1732 const auto ConstantMemorySemantics = ConstantInt::get(
1733 IntTy, spv::MemorySemanticsUniformMemoryMask |
1734 spv::MemorySemanticsSequentiallyConsistentMask);
1735
1736 SmallVector<Value *, 5> Params;
1737
1738 // The pointer.
1739 Params.push_back(CI->getArgOperand(0));
1740
1741 // The memory scope.
1742 Params.push_back(ConstantScopeDevice);
1743
1744 // The memory semantics.
1745 Params.push_back(ConstantMemorySemantics);
1746
1747 if (2 < CI->getNumArgOperands()) {
1748 // The unequal memory semantics.
1749 Params.push_back(ConstantMemorySemantics);
1750
1751 // The value.
1752 Params.push_back(CI->getArgOperand(2));
1753
1754 // The comparator.
1755 Params.push_back(CI->getArgOperand(1));
1756 } else if (1 < CI->getNumArgOperands()) {
1757 // The value.
1758 Params.push_back(CI->getArgOperand(1));
1759 }
1760
1761 auto NewCI = CallInst::Create(NewF, Params, "", CI);
1762
1763 CI->replaceAllUsesWith(NewCI);
1764
1765 // Lastly, remember to remove the user.
1766 ToRemoves.push_back(CI);
1767 }
1768 }
1769
1770 Changed = !ToRemoves.empty();
1771
1772 // And cleanup the calls we don't use anymore.
1773 for (auto V : ToRemoves) {
1774 V->eraseFromParent();
1775 }
1776
1777 // And remove the function we don't need either too.
1778 F->eraseFromParent();
1779 }
1780 }
1781
Neil Henning39672102017-09-29 14:33:13 +01001782 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
1783 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
1784 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
1785 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
1786 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
1787 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
1788 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
1789 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
1790 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
1791 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
1792 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
1793 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
1794 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
1795 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
1796 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
1797 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
1798 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
1799
1800 for (auto Pair : Map2) {
1801 // If we find a function with the matching name.
1802 if (auto F = M.getFunction(Pair.first)) {
1803 SmallVector<Instruction *, 4> ToRemoves;
1804
1805 // Walk the users of the function.
1806 for (auto &U : F->uses()) {
1807 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1808 auto AtomicOp = new AtomicRMWInst(
1809 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
1810 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
1811
1812 CI->replaceAllUsesWith(AtomicOp);
1813
1814 // Lastly, remember to remove the user.
1815 ToRemoves.push_back(CI);
1816 }
1817 }
1818
1819 Changed = !ToRemoves.empty();
1820
1821 // And cleanup the calls we don't use anymore.
1822 for (auto V : ToRemoves) {
1823 V->eraseFromParent();
1824 }
1825
1826 // And remove the function we don't need either too.
1827 F->eraseFromParent();
1828 }
1829 }
1830
David Neto22f144c2017-06-12 14:26:21 -04001831 return Changed;
1832}
1833
1834bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
1835 bool Changed = false;
1836
1837 // If we find a function with the matching name.
1838 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
1839 SmallVector<Instruction *, 4> ToRemoves;
1840
1841 auto IntTy = Type::getInt32Ty(M.getContext());
1842 auto FloatTy = Type::getFloatTy(M.getContext());
1843
1844 Constant *DownShuffleMask[3] = {
1845 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1846 ConstantInt::get(IntTy, 2)};
1847
1848 Constant *UpShuffleMask[4] = {
1849 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1850 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1851
1852 Constant *FloatVec[3] = {
1853 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
1854 };
1855
1856 // Walk the users of the function.
1857 for (auto &U : F->uses()) {
1858 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1859 auto Vec4Ty = CI->getArgOperand(0)->getType();
1860 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1861 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1862 auto Vec3Ty = Arg0->getType();
1863
1864 auto NewFType =
1865 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
1866
1867 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
1868
1869 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
1870
1871 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
1872
1873 CI->replaceAllUsesWith(Result);
1874
1875 // Lastly, remember to remove the user.
1876 ToRemoves.push_back(CI);
1877 }
1878 }
1879
1880 Changed = !ToRemoves.empty();
1881
1882 // And cleanup the calls we don't use anymore.
1883 for (auto V : ToRemoves) {
1884 V->eraseFromParent();
1885 }
1886
1887 // And remove the function we don't need either too.
1888 F->eraseFromParent();
1889 }
1890
1891 return Changed;
1892}
David Neto62653202017-10-16 19:05:18 -04001893
1894bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
1895 bool Changed = false;
1896
1897 // OpenCL's float result = fract(float x, float* ptr)
1898 //
1899 // In the LLVM domain:
1900 //
1901 // %floor_result = call spir_func float @floor(float %x)
1902 // store float %floor_result, float * %ptr
1903 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
1904 // %result = call spir_func float
1905 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
1906 //
1907 // Becomes in the SPIR-V domain, where translations of floor, fmin,
1908 // and clspv.fract occur in the SPIR-V generator pass:
1909 //
1910 // %glsl_ext = OpExtInstImport "GLSL.std.450"
1911 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
1912 // ...
1913 // %floor_result = OpExtInst %float %glsl_ext Floor %x
1914 // OpStore %ptr %floor_result
1915 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
1916 // %fract_result = OpExtInst %float
1917 // %glsl_ext Fmin %fract_intermediate %just_under_1
1918
1919
1920 using std::string;
1921
1922 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
1923 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
1924 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
1925 auto make_quad = [](const char *a, const char *b, const char *c,
1926 const char *d) {
1927 return std::tuple<const char *, const char *, const char *, const char *>(
1928 a, b, c, d);
1929 };
1930 const std::vector<QuadType> Functions = {
1931 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
1932 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
1933 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
1934 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
1935 };
1936
1937 for (auto& quad : Functions) {
1938 const StringRef fract_name(std::get<0>(quad));
1939
1940 // If we find a function with the matching name.
1941 if (auto F = M.getFunction(fract_name)) {
1942 if (F->use_begin() == F->use_end())
1943 continue;
1944
1945 // We have some uses.
1946 Changed = true;
1947
1948 auto& Context = M.getContext();
1949
1950 const StringRef floor_name(std::get<1>(quad));
1951 const StringRef fmin_name(std::get<2>(quad));
1952 const StringRef clspv_fract_name(std::get<3>(quad));
1953
1954 // This is either float or a float vector. All the float-like
1955 // types are this type.
1956 auto result_ty = F->getReturnType();
1957
1958 Function* fmin_fn = M.getFunction(fmin_name);
1959 if (!fmin_fn) {
1960 // Make the fmin function.
1961 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
1962 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
1963 fmin_fn->addFnAttr(Attribute::ReadOnly);
1964 fmin_fn->addFnAttr(Attribute::ReadNone);
1965 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
1966 }
1967
1968 Function* floor_fn = M.getFunction(floor_name);
1969 if (!floor_fn) {
1970 // Make the floor function.
1971 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
1972 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
1973 floor_fn->addFnAttr(Attribute::ReadOnly);
1974 floor_fn->addFnAttr(Attribute::ReadNone);
1975 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
1976 }
1977
1978 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
1979 if (!clspv_fract_fn) {
1980 // Make the clspv_fract function.
1981 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
1982 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
1983 clspv_fract_fn->addFnAttr(Attribute::ReadOnly);
1984 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
1985 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
1986 }
1987
1988 // Number of significant significand bits, whether represented or not.
1989 unsigned num_significand_bits;
1990 switch (result_ty->getScalarType()->getTypeID()) {
1991 case Type::HalfTyID:
1992 num_significand_bits = 11;
1993 break;
1994 case Type::FloatTyID:
1995 num_significand_bits = 24;
1996 break;
1997 case Type::DoubleTyID:
1998 num_significand_bits = 53;
1999 break;
2000 default:
2001 assert(false && "Unhandled float type when processing fract builtin");
2002 break;
2003 }
2004 // Beware that the disassembler displays this value as
2005 // OpConstant %float 1
2006 // which is not quite right.
2007 const double kJustUnderOneScalar =
2008 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2009
2010 Constant *just_under_one =
2011 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2012 if (result_ty->isVectorTy()) {
2013 just_under_one = ConstantVector::getSplat(
2014 result_ty->getVectorNumElements(), just_under_one);
2015 }
2016
2017 IRBuilder<> Builder(Context);
2018
2019 SmallVector<Instruction *, 4> ToRemoves;
2020
2021 // Walk the users of the function.
2022 for (auto &U : F->uses()) {
2023 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2024
2025 Builder.SetInsertPoint(CI);
2026 auto arg = CI->getArgOperand(0);
2027 auto ptr = CI->getArgOperand(1);
2028
2029 // Compute floor result and store it.
2030 auto floor = Builder.CreateCall(floor_fn, {arg});
2031 Builder.CreateStore(floor, ptr);
2032
2033 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2034 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2035
2036 CI->replaceAllUsesWith(fract_result);
2037
2038 // Lastly, remember to remove the user.
2039 ToRemoves.push_back(CI);
2040 }
2041 }
2042
2043 // And cleanup the calls we don't use anymore.
2044 for (auto V : ToRemoves) {
2045 V->eraseFromParent();
2046 }
2047
2048 // And remove the function we don't need either too.
2049 F->eraseFromParent();
2050 }
2051 }
2052
2053 return Changed;
2054}