blob: db2542489d40e7552f826c01ca7d746197fde713 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto22f144c2017-06-12 14:26:21 -040019#include <llvm/IR/Constants.h>
20#include <llvm/IR/Instructions.h>
David Neto62653202017-10-16 19:05:18 -040021#include <llvm/IR/IRBuilder.h>
David Neto22f144c2017-06-12 14:26:21 -040022#include <llvm/IR/Module.h>
23#include <llvm/Pass.h>
David Neto17852de2017-05-29 17:29:31 -040024#include <llvm/Support/CommandLine.h>
David Neto22f144c2017-06-12 14:26:21 -040025#include <llvm/Support/raw_ostream.h>
26#include <llvm/Transforms/Utils/Cloning.h>
27
28#include <spirv/1.0/spirv.hpp>
29
David Neto482550a2018-03-24 05:21:07 -070030#include "clspv/Option.h"
31
David Neto22f144c2017-06-12 14:26:21 -040032using namespace llvm;
33
34#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
35
36namespace {
37uint32_t clz(uint32_t v) {
38 uint32_t r;
39 uint32_t shift;
40
41 r = (v > 0xFFFF) << 4;
42 v >>= r;
43 shift = (v > 0xFF) << 3;
44 v >>= shift;
45 r |= shift;
46 shift = (v > 0xF) << 2;
47 v >>= shift;
48 r |= shift;
49 shift = (v > 0x3) << 1;
50 v >>= shift;
51 r |= shift;
52 r |= (v >> 1);
53
54 return r;
55}
56
57Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
58 if (1 == elements) {
59 return Type::getInt1Ty(C);
60 } else {
61 return VectorType::get(Type::getInt1Ty(C), elements);
62 }
63}
64
65struct ReplaceOpenCLBuiltinPass final : public ModulePass {
66 static char ID;
67 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
68
69 bool runOnModule(Module &M) override;
70 bool replaceRecip(Module &M);
71 bool replaceDivide(Module &M);
72 bool replaceExp10(Module &M);
73 bool replaceLog10(Module &M);
74 bool replaceBarrier(Module &M);
75 bool replaceMemFence(Module &M);
76 bool replaceRelational(Module &M);
77 bool replaceIsInfAndIsNan(Module &M);
78 bool replaceAllAndAny(Module &M);
79 bool replaceSignbit(Module &M);
80 bool replaceMadandMad24andMul24(Module &M);
81 bool replaceVloadHalf(Module &M);
82 bool replaceVloadHalf2(Module &M);
83 bool replaceVloadHalf4(Module &M);
84 bool replaceVstoreHalf(Module &M);
85 bool replaceVstoreHalf2(Module &M);
86 bool replaceVstoreHalf4(Module &M);
87 bool replaceReadImageF(Module &M);
88 bool replaceAtomics(Module &M);
89 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040090 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070091 bool replaceVload(Module &M);
92 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040093};
94}
95
96char ReplaceOpenCLBuiltinPass::ID = 0;
97static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
98 "Replace OpenCL Builtins Pass");
99
100namespace clspv {
101ModulePass *createReplaceOpenCLBuiltinPass() {
102 return new ReplaceOpenCLBuiltinPass();
103}
104}
105
106bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
107 bool Changed = false;
108
109 Changed |= replaceRecip(M);
110 Changed |= replaceDivide(M);
111 Changed |= replaceExp10(M);
112 Changed |= replaceLog10(M);
113 Changed |= replaceBarrier(M);
114 Changed |= replaceMemFence(M);
115 Changed |= replaceRelational(M);
116 Changed |= replaceIsInfAndIsNan(M);
117 Changed |= replaceAllAndAny(M);
118 Changed |= replaceSignbit(M);
119 Changed |= replaceMadandMad24andMul24(M);
120 Changed |= replaceVloadHalf(M);
121 Changed |= replaceVloadHalf2(M);
122 Changed |= replaceVloadHalf4(M);
123 Changed |= replaceVstoreHalf(M);
124 Changed |= replaceVstoreHalf2(M);
125 Changed |= replaceVstoreHalf4(M);
126 Changed |= replaceReadImageF(M);
127 Changed |= replaceAtomics(M);
128 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400129 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700130 Changed |= replaceVload(M);
131 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400132
133 return Changed;
134}
135
136bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
137 bool Changed = false;
138
139 const char *Names[] = {
140 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
141 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
142 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
143 };
144
145 for (auto Name : Names) {
146 // If we find a function with the matching name.
147 if (auto F = M.getFunction(Name)) {
148 SmallVector<Instruction *, 4> ToRemoves;
149
150 // Walk the users of the function.
151 for (auto &U : F->uses()) {
152 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
153 // Recip has one arg.
154 auto Arg = CI->getOperand(0);
155
156 auto Div = BinaryOperator::Create(
157 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
158 CI);
159
160 CI->replaceAllUsesWith(Div);
161
162 // Lastly, remember to remove the user.
163 ToRemoves.push_back(CI);
164 }
165 }
166
167 Changed = !ToRemoves.empty();
168
169 // And cleanup the calls we don't use anymore.
170 for (auto V : ToRemoves) {
171 V->eraseFromParent();
172 }
173
174 // And remove the function we don't need either too.
175 F->eraseFromParent();
176 }
177 }
178
179 return Changed;
180}
181
182bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
183 bool Changed = false;
184
185 const char *Names[] = {
186 "_Z11half_divideff", "_Z13native_divideff",
187 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
188 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
189 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
190 };
191
192 for (auto Name : Names) {
193 // If we find a function with the matching name.
194 if (auto F = M.getFunction(Name)) {
195 SmallVector<Instruction *, 4> ToRemoves;
196
197 // Walk the users of the function.
198 for (auto &U : F->uses()) {
199 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
200 auto Div = BinaryOperator::Create(
201 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
202
203 CI->replaceAllUsesWith(Div);
204
205 // Lastly, remember to remove the user.
206 ToRemoves.push_back(CI);
207 }
208 }
209
210 Changed = !ToRemoves.empty();
211
212 // And cleanup the calls we don't use anymore.
213 for (auto V : ToRemoves) {
214 V->eraseFromParent();
215 }
216
217 // And remove the function we don't need either too.
218 F->eraseFromParent();
219 }
220 }
221
222 return Changed;
223}
224
225bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
226 bool Changed = false;
227
228 const std::map<const char *, const char *> Map = {
229 {"_Z5exp10f", "_Z3expf"},
230 {"_Z10half_exp10f", "_Z8half_expf"},
231 {"_Z12native_exp10f", "_Z10native_expf"},
232 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
233 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
234 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
235 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
236 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
237 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
238 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
239 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
240 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
241
242 for (auto Pair : Map) {
243 // If we find a function with the matching name.
244 if (auto F = M.getFunction(Pair.first)) {
245 SmallVector<Instruction *, 4> ToRemoves;
246
247 // Walk the users of the function.
248 for (auto &U : F->uses()) {
249 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
250 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
251
252 auto Arg = CI->getOperand(0);
253
254 // Constant of the natural log of 10 (ln(10)).
255 const double Ln10 =
256 2.302585092994045684017991454684364207601101488628772976033;
257
258 auto Mul = BinaryOperator::Create(
259 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
260 CI);
261
262 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
263
264 CI->replaceAllUsesWith(NewCI);
265
266 // Lastly, remember to remove the user.
267 ToRemoves.push_back(CI);
268 }
269 }
270
271 Changed = !ToRemoves.empty();
272
273 // And cleanup the calls we don't use anymore.
274 for (auto V : ToRemoves) {
275 V->eraseFromParent();
276 }
277
278 // And remove the function we don't need either too.
279 F->eraseFromParent();
280 }
281 }
282
283 return Changed;
284}
285
286bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
287 bool Changed = false;
288
289 const std::map<const char *, const char *> Map = {
290 {"_Z5log10f", "_Z3logf"},
291 {"_Z10half_log10f", "_Z8half_logf"},
292 {"_Z12native_log10f", "_Z10native_logf"},
293 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
294 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
295 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
296 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
297 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
298 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
299 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
300 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
301 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
302
303 for (auto Pair : Map) {
304 // If we find a function with the matching name.
305 if (auto F = M.getFunction(Pair.first)) {
306 SmallVector<Instruction *, 4> ToRemoves;
307
308 // Walk the users of the function.
309 for (auto &U : F->uses()) {
310 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
311 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
312
313 auto Arg = CI->getOperand(0);
314
315 // Constant of the reciprocal of the natural log of 10 (ln(10)).
316 const double Ln10 =
317 0.434294481903251827651128918916605082294397005803666566114;
318
319 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
320
321 auto Mul = BinaryOperator::Create(
322 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
323 "", CI);
324
325 CI->replaceAllUsesWith(Mul);
326
327 // Lastly, remember to remove the user.
328 ToRemoves.push_back(CI);
329 }
330 }
331
332 Changed = !ToRemoves.empty();
333
334 // And cleanup the calls we don't use anymore.
335 for (auto V : ToRemoves) {
336 V->eraseFromParent();
337 }
338
339 // And remove the function we don't need either too.
340 F->eraseFromParent();
341 }
342 }
343
344 return Changed;
345}
346
347bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
348 bool Changed = false;
349
350 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
351
352 const std::map<const char *, const char *> Map = {
353 {"_Z7barrierj", "__spirv_control_barrier"}};
354
355 for (auto Pair : Map) {
356 // If we find a function with the matching name.
357 if (auto F = M.getFunction(Pair.first)) {
358 SmallVector<Instruction *, 4> ToRemoves;
359
360 // Walk the users of the function.
361 for (auto &U : F->uses()) {
362 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
363 auto FType = F->getFunctionType();
364 SmallVector<Type *, 3> Params;
365 for (unsigned i = 0; i < 3; i++) {
366 Params.push_back(FType->getParamType(0));
367 }
368 auto NewFType =
369 FunctionType::get(FType->getReturnType(), Params, false);
370 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
371
372 auto Arg = CI->getOperand(0);
373
374 // We need to map the OpenCL constants to the SPIR-V equivalents.
375 const auto LocalMemFence =
376 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
377 const auto GlobalMemFence =
378 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
379 const auto ConstantSequentiallyConsistent = ConstantInt::get(
380 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
381 const auto ConstantScopeDevice =
382 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
383 const auto ConstantScopeWorkgroup =
384 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
385
386 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
387 const auto LocalMemFenceMask = BinaryOperator::Create(
388 Instruction::And, LocalMemFence, Arg, "", CI);
389 const auto WorkgroupShiftAmount =
390 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
391 clz(CLK_LOCAL_MEM_FENCE);
392 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
393 Instruction::Shl, LocalMemFenceMask,
394 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
395
396 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
397 const auto GlobalMemFenceMask = BinaryOperator::Create(
398 Instruction::And, GlobalMemFence, Arg, "", CI);
399 const auto UniformShiftAmount =
400 clz(spv::MemorySemanticsUniformMemoryMask) -
401 clz(CLK_GLOBAL_MEM_FENCE);
402 const auto MemorySemanticsUniform = BinaryOperator::Create(
403 Instruction::Shl, GlobalMemFenceMask,
404 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
405
406 // And combine the above together, also adding in
407 // MemorySemanticsSequentiallyConsistentMask.
408 auto MemorySemantics =
409 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
410 ConstantSequentiallyConsistent, "", CI);
411 MemorySemantics = BinaryOperator::Create(
412 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
413
414 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
415 // Device Scope, otherwise Workgroup Scope.
416 const auto Cmp =
417 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
418 GlobalMemFenceMask, GlobalMemFence, "", CI);
419 const auto MemoryScope = SelectInst::Create(
420 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
421
422 // Lastly, the Execution Scope is always Workgroup Scope.
423 const auto ExecutionScope = ConstantScopeWorkgroup;
424
425 auto NewCI = CallInst::Create(
426 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
427
428 CI->replaceAllUsesWith(NewCI);
429
430 // Lastly, remember to remove the user.
431 ToRemoves.push_back(CI);
432 }
433 }
434
435 Changed = !ToRemoves.empty();
436
437 // And cleanup the calls we don't use anymore.
438 for (auto V : ToRemoves) {
439 V->eraseFromParent();
440 }
441
442 // And remove the function we don't need either too.
443 F->eraseFromParent();
444 }
445 }
446
447 return Changed;
448}
449
450bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
451 bool Changed = false;
452
453 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
454
Neil Henning39672102017-09-29 14:33:13 +0100455 using Tuple = std::tuple<const char *, unsigned>;
456 const std::map<const char *, Tuple> Map = {
457 {"_Z9mem_fencej",
458 Tuple("__spirv_memory_barrier",
459 spv::MemorySemanticsSequentiallyConsistentMask)},
460 {"_Z14read_mem_fencej",
461 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
462 {"_Z15write_mem_fencej",
463 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400464
465 for (auto Pair : Map) {
466 // If we find a function with the matching name.
467 if (auto F = M.getFunction(Pair.first)) {
468 SmallVector<Instruction *, 4> ToRemoves;
469
470 // Walk the users of the function.
471 for (auto &U : F->uses()) {
472 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
473 auto FType = F->getFunctionType();
474 SmallVector<Type *, 2> Params;
475 for (unsigned i = 0; i < 2; i++) {
476 Params.push_back(FType->getParamType(0));
477 }
478 auto NewFType =
479 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100480 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400481
482 auto Arg = CI->getOperand(0);
483
484 // We need to map the OpenCL constants to the SPIR-V equivalents.
485 const auto LocalMemFence =
486 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
487 const auto GlobalMemFence =
488 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
489 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100490 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400491 const auto ConstantScopeDevice =
492 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
493
494 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
495 const auto LocalMemFenceMask = BinaryOperator::Create(
496 Instruction::And, LocalMemFence, Arg, "", CI);
497 const auto WorkgroupShiftAmount =
498 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
499 clz(CLK_LOCAL_MEM_FENCE);
500 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
501 Instruction::Shl, LocalMemFenceMask,
502 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
503
504 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
505 const auto GlobalMemFenceMask = BinaryOperator::Create(
506 Instruction::And, GlobalMemFence, Arg, "", CI);
507 const auto UniformShiftAmount =
508 clz(spv::MemorySemanticsUniformMemoryMask) -
509 clz(CLK_GLOBAL_MEM_FENCE);
510 const auto MemorySemanticsUniform = BinaryOperator::Create(
511 Instruction::Shl, GlobalMemFenceMask,
512 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
513
514 // And combine the above together, also adding in
515 // MemorySemanticsSequentiallyConsistentMask.
516 auto MemorySemantics =
517 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
518 ConstantMemorySemantics, "", CI);
519 MemorySemantics = BinaryOperator::Create(
520 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
521
522 // Memory Scope is always device.
523 const auto MemoryScope = ConstantScopeDevice;
524
525 auto NewCI =
526 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
527
528 CI->replaceAllUsesWith(NewCI);
529
530 // Lastly, remember to remove the user.
531 ToRemoves.push_back(CI);
532 }
533 }
534
535 Changed = !ToRemoves.empty();
536
537 // And cleanup the calls we don't use anymore.
538 for (auto V : ToRemoves) {
539 V->eraseFromParent();
540 }
541
542 // And remove the function we don't need either too.
543 F->eraseFromParent();
544 }
545 }
546
547 return Changed;
548}
549
550bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
551 bool Changed = false;
552
553 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
554 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
555 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
556 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
557 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
558 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
559 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
560 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
561 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
562 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
563 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
564 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
565 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
566 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
567 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
568 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
569 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
570 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
571 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
572 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
573 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
574 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
575 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
576 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
577 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
578 };
579
580 for (auto Pair : Map) {
581 // If we find a function with the matching name.
582 if (auto F = M.getFunction(Pair.first)) {
583 SmallVector<Instruction *, 4> ToRemoves;
584
585 // Walk the users of the function.
586 for (auto &U : F->uses()) {
587 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
588 // The predicate to use in the CmpInst.
589 auto Predicate = Pair.second.first;
590
591 // The value to return for true.
592 auto TrueValue =
593 ConstantInt::getSigned(CI->getType(), Pair.second.second);
594
595 // The value to return for false.
596 auto FalseValue = Constant::getNullValue(CI->getType());
597
598 auto Arg1 = CI->getOperand(0);
599 auto Arg2 = CI->getOperand(1);
600
601 const auto Cmp =
602 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
603
604 const auto Select =
605 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
606
607 CI->replaceAllUsesWith(Select);
608
609 // Lastly, remember to remove the user.
610 ToRemoves.push_back(CI);
611 }
612 }
613
614 Changed = !ToRemoves.empty();
615
616 // And cleanup the calls we don't use anymore.
617 for (auto V : ToRemoves) {
618 V->eraseFromParent();
619 }
620
621 // And remove the function we don't need either too.
622 F->eraseFromParent();
623 }
624 }
625
626 return Changed;
627}
628
629bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
630 bool Changed = false;
631
632 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
633 {"_Z5isinff", {"__spirv_isinff", 1}},
634 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
635 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
636 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
637 {"_Z5isnanf", {"__spirv_isnanf", 1}},
638 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
639 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
640 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
641 };
642
643 for (auto Pair : Map) {
644 // If we find a function with the matching name.
645 if (auto F = M.getFunction(Pair.first)) {
646 SmallVector<Instruction *, 4> ToRemoves;
647
648 // Walk the users of the function.
649 for (auto &U : F->uses()) {
650 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
651 const auto CITy = CI->getType();
652
653 // The fake SPIR-V intrinsic to generate.
654 auto SPIRVIntrinsic = Pair.second.first;
655
656 // The value to return for true.
657 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
658
659 // The value to return for false.
660 auto FalseValue = Constant::getNullValue(CITy);
661
662 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
663 M.getContext(),
664 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
665
666 auto NewFType =
667 FunctionType::get(CorrespondingBoolTy,
668 F->getFunctionType()->getParamType(0), false);
669
670 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
671
672 auto Arg = CI->getOperand(0);
673
674 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
675
676 const auto Select =
677 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
678
679 CI->replaceAllUsesWith(Select);
680
681 // Lastly, remember to remove the user.
682 ToRemoves.push_back(CI);
683 }
684 }
685
686 Changed = !ToRemoves.empty();
687
688 // And cleanup the calls we don't use anymore.
689 for (auto V : ToRemoves) {
690 V->eraseFromParent();
691 }
692
693 // And remove the function we don't need either too.
694 F->eraseFromParent();
695 }
696 }
697
698 return Changed;
699}
700
701bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
702 bool Changed = false;
703
704 const std::map<const char *, const char *> Map = {
705 {"_Z3alli", ""},
706 {"_Z3allDv2_i", "__spirv_allDv2_i"},
707 {"_Z3allDv3_i", "__spirv_allDv3_i"},
708 {"_Z3allDv4_i", "__spirv_allDv4_i"},
709 {"_Z3anyi", ""},
710 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
711 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
712 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
713 };
714
715 for (auto Pair : Map) {
716 // If we find a function with the matching name.
717 if (auto F = M.getFunction(Pair.first)) {
718 SmallVector<Instruction *, 4> ToRemoves;
719
720 // Walk the users of the function.
721 for (auto &U : F->uses()) {
722 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
723 // The fake SPIR-V intrinsic to generate.
724 auto SPIRVIntrinsic = Pair.second;
725
726 auto Arg = CI->getOperand(0);
727
728 Value *V;
729
730 // If we have a function to call, call it!
731 if (0 < strlen(SPIRVIntrinsic)) {
732 // The value for zero to compare against.
733 const auto ZeroValue = Constant::getNullValue(Arg->getType());
734
735 const auto Cmp = CmpInst::Create(
736 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
737 const auto NewFType = FunctionType::get(
738 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
739
740 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
741
742 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
743
744 // The value to return for true.
745 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
746
747 // The value to return for false.
748 const auto FalseValue = Constant::getNullValue(CI->getType());
749
750 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
751 } else {
752 V = BinaryOperator::Create(Instruction::LShr, Arg,
753 ConstantInt::get(CI->getType(), 31), "",
754 CI);
755 }
756
757 CI->replaceAllUsesWith(V);
758
759 // Lastly, remember to remove the user.
760 ToRemoves.push_back(CI);
761 }
762 }
763
764 Changed = !ToRemoves.empty();
765
766 // And cleanup the calls we don't use anymore.
767 for (auto V : ToRemoves) {
768 V->eraseFromParent();
769 }
770
771 // And remove the function we don't need either too.
772 F->eraseFromParent();
773 }
774 }
775
776 return Changed;
777}
778
779bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
780 bool Changed = false;
781
782 const std::map<const char *, Instruction::BinaryOps> Map = {
783 {"_Z7signbitf", Instruction::LShr},
784 {"_Z7signbitDv2_f", Instruction::AShr},
785 {"_Z7signbitDv3_f", Instruction::AShr},
786 {"_Z7signbitDv4_f", Instruction::AShr},
787 };
788
789 for (auto Pair : Map) {
790 // If we find a function with the matching name.
791 if (auto F = M.getFunction(Pair.first)) {
792 SmallVector<Instruction *, 4> ToRemoves;
793
794 // Walk the users of the function.
795 for (auto &U : F->uses()) {
796 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
797 auto Arg = CI->getOperand(0);
798
799 auto Bitcast =
800 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
801
802 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
803 ConstantInt::get(CI->getType(), 31),
804 "", CI);
805
806 CI->replaceAllUsesWith(Shr);
807
808 // Lastly, remember to remove the user.
809 ToRemoves.push_back(CI);
810 }
811 }
812
813 Changed = !ToRemoves.empty();
814
815 // And cleanup the calls we don't use anymore.
816 for (auto V : ToRemoves) {
817 V->eraseFromParent();
818 }
819
820 // And remove the function we don't need either too.
821 F->eraseFromParent();
822 }
823 }
824
825 return Changed;
826}
827
828bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
829 bool Changed = false;
830
831 const std::map<const char *,
832 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
833 Map = {
834 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
835 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
836 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
837 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
838 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
839 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
840 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
841 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
842 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
843 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
844 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
845 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
846 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
847 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
848 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
849 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
850 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
851 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
852 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
853 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
854 };
855
856 for (auto Pair : Map) {
857 // If we find a function with the matching name.
858 if (auto F = M.getFunction(Pair.first)) {
859 SmallVector<Instruction *, 4> ToRemoves;
860
861 // Walk the users of the function.
862 for (auto &U : F->uses()) {
863 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
864 // The multiply instruction to use.
865 auto MulInst = Pair.second.first;
866
867 // The add instruction to use.
868 auto AddInst = Pair.second.second;
869
870 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
871
872 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
873 CI->getArgOperand(1), "", CI);
874
875 if (Instruction::BinaryOpsEnd != AddInst) {
876 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
877 CI);
878 }
879
880 CI->replaceAllUsesWith(I);
881
882 // Lastly, remember to remove the user.
883 ToRemoves.push_back(CI);
884 }
885 }
886
887 Changed = !ToRemoves.empty();
888
889 // And cleanup the calls we don't use anymore.
890 for (auto V : ToRemoves) {
891 V->eraseFromParent();
892 }
893
894 // And remove the function we don't need either too.
895 F->eraseFromParent();
896 }
897 }
898
899 return Changed;
900}
901
Derek Chowcfd368b2017-10-19 20:58:45 -0700902bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
903 bool Changed = false;
904
905 struct VectorStoreOps {
906 const char* name;
907 int n;
908 Type* (*get_scalar_type_function)(LLVMContext&);
909 } vector_store_ops[] = {
910 // TODO(derekjchow): Expand this list.
911 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
912 };
913
David Neto544fffc2017-11-16 18:35:14 -0500914 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -0700915 auto Name = Op.name;
916 auto N = Op.n;
917 auto TypeFn = Op.get_scalar_type_function;
918 if (auto F = M.getFunction(Name)) {
919 SmallVector<Instruction *, 4> ToRemoves;
920
921 // Walk the users of the function.
922 for (auto &U : F->uses()) {
923 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
924 // The value argument from vstoren.
925 auto Arg0 = CI->getOperand(0);
926
927 // The index argument from vstoren.
928 auto Arg1 = CI->getOperand(1);
929
930 // The pointer argument from vstoren.
931 auto Arg2 = CI->getOperand(2);
932
933 // Get types.
934 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
935 auto ScalarNPointerTy = PointerType::get(
936 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
937
938 // Cast to scalarn
939 auto Cast = CastInst::CreatePointerCast(
940 Arg2, ScalarNPointerTy, "", CI);
941 // Index to correct address
942 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
943 // Store
944 auto Store = new StoreInst(Arg0, Index, CI);
945
946 CI->replaceAllUsesWith(Store);
947 ToRemoves.push_back(CI);
948 }
949 }
950
951 Changed = !ToRemoves.empty();
952
953 // And cleanup the calls we don't use anymore.
954 for (auto V : ToRemoves) {
955 V->eraseFromParent();
956 }
957
958 // And remove the function we don't need either too.
959 F->eraseFromParent();
960 }
961 }
962
963 return Changed;
964}
965
966bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
967 bool Changed = false;
968
969 struct VectorLoadOps {
970 const char* name;
971 int n;
972 Type* (*get_scalar_type_function)(LLVMContext&);
973 } vector_load_ops[] = {
974 // TODO(derekjchow): Expand this list.
975 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
976 };
977
David Neto544fffc2017-11-16 18:35:14 -0500978 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -0700979 auto Name = Op.name;
980 auto N = Op.n;
981 auto TypeFn = Op.get_scalar_type_function;
982 // If we find a function with the matching name.
983 if (auto F = M.getFunction(Name)) {
984 SmallVector<Instruction *, 4> ToRemoves;
985
986 // Walk the users of the function.
987 for (auto &U : F->uses()) {
988 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
989 // The index argument from vloadn.
990 auto Arg0 = CI->getOperand(0);
991
992 // The pointer argument from vloadn.
993 auto Arg1 = CI->getOperand(1);
994
995 // Get types.
996 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
997 auto ScalarNPointerTy = PointerType::get(
998 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
999
1000 // Cast to scalarn
1001 auto Cast = CastInst::CreatePointerCast(
1002 Arg1, ScalarNPointerTy, "", CI);
1003 // Index to correct address
1004 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1005 // Load
1006 auto Load = new LoadInst(Index, "", CI);
1007
1008 CI->replaceAllUsesWith(Load);
1009 ToRemoves.push_back(CI);
1010 }
1011 }
1012
1013 Changed = !ToRemoves.empty();
1014
1015 // And cleanup the calls we don't use anymore.
1016 for (auto V : ToRemoves) {
1017 V->eraseFromParent();
1018 }
1019
1020 // And remove the function we don't need either too.
1021 F->eraseFromParent();
1022
1023 }
1024 }
1025
1026 return Changed;
1027}
1028
David Neto22f144c2017-06-12 14:26:21 -04001029bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1030 bool Changed = false;
1031
1032 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1033 "_Z10vload_halfjPU3AS2KDh"};
1034
1035 for (auto Name : Map) {
1036 // If we find a function with the matching name.
1037 if (auto F = M.getFunction(Name)) {
1038 SmallVector<Instruction *, 4> ToRemoves;
1039
1040 // Walk the users of the function.
1041 for (auto &U : F->uses()) {
1042 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1043 // The index argument from vload_half.
1044 auto Arg0 = CI->getOperand(0);
1045
1046 // The pointer argument from vload_half.
1047 auto Arg1 = CI->getOperand(1);
1048
David Neto22f144c2017-06-12 14:26:21 -04001049 auto IntTy = Type::getInt32Ty(M.getContext());
1050 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001051 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1052
David Neto22f144c2017-06-12 14:26:21 -04001053 // Our intrinsic to unpack a float2 from an int.
1054 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1055
1056 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1057
David Neto482550a2018-03-24 05:21:07 -07001058 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04001059 auto ShortTy = Type::getInt16Ty(M.getContext());
1060 auto ShortPointerTy = PointerType::get(
1061 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001062
David Netoac825b82017-05-30 12:49:01 -04001063 // Cast the half* pointer to short*.
1064 auto Cast =
1065 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001066
David Netoac825b82017-05-30 12:49:01 -04001067 // Index into the correct address of the casted pointer.
1068 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1069
1070 // Load from the short* we casted to.
1071 auto Load = new LoadInst(Index, "", CI);
1072
1073 // ZExt the short -> int.
1074 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1075
1076 // Get our float2.
1077 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1078
1079 // Extract out the bottom element which is our float result.
1080 auto Extract = ExtractElementInst::Create(
1081 Call, ConstantInt::get(IntTy, 0), "", CI);
1082
1083 CI->replaceAllUsesWith(Extract);
1084 } else {
1085 // Assume the pointer argument points to storage aligned to 32bits
1086 // or more.
1087 // TODO(dneto): Do more analysis to make sure this is true?
1088 //
1089 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1090 // with:
1091 //
1092 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1093 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1094 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1095 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1096 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1097 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1098 // x float> %converted, %index_is_odd32
1099
1100 auto IntPointerTy = PointerType::get(
1101 IntTy, Arg1->getType()->getPointerAddressSpace());
1102
David Neto973e6a82017-05-30 13:48:18 -04001103 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001104 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001105 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001106 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1107
1108 auto One = ConstantInt::get(IntTy, 1);
1109 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1110 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1111
1112 // Index into the correct address of the casted pointer.
1113 auto Ptr =
1114 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1115
1116 // Load from the int* we casted to.
1117 auto Load = new LoadInst(Ptr, "", CI);
1118
1119 // Get our float2.
1120 auto Call = CallInst::Create(NewF, Load, "", CI);
1121
1122 // Extract out the float result, where the element number is
1123 // determined by whether the original index was even or odd.
1124 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1125
1126 CI->replaceAllUsesWith(Extract);
1127 }
David Neto22f144c2017-06-12 14:26:21 -04001128
1129 // Lastly, remember to remove the user.
1130 ToRemoves.push_back(CI);
1131 }
1132 }
1133
1134 Changed = !ToRemoves.empty();
1135
1136 // And cleanup the calls we don't use anymore.
1137 for (auto V : ToRemoves) {
1138 V->eraseFromParent();
1139 }
1140
1141 // And remove the function we don't need either too.
1142 F->eraseFromParent();
1143 }
1144 }
1145
1146 return Changed;
1147}
1148
1149bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1150 bool Changed = false;
1151
1152 const std::vector<const char *> Map = {"_Z11vload_half2jPU3AS1KDh",
1153 "_Z11vload_half2jPU3AS2KDh"};
1154
1155 for (auto Name : Map) {
1156 // If we find a function with the matching name.
1157 if (auto F = M.getFunction(Name)) {
1158 SmallVector<Instruction *, 4> ToRemoves;
1159
1160 // Walk the users of the function.
1161 for (auto &U : F->uses()) {
1162 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1163 // The index argument from vload_half.
1164 auto Arg0 = CI->getOperand(0);
1165
1166 // The pointer argument from vload_half.
1167 auto Arg1 = CI->getOperand(1);
1168
1169 auto IntTy = Type::getInt32Ty(M.getContext());
1170 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1171 auto NewPointerTy = PointerType::get(
1172 IntTy, Arg1->getType()->getPointerAddressSpace());
1173 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1174
1175 // Cast the half* pointer to int*.
1176 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1177
1178 // Index into the correct address of the casted pointer.
1179 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1180
1181 // Load from the int* we casted to.
1182 auto Load = new LoadInst(Index, "", CI);
1183
1184 // Our intrinsic to unpack a float2 from an int.
1185 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1186
1187 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1188
1189 // Get our float2.
1190 auto Call = CallInst::Create(NewF, Load, "", CI);
1191
1192 CI->replaceAllUsesWith(Call);
1193
1194 // Lastly, remember to remove the user.
1195 ToRemoves.push_back(CI);
1196 }
1197 }
1198
1199 Changed = !ToRemoves.empty();
1200
1201 // And cleanup the calls we don't use anymore.
1202 for (auto V : ToRemoves) {
1203 V->eraseFromParent();
1204 }
1205
1206 // And remove the function we don't need either too.
1207 F->eraseFromParent();
1208 }
1209 }
1210
1211 return Changed;
1212}
1213
1214bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1215 bool Changed = false;
1216
1217 const std::vector<const char *> Map = {"_Z11vload_half4jPU3AS1KDh",
1218 "_Z11vload_half4jPU3AS2KDh"};
1219
1220 for (auto Name : Map) {
1221 // If we find a function with the matching name.
1222 if (auto F = M.getFunction(Name)) {
1223 SmallVector<Instruction *, 4> ToRemoves;
1224
1225 // Walk the users of the function.
1226 for (auto &U : F->uses()) {
1227 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1228 // The index argument from vload_half.
1229 auto Arg0 = CI->getOperand(0);
1230
1231 // The pointer argument from vload_half.
1232 auto Arg1 = CI->getOperand(1);
1233
1234 auto IntTy = Type::getInt32Ty(M.getContext());
1235 auto Int2Ty = VectorType::get(IntTy, 2);
1236 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1237 auto NewPointerTy = PointerType::get(
1238 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1239 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1240
1241 // Cast the half* pointer to int2*.
1242 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1243
1244 // Index into the correct address of the casted pointer.
1245 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1246
1247 // Load from the int2* we casted to.
1248 auto Load = new LoadInst(Index, "", CI);
1249
1250 // Extract each element from the loaded int2.
1251 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1252 "", CI);
1253 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1254 "", CI);
1255
1256 // Our intrinsic to unpack a float2 from an int.
1257 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1258
1259 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1260
1261 // Get the lower (x & y) components of our final float4.
1262 auto Lo = CallInst::Create(NewF, X, "", CI);
1263
1264 // Get the higher (z & w) components of our final float4.
1265 auto Hi = CallInst::Create(NewF, Y, "", CI);
1266
1267 Constant *ShuffleMask[4] = {
1268 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1269 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1270
1271 // Combine our two float2's into one float4.
1272 auto Combine = new ShuffleVectorInst(
1273 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1274
1275 CI->replaceAllUsesWith(Combine);
1276
1277 // Lastly, remember to remove the user.
1278 ToRemoves.push_back(CI);
1279 }
1280 }
1281
1282 Changed = !ToRemoves.empty();
1283
1284 // And cleanup the calls we don't use anymore.
1285 for (auto V : ToRemoves) {
1286 V->eraseFromParent();
1287 }
1288
1289 // And remove the function we don't need either too.
1290 F->eraseFromParent();
1291 }
1292 }
1293
1294 return Changed;
1295}
1296
1297bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1298 bool Changed = false;
1299
1300 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1301 "_Z15vstore_half_rtefjPU3AS1Dh",
1302 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1303
1304 for (auto Name : Map) {
1305 // If we find a function with the matching name.
1306 if (auto F = M.getFunction(Name)) {
1307 SmallVector<Instruction *, 4> ToRemoves;
1308
1309 // Walk the users of the function.
1310 for (auto &U : F->uses()) {
1311 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1312 // The value to store.
1313 auto Arg0 = CI->getOperand(0);
1314
1315 // The index argument from vstore_half.
1316 auto Arg1 = CI->getOperand(1);
1317
1318 // The pointer argument from vstore_half.
1319 auto Arg2 = CI->getOperand(2);
1320
David Neto22f144c2017-06-12 14:26:21 -04001321 auto IntTy = Type::getInt32Ty(M.getContext());
1322 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001323 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001324 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001325
1326 // Our intrinsic to pack a float2 to an int.
1327 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1328
1329 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1330
1331 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001332 auto TempVec =
1333 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1334 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001335
1336 // Pack the float2 -> half2 (in an int).
1337 auto X = CallInst::Create(NewF, TempVec, "", CI);
1338
David Neto482550a2018-03-24 05:21:07 -07001339 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04001340 auto ShortTy = Type::getInt16Ty(M.getContext());
1341 auto ShortPointerTy = PointerType::get(
1342 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001343
David Neto17852de2017-05-29 17:29:31 -04001344 // Truncate our i32 to an i16.
1345 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001346
David Neto17852de2017-05-29 17:29:31 -04001347 // Cast the half* pointer to short*.
1348 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001349
David Neto17852de2017-05-29 17:29:31 -04001350 // Index into the correct address of the casted pointer.
1351 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001352
David Neto17852de2017-05-29 17:29:31 -04001353 // Store to the int* we casted to.
1354 auto Store = new StoreInst(Trunc, Index, CI);
1355
1356 CI->replaceAllUsesWith(Store);
1357 } else {
1358 // We can only write to 32-bit aligned words.
1359 //
1360 // Assuming base is aligned to 32-bits, replace the equivalent of
1361 // vstore_half(value, index, base)
1362 // with:
1363 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1364 // uint32_t write_to_upper_half = index & 1u;
1365 // uint32_t shift = write_to_upper_half << 4;
1366 //
1367 // // Pack the float value as a half number in bottom 16 bits
1368 // // of an i32.
1369 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1370 //
1371 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1372 // ^ ((packed & 0xffff) << shift)
1373 // // We only need relaxed consistency, but OpenCL 1.2 only has
1374 // // sequentially consistent atomics.
1375 // // TODO(dneto): Use relaxed consistency.
1376 // atomic_xor(target_ptr, xor_value)
1377 auto IntPointerTy = PointerType::get(
1378 IntTy, Arg2->getType()->getPointerAddressSpace());
1379
1380 auto Four = ConstantInt::get(IntTy, 4);
1381 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1382
1383 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1384 // Compute index / 2
1385 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1386 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1387 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1388 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1389 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1390 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1391 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1392
1393 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1394 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1395 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1396
1397 // Generate the call to atomi_xor.
1398 SmallVector<Type *, 5> ParamTypes;
1399 // The pointer type.
1400 ParamTypes.push_back(IntPointerTy);
1401 // The Types for memory scope, semantics, and value.
1402 ParamTypes.push_back(IntTy);
1403 ParamTypes.push_back(IntTy);
1404 ParamTypes.push_back(IntTy);
1405 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1406 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1407
1408 const auto ConstantScopeDevice =
1409 ConstantInt::get(IntTy, spv::ScopeDevice);
1410 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1411 // (SPIR-V Workgroup).
1412 const auto AddrSpaceSemanticsBits =
1413 IntPointerTy->getPointerAddressSpace() == 1
1414 ? spv::MemorySemanticsUniformMemoryMask
1415 : spv::MemorySemanticsWorkgroupMemoryMask;
1416
1417 // We're using relaxed consistency here.
1418 const auto ConstantMemorySemantics =
1419 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1420 AddrSpaceSemanticsBits);
1421
1422 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1423 ConstantMemorySemantics, ValueToXor};
1424 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1425 }
David Neto22f144c2017-06-12 14:26:21 -04001426
1427 // Lastly, remember to remove the user.
1428 ToRemoves.push_back(CI);
1429 }
1430 }
1431
1432 Changed = !ToRemoves.empty();
1433
1434 // And cleanup the calls we don't use anymore.
1435 for (auto V : ToRemoves) {
1436 V->eraseFromParent();
1437 }
1438
1439 // And remove the function we don't need either too.
1440 F->eraseFromParent();
1441 }
1442 }
1443
1444 return Changed;
1445}
1446
1447bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1448 bool Changed = false;
1449
1450 const std::vector<const char *> Map = {"_Z12vstore_half2Dv2_fjPU3AS1Dh",
1451 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1452 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh"};
1453
1454 for (auto Name : Map) {
1455 // If we find a function with the matching name.
1456 if (auto F = M.getFunction(Name)) {
1457 SmallVector<Instruction *, 4> ToRemoves;
1458
1459 // Walk the users of the function.
1460 for (auto &U : F->uses()) {
1461 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1462 // The value to store.
1463 auto Arg0 = CI->getOperand(0);
1464
1465 // The index argument from vstore_half.
1466 auto Arg1 = CI->getOperand(1);
1467
1468 // The pointer argument from vstore_half.
1469 auto Arg2 = CI->getOperand(2);
1470
1471 auto IntTy = Type::getInt32Ty(M.getContext());
1472 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1473 auto NewPointerTy = PointerType::get(
1474 IntTy, Arg2->getType()->getPointerAddressSpace());
1475 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1476
1477 // Our intrinsic to pack a float2 to an int.
1478 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1479
1480 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1481
1482 // Turn the packed x & y into the final packing.
1483 auto X = CallInst::Create(NewF, Arg0, "", CI);
1484
1485 // Cast the half* pointer to int*.
1486 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1487
1488 // Index into the correct address of the casted pointer.
1489 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1490
1491 // Store to the int* we casted to.
1492 auto Store = new StoreInst(X, Index, CI);
1493
1494 CI->replaceAllUsesWith(Store);
1495
1496 // Lastly, remember to remove the user.
1497 ToRemoves.push_back(CI);
1498 }
1499 }
1500
1501 Changed = !ToRemoves.empty();
1502
1503 // And cleanup the calls we don't use anymore.
1504 for (auto V : ToRemoves) {
1505 V->eraseFromParent();
1506 }
1507
1508 // And remove the function we don't need either too.
1509 F->eraseFromParent();
1510 }
1511 }
1512
1513 return Changed;
1514}
1515
1516bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1517 bool Changed = false;
1518
1519 const std::vector<const char *> Map = {"_Z12vstore_half4Dv4_fjPU3AS1Dh",
1520 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1521 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh"};
1522
1523 for (auto Name : Map) {
1524 // If we find a function with the matching name.
1525 if (auto F = M.getFunction(Name)) {
1526 SmallVector<Instruction *, 4> ToRemoves;
1527
1528 // Walk the users of the function.
1529 for (auto &U : F->uses()) {
1530 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1531 // The value to store.
1532 auto Arg0 = CI->getOperand(0);
1533
1534 // The index argument from vstore_half.
1535 auto Arg1 = CI->getOperand(1);
1536
1537 // The pointer argument from vstore_half.
1538 auto Arg2 = CI->getOperand(2);
1539
1540 auto IntTy = Type::getInt32Ty(M.getContext());
1541 auto Int2Ty = VectorType::get(IntTy, 2);
1542 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1543 auto NewPointerTy = PointerType::get(
1544 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1545 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1546
1547 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1548 ConstantInt::get(IntTy, 1)};
1549
1550 // Extract out the x & y components of our to store value.
1551 auto Lo =
1552 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1553 ConstantVector::get(LoShuffleMask), "", CI);
1554
1555 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1556 ConstantInt::get(IntTy, 3)};
1557
1558 // Extract out the z & w components of our to store value.
1559 auto Hi =
1560 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1561 ConstantVector::get(HiShuffleMask), "", CI);
1562
1563 // Our intrinsic to pack a float2 to an int.
1564 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1565
1566 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1567
1568 // Turn the packed x & y into the final component of our int2.
1569 auto X = CallInst::Create(NewF, Lo, "", CI);
1570
1571 // Turn the packed z & w into the final component of our int2.
1572 auto Y = CallInst::Create(NewF, Hi, "", CI);
1573
1574 auto Combine = InsertElementInst::Create(
1575 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1576 Combine = InsertElementInst::Create(
1577 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1578
1579 // Cast the half* pointer to int2*.
1580 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1581
1582 // Index into the correct address of the casted pointer.
1583 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1584
1585 // Store to the int2* we casted to.
1586 auto Store = new StoreInst(Combine, Index, CI);
1587
1588 CI->replaceAllUsesWith(Store);
1589
1590 // Lastly, remember to remove the user.
1591 ToRemoves.push_back(CI);
1592 }
1593 }
1594
1595 Changed = !ToRemoves.empty();
1596
1597 // And cleanup the calls we don't use anymore.
1598 for (auto V : ToRemoves) {
1599 V->eraseFromParent();
1600 }
1601
1602 // And remove the function we don't need either too.
1603 F->eraseFromParent();
1604 }
1605 }
1606
1607 return Changed;
1608}
1609
1610bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1611 bool Changed = false;
1612
1613 const std::map<const char *, const char*> Map = {
1614 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1615 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1616 };
1617
1618 for (auto Pair : Map) {
1619 // If we find a function with the matching name.
1620 if (auto F = M.getFunction(Pair.first)) {
1621 SmallVector<Instruction *, 4> ToRemoves;
1622
1623 // Walk the users of the function.
1624 for (auto &U : F->uses()) {
1625 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1626 // The image.
1627 auto Arg0 = CI->getOperand(0);
1628
1629 // The sampler.
1630 auto Arg1 = CI->getOperand(1);
1631
1632 // The coordinate (integer type that we can't handle).
1633 auto Arg2 = CI->getOperand(2);
1634
1635 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1636
1637 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1638
1639 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1640
1641 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1642
1643 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1644
1645 CI->replaceAllUsesWith(NewCI);
1646
1647 // Lastly, remember to remove the user.
1648 ToRemoves.push_back(CI);
1649 }
1650 }
1651
1652 Changed = !ToRemoves.empty();
1653
1654 // And cleanup the calls we don't use anymore.
1655 for (auto V : ToRemoves) {
1656 V->eraseFromParent();
1657 }
1658
1659 // And remove the function we don't need either too.
1660 F->eraseFromParent();
1661 }
1662 }
1663
1664 return Changed;
1665}
1666
1667bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1668 bool Changed = false;
1669
1670 const std::map<const char *, const char *> Map = {
David Neto22f144c2017-06-12 14:26:21 -04001671 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1672 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1673 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1674 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1675 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01001676 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04001677
1678 for (auto Pair : Map) {
1679 // If we find a function with the matching name.
1680 if (auto F = M.getFunction(Pair.first)) {
1681 SmallVector<Instruction *, 4> ToRemoves;
1682
1683 // Walk the users of the function.
1684 for (auto &U : F->uses()) {
1685 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1686 auto FType = F->getFunctionType();
1687 SmallVector<Type *, 5> ParamTypes;
1688
1689 // The pointer type.
1690 ParamTypes.push_back(FType->getParamType(0));
1691
1692 auto IntTy = Type::getInt32Ty(M.getContext());
1693
1694 // The memory scope type.
1695 ParamTypes.push_back(IntTy);
1696
1697 // The memory semantics type.
1698 ParamTypes.push_back(IntTy);
1699
1700 if (2 < CI->getNumArgOperands()) {
1701 // The unequal memory semantics type.
1702 ParamTypes.push_back(IntTy);
1703
1704 // The value type.
1705 ParamTypes.push_back(FType->getParamType(2));
1706
1707 // The comparator type.
1708 ParamTypes.push_back(FType->getParamType(1));
1709 } else if (1 < CI->getNumArgOperands()) {
1710 // The value type.
1711 ParamTypes.push_back(FType->getParamType(1));
1712 }
1713
1714 auto NewFType =
1715 FunctionType::get(FType->getReturnType(), ParamTypes, false);
1716 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1717
1718 // We need to map the OpenCL constants to the SPIR-V equivalents.
1719 const auto ConstantScopeDevice =
1720 ConstantInt::get(IntTy, spv::ScopeDevice);
1721 const auto ConstantMemorySemantics = ConstantInt::get(
1722 IntTy, spv::MemorySemanticsUniformMemoryMask |
1723 spv::MemorySemanticsSequentiallyConsistentMask);
1724
1725 SmallVector<Value *, 5> Params;
1726
1727 // The pointer.
1728 Params.push_back(CI->getArgOperand(0));
1729
1730 // The memory scope.
1731 Params.push_back(ConstantScopeDevice);
1732
1733 // The memory semantics.
1734 Params.push_back(ConstantMemorySemantics);
1735
1736 if (2 < CI->getNumArgOperands()) {
1737 // The unequal memory semantics.
1738 Params.push_back(ConstantMemorySemantics);
1739
1740 // The value.
1741 Params.push_back(CI->getArgOperand(2));
1742
1743 // The comparator.
1744 Params.push_back(CI->getArgOperand(1));
1745 } else if (1 < CI->getNumArgOperands()) {
1746 // The value.
1747 Params.push_back(CI->getArgOperand(1));
1748 }
1749
1750 auto NewCI = CallInst::Create(NewF, Params, "", CI);
1751
1752 CI->replaceAllUsesWith(NewCI);
1753
1754 // Lastly, remember to remove the user.
1755 ToRemoves.push_back(CI);
1756 }
1757 }
1758
1759 Changed = !ToRemoves.empty();
1760
1761 // And cleanup the calls we don't use anymore.
1762 for (auto V : ToRemoves) {
1763 V->eraseFromParent();
1764 }
1765
1766 // And remove the function we don't need either too.
1767 F->eraseFromParent();
1768 }
1769 }
1770
Neil Henning39672102017-09-29 14:33:13 +01001771 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
1772 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
1773 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
1774 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
1775 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
1776 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
1777 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
1778 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
1779 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
1780 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
1781 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
1782 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
1783 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
1784 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
1785 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
1786 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
1787 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
1788
1789 for (auto Pair : Map2) {
1790 // If we find a function with the matching name.
1791 if (auto F = M.getFunction(Pair.first)) {
1792 SmallVector<Instruction *, 4> ToRemoves;
1793
1794 // Walk the users of the function.
1795 for (auto &U : F->uses()) {
1796 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1797 auto AtomicOp = new AtomicRMWInst(
1798 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
1799 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
1800
1801 CI->replaceAllUsesWith(AtomicOp);
1802
1803 // Lastly, remember to remove the user.
1804 ToRemoves.push_back(CI);
1805 }
1806 }
1807
1808 Changed = !ToRemoves.empty();
1809
1810 // And cleanup the calls we don't use anymore.
1811 for (auto V : ToRemoves) {
1812 V->eraseFromParent();
1813 }
1814
1815 // And remove the function we don't need either too.
1816 F->eraseFromParent();
1817 }
1818 }
1819
David Neto22f144c2017-06-12 14:26:21 -04001820 return Changed;
1821}
1822
1823bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
1824 bool Changed = false;
1825
1826 // If we find a function with the matching name.
1827 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
1828 SmallVector<Instruction *, 4> ToRemoves;
1829
1830 auto IntTy = Type::getInt32Ty(M.getContext());
1831 auto FloatTy = Type::getFloatTy(M.getContext());
1832
1833 Constant *DownShuffleMask[3] = {
1834 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1835 ConstantInt::get(IntTy, 2)};
1836
1837 Constant *UpShuffleMask[4] = {
1838 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1839 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1840
1841 Constant *FloatVec[3] = {
1842 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
1843 };
1844
1845 // Walk the users of the function.
1846 for (auto &U : F->uses()) {
1847 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1848 auto Vec4Ty = CI->getArgOperand(0)->getType();
1849 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1850 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1851 auto Vec3Ty = Arg0->getType();
1852
1853 auto NewFType =
1854 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
1855
1856 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
1857
1858 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
1859
1860 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
1861
1862 CI->replaceAllUsesWith(Result);
1863
1864 // Lastly, remember to remove the user.
1865 ToRemoves.push_back(CI);
1866 }
1867 }
1868
1869 Changed = !ToRemoves.empty();
1870
1871 // And cleanup the calls we don't use anymore.
1872 for (auto V : ToRemoves) {
1873 V->eraseFromParent();
1874 }
1875
1876 // And remove the function we don't need either too.
1877 F->eraseFromParent();
1878 }
1879
1880 return Changed;
1881}
David Neto62653202017-10-16 19:05:18 -04001882
1883bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
1884 bool Changed = false;
1885
1886 // OpenCL's float result = fract(float x, float* ptr)
1887 //
1888 // In the LLVM domain:
1889 //
1890 // %floor_result = call spir_func float @floor(float %x)
1891 // store float %floor_result, float * %ptr
1892 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
1893 // %result = call spir_func float
1894 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
1895 //
1896 // Becomes in the SPIR-V domain, where translations of floor, fmin,
1897 // and clspv.fract occur in the SPIR-V generator pass:
1898 //
1899 // %glsl_ext = OpExtInstImport "GLSL.std.450"
1900 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
1901 // ...
1902 // %floor_result = OpExtInst %float %glsl_ext Floor %x
1903 // OpStore %ptr %floor_result
1904 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
1905 // %fract_result = OpExtInst %float
1906 // %glsl_ext Fmin %fract_intermediate %just_under_1
1907
1908
1909 using std::string;
1910
1911 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
1912 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
1913 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
1914 auto make_quad = [](const char *a, const char *b, const char *c,
1915 const char *d) {
1916 return std::tuple<const char *, const char *, const char *, const char *>(
1917 a, b, c, d);
1918 };
1919 const std::vector<QuadType> Functions = {
1920 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
1921 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
1922 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
1923 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
1924 };
1925
1926 for (auto& quad : Functions) {
1927 const StringRef fract_name(std::get<0>(quad));
1928
1929 // If we find a function with the matching name.
1930 if (auto F = M.getFunction(fract_name)) {
1931 if (F->use_begin() == F->use_end())
1932 continue;
1933
1934 // We have some uses.
1935 Changed = true;
1936
1937 auto& Context = M.getContext();
1938
1939 const StringRef floor_name(std::get<1>(quad));
1940 const StringRef fmin_name(std::get<2>(quad));
1941 const StringRef clspv_fract_name(std::get<3>(quad));
1942
1943 // This is either float or a float vector. All the float-like
1944 // types are this type.
1945 auto result_ty = F->getReturnType();
1946
1947 Function* fmin_fn = M.getFunction(fmin_name);
1948 if (!fmin_fn) {
1949 // Make the fmin function.
1950 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
1951 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
1952 fmin_fn->addFnAttr(Attribute::ReadOnly);
1953 fmin_fn->addFnAttr(Attribute::ReadNone);
1954 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
1955 }
1956
1957 Function* floor_fn = M.getFunction(floor_name);
1958 if (!floor_fn) {
1959 // Make the floor function.
1960 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
1961 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
1962 floor_fn->addFnAttr(Attribute::ReadOnly);
1963 floor_fn->addFnAttr(Attribute::ReadNone);
1964 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
1965 }
1966
1967 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
1968 if (!clspv_fract_fn) {
1969 // Make the clspv_fract function.
1970 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
1971 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
1972 clspv_fract_fn->addFnAttr(Attribute::ReadOnly);
1973 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
1974 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
1975 }
1976
1977 // Number of significant significand bits, whether represented or not.
1978 unsigned num_significand_bits;
1979 switch (result_ty->getScalarType()->getTypeID()) {
1980 case Type::HalfTyID:
1981 num_significand_bits = 11;
1982 break;
1983 case Type::FloatTyID:
1984 num_significand_bits = 24;
1985 break;
1986 case Type::DoubleTyID:
1987 num_significand_bits = 53;
1988 break;
1989 default:
1990 assert(false && "Unhandled float type when processing fract builtin");
1991 break;
1992 }
1993 // Beware that the disassembler displays this value as
1994 // OpConstant %float 1
1995 // which is not quite right.
1996 const double kJustUnderOneScalar =
1997 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
1998
1999 Constant *just_under_one =
2000 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2001 if (result_ty->isVectorTy()) {
2002 just_under_one = ConstantVector::getSplat(
2003 result_ty->getVectorNumElements(), just_under_one);
2004 }
2005
2006 IRBuilder<> Builder(Context);
2007
2008 SmallVector<Instruction *, 4> ToRemoves;
2009
2010 // Walk the users of the function.
2011 for (auto &U : F->uses()) {
2012 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2013
2014 Builder.SetInsertPoint(CI);
2015 auto arg = CI->getArgOperand(0);
2016 auto ptr = CI->getArgOperand(1);
2017
2018 // Compute floor result and store it.
2019 auto floor = Builder.CreateCall(floor_fn, {arg});
2020 Builder.CreateStore(floor, ptr);
2021
2022 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2023 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2024
2025 CI->replaceAllUsesWith(fract_result);
2026
2027 // Lastly, remember to remove the user.
2028 ToRemoves.push_back(CI);
2029 }
2030 }
2031
2032 // And cleanup the calls we don't use anymore.
2033 for (auto V : ToRemoves) {
2034 V->eraseFromParent();
2035 }
2036
2037 // And remove the function we don't need either too.
2038 F->eraseFromParent();
2039 }
2040 }
2041
2042 return Changed;
2043}