blob: c45e03b2efe68755142a32b83d818ec11a0b3325 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15#include <llvm/IR/Constants.h>
16#include <llvm/IR/Instructions.h>
17#include <llvm/IR/Module.h>
18#include <llvm/Pass.h>
David Neto17852de2017-05-29 17:29:31 -040019#include <llvm/Support/CommandLine.h>
David Neto22f144c2017-06-12 14:26:21 -040020#include <llvm/Support/raw_ostream.h>
21#include <llvm/Transforms/Utils/Cloning.h>
22
23#include <spirv/1.0/spirv.hpp>
24
25using namespace llvm;
26
27#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
28
David Netoac825b82017-05-30 12:49:01 -040029// TODO(dneto): As per Neil's suggestion, might not need this if you can
30// trace the pointer back far enough to see that it's 32-bit aligned.
31// However, even in the vstore_half case, you'll probably get better
32// performance if you can rely on SPV_KHR_16bit_storage since in the
33// alternate case you're using a (relaxed) atomic, and therefore have
34// to write through to the cache.
David Neto17852de2017-05-29 17:29:31 -040035static llvm::cl::opt<bool> f16bit_storage(
36 "f16bit_storage", llvm::cl::init(false),
37 llvm::cl::desc("Assume the target supports SPV_KHR_16bit_storage"));
38
David Neto22f144c2017-06-12 14:26:21 -040039namespace {
40uint32_t clz(uint32_t v) {
41 uint32_t r;
42 uint32_t shift;
43
44 r = (v > 0xFFFF) << 4;
45 v >>= r;
46 shift = (v > 0xFF) << 3;
47 v >>= shift;
48 r |= shift;
49 shift = (v > 0xF) << 2;
50 v >>= shift;
51 r |= shift;
52 shift = (v > 0x3) << 1;
53 v >>= shift;
54 r |= shift;
55 r |= (v >> 1);
56
57 return r;
58}
59
60Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
61 if (1 == elements) {
62 return Type::getInt1Ty(C);
63 } else {
64 return VectorType::get(Type::getInt1Ty(C), elements);
65 }
66}
67
68struct ReplaceOpenCLBuiltinPass final : public ModulePass {
69 static char ID;
70 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
71
72 bool runOnModule(Module &M) override;
73 bool replaceRecip(Module &M);
74 bool replaceDivide(Module &M);
75 bool replaceExp10(Module &M);
76 bool replaceLog10(Module &M);
77 bool replaceBarrier(Module &M);
78 bool replaceMemFence(Module &M);
79 bool replaceRelational(Module &M);
80 bool replaceIsInfAndIsNan(Module &M);
81 bool replaceAllAndAny(Module &M);
82 bool replaceSignbit(Module &M);
83 bool replaceMadandMad24andMul24(Module &M);
84 bool replaceVloadHalf(Module &M);
85 bool replaceVloadHalf2(Module &M);
86 bool replaceVloadHalf4(Module &M);
87 bool replaceVstoreHalf(Module &M);
88 bool replaceVstoreHalf2(Module &M);
89 bool replaceVstoreHalf4(Module &M);
90 bool replaceReadImageF(Module &M);
91 bool replaceAtomics(Module &M);
92 bool replaceCross(Module &M);
93};
94}
95
96char ReplaceOpenCLBuiltinPass::ID = 0;
97static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
98 "Replace OpenCL Builtins Pass");
99
100namespace clspv {
101ModulePass *createReplaceOpenCLBuiltinPass() {
102 return new ReplaceOpenCLBuiltinPass();
103}
104}
105
106bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
107 bool Changed = false;
108
109 Changed |= replaceRecip(M);
110 Changed |= replaceDivide(M);
111 Changed |= replaceExp10(M);
112 Changed |= replaceLog10(M);
113 Changed |= replaceBarrier(M);
114 Changed |= replaceMemFence(M);
115 Changed |= replaceRelational(M);
116 Changed |= replaceIsInfAndIsNan(M);
117 Changed |= replaceAllAndAny(M);
118 Changed |= replaceSignbit(M);
119 Changed |= replaceMadandMad24andMul24(M);
120 Changed |= replaceVloadHalf(M);
121 Changed |= replaceVloadHalf2(M);
122 Changed |= replaceVloadHalf4(M);
123 Changed |= replaceVstoreHalf(M);
124 Changed |= replaceVstoreHalf2(M);
125 Changed |= replaceVstoreHalf4(M);
126 Changed |= replaceReadImageF(M);
127 Changed |= replaceAtomics(M);
128 Changed |= replaceCross(M);
129
130 return Changed;
131}
132
133bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
134 bool Changed = false;
135
136 const char *Names[] = {
137 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
138 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
139 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
140 };
141
142 for (auto Name : Names) {
143 // If we find a function with the matching name.
144 if (auto F = M.getFunction(Name)) {
145 SmallVector<Instruction *, 4> ToRemoves;
146
147 // Walk the users of the function.
148 for (auto &U : F->uses()) {
149 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
150 // Recip has one arg.
151 auto Arg = CI->getOperand(0);
152
153 auto Div = BinaryOperator::Create(
154 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
155 CI);
156
157 CI->replaceAllUsesWith(Div);
158
159 // Lastly, remember to remove the user.
160 ToRemoves.push_back(CI);
161 }
162 }
163
164 Changed = !ToRemoves.empty();
165
166 // And cleanup the calls we don't use anymore.
167 for (auto V : ToRemoves) {
168 V->eraseFromParent();
169 }
170
171 // And remove the function we don't need either too.
172 F->eraseFromParent();
173 }
174 }
175
176 return Changed;
177}
178
179bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
180 bool Changed = false;
181
182 const char *Names[] = {
183 "_Z11half_divideff", "_Z13native_divideff",
184 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
185 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
186 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
187 };
188
189 for (auto Name : Names) {
190 // If we find a function with the matching name.
191 if (auto F = M.getFunction(Name)) {
192 SmallVector<Instruction *, 4> ToRemoves;
193
194 // Walk the users of the function.
195 for (auto &U : F->uses()) {
196 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
197 auto Div = BinaryOperator::Create(
198 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
199
200 CI->replaceAllUsesWith(Div);
201
202 // Lastly, remember to remove the user.
203 ToRemoves.push_back(CI);
204 }
205 }
206
207 Changed = !ToRemoves.empty();
208
209 // And cleanup the calls we don't use anymore.
210 for (auto V : ToRemoves) {
211 V->eraseFromParent();
212 }
213
214 // And remove the function we don't need either too.
215 F->eraseFromParent();
216 }
217 }
218
219 return Changed;
220}
221
222bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
223 bool Changed = false;
224
225 const std::map<const char *, const char *> Map = {
226 {"_Z5exp10f", "_Z3expf"},
227 {"_Z10half_exp10f", "_Z8half_expf"},
228 {"_Z12native_exp10f", "_Z10native_expf"},
229 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
230 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
231 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
232 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
233 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
234 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
235 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
236 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
237 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
238
239 for (auto Pair : Map) {
240 // If we find a function with the matching name.
241 if (auto F = M.getFunction(Pair.first)) {
242 SmallVector<Instruction *, 4> ToRemoves;
243
244 // Walk the users of the function.
245 for (auto &U : F->uses()) {
246 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
247 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
248
249 auto Arg = CI->getOperand(0);
250
251 // Constant of the natural log of 10 (ln(10)).
252 const double Ln10 =
253 2.302585092994045684017991454684364207601101488628772976033;
254
255 auto Mul = BinaryOperator::Create(
256 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
257 CI);
258
259 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
260
261 CI->replaceAllUsesWith(NewCI);
262
263 // Lastly, remember to remove the user.
264 ToRemoves.push_back(CI);
265 }
266 }
267
268 Changed = !ToRemoves.empty();
269
270 // And cleanup the calls we don't use anymore.
271 for (auto V : ToRemoves) {
272 V->eraseFromParent();
273 }
274
275 // And remove the function we don't need either too.
276 F->eraseFromParent();
277 }
278 }
279
280 return Changed;
281}
282
283bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
284 bool Changed = false;
285
286 const std::map<const char *, const char *> Map = {
287 {"_Z5log10f", "_Z3logf"},
288 {"_Z10half_log10f", "_Z8half_logf"},
289 {"_Z12native_log10f", "_Z10native_logf"},
290 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
291 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
292 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
293 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
294 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
295 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
296 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
297 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
298 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
299
300 for (auto Pair : Map) {
301 // If we find a function with the matching name.
302 if (auto F = M.getFunction(Pair.first)) {
303 SmallVector<Instruction *, 4> ToRemoves;
304
305 // Walk the users of the function.
306 for (auto &U : F->uses()) {
307 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
308 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
309
310 auto Arg = CI->getOperand(0);
311
312 // Constant of the reciprocal of the natural log of 10 (ln(10)).
313 const double Ln10 =
314 0.434294481903251827651128918916605082294397005803666566114;
315
316 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
317
318 auto Mul = BinaryOperator::Create(
319 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
320 "", CI);
321
322 CI->replaceAllUsesWith(Mul);
323
324 // Lastly, remember to remove the user.
325 ToRemoves.push_back(CI);
326 }
327 }
328
329 Changed = !ToRemoves.empty();
330
331 // And cleanup the calls we don't use anymore.
332 for (auto V : ToRemoves) {
333 V->eraseFromParent();
334 }
335
336 // And remove the function we don't need either too.
337 F->eraseFromParent();
338 }
339 }
340
341 return Changed;
342}
343
344bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
345 bool Changed = false;
346
347 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
348
349 const std::map<const char *, const char *> Map = {
350 {"_Z7barrierj", "__spirv_control_barrier"}};
351
352 for (auto Pair : Map) {
353 // If we find a function with the matching name.
354 if (auto F = M.getFunction(Pair.first)) {
355 SmallVector<Instruction *, 4> ToRemoves;
356
357 // Walk the users of the function.
358 for (auto &U : F->uses()) {
359 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
360 auto FType = F->getFunctionType();
361 SmallVector<Type *, 3> Params;
362 for (unsigned i = 0; i < 3; i++) {
363 Params.push_back(FType->getParamType(0));
364 }
365 auto NewFType =
366 FunctionType::get(FType->getReturnType(), Params, false);
367 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
368
369 auto Arg = CI->getOperand(0);
370
371 // We need to map the OpenCL constants to the SPIR-V equivalents.
372 const auto LocalMemFence =
373 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
374 const auto GlobalMemFence =
375 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
376 const auto ConstantSequentiallyConsistent = ConstantInt::get(
377 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
378 const auto ConstantScopeDevice =
379 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
380 const auto ConstantScopeWorkgroup =
381 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
382
383 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
384 const auto LocalMemFenceMask = BinaryOperator::Create(
385 Instruction::And, LocalMemFence, Arg, "", CI);
386 const auto WorkgroupShiftAmount =
387 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
388 clz(CLK_LOCAL_MEM_FENCE);
389 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
390 Instruction::Shl, LocalMemFenceMask,
391 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
392
393 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
394 const auto GlobalMemFenceMask = BinaryOperator::Create(
395 Instruction::And, GlobalMemFence, Arg, "", CI);
396 const auto UniformShiftAmount =
397 clz(spv::MemorySemanticsUniformMemoryMask) -
398 clz(CLK_GLOBAL_MEM_FENCE);
399 const auto MemorySemanticsUniform = BinaryOperator::Create(
400 Instruction::Shl, GlobalMemFenceMask,
401 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
402
403 // And combine the above together, also adding in
404 // MemorySemanticsSequentiallyConsistentMask.
405 auto MemorySemantics =
406 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
407 ConstantSequentiallyConsistent, "", CI);
408 MemorySemantics = BinaryOperator::Create(
409 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
410
411 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
412 // Device Scope, otherwise Workgroup Scope.
413 const auto Cmp =
414 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
415 GlobalMemFenceMask, GlobalMemFence, "", CI);
416 const auto MemoryScope = SelectInst::Create(
417 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
418
419 // Lastly, the Execution Scope is always Workgroup Scope.
420 const auto ExecutionScope = ConstantScopeWorkgroup;
421
422 auto NewCI = CallInst::Create(
423 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
424
425 CI->replaceAllUsesWith(NewCI);
426
427 // Lastly, remember to remove the user.
428 ToRemoves.push_back(CI);
429 }
430 }
431
432 Changed = !ToRemoves.empty();
433
434 // And cleanup the calls we don't use anymore.
435 for (auto V : ToRemoves) {
436 V->eraseFromParent();
437 }
438
439 // And remove the function we don't need either too.
440 F->eraseFromParent();
441 }
442 }
443
444 return Changed;
445}
446
447bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
448 bool Changed = false;
449
450 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
451
452 const std::map<const char *, const char *> Map = {
453 {"_Z9mem_fencej", "__spirv_memory_barrier"},
454 {"_Z14read_mem_fencej", "__spirv_memory_barrier"},
455 {"_Z15write_mem_fencej", "__spirv_memory_barrier"}};
456
457 std::map<const char *, uint32_t> MemorySemanticsMap = {
458 {"_Z9mem_fencej", spv::MemorySemanticsSequentiallyConsistentMask},
459 {"_Z14read_mem_fencej", spv::MemorySemanticsAcquireMask},
460 {"_Z15write_mem_fencej", spv::MemorySemanticsReleaseMask}};
461
462 for (auto Pair : Map) {
463 // If we find a function with the matching name.
464 if (auto F = M.getFunction(Pair.first)) {
465 SmallVector<Instruction *, 4> ToRemoves;
466
467 // Walk the users of the function.
468 for (auto &U : F->uses()) {
469 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
470 auto FType = F->getFunctionType();
471 SmallVector<Type *, 2> Params;
472 for (unsigned i = 0; i < 2; i++) {
473 Params.push_back(FType->getParamType(0));
474 }
475 auto NewFType =
476 FunctionType::get(FType->getReturnType(), Params, false);
477 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
478
479 auto Arg = CI->getOperand(0);
480
481 // We need to map the OpenCL constants to the SPIR-V equivalents.
482 const auto LocalMemFence =
483 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
484 const auto GlobalMemFence =
485 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
486 const auto ConstantMemorySemantics =
487 ConstantInt::get(Arg->getType(), MemorySemanticsMap[Pair.first]);
488 const auto ConstantScopeDevice =
489 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
490
491 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
492 const auto LocalMemFenceMask = BinaryOperator::Create(
493 Instruction::And, LocalMemFence, Arg, "", CI);
494 const auto WorkgroupShiftAmount =
495 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
496 clz(CLK_LOCAL_MEM_FENCE);
497 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
498 Instruction::Shl, LocalMemFenceMask,
499 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
500
501 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
502 const auto GlobalMemFenceMask = BinaryOperator::Create(
503 Instruction::And, GlobalMemFence, Arg, "", CI);
504 const auto UniformShiftAmount =
505 clz(spv::MemorySemanticsUniformMemoryMask) -
506 clz(CLK_GLOBAL_MEM_FENCE);
507 const auto MemorySemanticsUniform = BinaryOperator::Create(
508 Instruction::Shl, GlobalMemFenceMask,
509 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
510
511 // And combine the above together, also adding in
512 // MemorySemanticsSequentiallyConsistentMask.
513 auto MemorySemantics =
514 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
515 ConstantMemorySemantics, "", CI);
516 MemorySemantics = BinaryOperator::Create(
517 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
518
519 // Memory Scope is always device.
520 const auto MemoryScope = ConstantScopeDevice;
521
522 auto NewCI =
523 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
524
525 CI->replaceAllUsesWith(NewCI);
526
527 // Lastly, remember to remove the user.
528 ToRemoves.push_back(CI);
529 }
530 }
531
532 Changed = !ToRemoves.empty();
533
534 // And cleanup the calls we don't use anymore.
535 for (auto V : ToRemoves) {
536 V->eraseFromParent();
537 }
538
539 // And remove the function we don't need either too.
540 F->eraseFromParent();
541 }
542 }
543
544 return Changed;
545}
546
547bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
548 bool Changed = false;
549
550 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
551 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
552 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
553 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
554 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
555 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
556 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
557 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
558 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
559 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
560 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
561 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
562 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
563 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
564 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
565 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
566 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
567 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
568 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
569 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
570 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
571 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
572 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
573 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
574 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
575 };
576
577 for (auto Pair : Map) {
578 // If we find a function with the matching name.
579 if (auto F = M.getFunction(Pair.first)) {
580 SmallVector<Instruction *, 4> ToRemoves;
581
582 // Walk the users of the function.
583 for (auto &U : F->uses()) {
584 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
585 // The predicate to use in the CmpInst.
586 auto Predicate = Pair.second.first;
587
588 // The value to return for true.
589 auto TrueValue =
590 ConstantInt::getSigned(CI->getType(), Pair.second.second);
591
592 // The value to return for false.
593 auto FalseValue = Constant::getNullValue(CI->getType());
594
595 auto Arg1 = CI->getOperand(0);
596 auto Arg2 = CI->getOperand(1);
597
598 const auto Cmp =
599 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
600
601 const auto Select =
602 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
603
604 CI->replaceAllUsesWith(Select);
605
606 // Lastly, remember to remove the user.
607 ToRemoves.push_back(CI);
608 }
609 }
610
611 Changed = !ToRemoves.empty();
612
613 // And cleanup the calls we don't use anymore.
614 for (auto V : ToRemoves) {
615 V->eraseFromParent();
616 }
617
618 // And remove the function we don't need either too.
619 F->eraseFromParent();
620 }
621 }
622
623 return Changed;
624}
625
626bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
627 bool Changed = false;
628
629 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
630 {"_Z5isinff", {"__spirv_isinff", 1}},
631 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
632 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
633 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
634 {"_Z5isnanf", {"__spirv_isnanf", 1}},
635 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
636 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
637 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
638 };
639
640 for (auto Pair : Map) {
641 // If we find a function with the matching name.
642 if (auto F = M.getFunction(Pair.first)) {
643 SmallVector<Instruction *, 4> ToRemoves;
644
645 // Walk the users of the function.
646 for (auto &U : F->uses()) {
647 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
648 const auto CITy = CI->getType();
649
650 // The fake SPIR-V intrinsic to generate.
651 auto SPIRVIntrinsic = Pair.second.first;
652
653 // The value to return for true.
654 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
655
656 // The value to return for false.
657 auto FalseValue = Constant::getNullValue(CITy);
658
659 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
660 M.getContext(),
661 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
662
663 auto NewFType =
664 FunctionType::get(CorrespondingBoolTy,
665 F->getFunctionType()->getParamType(0), false);
666
667 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
668
669 auto Arg = CI->getOperand(0);
670
671 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
672
673 const auto Select =
674 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
675
676 CI->replaceAllUsesWith(Select);
677
678 // Lastly, remember to remove the user.
679 ToRemoves.push_back(CI);
680 }
681 }
682
683 Changed = !ToRemoves.empty();
684
685 // And cleanup the calls we don't use anymore.
686 for (auto V : ToRemoves) {
687 V->eraseFromParent();
688 }
689
690 // And remove the function we don't need either too.
691 F->eraseFromParent();
692 }
693 }
694
695 return Changed;
696}
697
698bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
699 bool Changed = false;
700
701 const std::map<const char *, const char *> Map = {
702 {"_Z3alli", ""},
703 {"_Z3allDv2_i", "__spirv_allDv2_i"},
704 {"_Z3allDv3_i", "__spirv_allDv3_i"},
705 {"_Z3allDv4_i", "__spirv_allDv4_i"},
706 {"_Z3anyi", ""},
707 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
708 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
709 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
710 };
711
712 for (auto Pair : Map) {
713 // If we find a function with the matching name.
714 if (auto F = M.getFunction(Pair.first)) {
715 SmallVector<Instruction *, 4> ToRemoves;
716
717 // Walk the users of the function.
718 for (auto &U : F->uses()) {
719 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
720 // The fake SPIR-V intrinsic to generate.
721 auto SPIRVIntrinsic = Pair.second;
722
723 auto Arg = CI->getOperand(0);
724
725 Value *V;
726
727 // If we have a function to call, call it!
728 if (0 < strlen(SPIRVIntrinsic)) {
729 // The value for zero to compare against.
730 const auto ZeroValue = Constant::getNullValue(Arg->getType());
731
732 const auto Cmp = CmpInst::Create(
733 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
734 const auto NewFType = FunctionType::get(
735 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
736
737 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
738
739 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
740
741 // The value to return for true.
742 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
743
744 // The value to return for false.
745 const auto FalseValue = Constant::getNullValue(CI->getType());
746
747 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
748 } else {
749 V = BinaryOperator::Create(Instruction::LShr, Arg,
750 ConstantInt::get(CI->getType(), 31), "",
751 CI);
752 }
753
754 CI->replaceAllUsesWith(V);
755
756 // Lastly, remember to remove the user.
757 ToRemoves.push_back(CI);
758 }
759 }
760
761 Changed = !ToRemoves.empty();
762
763 // And cleanup the calls we don't use anymore.
764 for (auto V : ToRemoves) {
765 V->eraseFromParent();
766 }
767
768 // And remove the function we don't need either too.
769 F->eraseFromParent();
770 }
771 }
772
773 return Changed;
774}
775
776bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
777 bool Changed = false;
778
779 const std::map<const char *, Instruction::BinaryOps> Map = {
780 {"_Z7signbitf", Instruction::LShr},
781 {"_Z7signbitDv2_f", Instruction::AShr},
782 {"_Z7signbitDv3_f", Instruction::AShr},
783 {"_Z7signbitDv4_f", Instruction::AShr},
784 };
785
786 for (auto Pair : Map) {
787 // If we find a function with the matching name.
788 if (auto F = M.getFunction(Pair.first)) {
789 SmallVector<Instruction *, 4> ToRemoves;
790
791 // Walk the users of the function.
792 for (auto &U : F->uses()) {
793 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
794 auto Arg = CI->getOperand(0);
795
796 auto Bitcast =
797 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
798
799 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
800 ConstantInt::get(CI->getType(), 31),
801 "", CI);
802
803 CI->replaceAllUsesWith(Shr);
804
805 // Lastly, remember to remove the user.
806 ToRemoves.push_back(CI);
807 }
808 }
809
810 Changed = !ToRemoves.empty();
811
812 // And cleanup the calls we don't use anymore.
813 for (auto V : ToRemoves) {
814 V->eraseFromParent();
815 }
816
817 // And remove the function we don't need either too.
818 F->eraseFromParent();
819 }
820 }
821
822 return Changed;
823}
824
825bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
826 bool Changed = false;
827
828 const std::map<const char *,
829 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
830 Map = {
831 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
832 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
833 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
834 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
835 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
836 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
837 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
838 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
839 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
840 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
841 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
842 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
843 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
844 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
845 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
846 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
847 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
848 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
849 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
850 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
851 };
852
853 for (auto Pair : Map) {
854 // If we find a function with the matching name.
855 if (auto F = M.getFunction(Pair.first)) {
856 SmallVector<Instruction *, 4> ToRemoves;
857
858 // Walk the users of the function.
859 for (auto &U : F->uses()) {
860 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
861 // The multiply instruction to use.
862 auto MulInst = Pair.second.first;
863
864 // The add instruction to use.
865 auto AddInst = Pair.second.second;
866
867 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
868
869 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
870 CI->getArgOperand(1), "", CI);
871
872 if (Instruction::BinaryOpsEnd != AddInst) {
873 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
874 CI);
875 }
876
877 CI->replaceAllUsesWith(I);
878
879 // Lastly, remember to remove the user.
880 ToRemoves.push_back(CI);
881 }
882 }
883
884 Changed = !ToRemoves.empty();
885
886 // And cleanup the calls we don't use anymore.
887 for (auto V : ToRemoves) {
888 V->eraseFromParent();
889 }
890
891 // And remove the function we don't need either too.
892 F->eraseFromParent();
893 }
894 }
895
896 return Changed;
897}
898
899bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
900 bool Changed = false;
901
902 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
903 "_Z10vload_halfjPU3AS2KDh"};
904
905 for (auto Name : Map) {
906 // If we find a function with the matching name.
907 if (auto F = M.getFunction(Name)) {
908 SmallVector<Instruction *, 4> ToRemoves;
909
910 // Walk the users of the function.
911 for (auto &U : F->uses()) {
912 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
913 // The index argument from vload_half.
914 auto Arg0 = CI->getOperand(0);
915
916 // The pointer argument from vload_half.
917 auto Arg1 = CI->getOperand(1);
918
David Neto22f144c2017-06-12 14:26:21 -0400919 auto IntTy = Type::getInt32Ty(M.getContext());
920 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -0400921 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
922
David Neto22f144c2017-06-12 14:26:21 -0400923 // Our intrinsic to unpack a float2 from an int.
924 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
925
926 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
927
David Netoac825b82017-05-30 12:49:01 -0400928 if (f16bit_storage) {
929 auto ShortTy = Type::getInt16Ty(M.getContext());
930 auto ShortPointerTy = PointerType::get(
931 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -0400932
David Netoac825b82017-05-30 12:49:01 -0400933 // Cast the half* pointer to short*.
934 auto Cast =
935 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400936
David Netoac825b82017-05-30 12:49:01 -0400937 // Index into the correct address of the casted pointer.
938 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
939
940 // Load from the short* we casted to.
941 auto Load = new LoadInst(Index, "", CI);
942
943 // ZExt the short -> int.
944 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
945
946 // Get our float2.
947 auto Call = CallInst::Create(NewF, ZExt, "", CI);
948
949 // Extract out the bottom element which is our float result.
950 auto Extract = ExtractElementInst::Create(
951 Call, ConstantInt::get(IntTy, 0), "", CI);
952
953 CI->replaceAllUsesWith(Extract);
954 } else {
955 // Assume the pointer argument points to storage aligned to 32bits
956 // or more.
957 // TODO(dneto): Do more analysis to make sure this is true?
958 //
959 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
960 // with:
961 //
962 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
963 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
964 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
965 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
966 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
967 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
968 // x float> %converted, %index_is_odd32
969
970 auto IntPointerTy = PointerType::get(
971 IntTy, Arg1->getType()->getPointerAddressSpace());
972
David Neto973e6a82017-05-30 13:48:18 -0400973 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -0400974 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -0400975 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -0400976 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
977
978 auto One = ConstantInt::get(IntTy, 1);
979 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
980 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
981
982 // Index into the correct address of the casted pointer.
983 auto Ptr =
984 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
985
986 // Load from the int* we casted to.
987 auto Load = new LoadInst(Ptr, "", CI);
988
989 // Get our float2.
990 auto Call = CallInst::Create(NewF, Load, "", CI);
991
992 // Extract out the float result, where the element number is
993 // determined by whether the original index was even or odd.
994 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
995
996 CI->replaceAllUsesWith(Extract);
997 }
David Neto22f144c2017-06-12 14:26:21 -0400998
999 // Lastly, remember to remove the user.
1000 ToRemoves.push_back(CI);
1001 }
1002 }
1003
1004 Changed = !ToRemoves.empty();
1005
1006 // And cleanup the calls we don't use anymore.
1007 for (auto V : ToRemoves) {
1008 V->eraseFromParent();
1009 }
1010
1011 // And remove the function we don't need either too.
1012 F->eraseFromParent();
1013 }
1014 }
1015
1016 return Changed;
1017}
1018
1019bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1020 bool Changed = false;
1021
1022 const std::vector<const char *> Map = {"_Z11vload_half2jPU3AS1KDh",
1023 "_Z11vload_half2jPU3AS2KDh"};
1024
1025 for (auto Name : Map) {
1026 // If we find a function with the matching name.
1027 if (auto F = M.getFunction(Name)) {
1028 SmallVector<Instruction *, 4> ToRemoves;
1029
1030 // Walk the users of the function.
1031 for (auto &U : F->uses()) {
1032 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1033 // The index argument from vload_half.
1034 auto Arg0 = CI->getOperand(0);
1035
1036 // The pointer argument from vload_half.
1037 auto Arg1 = CI->getOperand(1);
1038
1039 auto IntTy = Type::getInt32Ty(M.getContext());
1040 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1041 auto NewPointerTy = PointerType::get(
1042 IntTy, Arg1->getType()->getPointerAddressSpace());
1043 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1044
1045 // Cast the half* pointer to int*.
1046 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1047
1048 // Index into the correct address of the casted pointer.
1049 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1050
1051 // Load from the int* we casted to.
1052 auto Load = new LoadInst(Index, "", CI);
1053
1054 // Our intrinsic to unpack a float2 from an int.
1055 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1056
1057 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1058
1059 // Get our float2.
1060 auto Call = CallInst::Create(NewF, Load, "", CI);
1061
1062 CI->replaceAllUsesWith(Call);
1063
1064 // Lastly, remember to remove the user.
1065 ToRemoves.push_back(CI);
1066 }
1067 }
1068
1069 Changed = !ToRemoves.empty();
1070
1071 // And cleanup the calls we don't use anymore.
1072 for (auto V : ToRemoves) {
1073 V->eraseFromParent();
1074 }
1075
1076 // And remove the function we don't need either too.
1077 F->eraseFromParent();
1078 }
1079 }
1080
1081 return Changed;
1082}
1083
1084bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1085 bool Changed = false;
1086
1087 const std::vector<const char *> Map = {"_Z11vload_half4jPU3AS1KDh",
1088 "_Z11vload_half4jPU3AS2KDh"};
1089
1090 for (auto Name : Map) {
1091 // If we find a function with the matching name.
1092 if (auto F = M.getFunction(Name)) {
1093 SmallVector<Instruction *, 4> ToRemoves;
1094
1095 // Walk the users of the function.
1096 for (auto &U : F->uses()) {
1097 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1098 // The index argument from vload_half.
1099 auto Arg0 = CI->getOperand(0);
1100
1101 // The pointer argument from vload_half.
1102 auto Arg1 = CI->getOperand(1);
1103
1104 auto IntTy = Type::getInt32Ty(M.getContext());
1105 auto Int2Ty = VectorType::get(IntTy, 2);
1106 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1107 auto NewPointerTy = PointerType::get(
1108 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1109 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1110
1111 // Cast the half* pointer to int2*.
1112 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1113
1114 // Index into the correct address of the casted pointer.
1115 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1116
1117 // Load from the int2* we casted to.
1118 auto Load = new LoadInst(Index, "", CI);
1119
1120 // Extract each element from the loaded int2.
1121 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1122 "", CI);
1123 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1124 "", CI);
1125
1126 // Our intrinsic to unpack a float2 from an int.
1127 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1128
1129 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1130
1131 // Get the lower (x & y) components of our final float4.
1132 auto Lo = CallInst::Create(NewF, X, "", CI);
1133
1134 // Get the higher (z & w) components of our final float4.
1135 auto Hi = CallInst::Create(NewF, Y, "", CI);
1136
1137 Constant *ShuffleMask[4] = {
1138 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1139 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1140
1141 // Combine our two float2's into one float4.
1142 auto Combine = new ShuffleVectorInst(
1143 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1144
1145 CI->replaceAllUsesWith(Combine);
1146
1147 // Lastly, remember to remove the user.
1148 ToRemoves.push_back(CI);
1149 }
1150 }
1151
1152 Changed = !ToRemoves.empty();
1153
1154 // And cleanup the calls we don't use anymore.
1155 for (auto V : ToRemoves) {
1156 V->eraseFromParent();
1157 }
1158
1159 // And remove the function we don't need either too.
1160 F->eraseFromParent();
1161 }
1162 }
1163
1164 return Changed;
1165}
1166
1167bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1168 bool Changed = false;
1169
1170 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1171 "_Z15vstore_half_rtefjPU3AS1Dh",
1172 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1173
1174 for (auto Name : Map) {
1175 // If we find a function with the matching name.
1176 if (auto F = M.getFunction(Name)) {
1177 SmallVector<Instruction *, 4> ToRemoves;
1178
1179 // Walk the users of the function.
1180 for (auto &U : F->uses()) {
1181 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1182 // The value to store.
1183 auto Arg0 = CI->getOperand(0);
1184
1185 // The index argument from vstore_half.
1186 auto Arg1 = CI->getOperand(1);
1187
1188 // The pointer argument from vstore_half.
1189 auto Arg2 = CI->getOperand(2);
1190
David Neto22f144c2017-06-12 14:26:21 -04001191 auto IntTy = Type::getInt32Ty(M.getContext());
1192 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001193 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001194 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001195
1196 // Our intrinsic to pack a float2 to an int.
1197 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1198
1199 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1200
1201 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001202 auto TempVec =
1203 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1204 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001205
1206 // Pack the float2 -> half2 (in an int).
1207 auto X = CallInst::Create(NewF, TempVec, "", CI);
1208
David Neto17852de2017-05-29 17:29:31 -04001209 if (f16bit_storage) {
1210 auto ShortTy = Type::getInt16Ty(M.getContext());
1211 auto ShortPointerTy = PointerType::get(
1212 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001213
David Neto17852de2017-05-29 17:29:31 -04001214 // Truncate our i32 to an i16.
1215 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001216
David Neto17852de2017-05-29 17:29:31 -04001217 // Cast the half* pointer to short*.
1218 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001219
David Neto17852de2017-05-29 17:29:31 -04001220 // Index into the correct address of the casted pointer.
1221 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001222
David Neto17852de2017-05-29 17:29:31 -04001223 // Store to the int* we casted to.
1224 auto Store = new StoreInst(Trunc, Index, CI);
1225
1226 CI->replaceAllUsesWith(Store);
1227 } else {
1228 // We can only write to 32-bit aligned words.
1229 //
1230 // Assuming base is aligned to 32-bits, replace the equivalent of
1231 // vstore_half(value, index, base)
1232 // with:
1233 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1234 // uint32_t write_to_upper_half = index & 1u;
1235 // uint32_t shift = write_to_upper_half << 4;
1236 //
1237 // // Pack the float value as a half number in bottom 16 bits
1238 // // of an i32.
1239 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1240 //
1241 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1242 // ^ ((packed & 0xffff) << shift)
1243 // // We only need relaxed consistency, but OpenCL 1.2 only has
1244 // // sequentially consistent atomics.
1245 // // TODO(dneto): Use relaxed consistency.
1246 // atomic_xor(target_ptr, xor_value)
1247 auto IntPointerTy = PointerType::get(
1248 IntTy, Arg2->getType()->getPointerAddressSpace());
1249
1250 auto Four = ConstantInt::get(IntTy, 4);
1251 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1252
1253 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1254 // Compute index / 2
1255 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1256 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1257 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1258 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1259 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1260 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1261 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1262
1263 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1264 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1265 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1266
1267 // Generate the call to atomi_xor.
1268 SmallVector<Type *, 5> ParamTypes;
1269 // The pointer type.
1270 ParamTypes.push_back(IntPointerTy);
1271 // The Types for memory scope, semantics, and value.
1272 ParamTypes.push_back(IntTy);
1273 ParamTypes.push_back(IntTy);
1274 ParamTypes.push_back(IntTy);
1275 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1276 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1277
1278 const auto ConstantScopeDevice =
1279 ConstantInt::get(IntTy, spv::ScopeDevice);
1280 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1281 // (SPIR-V Workgroup).
1282 const auto AddrSpaceSemanticsBits =
1283 IntPointerTy->getPointerAddressSpace() == 1
1284 ? spv::MemorySemanticsUniformMemoryMask
1285 : spv::MemorySemanticsWorkgroupMemoryMask;
1286
1287 // We're using relaxed consistency here.
1288 const auto ConstantMemorySemantics =
1289 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1290 AddrSpaceSemanticsBits);
1291
1292 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1293 ConstantMemorySemantics, ValueToXor};
1294 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1295 }
David Neto22f144c2017-06-12 14:26:21 -04001296
1297 // Lastly, remember to remove the user.
1298 ToRemoves.push_back(CI);
1299 }
1300 }
1301
1302 Changed = !ToRemoves.empty();
1303
1304 // And cleanup the calls we don't use anymore.
1305 for (auto V : ToRemoves) {
1306 V->eraseFromParent();
1307 }
1308
1309 // And remove the function we don't need either too.
1310 F->eraseFromParent();
1311 }
1312 }
1313
1314 return Changed;
1315}
1316
1317bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1318 bool Changed = false;
1319
1320 const std::vector<const char *> Map = {"_Z12vstore_half2Dv2_fjPU3AS1Dh",
1321 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1322 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh"};
1323
1324 for (auto Name : Map) {
1325 // If we find a function with the matching name.
1326 if (auto F = M.getFunction(Name)) {
1327 SmallVector<Instruction *, 4> ToRemoves;
1328
1329 // Walk the users of the function.
1330 for (auto &U : F->uses()) {
1331 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1332 // The value to store.
1333 auto Arg0 = CI->getOperand(0);
1334
1335 // The index argument from vstore_half.
1336 auto Arg1 = CI->getOperand(1);
1337
1338 // The pointer argument from vstore_half.
1339 auto Arg2 = CI->getOperand(2);
1340
1341 auto IntTy = Type::getInt32Ty(M.getContext());
1342 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1343 auto NewPointerTy = PointerType::get(
1344 IntTy, Arg2->getType()->getPointerAddressSpace());
1345 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1346
1347 // Our intrinsic to pack a float2 to an int.
1348 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1349
1350 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1351
1352 // Turn the packed x & y into the final packing.
1353 auto X = CallInst::Create(NewF, Arg0, "", CI);
1354
1355 // Cast the half* pointer to int*.
1356 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1357
1358 // Index into the correct address of the casted pointer.
1359 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1360
1361 // Store to the int* we casted to.
1362 auto Store = new StoreInst(X, Index, CI);
1363
1364 CI->replaceAllUsesWith(Store);
1365
1366 // Lastly, remember to remove the user.
1367 ToRemoves.push_back(CI);
1368 }
1369 }
1370
1371 Changed = !ToRemoves.empty();
1372
1373 // And cleanup the calls we don't use anymore.
1374 for (auto V : ToRemoves) {
1375 V->eraseFromParent();
1376 }
1377
1378 // And remove the function we don't need either too.
1379 F->eraseFromParent();
1380 }
1381 }
1382
1383 return Changed;
1384}
1385
1386bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1387 bool Changed = false;
1388
1389 const std::vector<const char *> Map = {"_Z12vstore_half4Dv4_fjPU3AS1Dh",
1390 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1391 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh"};
1392
1393 for (auto Name : Map) {
1394 // If we find a function with the matching name.
1395 if (auto F = M.getFunction(Name)) {
1396 SmallVector<Instruction *, 4> ToRemoves;
1397
1398 // Walk the users of the function.
1399 for (auto &U : F->uses()) {
1400 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1401 // The value to store.
1402 auto Arg0 = CI->getOperand(0);
1403
1404 // The index argument from vstore_half.
1405 auto Arg1 = CI->getOperand(1);
1406
1407 // The pointer argument from vstore_half.
1408 auto Arg2 = CI->getOperand(2);
1409
1410 auto IntTy = Type::getInt32Ty(M.getContext());
1411 auto Int2Ty = VectorType::get(IntTy, 2);
1412 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1413 auto NewPointerTy = PointerType::get(
1414 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1415 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1416
1417 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1418 ConstantInt::get(IntTy, 1)};
1419
1420 // Extract out the x & y components of our to store value.
1421 auto Lo =
1422 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1423 ConstantVector::get(LoShuffleMask), "", CI);
1424
1425 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1426 ConstantInt::get(IntTy, 3)};
1427
1428 // Extract out the z & w components of our to store value.
1429 auto Hi =
1430 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1431 ConstantVector::get(HiShuffleMask), "", CI);
1432
1433 // Our intrinsic to pack a float2 to an int.
1434 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1435
1436 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1437
1438 // Turn the packed x & y into the final component of our int2.
1439 auto X = CallInst::Create(NewF, Lo, "", CI);
1440
1441 // Turn the packed z & w into the final component of our int2.
1442 auto Y = CallInst::Create(NewF, Hi, "", CI);
1443
1444 auto Combine = InsertElementInst::Create(
1445 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1446 Combine = InsertElementInst::Create(
1447 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1448
1449 // Cast the half* pointer to int2*.
1450 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1451
1452 // Index into the correct address of the casted pointer.
1453 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1454
1455 // Store to the int2* we casted to.
1456 auto Store = new StoreInst(Combine, Index, CI);
1457
1458 CI->replaceAllUsesWith(Store);
1459
1460 // Lastly, remember to remove the user.
1461 ToRemoves.push_back(CI);
1462 }
1463 }
1464
1465 Changed = !ToRemoves.empty();
1466
1467 // And cleanup the calls we don't use anymore.
1468 for (auto V : ToRemoves) {
1469 V->eraseFromParent();
1470 }
1471
1472 // And remove the function we don't need either too.
1473 F->eraseFromParent();
1474 }
1475 }
1476
1477 return Changed;
1478}
1479
1480bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1481 bool Changed = false;
1482
1483 const std::map<const char *, const char*> Map = {
1484 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1485 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1486 };
1487
1488 for (auto Pair : Map) {
1489 // If we find a function with the matching name.
1490 if (auto F = M.getFunction(Pair.first)) {
1491 SmallVector<Instruction *, 4> ToRemoves;
1492
1493 // Walk the users of the function.
1494 for (auto &U : F->uses()) {
1495 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1496 // The image.
1497 auto Arg0 = CI->getOperand(0);
1498
1499 // The sampler.
1500 auto Arg1 = CI->getOperand(1);
1501
1502 // The coordinate (integer type that we can't handle).
1503 auto Arg2 = CI->getOperand(2);
1504
1505 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1506
1507 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1508
1509 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1510
1511 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1512
1513 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1514
1515 CI->replaceAllUsesWith(NewCI);
1516
1517 // Lastly, remember to remove the user.
1518 ToRemoves.push_back(CI);
1519 }
1520 }
1521
1522 Changed = !ToRemoves.empty();
1523
1524 // And cleanup the calls we don't use anymore.
1525 for (auto V : ToRemoves) {
1526 V->eraseFromParent();
1527 }
1528
1529 // And remove the function we don't need either too.
1530 F->eraseFromParent();
1531 }
1532 }
1533
1534 return Changed;
1535}
1536
1537bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1538 bool Changed = false;
1539
1540 const std::map<const char *, const char *> Map = {
1541 {"_Z10atomic_addPU3AS1Vii", "spirv.atomic_add"},
1542 {"_Z10atomic_addPU3AS1Vjj", "spirv.atomic_add"},
1543 {"_Z10atomic_subPU3AS1Vii", "spirv.atomic_sub"},
1544 {"_Z10atomic_subPU3AS1Vjj", "spirv.atomic_sub"},
1545 {"_Z11atomic_xchgPU3AS1Vii", "spirv.atomic_exchange"},
1546 {"_Z11atomic_xchgPU3AS1Vjj", "spirv.atomic_exchange"},
1547 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1548 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1549 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1550 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1551 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
1552 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
1553 {"_Z10atomic_minPU3AS1Vii", "spirv.atomic_smin"},
1554 {"_Z10atomic_minPU3AS1Vjj", "spirv.atomic_umin"},
1555 {"_Z10atomic_maxPU3AS1Vii", "spirv.atomic_smax"},
1556 {"_Z10atomic_maxPU3AS1Vjj", "spirv.atomic_umax"},
1557 {"_Z10atomic_andPU3AS1Vii", "spirv.atomic_and"},
1558 {"_Z10atomic_andPU3AS1Vjj", "spirv.atomic_and"},
1559 {"_Z9atomic_orPU3AS1Vii", "spirv.atomic_or"},
1560 {"_Z9atomic_orPU3AS1Vjj", "spirv.atomic_or"},
1561 {"_Z10atomic_xorPU3AS1Vii", "spirv.atomic_xor"},
1562 {"_Z10atomic_xorPU3AS1Vjj", "spirv.atomic_xor"}};
1563
1564 for (auto Pair : Map) {
1565 // If we find a function with the matching name.
1566 if (auto F = M.getFunction(Pair.first)) {
1567 SmallVector<Instruction *, 4> ToRemoves;
1568
1569 // Walk the users of the function.
1570 for (auto &U : F->uses()) {
1571 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1572 auto FType = F->getFunctionType();
1573 SmallVector<Type *, 5> ParamTypes;
1574
1575 // The pointer type.
1576 ParamTypes.push_back(FType->getParamType(0));
1577
1578 auto IntTy = Type::getInt32Ty(M.getContext());
1579
1580 // The memory scope type.
1581 ParamTypes.push_back(IntTy);
1582
1583 // The memory semantics type.
1584 ParamTypes.push_back(IntTy);
1585
1586 if (2 < CI->getNumArgOperands()) {
1587 // The unequal memory semantics type.
1588 ParamTypes.push_back(IntTy);
1589
1590 // The value type.
1591 ParamTypes.push_back(FType->getParamType(2));
1592
1593 // The comparator type.
1594 ParamTypes.push_back(FType->getParamType(1));
1595 } else if (1 < CI->getNumArgOperands()) {
1596 // The value type.
1597 ParamTypes.push_back(FType->getParamType(1));
1598 }
1599
1600 auto NewFType =
1601 FunctionType::get(FType->getReturnType(), ParamTypes, false);
1602 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1603
1604 // We need to map the OpenCL constants to the SPIR-V equivalents.
1605 const auto ConstantScopeDevice =
1606 ConstantInt::get(IntTy, spv::ScopeDevice);
1607 const auto ConstantMemorySemantics = ConstantInt::get(
1608 IntTy, spv::MemorySemanticsUniformMemoryMask |
1609 spv::MemorySemanticsSequentiallyConsistentMask);
1610
1611 SmallVector<Value *, 5> Params;
1612
1613 // The pointer.
1614 Params.push_back(CI->getArgOperand(0));
1615
1616 // The memory scope.
1617 Params.push_back(ConstantScopeDevice);
1618
1619 // The memory semantics.
1620 Params.push_back(ConstantMemorySemantics);
1621
1622 if (2 < CI->getNumArgOperands()) {
1623 // The unequal memory semantics.
1624 Params.push_back(ConstantMemorySemantics);
1625
1626 // The value.
1627 Params.push_back(CI->getArgOperand(2));
1628
1629 // The comparator.
1630 Params.push_back(CI->getArgOperand(1));
1631 } else if (1 < CI->getNumArgOperands()) {
1632 // The value.
1633 Params.push_back(CI->getArgOperand(1));
1634 }
1635
1636 auto NewCI = CallInst::Create(NewF, Params, "", CI);
1637
1638 CI->replaceAllUsesWith(NewCI);
1639
1640 // Lastly, remember to remove the user.
1641 ToRemoves.push_back(CI);
1642 }
1643 }
1644
1645 Changed = !ToRemoves.empty();
1646
1647 // And cleanup the calls we don't use anymore.
1648 for (auto V : ToRemoves) {
1649 V->eraseFromParent();
1650 }
1651
1652 // And remove the function we don't need either too.
1653 F->eraseFromParent();
1654 }
1655 }
1656
1657 return Changed;
1658}
1659
1660bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
1661 bool Changed = false;
1662
1663 // If we find a function with the matching name.
1664 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
1665 SmallVector<Instruction *, 4> ToRemoves;
1666
1667 auto IntTy = Type::getInt32Ty(M.getContext());
1668 auto FloatTy = Type::getFloatTy(M.getContext());
1669
1670 Constant *DownShuffleMask[3] = {
1671 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1672 ConstantInt::get(IntTy, 2)};
1673
1674 Constant *UpShuffleMask[4] = {
1675 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1676 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1677
1678 Constant *FloatVec[3] = {
1679 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
1680 };
1681
1682 // Walk the users of the function.
1683 for (auto &U : F->uses()) {
1684 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1685 auto Vec4Ty = CI->getArgOperand(0)->getType();
1686 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1687 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
1688 auto Vec3Ty = Arg0->getType();
1689
1690 auto NewFType =
1691 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
1692
1693 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
1694
1695 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
1696
1697 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
1698
1699 CI->replaceAllUsesWith(Result);
1700
1701 // Lastly, remember to remove the user.
1702 ToRemoves.push_back(CI);
1703 }
1704 }
1705
1706 Changed = !ToRemoves.empty();
1707
1708 // And cleanup the calls we don't use anymore.
1709 for (auto V : ToRemoves) {
1710 V->eraseFromParent();
1711 }
1712
1713 // And remove the function we don't need either too.
1714 F->eraseFromParent();
1715 }
1716
1717 return Changed;
1718}