blob: cd285e1f8cde4f21e0b06ad59497a35d79bda85f [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto118188e2018-08-24 11:27:54 -040019#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Module.h"
23#include "llvm/Pass.h"
24#include "llvm/Support/CommandLine.h"
25#include "llvm/Support/raw_ostream.h"
26#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040027
David Neto118188e2018-08-24 11:27:54 -040028#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto482550a2018-03-24 05:21:07 -070030#include "clspv/Option.h"
31
David Neto22f144c2017-06-12 14:26:21 -040032using namespace llvm;
33
34#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
35
36namespace {
37uint32_t clz(uint32_t v) {
38 uint32_t r;
39 uint32_t shift;
40
41 r = (v > 0xFFFF) << 4;
42 v >>= r;
43 shift = (v > 0xFF) << 3;
44 v >>= shift;
45 r |= shift;
46 shift = (v > 0xF) << 2;
47 v >>= shift;
48 r |= shift;
49 shift = (v > 0x3) << 1;
50 v >>= shift;
51 r |= shift;
52 r |= (v >> 1);
53
54 return r;
55}
56
57Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
58 if (1 == elements) {
59 return Type::getInt1Ty(C);
60 } else {
61 return VectorType::get(Type::getInt1Ty(C), elements);
62 }
63}
64
65struct ReplaceOpenCLBuiltinPass final : public ModulePass {
66 static char ID;
67 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
68
69 bool runOnModule(Module &M) override;
70 bool replaceRecip(Module &M);
71 bool replaceDivide(Module &M);
72 bool replaceExp10(Module &M);
73 bool replaceLog10(Module &M);
74 bool replaceBarrier(Module &M);
75 bool replaceMemFence(Module &M);
76 bool replaceRelational(Module &M);
77 bool replaceIsInfAndIsNan(Module &M);
78 bool replaceAllAndAny(Module &M);
79 bool replaceSignbit(Module &M);
80 bool replaceMadandMad24andMul24(Module &M);
81 bool replaceVloadHalf(Module &M);
82 bool replaceVloadHalf2(Module &M);
83 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -070084 bool replaceClspvVloadaHalf2(Module &M);
85 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040086 bool replaceVstoreHalf(Module &M);
87 bool replaceVstoreHalf2(Module &M);
88 bool replaceVstoreHalf4(Module &M);
89 bool replaceReadImageF(Module &M);
90 bool replaceAtomics(Module &M);
91 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040092 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070093 bool replaceVload(Module &M);
94 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040095};
96}
97
98char ReplaceOpenCLBuiltinPass::ID = 0;
99static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
100 "Replace OpenCL Builtins Pass");
101
102namespace clspv {
103ModulePass *createReplaceOpenCLBuiltinPass() {
104 return new ReplaceOpenCLBuiltinPass();
105}
106}
107
108bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
109 bool Changed = false;
110
111 Changed |= replaceRecip(M);
112 Changed |= replaceDivide(M);
113 Changed |= replaceExp10(M);
114 Changed |= replaceLog10(M);
115 Changed |= replaceBarrier(M);
116 Changed |= replaceMemFence(M);
117 Changed |= replaceRelational(M);
118 Changed |= replaceIsInfAndIsNan(M);
119 Changed |= replaceAllAndAny(M);
120 Changed |= replaceSignbit(M);
121 Changed |= replaceMadandMad24andMul24(M);
122 Changed |= replaceVloadHalf(M);
123 Changed |= replaceVloadHalf2(M);
124 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700125 Changed |= replaceClspvVloadaHalf2(M);
126 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400127 Changed |= replaceVstoreHalf(M);
128 Changed |= replaceVstoreHalf2(M);
129 Changed |= replaceVstoreHalf4(M);
130 Changed |= replaceReadImageF(M);
131 Changed |= replaceAtomics(M);
132 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400133 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700134 Changed |= replaceVload(M);
135 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400136
137 return Changed;
138}
139
140bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
141 bool Changed = false;
142
143 const char *Names[] = {
144 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
145 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
146 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
147 };
148
149 for (auto Name : Names) {
150 // If we find a function with the matching name.
151 if (auto F = M.getFunction(Name)) {
152 SmallVector<Instruction *, 4> ToRemoves;
153
154 // Walk the users of the function.
155 for (auto &U : F->uses()) {
156 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
157 // Recip has one arg.
158 auto Arg = CI->getOperand(0);
159
160 auto Div = BinaryOperator::Create(
161 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
162 CI);
163
164 CI->replaceAllUsesWith(Div);
165
166 // Lastly, remember to remove the user.
167 ToRemoves.push_back(CI);
168 }
169 }
170
171 Changed = !ToRemoves.empty();
172
173 // And cleanup the calls we don't use anymore.
174 for (auto V : ToRemoves) {
175 V->eraseFromParent();
176 }
177
178 // And remove the function we don't need either too.
179 F->eraseFromParent();
180 }
181 }
182
183 return Changed;
184}
185
186bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
187 bool Changed = false;
188
189 const char *Names[] = {
190 "_Z11half_divideff", "_Z13native_divideff",
191 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
192 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
193 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
194 };
195
196 for (auto Name : Names) {
197 // If we find a function with the matching name.
198 if (auto F = M.getFunction(Name)) {
199 SmallVector<Instruction *, 4> ToRemoves;
200
201 // Walk the users of the function.
202 for (auto &U : F->uses()) {
203 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
204 auto Div = BinaryOperator::Create(
205 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
206
207 CI->replaceAllUsesWith(Div);
208
209 // Lastly, remember to remove the user.
210 ToRemoves.push_back(CI);
211 }
212 }
213
214 Changed = !ToRemoves.empty();
215
216 // And cleanup the calls we don't use anymore.
217 for (auto V : ToRemoves) {
218 V->eraseFromParent();
219 }
220
221 // And remove the function we don't need either too.
222 F->eraseFromParent();
223 }
224 }
225
226 return Changed;
227}
228
229bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
230 bool Changed = false;
231
232 const std::map<const char *, const char *> Map = {
233 {"_Z5exp10f", "_Z3expf"},
234 {"_Z10half_exp10f", "_Z8half_expf"},
235 {"_Z12native_exp10f", "_Z10native_expf"},
236 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
237 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
238 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
239 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
240 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
241 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
242 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
243 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
244 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
245
246 for (auto Pair : Map) {
247 // If we find a function with the matching name.
248 if (auto F = M.getFunction(Pair.first)) {
249 SmallVector<Instruction *, 4> ToRemoves;
250
251 // Walk the users of the function.
252 for (auto &U : F->uses()) {
253 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
254 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
255
256 auto Arg = CI->getOperand(0);
257
258 // Constant of the natural log of 10 (ln(10)).
259 const double Ln10 =
260 2.302585092994045684017991454684364207601101488628772976033;
261
262 auto Mul = BinaryOperator::Create(
263 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
264 CI);
265
266 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
267
268 CI->replaceAllUsesWith(NewCI);
269
270 // Lastly, remember to remove the user.
271 ToRemoves.push_back(CI);
272 }
273 }
274
275 Changed = !ToRemoves.empty();
276
277 // And cleanup the calls we don't use anymore.
278 for (auto V : ToRemoves) {
279 V->eraseFromParent();
280 }
281
282 // And remove the function we don't need either too.
283 F->eraseFromParent();
284 }
285 }
286
287 return Changed;
288}
289
290bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
291 bool Changed = false;
292
293 const std::map<const char *, const char *> Map = {
294 {"_Z5log10f", "_Z3logf"},
295 {"_Z10half_log10f", "_Z8half_logf"},
296 {"_Z12native_log10f", "_Z10native_logf"},
297 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
298 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
299 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
300 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
301 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
302 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
303 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
304 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
305 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
306
307 for (auto Pair : Map) {
308 // If we find a function with the matching name.
309 if (auto F = M.getFunction(Pair.first)) {
310 SmallVector<Instruction *, 4> ToRemoves;
311
312 // Walk the users of the function.
313 for (auto &U : F->uses()) {
314 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
315 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
316
317 auto Arg = CI->getOperand(0);
318
319 // Constant of the reciprocal of the natural log of 10 (ln(10)).
320 const double Ln10 =
321 0.434294481903251827651128918916605082294397005803666566114;
322
323 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
324
325 auto Mul = BinaryOperator::Create(
326 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
327 "", CI);
328
329 CI->replaceAllUsesWith(Mul);
330
331 // Lastly, remember to remove the user.
332 ToRemoves.push_back(CI);
333 }
334 }
335
336 Changed = !ToRemoves.empty();
337
338 // And cleanup the calls we don't use anymore.
339 for (auto V : ToRemoves) {
340 V->eraseFromParent();
341 }
342
343 // And remove the function we don't need either too.
344 F->eraseFromParent();
345 }
346 }
347
348 return Changed;
349}
350
351bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
352 bool Changed = false;
353
354 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
355
356 const std::map<const char *, const char *> Map = {
357 {"_Z7barrierj", "__spirv_control_barrier"}};
358
359 for (auto Pair : Map) {
360 // If we find a function with the matching name.
361 if (auto F = M.getFunction(Pair.first)) {
362 SmallVector<Instruction *, 4> ToRemoves;
363
364 // Walk the users of the function.
365 for (auto &U : F->uses()) {
366 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
367 auto FType = F->getFunctionType();
368 SmallVector<Type *, 3> Params;
369 for (unsigned i = 0; i < 3; i++) {
370 Params.push_back(FType->getParamType(0));
371 }
372 auto NewFType =
373 FunctionType::get(FType->getReturnType(), Params, false);
374 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
375
376 auto Arg = CI->getOperand(0);
377
378 // We need to map the OpenCL constants to the SPIR-V equivalents.
379 const auto LocalMemFence =
380 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
381 const auto GlobalMemFence =
382 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
383 const auto ConstantSequentiallyConsistent = ConstantInt::get(
384 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
385 const auto ConstantScopeDevice =
386 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
387 const auto ConstantScopeWorkgroup =
388 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
389
390 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
391 const auto LocalMemFenceMask = BinaryOperator::Create(
392 Instruction::And, LocalMemFence, Arg, "", CI);
393 const auto WorkgroupShiftAmount =
394 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
395 clz(CLK_LOCAL_MEM_FENCE);
396 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
397 Instruction::Shl, LocalMemFenceMask,
398 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
399
400 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
401 const auto GlobalMemFenceMask = BinaryOperator::Create(
402 Instruction::And, GlobalMemFence, Arg, "", CI);
403 const auto UniformShiftAmount =
404 clz(spv::MemorySemanticsUniformMemoryMask) -
405 clz(CLK_GLOBAL_MEM_FENCE);
406 const auto MemorySemanticsUniform = BinaryOperator::Create(
407 Instruction::Shl, GlobalMemFenceMask,
408 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
409
410 // And combine the above together, also adding in
411 // MemorySemanticsSequentiallyConsistentMask.
412 auto MemorySemantics =
413 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
414 ConstantSequentiallyConsistent, "", CI);
415 MemorySemantics = BinaryOperator::Create(
416 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
417
418 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
419 // Device Scope, otherwise Workgroup Scope.
420 const auto Cmp =
421 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
422 GlobalMemFenceMask, GlobalMemFence, "", CI);
423 const auto MemoryScope = SelectInst::Create(
424 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
425
426 // Lastly, the Execution Scope is always Workgroup Scope.
427 const auto ExecutionScope = ConstantScopeWorkgroup;
428
429 auto NewCI = CallInst::Create(
430 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
431
432 CI->replaceAllUsesWith(NewCI);
433
434 // Lastly, remember to remove the user.
435 ToRemoves.push_back(CI);
436 }
437 }
438
439 Changed = !ToRemoves.empty();
440
441 // And cleanup the calls we don't use anymore.
442 for (auto V : ToRemoves) {
443 V->eraseFromParent();
444 }
445
446 // And remove the function we don't need either too.
447 F->eraseFromParent();
448 }
449 }
450
451 return Changed;
452}
453
454bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
455 bool Changed = false;
456
457 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
458
Neil Henning39672102017-09-29 14:33:13 +0100459 using Tuple = std::tuple<const char *, unsigned>;
460 const std::map<const char *, Tuple> Map = {
461 {"_Z9mem_fencej",
462 Tuple("__spirv_memory_barrier",
463 spv::MemorySemanticsSequentiallyConsistentMask)},
464 {"_Z14read_mem_fencej",
465 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
466 {"_Z15write_mem_fencej",
467 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400468
469 for (auto Pair : Map) {
470 // If we find a function with the matching name.
471 if (auto F = M.getFunction(Pair.first)) {
472 SmallVector<Instruction *, 4> ToRemoves;
473
474 // Walk the users of the function.
475 for (auto &U : F->uses()) {
476 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
477 auto FType = F->getFunctionType();
478 SmallVector<Type *, 2> Params;
479 for (unsigned i = 0; i < 2; i++) {
480 Params.push_back(FType->getParamType(0));
481 }
482 auto NewFType =
483 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100484 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400485
486 auto Arg = CI->getOperand(0);
487
488 // We need to map the OpenCL constants to the SPIR-V equivalents.
489 const auto LocalMemFence =
490 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
491 const auto GlobalMemFence =
492 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
493 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100494 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400495 const auto ConstantScopeDevice =
496 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
497
498 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
499 const auto LocalMemFenceMask = BinaryOperator::Create(
500 Instruction::And, LocalMemFence, Arg, "", CI);
501 const auto WorkgroupShiftAmount =
502 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
503 clz(CLK_LOCAL_MEM_FENCE);
504 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
505 Instruction::Shl, LocalMemFenceMask,
506 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
507
508 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
509 const auto GlobalMemFenceMask = BinaryOperator::Create(
510 Instruction::And, GlobalMemFence, Arg, "", CI);
511 const auto UniformShiftAmount =
512 clz(spv::MemorySemanticsUniformMemoryMask) -
513 clz(CLK_GLOBAL_MEM_FENCE);
514 const auto MemorySemanticsUniform = BinaryOperator::Create(
515 Instruction::Shl, GlobalMemFenceMask,
516 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
517
518 // And combine the above together, also adding in
519 // MemorySemanticsSequentiallyConsistentMask.
520 auto MemorySemantics =
521 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
522 ConstantMemorySemantics, "", CI);
523 MemorySemantics = BinaryOperator::Create(
524 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
525
526 // Memory Scope is always device.
527 const auto MemoryScope = ConstantScopeDevice;
528
529 auto NewCI =
530 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
531
532 CI->replaceAllUsesWith(NewCI);
533
534 // Lastly, remember to remove the user.
535 ToRemoves.push_back(CI);
536 }
537 }
538
539 Changed = !ToRemoves.empty();
540
541 // And cleanup the calls we don't use anymore.
542 for (auto V : ToRemoves) {
543 V->eraseFromParent();
544 }
545
546 // And remove the function we don't need either too.
547 F->eraseFromParent();
548 }
549 }
550
551 return Changed;
552}
553
554bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
555 bool Changed = false;
556
557 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
558 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
559 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
560 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
561 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
562 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
563 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
564 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
565 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
566 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
567 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
568 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
569 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
570 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
571 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
572 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
573 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
574 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
575 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
576 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
577 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
578 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
579 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
580 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
581 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
582 };
583
584 for (auto Pair : Map) {
585 // If we find a function with the matching name.
586 if (auto F = M.getFunction(Pair.first)) {
587 SmallVector<Instruction *, 4> ToRemoves;
588
589 // Walk the users of the function.
590 for (auto &U : F->uses()) {
591 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
592 // The predicate to use in the CmpInst.
593 auto Predicate = Pair.second.first;
594
595 // The value to return for true.
596 auto TrueValue =
597 ConstantInt::getSigned(CI->getType(), Pair.second.second);
598
599 // The value to return for false.
600 auto FalseValue = Constant::getNullValue(CI->getType());
601
602 auto Arg1 = CI->getOperand(0);
603 auto Arg2 = CI->getOperand(1);
604
605 const auto Cmp =
606 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
607
608 const auto Select =
609 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
610
611 CI->replaceAllUsesWith(Select);
612
613 // Lastly, remember to remove the user.
614 ToRemoves.push_back(CI);
615 }
616 }
617
618 Changed = !ToRemoves.empty();
619
620 // And cleanup the calls we don't use anymore.
621 for (auto V : ToRemoves) {
622 V->eraseFromParent();
623 }
624
625 // And remove the function we don't need either too.
626 F->eraseFromParent();
627 }
628 }
629
630 return Changed;
631}
632
633bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
634 bool Changed = false;
635
636 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
637 {"_Z5isinff", {"__spirv_isinff", 1}},
638 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
639 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
640 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
641 {"_Z5isnanf", {"__spirv_isnanf", 1}},
642 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
643 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
644 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
645 };
646
647 for (auto Pair : Map) {
648 // If we find a function with the matching name.
649 if (auto F = M.getFunction(Pair.first)) {
650 SmallVector<Instruction *, 4> ToRemoves;
651
652 // Walk the users of the function.
653 for (auto &U : F->uses()) {
654 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
655 const auto CITy = CI->getType();
656
657 // The fake SPIR-V intrinsic to generate.
658 auto SPIRVIntrinsic = Pair.second.first;
659
660 // The value to return for true.
661 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
662
663 // The value to return for false.
664 auto FalseValue = Constant::getNullValue(CITy);
665
666 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
667 M.getContext(),
668 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
669
670 auto NewFType =
671 FunctionType::get(CorrespondingBoolTy,
672 F->getFunctionType()->getParamType(0), false);
673
674 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
675
676 auto Arg = CI->getOperand(0);
677
678 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
679
680 const auto Select =
681 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
682
683 CI->replaceAllUsesWith(Select);
684
685 // Lastly, remember to remove the user.
686 ToRemoves.push_back(CI);
687 }
688 }
689
690 Changed = !ToRemoves.empty();
691
692 // And cleanup the calls we don't use anymore.
693 for (auto V : ToRemoves) {
694 V->eraseFromParent();
695 }
696
697 // And remove the function we don't need either too.
698 F->eraseFromParent();
699 }
700 }
701
702 return Changed;
703}
704
705bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
706 bool Changed = false;
707
708 const std::map<const char *, const char *> Map = {
709 {"_Z3alli", ""},
710 {"_Z3allDv2_i", "__spirv_allDv2_i"},
711 {"_Z3allDv3_i", "__spirv_allDv3_i"},
712 {"_Z3allDv4_i", "__spirv_allDv4_i"},
713 {"_Z3anyi", ""},
714 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
715 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
716 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
717 };
718
719 for (auto Pair : Map) {
720 // If we find a function with the matching name.
721 if (auto F = M.getFunction(Pair.first)) {
722 SmallVector<Instruction *, 4> ToRemoves;
723
724 // Walk the users of the function.
725 for (auto &U : F->uses()) {
726 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
727 // The fake SPIR-V intrinsic to generate.
728 auto SPIRVIntrinsic = Pair.second;
729
730 auto Arg = CI->getOperand(0);
731
732 Value *V;
733
734 // If we have a function to call, call it!
735 if (0 < strlen(SPIRVIntrinsic)) {
736 // The value for zero to compare against.
737 const auto ZeroValue = Constant::getNullValue(Arg->getType());
738
739 const auto Cmp = CmpInst::Create(
740 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
741 const auto NewFType = FunctionType::get(
742 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
743
744 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
745
746 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
747
748 // The value to return for true.
749 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
750
751 // The value to return for false.
752 const auto FalseValue = Constant::getNullValue(CI->getType());
753
754 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
755 } else {
756 V = BinaryOperator::Create(Instruction::LShr, Arg,
757 ConstantInt::get(CI->getType(), 31), "",
758 CI);
759 }
760
761 CI->replaceAllUsesWith(V);
762
763 // Lastly, remember to remove the user.
764 ToRemoves.push_back(CI);
765 }
766 }
767
768 Changed = !ToRemoves.empty();
769
770 // And cleanup the calls we don't use anymore.
771 for (auto V : ToRemoves) {
772 V->eraseFromParent();
773 }
774
775 // And remove the function we don't need either too.
776 F->eraseFromParent();
777 }
778 }
779
780 return Changed;
781}
782
783bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
784 bool Changed = false;
785
786 const std::map<const char *, Instruction::BinaryOps> Map = {
787 {"_Z7signbitf", Instruction::LShr},
788 {"_Z7signbitDv2_f", Instruction::AShr},
789 {"_Z7signbitDv3_f", Instruction::AShr},
790 {"_Z7signbitDv4_f", Instruction::AShr},
791 };
792
793 for (auto Pair : Map) {
794 // If we find a function with the matching name.
795 if (auto F = M.getFunction(Pair.first)) {
796 SmallVector<Instruction *, 4> ToRemoves;
797
798 // Walk the users of the function.
799 for (auto &U : F->uses()) {
800 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
801 auto Arg = CI->getOperand(0);
802
803 auto Bitcast =
804 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
805
806 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
807 ConstantInt::get(CI->getType(), 31),
808 "", CI);
809
810 CI->replaceAllUsesWith(Shr);
811
812 // Lastly, remember to remove the user.
813 ToRemoves.push_back(CI);
814 }
815 }
816
817 Changed = !ToRemoves.empty();
818
819 // And cleanup the calls we don't use anymore.
820 for (auto V : ToRemoves) {
821 V->eraseFromParent();
822 }
823
824 // And remove the function we don't need either too.
825 F->eraseFromParent();
826 }
827 }
828
829 return Changed;
830}
831
832bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
833 bool Changed = false;
834
835 const std::map<const char *,
836 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
837 Map = {
838 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
839 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
840 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
841 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
842 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
843 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
844 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
845 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
846 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
847 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
848 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
849 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
850 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
851 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
852 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
853 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
854 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
855 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
856 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
857 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
858 };
859
860 for (auto Pair : Map) {
861 // If we find a function with the matching name.
862 if (auto F = M.getFunction(Pair.first)) {
863 SmallVector<Instruction *, 4> ToRemoves;
864
865 // Walk the users of the function.
866 for (auto &U : F->uses()) {
867 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
868 // The multiply instruction to use.
869 auto MulInst = Pair.second.first;
870
871 // The add instruction to use.
872 auto AddInst = Pair.second.second;
873
874 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
875
876 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
877 CI->getArgOperand(1), "", CI);
878
879 if (Instruction::BinaryOpsEnd != AddInst) {
880 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
881 CI);
882 }
883
884 CI->replaceAllUsesWith(I);
885
886 // Lastly, remember to remove the user.
887 ToRemoves.push_back(CI);
888 }
889 }
890
891 Changed = !ToRemoves.empty();
892
893 // And cleanup the calls we don't use anymore.
894 for (auto V : ToRemoves) {
895 V->eraseFromParent();
896 }
897
898 // And remove the function we don't need either too.
899 F->eraseFromParent();
900 }
901 }
902
903 return Changed;
904}
905
Derek Chowcfd368b2017-10-19 20:58:45 -0700906bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
907 bool Changed = false;
908
909 struct VectorStoreOps {
910 const char* name;
911 int n;
912 Type* (*get_scalar_type_function)(LLVMContext&);
913 } vector_store_ops[] = {
914 // TODO(derekjchow): Expand this list.
915 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
916 };
917
David Neto544fffc2017-11-16 18:35:14 -0500918 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -0700919 auto Name = Op.name;
920 auto N = Op.n;
921 auto TypeFn = Op.get_scalar_type_function;
922 if (auto F = M.getFunction(Name)) {
923 SmallVector<Instruction *, 4> ToRemoves;
924
925 // Walk the users of the function.
926 for (auto &U : F->uses()) {
927 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
928 // The value argument from vstoren.
929 auto Arg0 = CI->getOperand(0);
930
931 // The index argument from vstoren.
932 auto Arg1 = CI->getOperand(1);
933
934 // The pointer argument from vstoren.
935 auto Arg2 = CI->getOperand(2);
936
937 // Get types.
938 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
939 auto ScalarNPointerTy = PointerType::get(
940 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
941
942 // Cast to scalarn
943 auto Cast = CastInst::CreatePointerCast(
944 Arg2, ScalarNPointerTy, "", CI);
945 // Index to correct address
946 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
947 // Store
948 auto Store = new StoreInst(Arg0, Index, CI);
949
950 CI->replaceAllUsesWith(Store);
951 ToRemoves.push_back(CI);
952 }
953 }
954
955 Changed = !ToRemoves.empty();
956
957 // And cleanup the calls we don't use anymore.
958 for (auto V : ToRemoves) {
959 V->eraseFromParent();
960 }
961
962 // And remove the function we don't need either too.
963 F->eraseFromParent();
964 }
965 }
966
967 return Changed;
968}
969
970bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
971 bool Changed = false;
972
973 struct VectorLoadOps {
974 const char* name;
975 int n;
976 Type* (*get_scalar_type_function)(LLVMContext&);
977 } vector_load_ops[] = {
978 // TODO(derekjchow): Expand this list.
979 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
980 };
981
David Neto544fffc2017-11-16 18:35:14 -0500982 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -0700983 auto Name = Op.name;
984 auto N = Op.n;
985 auto TypeFn = Op.get_scalar_type_function;
986 // If we find a function with the matching name.
987 if (auto F = M.getFunction(Name)) {
988 SmallVector<Instruction *, 4> ToRemoves;
989
990 // Walk the users of the function.
991 for (auto &U : F->uses()) {
992 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
993 // The index argument from vloadn.
994 auto Arg0 = CI->getOperand(0);
995
996 // The pointer argument from vloadn.
997 auto Arg1 = CI->getOperand(1);
998
999 // Get types.
1000 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1001 auto ScalarNPointerTy = PointerType::get(
1002 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1003
1004 // Cast to scalarn
1005 auto Cast = CastInst::CreatePointerCast(
1006 Arg1, ScalarNPointerTy, "", CI);
1007 // Index to correct address
1008 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1009 // Load
1010 auto Load = new LoadInst(Index, "", CI);
1011
1012 CI->replaceAllUsesWith(Load);
1013 ToRemoves.push_back(CI);
1014 }
1015 }
1016
1017 Changed = !ToRemoves.empty();
1018
1019 // And cleanup the calls we don't use anymore.
1020 for (auto V : ToRemoves) {
1021 V->eraseFromParent();
1022 }
1023
1024 // And remove the function we don't need either too.
1025 F->eraseFromParent();
1026
1027 }
1028 }
1029
1030 return Changed;
1031}
1032
David Neto22f144c2017-06-12 14:26:21 -04001033bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1034 bool Changed = false;
1035
1036 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1037 "_Z10vload_halfjPU3AS2KDh"};
1038
1039 for (auto Name : Map) {
1040 // If we find a function with the matching name.
1041 if (auto F = M.getFunction(Name)) {
1042 SmallVector<Instruction *, 4> ToRemoves;
1043
1044 // Walk the users of the function.
1045 for (auto &U : F->uses()) {
1046 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1047 // The index argument from vload_half.
1048 auto Arg0 = CI->getOperand(0);
1049
1050 // The pointer argument from vload_half.
1051 auto Arg1 = CI->getOperand(1);
1052
David Neto22f144c2017-06-12 14:26:21 -04001053 auto IntTy = Type::getInt32Ty(M.getContext());
1054 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001055 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1056
David Neto22f144c2017-06-12 14:26:21 -04001057 // Our intrinsic to unpack a float2 from an int.
1058 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1059
1060 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1061
David Neto482550a2018-03-24 05:21:07 -07001062 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04001063 auto ShortTy = Type::getInt16Ty(M.getContext());
1064 auto ShortPointerTy = PointerType::get(
1065 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001066
David Netoac825b82017-05-30 12:49:01 -04001067 // Cast the half* pointer to short*.
1068 auto Cast =
1069 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001070
David Netoac825b82017-05-30 12:49:01 -04001071 // Index into the correct address of the casted pointer.
1072 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1073
1074 // Load from the short* we casted to.
1075 auto Load = new LoadInst(Index, "", CI);
1076
1077 // ZExt the short -> int.
1078 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1079
1080 // Get our float2.
1081 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1082
1083 // Extract out the bottom element which is our float result.
1084 auto Extract = ExtractElementInst::Create(
1085 Call, ConstantInt::get(IntTy, 0), "", CI);
1086
1087 CI->replaceAllUsesWith(Extract);
1088 } else {
1089 // Assume the pointer argument points to storage aligned to 32bits
1090 // or more.
1091 // TODO(dneto): Do more analysis to make sure this is true?
1092 //
1093 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1094 // with:
1095 //
1096 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1097 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1098 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1099 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1100 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1101 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1102 // x float> %converted, %index_is_odd32
1103
1104 auto IntPointerTy = PointerType::get(
1105 IntTy, Arg1->getType()->getPointerAddressSpace());
1106
David Neto973e6a82017-05-30 13:48:18 -04001107 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001108 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001109 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001110 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1111
1112 auto One = ConstantInt::get(IntTy, 1);
1113 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1114 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1115
1116 // Index into the correct address of the casted pointer.
1117 auto Ptr =
1118 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1119
1120 // Load from the int* we casted to.
1121 auto Load = new LoadInst(Ptr, "", CI);
1122
1123 // Get our float2.
1124 auto Call = CallInst::Create(NewF, Load, "", CI);
1125
1126 // Extract out the float result, where the element number is
1127 // determined by whether the original index was even or odd.
1128 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1129
1130 CI->replaceAllUsesWith(Extract);
1131 }
David Neto22f144c2017-06-12 14:26:21 -04001132
1133 // Lastly, remember to remove the user.
1134 ToRemoves.push_back(CI);
1135 }
1136 }
1137
1138 Changed = !ToRemoves.empty();
1139
1140 // And cleanup the calls we don't use anymore.
1141 for (auto V : ToRemoves) {
1142 V->eraseFromParent();
1143 }
1144
1145 // And remove the function we don't need either too.
1146 F->eraseFromParent();
1147 }
1148 }
1149
1150 return Changed;
1151}
1152
1153bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1154 bool Changed = false;
1155
David Neto556c7e62018-06-08 13:45:55 -07001156 const std::vector<const char *> Map = {
1157 "_Z11vload_half2jPU3AS1KDh",
1158 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
1159 "_Z11vload_half2jPU3AS2KDh",
1160 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
1161 };
David Neto22f144c2017-06-12 14:26:21 -04001162
1163 for (auto Name : Map) {
1164 // If we find a function with the matching name.
1165 if (auto F = M.getFunction(Name)) {
1166 SmallVector<Instruction *, 4> ToRemoves;
1167
1168 // Walk the users of the function.
1169 for (auto &U : F->uses()) {
1170 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1171 // The index argument from vload_half.
1172 auto Arg0 = CI->getOperand(0);
1173
1174 // The pointer argument from vload_half.
1175 auto Arg1 = CI->getOperand(1);
1176
1177 auto IntTy = Type::getInt32Ty(M.getContext());
1178 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1179 auto NewPointerTy = PointerType::get(
1180 IntTy, Arg1->getType()->getPointerAddressSpace());
1181 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1182
1183 // Cast the half* pointer to int*.
1184 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1185
1186 // Index into the correct address of the casted pointer.
1187 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1188
1189 // Load from the int* we casted to.
1190 auto Load = new LoadInst(Index, "", CI);
1191
1192 // Our intrinsic to unpack a float2 from an int.
1193 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1194
1195 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1196
1197 // Get our float2.
1198 auto Call = CallInst::Create(NewF, Load, "", CI);
1199
1200 CI->replaceAllUsesWith(Call);
1201
1202 // Lastly, remember to remove the user.
1203 ToRemoves.push_back(CI);
1204 }
1205 }
1206
1207 Changed = !ToRemoves.empty();
1208
1209 // And cleanup the calls we don't use anymore.
1210 for (auto V : ToRemoves) {
1211 V->eraseFromParent();
1212 }
1213
1214 // And remove the function we don't need either too.
1215 F->eraseFromParent();
1216 }
1217 }
1218
1219 return Changed;
1220}
1221
1222bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1223 bool Changed = false;
1224
David Neto556c7e62018-06-08 13:45:55 -07001225 const std::vector<const char *> Map = {
1226 "_Z11vload_half4jPU3AS1KDh",
1227 "_Z12vloada_half4jPU3AS1KDh",
1228 "_Z11vload_half4jPU3AS2KDh",
1229 "_Z12vloada_half4jPU3AS2KDh",
1230 };
David Neto22f144c2017-06-12 14:26:21 -04001231
1232 for (auto Name : Map) {
1233 // If we find a function with the matching name.
1234 if (auto F = M.getFunction(Name)) {
1235 SmallVector<Instruction *, 4> ToRemoves;
1236
1237 // Walk the users of the function.
1238 for (auto &U : F->uses()) {
1239 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1240 // The index argument from vload_half.
1241 auto Arg0 = CI->getOperand(0);
1242
1243 // The pointer argument from vload_half.
1244 auto Arg1 = CI->getOperand(1);
1245
1246 auto IntTy = Type::getInt32Ty(M.getContext());
1247 auto Int2Ty = VectorType::get(IntTy, 2);
1248 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1249 auto NewPointerTy = PointerType::get(
1250 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1251 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1252
1253 // Cast the half* pointer to int2*.
1254 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1255
1256 // Index into the correct address of the casted pointer.
1257 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1258
1259 // Load from the int2* we casted to.
1260 auto Load = new LoadInst(Index, "", CI);
1261
1262 // Extract each element from the loaded int2.
1263 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1264 "", CI);
1265 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1266 "", CI);
1267
1268 // Our intrinsic to unpack a float2 from an int.
1269 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1270
1271 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1272
1273 // Get the lower (x & y) components of our final float4.
1274 auto Lo = CallInst::Create(NewF, X, "", CI);
1275
1276 // Get the higher (z & w) components of our final float4.
1277 auto Hi = CallInst::Create(NewF, Y, "", CI);
1278
1279 Constant *ShuffleMask[4] = {
1280 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1281 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1282
1283 // Combine our two float2's into one float4.
1284 auto Combine = new ShuffleVectorInst(
1285 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1286
1287 CI->replaceAllUsesWith(Combine);
1288
1289 // Lastly, remember to remove the user.
1290 ToRemoves.push_back(CI);
1291 }
1292 }
1293
1294 Changed = !ToRemoves.empty();
1295
1296 // And cleanup the calls we don't use anymore.
1297 for (auto V : ToRemoves) {
1298 V->eraseFromParent();
1299 }
1300
1301 // And remove the function we don't need either too.
1302 F->eraseFromParent();
1303 }
1304 }
1305
1306 return Changed;
1307}
1308
David Neto6ad93232018-06-07 15:42:58 -07001309bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
1310 bool Changed = false;
1311
1312 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1313 //
1314 // %u = load i32 %ptr
1315 // %fxy = call <2 x float> Unpack2xHalf(u)
1316 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1317 const std::vector<const char *> Map = {
1318 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
1319 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
1320 "_Z20__clspv_vloada_half2jPKj", // private
1321 };
1322
1323 for (auto Name : Map) {
1324 // If we find a function with the matching name.
1325 if (auto F = M.getFunction(Name)) {
1326 SmallVector<Instruction *, 4> ToRemoves;
1327
1328 // Walk the users of the function.
1329 for (auto &U : F->uses()) {
1330 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
1331 auto Index = CI->getOperand(0);
1332 auto Ptr = CI->getOperand(1);
1333
1334 auto IntTy = Type::getInt32Ty(M.getContext());
1335 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1336 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1337
1338 auto IndexedPtr =
1339 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
1340 auto Load = new LoadInst(IndexedPtr, "", CI);
1341
1342 // Our intrinsic to unpack a float2 from an int.
1343 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1344
1345 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1346
1347 // Get our final float2.
1348 auto Result = CallInst::Create(NewF, Load, "", CI);
1349
1350 CI->replaceAllUsesWith(Result);
1351
1352 // Lastly, remember to remove the user.
1353 ToRemoves.push_back(CI);
1354 }
1355 }
1356
1357 Changed = true;
1358
1359 // And cleanup the calls we don't use anymore.
1360 for (auto V : ToRemoves) {
1361 V->eraseFromParent();
1362 }
1363
1364 // And remove the function we don't need either too.
1365 F->eraseFromParent();
1366 }
1367 }
1368
1369 return Changed;
1370}
1371
1372bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
1373 bool Changed = false;
1374
1375 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1376 //
1377 // %u2 = load <2 x i32> %ptr
1378 // %u2xy = extractelement %u2, 0
1379 // %u2zw = extractelement %u2, 1
1380 // %fxy = call <2 x float> Unpack2xHalf(uint)
1381 // %fzw = call <2 x float> Unpack2xHalf(uint)
1382 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1383 const std::vector<const char *> Map = {
1384 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
1385 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
1386 "_Z20__clspv_vloada_half4jPKDv2_j", // private
1387 };
1388
1389 for (auto Name : Map) {
1390 // If we find a function with the matching name.
1391 if (auto F = M.getFunction(Name)) {
1392 SmallVector<Instruction *, 4> ToRemoves;
1393
1394 // Walk the users of the function.
1395 for (auto &U : F->uses()) {
1396 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1397 auto Index = CI->getOperand(0);
1398 auto Ptr = CI->getOperand(1);
1399
1400 auto IntTy = Type::getInt32Ty(M.getContext());
1401 auto Int2Ty = VectorType::get(IntTy, 2);
1402 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1403 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1404
1405 auto IndexedPtr =
1406 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
1407 auto Load = new LoadInst(IndexedPtr, "", CI);
1408
1409 // Extract each element from the loaded int2.
1410 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1411 "", CI);
1412 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1413 "", CI);
1414
1415 // Our intrinsic to unpack a float2 from an int.
1416 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1417
1418 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1419
1420 // Get the lower (x & y) components of our final float4.
1421 auto Lo = CallInst::Create(NewF, X, "", CI);
1422
1423 // Get the higher (z & w) components of our final float4.
1424 auto Hi = CallInst::Create(NewF, Y, "", CI);
1425
1426 Constant *ShuffleMask[4] = {
1427 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1428 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1429
1430 // Combine our two float2's into one float4.
1431 auto Combine = new ShuffleVectorInst(
1432 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1433
1434 CI->replaceAllUsesWith(Combine);
1435
1436 // Lastly, remember to remove the user.
1437 ToRemoves.push_back(CI);
1438 }
1439 }
1440
1441 Changed = true;
1442
1443 // And cleanup the calls we don't use anymore.
1444 for (auto V : ToRemoves) {
1445 V->eraseFromParent();
1446 }
1447
1448 // And remove the function we don't need either too.
1449 F->eraseFromParent();
1450 }
1451 }
1452
1453 return Changed;
1454}
1455
David Neto22f144c2017-06-12 14:26:21 -04001456bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1457 bool Changed = false;
1458
1459 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1460 "_Z15vstore_half_rtefjPU3AS1Dh",
1461 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1462
1463 for (auto Name : Map) {
1464 // If we find a function with the matching name.
1465 if (auto F = M.getFunction(Name)) {
1466 SmallVector<Instruction *, 4> ToRemoves;
1467
1468 // Walk the users of the function.
1469 for (auto &U : F->uses()) {
1470 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1471 // The value to store.
1472 auto Arg0 = CI->getOperand(0);
1473
1474 // The index argument from vstore_half.
1475 auto Arg1 = CI->getOperand(1);
1476
1477 // The pointer argument from vstore_half.
1478 auto Arg2 = CI->getOperand(2);
1479
David Neto22f144c2017-06-12 14:26:21 -04001480 auto IntTy = Type::getInt32Ty(M.getContext());
1481 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001482 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001483 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001484
1485 // Our intrinsic to pack a float2 to an int.
1486 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1487
1488 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1489
1490 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001491 auto TempVec =
1492 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1493 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001494
1495 // Pack the float2 -> half2 (in an int).
1496 auto X = CallInst::Create(NewF, TempVec, "", CI);
1497
David Neto482550a2018-03-24 05:21:07 -07001498 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04001499 auto ShortTy = Type::getInt16Ty(M.getContext());
1500 auto ShortPointerTy = PointerType::get(
1501 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001502
David Neto17852de2017-05-29 17:29:31 -04001503 // Truncate our i32 to an i16.
1504 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001505
David Neto17852de2017-05-29 17:29:31 -04001506 // Cast the half* pointer to short*.
1507 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001508
David Neto17852de2017-05-29 17:29:31 -04001509 // Index into the correct address of the casted pointer.
1510 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001511
David Neto17852de2017-05-29 17:29:31 -04001512 // Store to the int* we casted to.
1513 auto Store = new StoreInst(Trunc, Index, CI);
1514
1515 CI->replaceAllUsesWith(Store);
1516 } else {
1517 // We can only write to 32-bit aligned words.
1518 //
1519 // Assuming base is aligned to 32-bits, replace the equivalent of
1520 // vstore_half(value, index, base)
1521 // with:
1522 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1523 // uint32_t write_to_upper_half = index & 1u;
1524 // uint32_t shift = write_to_upper_half << 4;
1525 //
1526 // // Pack the float value as a half number in bottom 16 bits
1527 // // of an i32.
1528 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1529 //
1530 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1531 // ^ ((packed & 0xffff) << shift)
1532 // // We only need relaxed consistency, but OpenCL 1.2 only has
1533 // // sequentially consistent atomics.
1534 // // TODO(dneto): Use relaxed consistency.
1535 // atomic_xor(target_ptr, xor_value)
1536 auto IntPointerTy = PointerType::get(
1537 IntTy, Arg2->getType()->getPointerAddressSpace());
1538
1539 auto Four = ConstantInt::get(IntTy, 4);
1540 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1541
1542 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1543 // Compute index / 2
1544 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1545 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1546 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1547 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1548 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1549 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1550 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1551
1552 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1553 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1554 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1555
1556 // Generate the call to atomi_xor.
1557 SmallVector<Type *, 5> ParamTypes;
1558 // The pointer type.
1559 ParamTypes.push_back(IntPointerTy);
1560 // The Types for memory scope, semantics, and value.
1561 ParamTypes.push_back(IntTy);
1562 ParamTypes.push_back(IntTy);
1563 ParamTypes.push_back(IntTy);
1564 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1565 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1566
1567 const auto ConstantScopeDevice =
1568 ConstantInt::get(IntTy, spv::ScopeDevice);
1569 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1570 // (SPIR-V Workgroup).
1571 const auto AddrSpaceSemanticsBits =
1572 IntPointerTy->getPointerAddressSpace() == 1
1573 ? spv::MemorySemanticsUniformMemoryMask
1574 : spv::MemorySemanticsWorkgroupMemoryMask;
1575
1576 // We're using relaxed consistency here.
1577 const auto ConstantMemorySemantics =
1578 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1579 AddrSpaceSemanticsBits);
1580
1581 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1582 ConstantMemorySemantics, ValueToXor};
1583 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1584 }
David Neto22f144c2017-06-12 14:26:21 -04001585
1586 // Lastly, remember to remove the user.
1587 ToRemoves.push_back(CI);
1588 }
1589 }
1590
1591 Changed = !ToRemoves.empty();
1592
1593 // And cleanup the calls we don't use anymore.
1594 for (auto V : ToRemoves) {
1595 V->eraseFromParent();
1596 }
1597
1598 // And remove the function we don't need either too.
1599 F->eraseFromParent();
1600 }
1601 }
1602
1603 return Changed;
1604}
1605
1606bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1607 bool Changed = false;
1608
David Netoe2871522018-06-08 11:09:54 -07001609 const std::vector<const char *> Map = {
1610 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
1611 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
1612 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
1613 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
1614 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1615 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
1616 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
1617 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
1618 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
1619 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
1620 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
1621 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
1622 };
David Neto22f144c2017-06-12 14:26:21 -04001623
1624 for (auto Name : Map) {
1625 // If we find a function with the matching name.
1626 if (auto F = M.getFunction(Name)) {
1627 SmallVector<Instruction *, 4> ToRemoves;
1628
1629 // Walk the users of the function.
1630 for (auto &U : F->uses()) {
1631 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1632 // The value to store.
1633 auto Arg0 = CI->getOperand(0);
1634
1635 // The index argument from vstore_half.
1636 auto Arg1 = CI->getOperand(1);
1637
1638 // The pointer argument from vstore_half.
1639 auto Arg2 = CI->getOperand(2);
1640
1641 auto IntTy = Type::getInt32Ty(M.getContext());
1642 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1643 auto NewPointerTy = PointerType::get(
1644 IntTy, Arg2->getType()->getPointerAddressSpace());
1645 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1646
1647 // Our intrinsic to pack a float2 to an int.
1648 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1649
1650 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1651
1652 // Turn the packed x & y into the final packing.
1653 auto X = CallInst::Create(NewF, Arg0, "", CI);
1654
1655 // Cast the half* pointer to int*.
1656 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1657
1658 // Index into the correct address of the casted pointer.
1659 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1660
1661 // Store to the int* we casted to.
1662 auto Store = new StoreInst(X, Index, CI);
1663
1664 CI->replaceAllUsesWith(Store);
1665
1666 // Lastly, remember to remove the user.
1667 ToRemoves.push_back(CI);
1668 }
1669 }
1670
1671 Changed = !ToRemoves.empty();
1672
1673 // And cleanup the calls we don't use anymore.
1674 for (auto V : ToRemoves) {
1675 V->eraseFromParent();
1676 }
1677
1678 // And remove the function we don't need either too.
1679 F->eraseFromParent();
1680 }
1681 }
1682
1683 return Changed;
1684}
1685
1686bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1687 bool Changed = false;
1688
David Netoe2871522018-06-08 11:09:54 -07001689 const std::vector<const char *> Map = {
1690 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
1691 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
1692 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
1693 "_Z13vstorea_half4Dv4_fjPDh", // private
1694 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1695 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
1696 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
1697 "_Z17vstorea_half4_rteDv4_fjPDh", // private
1698 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
1699 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
1700 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
1701 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
1702 };
David Neto22f144c2017-06-12 14:26:21 -04001703
1704 for (auto Name : Map) {
1705 // If we find a function with the matching name.
1706 if (auto F = M.getFunction(Name)) {
1707 SmallVector<Instruction *, 4> ToRemoves;
1708
1709 // Walk the users of the function.
1710 for (auto &U : F->uses()) {
1711 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1712 // The value to store.
1713 auto Arg0 = CI->getOperand(0);
1714
1715 // The index argument from vstore_half.
1716 auto Arg1 = CI->getOperand(1);
1717
1718 // The pointer argument from vstore_half.
1719 auto Arg2 = CI->getOperand(2);
1720
1721 auto IntTy = Type::getInt32Ty(M.getContext());
1722 auto Int2Ty = VectorType::get(IntTy, 2);
1723 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1724 auto NewPointerTy = PointerType::get(
1725 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1726 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1727
1728 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1729 ConstantInt::get(IntTy, 1)};
1730
1731 // Extract out the x & y components of our to store value.
1732 auto Lo =
1733 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1734 ConstantVector::get(LoShuffleMask), "", CI);
1735
1736 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1737 ConstantInt::get(IntTy, 3)};
1738
1739 // Extract out the z & w components of our to store value.
1740 auto Hi =
1741 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1742 ConstantVector::get(HiShuffleMask), "", CI);
1743
1744 // Our intrinsic to pack a float2 to an int.
1745 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1746
1747 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1748
1749 // Turn the packed x & y into the final component of our int2.
1750 auto X = CallInst::Create(NewF, Lo, "", CI);
1751
1752 // Turn the packed z & w into the final component of our int2.
1753 auto Y = CallInst::Create(NewF, Hi, "", CI);
1754
1755 auto Combine = InsertElementInst::Create(
1756 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1757 Combine = InsertElementInst::Create(
1758 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1759
1760 // Cast the half* pointer to int2*.
1761 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1762
1763 // Index into the correct address of the casted pointer.
1764 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1765
1766 // Store to the int2* we casted to.
1767 auto Store = new StoreInst(Combine, Index, CI);
1768
1769 CI->replaceAllUsesWith(Store);
1770
1771 // Lastly, remember to remove the user.
1772 ToRemoves.push_back(CI);
1773 }
1774 }
1775
1776 Changed = !ToRemoves.empty();
1777
1778 // And cleanup the calls we don't use anymore.
1779 for (auto V : ToRemoves) {
1780 V->eraseFromParent();
1781 }
1782
1783 // And remove the function we don't need either too.
1784 F->eraseFromParent();
1785 }
1786 }
1787
1788 return Changed;
1789}
1790
1791bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1792 bool Changed = false;
1793
1794 const std::map<const char *, const char*> Map = {
1795 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1796 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1797 };
1798
1799 for (auto Pair : Map) {
1800 // If we find a function with the matching name.
1801 if (auto F = M.getFunction(Pair.first)) {
1802 SmallVector<Instruction *, 4> ToRemoves;
1803
1804 // Walk the users of the function.
1805 for (auto &U : F->uses()) {
1806 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1807 // The image.
1808 auto Arg0 = CI->getOperand(0);
1809
1810 // The sampler.
1811 auto Arg1 = CI->getOperand(1);
1812
1813 // The coordinate (integer type that we can't handle).
1814 auto Arg2 = CI->getOperand(2);
1815
1816 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1817
1818 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1819
1820 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1821
1822 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1823
1824 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1825
1826 CI->replaceAllUsesWith(NewCI);
1827
1828 // Lastly, remember to remove the user.
1829 ToRemoves.push_back(CI);
1830 }
1831 }
1832
1833 Changed = !ToRemoves.empty();
1834
1835 // And cleanup the calls we don't use anymore.
1836 for (auto V : ToRemoves) {
1837 V->eraseFromParent();
1838 }
1839
1840 // And remove the function we don't need either too.
1841 F->eraseFromParent();
1842 }
1843 }
1844
1845 return Changed;
1846}
1847
1848bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1849 bool Changed = false;
1850
1851 const std::map<const char *, const char *> Map = {
David Neto22f144c2017-06-12 14:26:21 -04001852 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1853 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1854 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1855 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1856 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01001857 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04001858
1859 for (auto Pair : Map) {
1860 // If we find a function with the matching name.
1861 if (auto F = M.getFunction(Pair.first)) {
1862 SmallVector<Instruction *, 4> ToRemoves;
1863
1864 // Walk the users of the function.
1865 for (auto &U : F->uses()) {
1866 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1867 auto FType = F->getFunctionType();
1868 SmallVector<Type *, 5> ParamTypes;
1869
1870 // The pointer type.
1871 ParamTypes.push_back(FType->getParamType(0));
1872
1873 auto IntTy = Type::getInt32Ty(M.getContext());
1874
1875 // The memory scope type.
1876 ParamTypes.push_back(IntTy);
1877
1878 // The memory semantics type.
1879 ParamTypes.push_back(IntTy);
1880
1881 if (2 < CI->getNumArgOperands()) {
1882 // The unequal memory semantics type.
1883 ParamTypes.push_back(IntTy);
1884
1885 // The value type.
1886 ParamTypes.push_back(FType->getParamType(2));
1887
1888 // The comparator type.
1889 ParamTypes.push_back(FType->getParamType(1));
1890 } else if (1 < CI->getNumArgOperands()) {
1891 // The value type.
1892 ParamTypes.push_back(FType->getParamType(1));
1893 }
1894
1895 auto NewFType =
1896 FunctionType::get(FType->getReturnType(), ParamTypes, false);
1897 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1898
1899 // We need to map the OpenCL constants to the SPIR-V equivalents.
1900 const auto ConstantScopeDevice =
1901 ConstantInt::get(IntTy, spv::ScopeDevice);
1902 const auto ConstantMemorySemantics = ConstantInt::get(
1903 IntTy, spv::MemorySemanticsUniformMemoryMask |
1904 spv::MemorySemanticsSequentiallyConsistentMask);
1905
1906 SmallVector<Value *, 5> Params;
1907
1908 // The pointer.
1909 Params.push_back(CI->getArgOperand(0));
1910
1911 // The memory scope.
1912 Params.push_back(ConstantScopeDevice);
1913
1914 // The memory semantics.
1915 Params.push_back(ConstantMemorySemantics);
1916
1917 if (2 < CI->getNumArgOperands()) {
1918 // The unequal memory semantics.
1919 Params.push_back(ConstantMemorySemantics);
1920
1921 // The value.
1922 Params.push_back(CI->getArgOperand(2));
1923
1924 // The comparator.
1925 Params.push_back(CI->getArgOperand(1));
1926 } else if (1 < CI->getNumArgOperands()) {
1927 // The value.
1928 Params.push_back(CI->getArgOperand(1));
1929 }
1930
1931 auto NewCI = CallInst::Create(NewF, Params, "", CI);
1932
1933 CI->replaceAllUsesWith(NewCI);
1934
1935 // Lastly, remember to remove the user.
1936 ToRemoves.push_back(CI);
1937 }
1938 }
1939
1940 Changed = !ToRemoves.empty();
1941
1942 // And cleanup the calls we don't use anymore.
1943 for (auto V : ToRemoves) {
1944 V->eraseFromParent();
1945 }
1946
1947 // And remove the function we don't need either too.
1948 F->eraseFromParent();
1949 }
1950 }
1951
Neil Henning39672102017-09-29 14:33:13 +01001952 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
1953 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
1954 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
1955 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
1956 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
1957 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
1958 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
1959 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
1960 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
1961 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
1962 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
1963 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
1964 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
1965 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
1966 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
1967 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
1968 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
1969
1970 for (auto Pair : Map2) {
1971 // If we find a function with the matching name.
1972 if (auto F = M.getFunction(Pair.first)) {
1973 SmallVector<Instruction *, 4> ToRemoves;
1974
1975 // Walk the users of the function.
1976 for (auto &U : F->uses()) {
1977 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1978 auto AtomicOp = new AtomicRMWInst(
1979 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
1980 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
1981
1982 CI->replaceAllUsesWith(AtomicOp);
1983
1984 // Lastly, remember to remove the user.
1985 ToRemoves.push_back(CI);
1986 }
1987 }
1988
1989 Changed = !ToRemoves.empty();
1990
1991 // And cleanup the calls we don't use anymore.
1992 for (auto V : ToRemoves) {
1993 V->eraseFromParent();
1994 }
1995
1996 // And remove the function we don't need either too.
1997 F->eraseFromParent();
1998 }
1999 }
2000
David Neto22f144c2017-06-12 14:26:21 -04002001 return Changed;
2002}
2003
2004bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
2005 bool Changed = false;
2006
2007 // If we find a function with the matching name.
2008 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
2009 SmallVector<Instruction *, 4> ToRemoves;
2010
2011 auto IntTy = Type::getInt32Ty(M.getContext());
2012 auto FloatTy = Type::getFloatTy(M.getContext());
2013
2014 Constant *DownShuffleMask[3] = {
2015 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2016 ConstantInt::get(IntTy, 2)};
2017
2018 Constant *UpShuffleMask[4] = {
2019 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2020 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2021
2022 Constant *FloatVec[3] = {
2023 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
2024 };
2025
2026 // Walk the users of the function.
2027 for (auto &U : F->uses()) {
2028 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2029 auto Vec4Ty = CI->getArgOperand(0)->getType();
2030 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2031 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2032 auto Vec3Ty = Arg0->getType();
2033
2034 auto NewFType =
2035 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
2036
2037 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
2038
2039 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
2040
2041 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
2042
2043 CI->replaceAllUsesWith(Result);
2044
2045 // Lastly, remember to remove the user.
2046 ToRemoves.push_back(CI);
2047 }
2048 }
2049
2050 Changed = !ToRemoves.empty();
2051
2052 // And cleanup the calls we don't use anymore.
2053 for (auto V : ToRemoves) {
2054 V->eraseFromParent();
2055 }
2056
2057 // And remove the function we don't need either too.
2058 F->eraseFromParent();
2059 }
2060
2061 return Changed;
2062}
David Neto62653202017-10-16 19:05:18 -04002063
2064bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2065 bool Changed = false;
2066
2067 // OpenCL's float result = fract(float x, float* ptr)
2068 //
2069 // In the LLVM domain:
2070 //
2071 // %floor_result = call spir_func float @floor(float %x)
2072 // store float %floor_result, float * %ptr
2073 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2074 // %result = call spir_func float
2075 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2076 //
2077 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2078 // and clspv.fract occur in the SPIR-V generator pass:
2079 //
2080 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2081 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2082 // ...
2083 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2084 // OpStore %ptr %floor_result
2085 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2086 // %fract_result = OpExtInst %float
2087 // %glsl_ext Fmin %fract_intermediate %just_under_1
2088
2089
2090 using std::string;
2091
2092 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2093 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
2094 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
2095 auto make_quad = [](const char *a, const char *b, const char *c,
2096 const char *d) {
2097 return std::tuple<const char *, const char *, const char *, const char *>(
2098 a, b, c, d);
2099 };
2100 const std::vector<QuadType> Functions = {
2101 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
2102 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
2103 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
2104 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
2105 };
2106
2107 for (auto& quad : Functions) {
2108 const StringRef fract_name(std::get<0>(quad));
2109
2110 // If we find a function with the matching name.
2111 if (auto F = M.getFunction(fract_name)) {
2112 if (F->use_begin() == F->use_end())
2113 continue;
2114
2115 // We have some uses.
2116 Changed = true;
2117
2118 auto& Context = M.getContext();
2119
2120 const StringRef floor_name(std::get<1>(quad));
2121 const StringRef fmin_name(std::get<2>(quad));
2122 const StringRef clspv_fract_name(std::get<3>(quad));
2123
2124 // This is either float or a float vector. All the float-like
2125 // types are this type.
2126 auto result_ty = F->getReturnType();
2127
2128 Function* fmin_fn = M.getFunction(fmin_name);
2129 if (!fmin_fn) {
2130 // Make the fmin function.
2131 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
2132 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002133 fmin_fn->addFnAttr(Attribute::ReadNone);
2134 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2135 }
2136
2137 Function* floor_fn = M.getFunction(floor_name);
2138 if (!floor_fn) {
2139 // Make the floor function.
2140 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2141 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002142 floor_fn->addFnAttr(Attribute::ReadNone);
2143 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2144 }
2145
2146 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
2147 if (!clspv_fract_fn) {
2148 // Make the clspv_fract function.
2149 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2150 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002151 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2152 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2153 }
2154
2155 // Number of significant significand bits, whether represented or not.
2156 unsigned num_significand_bits;
2157 switch (result_ty->getScalarType()->getTypeID()) {
2158 case Type::HalfTyID:
2159 num_significand_bits = 11;
2160 break;
2161 case Type::FloatTyID:
2162 num_significand_bits = 24;
2163 break;
2164 case Type::DoubleTyID:
2165 num_significand_bits = 53;
2166 break;
2167 default:
2168 assert(false && "Unhandled float type when processing fract builtin");
2169 break;
2170 }
2171 // Beware that the disassembler displays this value as
2172 // OpConstant %float 1
2173 // which is not quite right.
2174 const double kJustUnderOneScalar =
2175 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2176
2177 Constant *just_under_one =
2178 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2179 if (result_ty->isVectorTy()) {
2180 just_under_one = ConstantVector::getSplat(
2181 result_ty->getVectorNumElements(), just_under_one);
2182 }
2183
2184 IRBuilder<> Builder(Context);
2185
2186 SmallVector<Instruction *, 4> ToRemoves;
2187
2188 // Walk the users of the function.
2189 for (auto &U : F->uses()) {
2190 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2191
2192 Builder.SetInsertPoint(CI);
2193 auto arg = CI->getArgOperand(0);
2194 auto ptr = CI->getArgOperand(1);
2195
2196 // Compute floor result and store it.
2197 auto floor = Builder.CreateCall(floor_fn, {arg});
2198 Builder.CreateStore(floor, ptr);
2199
2200 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2201 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2202
2203 CI->replaceAllUsesWith(fract_result);
2204
2205 // Lastly, remember to remove the user.
2206 ToRemoves.push_back(CI);
2207 }
2208 }
2209
2210 // And cleanup the calls we don't use anymore.
2211 for (auto V : ToRemoves) {
2212 V->eraseFromParent();
2213 }
2214
2215 // And remove the function we don't need either too.
2216 F->eraseFromParent();
2217 }
2218 }
2219
2220 return Changed;
2221}