blob: db941d804c61a0360c5608f5c878b8b9b9326d6c [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto118188e2018-08-24 11:27:54 -040019#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000023#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040024#include "llvm/Pass.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/raw_ostream.h"
27#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040028
David Neto118188e2018-08-24 11:27:54 -040029#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040030
David Neto482550a2018-03-24 05:21:07 -070031#include "clspv/Option.h"
32
David Neto22f144c2017-06-12 14:26:21 -040033using namespace llvm;
34
35#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
36
37namespace {
38uint32_t clz(uint32_t v) {
39 uint32_t r;
40 uint32_t shift;
41
42 r = (v > 0xFFFF) << 4;
43 v >>= r;
44 shift = (v > 0xFF) << 3;
45 v >>= shift;
46 r |= shift;
47 shift = (v > 0xF) << 2;
48 v >>= shift;
49 r |= shift;
50 shift = (v > 0x3) << 1;
51 v >>= shift;
52 r |= shift;
53 r |= (v >> 1);
54
55 return r;
56}
57
58Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
59 if (1 == elements) {
60 return Type::getInt1Ty(C);
61 } else {
62 return VectorType::get(Type::getInt1Ty(C), elements);
63 }
64}
65
66struct ReplaceOpenCLBuiltinPass final : public ModulePass {
67 static char ID;
68 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
69
70 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +000071 bool replaceAbs(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040072 bool replaceRecip(Module &M);
73 bool replaceDivide(Module &M);
74 bool replaceExp10(Module &M);
75 bool replaceLog10(Module &M);
76 bool replaceBarrier(Module &M);
77 bool replaceMemFence(Module &M);
78 bool replaceRelational(Module &M);
79 bool replaceIsInfAndIsNan(Module &M);
80 bool replaceAllAndAny(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +000081 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +000082 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +000083 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040084 bool replaceSignbit(Module &M);
85 bool replaceMadandMad24andMul24(Module &M);
86 bool replaceVloadHalf(Module &M);
87 bool replaceVloadHalf2(Module &M);
88 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -070089 bool replaceClspvVloadaHalf2(Module &M);
90 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040091 bool replaceVstoreHalf(Module &M);
92 bool replaceVstoreHalf2(Module &M);
93 bool replaceVstoreHalf4(Module &M);
94 bool replaceReadImageF(Module &M);
95 bool replaceAtomics(Module &M);
96 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040097 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070098 bool replaceVload(Module &M);
99 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400100};
101}
102
103char ReplaceOpenCLBuiltinPass::ID = 0;
104static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
105 "Replace OpenCL Builtins Pass");
106
107namespace clspv {
108ModulePass *createReplaceOpenCLBuiltinPass() {
109 return new ReplaceOpenCLBuiltinPass();
110}
111}
112
113bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
114 bool Changed = false;
115
Kévin Petit2444e9b2018-11-09 14:14:37 +0000116 Changed |= replaceAbs(M);
David Neto22f144c2017-06-12 14:26:21 -0400117 Changed |= replaceRecip(M);
118 Changed |= replaceDivide(M);
119 Changed |= replaceExp10(M);
120 Changed |= replaceLog10(M);
121 Changed |= replaceBarrier(M);
122 Changed |= replaceMemFence(M);
123 Changed |= replaceRelational(M);
124 Changed |= replaceIsInfAndIsNan(M);
125 Changed |= replaceAllAndAny(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000126 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000127 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000128 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400129 Changed |= replaceSignbit(M);
130 Changed |= replaceMadandMad24andMul24(M);
131 Changed |= replaceVloadHalf(M);
132 Changed |= replaceVloadHalf2(M);
133 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700134 Changed |= replaceClspvVloadaHalf2(M);
135 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400136 Changed |= replaceVstoreHalf(M);
137 Changed |= replaceVstoreHalf2(M);
138 Changed |= replaceVstoreHalf4(M);
139 Changed |= replaceReadImageF(M);
140 Changed |= replaceAtomics(M);
141 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400142 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700143 Changed |= replaceVload(M);
144 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400145
146 return Changed;
147}
148
Kévin Petit2444e9b2018-11-09 14:14:37 +0000149bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
150 bool Changed = false;
151
152 const char *Names[] = {
153 "_Z3abst",
154 "_Z3absDv2_t",
155 "_Z3absDv3_t",
156 "_Z3absDv4_t",
157 "_Z3absj",
158 "_Z3absDv2_j",
159 "_Z3absDv3_j",
160 "_Z3absDv4_j",
161 "_Z3absm",
162 "_Z3absDv2_m",
163 "_Z3absDv3_m",
164 "_Z3absDv4_m",
165 };
166
167 for (auto Name : Names) {
168 // If we find a function with the matching name.
169 if (auto F = M.getFunction(Name)) {
170 SmallVector<Instruction *, 4> ToRemoves;
171
172 // Walk the users of the function.
173 for (auto &U : F->uses()) {
174 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
175 // Abs has one arg.
176 auto Arg = CI->getOperand(0);
177
178 // Use the argument unchanged, we know it's unsigned
179 CI->replaceAllUsesWith(Arg);
180
181 // Lastly, remember to remove the user.
182 ToRemoves.push_back(CI);
183 }
184 }
185
186 Changed = !ToRemoves.empty();
187
188 // And cleanup the calls we don't use anymore.
189 for (auto V : ToRemoves) {
190 V->eraseFromParent();
191 }
192
193 // And remove the function we don't need either too.
194 F->eraseFromParent();
195 }
196 }
197
198 return Changed;
199}
200
David Neto22f144c2017-06-12 14:26:21 -0400201bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
202 bool Changed = false;
203
204 const char *Names[] = {
205 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
206 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
207 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
208 };
209
210 for (auto Name : Names) {
211 // If we find a function with the matching name.
212 if (auto F = M.getFunction(Name)) {
213 SmallVector<Instruction *, 4> ToRemoves;
214
215 // Walk the users of the function.
216 for (auto &U : F->uses()) {
217 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
218 // Recip has one arg.
219 auto Arg = CI->getOperand(0);
220
221 auto Div = BinaryOperator::Create(
222 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
223 CI);
224
225 CI->replaceAllUsesWith(Div);
226
227 // Lastly, remember to remove the user.
228 ToRemoves.push_back(CI);
229 }
230 }
231
232 Changed = !ToRemoves.empty();
233
234 // And cleanup the calls we don't use anymore.
235 for (auto V : ToRemoves) {
236 V->eraseFromParent();
237 }
238
239 // And remove the function we don't need either too.
240 F->eraseFromParent();
241 }
242 }
243
244 return Changed;
245}
246
247bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
248 bool Changed = false;
249
250 const char *Names[] = {
251 "_Z11half_divideff", "_Z13native_divideff",
252 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
253 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
254 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
255 };
256
257 for (auto Name : Names) {
258 // If we find a function with the matching name.
259 if (auto F = M.getFunction(Name)) {
260 SmallVector<Instruction *, 4> ToRemoves;
261
262 // Walk the users of the function.
263 for (auto &U : F->uses()) {
264 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
265 auto Div = BinaryOperator::Create(
266 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
267
268 CI->replaceAllUsesWith(Div);
269
270 // Lastly, remember to remove the user.
271 ToRemoves.push_back(CI);
272 }
273 }
274
275 Changed = !ToRemoves.empty();
276
277 // And cleanup the calls we don't use anymore.
278 for (auto V : ToRemoves) {
279 V->eraseFromParent();
280 }
281
282 // And remove the function we don't need either too.
283 F->eraseFromParent();
284 }
285 }
286
287 return Changed;
288}
289
290bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
291 bool Changed = false;
292
293 const std::map<const char *, const char *> Map = {
294 {"_Z5exp10f", "_Z3expf"},
295 {"_Z10half_exp10f", "_Z8half_expf"},
296 {"_Z12native_exp10f", "_Z10native_expf"},
297 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
298 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
299 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
300 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
301 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
302 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
303 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
304 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
305 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
306
307 for (auto Pair : Map) {
308 // If we find a function with the matching name.
309 if (auto F = M.getFunction(Pair.first)) {
310 SmallVector<Instruction *, 4> ToRemoves;
311
312 // Walk the users of the function.
313 for (auto &U : F->uses()) {
314 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
315 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
316
317 auto Arg = CI->getOperand(0);
318
319 // Constant of the natural log of 10 (ln(10)).
320 const double Ln10 =
321 2.302585092994045684017991454684364207601101488628772976033;
322
323 auto Mul = BinaryOperator::Create(
324 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
325 CI);
326
327 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
328
329 CI->replaceAllUsesWith(NewCI);
330
331 // Lastly, remember to remove the user.
332 ToRemoves.push_back(CI);
333 }
334 }
335
336 Changed = !ToRemoves.empty();
337
338 // And cleanup the calls we don't use anymore.
339 for (auto V : ToRemoves) {
340 V->eraseFromParent();
341 }
342
343 // And remove the function we don't need either too.
344 F->eraseFromParent();
345 }
346 }
347
348 return Changed;
349}
350
351bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
352 bool Changed = false;
353
354 const std::map<const char *, const char *> Map = {
355 {"_Z5log10f", "_Z3logf"},
356 {"_Z10half_log10f", "_Z8half_logf"},
357 {"_Z12native_log10f", "_Z10native_logf"},
358 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
359 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
360 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
361 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
362 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
363 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
364 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
365 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
366 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
367
368 for (auto Pair : Map) {
369 // If we find a function with the matching name.
370 if (auto F = M.getFunction(Pair.first)) {
371 SmallVector<Instruction *, 4> ToRemoves;
372
373 // Walk the users of the function.
374 for (auto &U : F->uses()) {
375 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
376 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
377
378 auto Arg = CI->getOperand(0);
379
380 // Constant of the reciprocal of the natural log of 10 (ln(10)).
381 const double Ln10 =
382 0.434294481903251827651128918916605082294397005803666566114;
383
384 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
385
386 auto Mul = BinaryOperator::Create(
387 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
388 "", CI);
389
390 CI->replaceAllUsesWith(Mul);
391
392 // Lastly, remember to remove the user.
393 ToRemoves.push_back(CI);
394 }
395 }
396
397 Changed = !ToRemoves.empty();
398
399 // And cleanup the calls we don't use anymore.
400 for (auto V : ToRemoves) {
401 V->eraseFromParent();
402 }
403
404 // And remove the function we don't need either too.
405 F->eraseFromParent();
406 }
407 }
408
409 return Changed;
410}
411
412bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
413 bool Changed = false;
414
415 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
416
417 const std::map<const char *, const char *> Map = {
418 {"_Z7barrierj", "__spirv_control_barrier"}};
419
420 for (auto Pair : Map) {
421 // If we find a function with the matching name.
422 if (auto F = M.getFunction(Pair.first)) {
423 SmallVector<Instruction *, 4> ToRemoves;
424
425 // Walk the users of the function.
426 for (auto &U : F->uses()) {
427 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
428 auto FType = F->getFunctionType();
429 SmallVector<Type *, 3> Params;
430 for (unsigned i = 0; i < 3; i++) {
431 Params.push_back(FType->getParamType(0));
432 }
433 auto NewFType =
434 FunctionType::get(FType->getReturnType(), Params, false);
435 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
436
437 auto Arg = CI->getOperand(0);
438
439 // We need to map the OpenCL constants to the SPIR-V equivalents.
440 const auto LocalMemFence =
441 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
442 const auto GlobalMemFence =
443 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
444 const auto ConstantSequentiallyConsistent = ConstantInt::get(
445 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
446 const auto ConstantScopeDevice =
447 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
448 const auto ConstantScopeWorkgroup =
449 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
450
451 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
452 const auto LocalMemFenceMask = BinaryOperator::Create(
453 Instruction::And, LocalMemFence, Arg, "", CI);
454 const auto WorkgroupShiftAmount =
455 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
456 clz(CLK_LOCAL_MEM_FENCE);
457 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
458 Instruction::Shl, LocalMemFenceMask,
459 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
460
461 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
462 const auto GlobalMemFenceMask = BinaryOperator::Create(
463 Instruction::And, GlobalMemFence, Arg, "", CI);
464 const auto UniformShiftAmount =
465 clz(spv::MemorySemanticsUniformMemoryMask) -
466 clz(CLK_GLOBAL_MEM_FENCE);
467 const auto MemorySemanticsUniform = BinaryOperator::Create(
468 Instruction::Shl, GlobalMemFenceMask,
469 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
470
471 // And combine the above together, also adding in
472 // MemorySemanticsSequentiallyConsistentMask.
473 auto MemorySemantics =
474 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
475 ConstantSequentiallyConsistent, "", CI);
476 MemorySemantics = BinaryOperator::Create(
477 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
478
479 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
480 // Device Scope, otherwise Workgroup Scope.
481 const auto Cmp =
482 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
483 GlobalMemFenceMask, GlobalMemFence, "", CI);
484 const auto MemoryScope = SelectInst::Create(
485 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
486
487 // Lastly, the Execution Scope is always Workgroup Scope.
488 const auto ExecutionScope = ConstantScopeWorkgroup;
489
490 auto NewCI = CallInst::Create(
491 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
492
493 CI->replaceAllUsesWith(NewCI);
494
495 // Lastly, remember to remove the user.
496 ToRemoves.push_back(CI);
497 }
498 }
499
500 Changed = !ToRemoves.empty();
501
502 // And cleanup the calls we don't use anymore.
503 for (auto V : ToRemoves) {
504 V->eraseFromParent();
505 }
506
507 // And remove the function we don't need either too.
508 F->eraseFromParent();
509 }
510 }
511
512 return Changed;
513}
514
515bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
516 bool Changed = false;
517
518 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
519
Neil Henning39672102017-09-29 14:33:13 +0100520 using Tuple = std::tuple<const char *, unsigned>;
521 const std::map<const char *, Tuple> Map = {
522 {"_Z9mem_fencej",
523 Tuple("__spirv_memory_barrier",
524 spv::MemorySemanticsSequentiallyConsistentMask)},
525 {"_Z14read_mem_fencej",
526 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
527 {"_Z15write_mem_fencej",
528 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400529
530 for (auto Pair : Map) {
531 // If we find a function with the matching name.
532 if (auto F = M.getFunction(Pair.first)) {
533 SmallVector<Instruction *, 4> ToRemoves;
534
535 // Walk the users of the function.
536 for (auto &U : F->uses()) {
537 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
538 auto FType = F->getFunctionType();
539 SmallVector<Type *, 2> Params;
540 for (unsigned i = 0; i < 2; i++) {
541 Params.push_back(FType->getParamType(0));
542 }
543 auto NewFType =
544 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100545 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400546
547 auto Arg = CI->getOperand(0);
548
549 // We need to map the OpenCL constants to the SPIR-V equivalents.
550 const auto LocalMemFence =
551 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
552 const auto GlobalMemFence =
553 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
554 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100555 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400556 const auto ConstantScopeDevice =
557 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
558
559 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
560 const auto LocalMemFenceMask = BinaryOperator::Create(
561 Instruction::And, LocalMemFence, Arg, "", CI);
562 const auto WorkgroupShiftAmount =
563 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
564 clz(CLK_LOCAL_MEM_FENCE);
565 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
566 Instruction::Shl, LocalMemFenceMask,
567 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
568
569 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
570 const auto GlobalMemFenceMask = BinaryOperator::Create(
571 Instruction::And, GlobalMemFence, Arg, "", CI);
572 const auto UniformShiftAmount =
573 clz(spv::MemorySemanticsUniformMemoryMask) -
574 clz(CLK_GLOBAL_MEM_FENCE);
575 const auto MemorySemanticsUniform = BinaryOperator::Create(
576 Instruction::Shl, GlobalMemFenceMask,
577 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
578
579 // And combine the above together, also adding in
580 // MemorySemanticsSequentiallyConsistentMask.
581 auto MemorySemantics =
582 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
583 ConstantMemorySemantics, "", CI);
584 MemorySemantics = BinaryOperator::Create(
585 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
586
587 // Memory Scope is always device.
588 const auto MemoryScope = ConstantScopeDevice;
589
590 auto NewCI =
591 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
592
593 CI->replaceAllUsesWith(NewCI);
594
595 // Lastly, remember to remove the user.
596 ToRemoves.push_back(CI);
597 }
598 }
599
600 Changed = !ToRemoves.empty();
601
602 // And cleanup the calls we don't use anymore.
603 for (auto V : ToRemoves) {
604 V->eraseFromParent();
605 }
606
607 // And remove the function we don't need either too.
608 F->eraseFromParent();
609 }
610 }
611
612 return Changed;
613}
614
615bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
616 bool Changed = false;
617
618 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
619 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
620 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
621 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
622 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
623 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
624 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
625 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
626 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
627 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
628 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
629 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
630 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
631 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
632 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
633 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
634 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
635 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
636 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
637 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
638 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
639 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
640 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
641 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
642 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
643 };
644
645 for (auto Pair : Map) {
646 // If we find a function with the matching name.
647 if (auto F = M.getFunction(Pair.first)) {
648 SmallVector<Instruction *, 4> ToRemoves;
649
650 // Walk the users of the function.
651 for (auto &U : F->uses()) {
652 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
653 // The predicate to use in the CmpInst.
654 auto Predicate = Pair.second.first;
655
656 // The value to return for true.
657 auto TrueValue =
658 ConstantInt::getSigned(CI->getType(), Pair.second.second);
659
660 // The value to return for false.
661 auto FalseValue = Constant::getNullValue(CI->getType());
662
663 auto Arg1 = CI->getOperand(0);
664 auto Arg2 = CI->getOperand(1);
665
666 const auto Cmp =
667 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
668
669 const auto Select =
670 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
671
672 CI->replaceAllUsesWith(Select);
673
674 // Lastly, remember to remove the user.
675 ToRemoves.push_back(CI);
676 }
677 }
678
679 Changed = !ToRemoves.empty();
680
681 // And cleanup the calls we don't use anymore.
682 for (auto V : ToRemoves) {
683 V->eraseFromParent();
684 }
685
686 // And remove the function we don't need either too.
687 F->eraseFromParent();
688 }
689 }
690
691 return Changed;
692}
693
694bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
695 bool Changed = false;
696
697 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
698 {"_Z5isinff", {"__spirv_isinff", 1}},
699 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
700 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
701 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
702 {"_Z5isnanf", {"__spirv_isnanf", 1}},
703 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
704 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
705 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
706 };
707
708 for (auto Pair : Map) {
709 // If we find a function with the matching name.
710 if (auto F = M.getFunction(Pair.first)) {
711 SmallVector<Instruction *, 4> ToRemoves;
712
713 // Walk the users of the function.
714 for (auto &U : F->uses()) {
715 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
716 const auto CITy = CI->getType();
717
718 // The fake SPIR-V intrinsic to generate.
719 auto SPIRVIntrinsic = Pair.second.first;
720
721 // The value to return for true.
722 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
723
724 // The value to return for false.
725 auto FalseValue = Constant::getNullValue(CITy);
726
727 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
728 M.getContext(),
729 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
730
731 auto NewFType =
732 FunctionType::get(CorrespondingBoolTy,
733 F->getFunctionType()->getParamType(0), false);
734
735 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
736
737 auto Arg = CI->getOperand(0);
738
739 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
740
741 const auto Select =
742 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
743
744 CI->replaceAllUsesWith(Select);
745
746 // Lastly, remember to remove the user.
747 ToRemoves.push_back(CI);
748 }
749 }
750
751 Changed = !ToRemoves.empty();
752
753 // And cleanup the calls we don't use anymore.
754 for (auto V : ToRemoves) {
755 V->eraseFromParent();
756 }
757
758 // And remove the function we don't need either too.
759 F->eraseFromParent();
760 }
761 }
762
763 return Changed;
764}
765
766bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
767 bool Changed = false;
768
769 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000770 // all
771 {"_Z3alls", ""},
772 {"_Z3allDv2_s", "__spirv_allDv2_s"},
773 {"_Z3allDv3_s", "__spirv_allDv3_s"},
774 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400775 {"_Z3alli", ""},
776 {"_Z3allDv2_i", "__spirv_allDv2_i"},
777 {"_Z3allDv3_i", "__spirv_allDv3_i"},
778 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000779 {"_Z3alll", ""},
780 {"_Z3allDv2_l", "__spirv_allDv2_l"},
781 {"_Z3allDv3_l", "__spirv_allDv3_l"},
782 {"_Z3allDv4_l", "__spirv_allDv4_l"},
783
784 // any
785 {"_Z3anys", ""},
786 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
787 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
788 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400789 {"_Z3anyi", ""},
790 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
791 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
792 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000793 {"_Z3anyl", ""},
794 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
795 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
796 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400797 };
798
799 for (auto Pair : Map) {
800 // If we find a function with the matching name.
801 if (auto F = M.getFunction(Pair.first)) {
802 SmallVector<Instruction *, 4> ToRemoves;
803
804 // Walk the users of the function.
805 for (auto &U : F->uses()) {
806 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
807 // The fake SPIR-V intrinsic to generate.
808 auto SPIRVIntrinsic = Pair.second;
809
810 auto Arg = CI->getOperand(0);
811
812 Value *V;
813
Kévin Petitfd27cca2018-10-31 13:00:17 +0000814 // If the argument is a 32-bit int, just use a shift
815 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
816 V = BinaryOperator::Create(Instruction::LShr, Arg,
817 ConstantInt::get(Arg->getType(), 31), "",
818 CI);
819 } else {
David Neto22f144c2017-06-12 14:26:21 -0400820 // The value for zero to compare against.
821 const auto ZeroValue = Constant::getNullValue(Arg->getType());
822
David Neto22f144c2017-06-12 14:26:21 -0400823 // The value to return for true.
824 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
825
826 // The value to return for false.
827 const auto FalseValue = Constant::getNullValue(CI->getType());
828
Kévin Petitfd27cca2018-10-31 13:00:17 +0000829 const auto Cmp = CmpInst::Create(
830 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
831
832 Value* SelectSource;
833
834 // If we have a function to call, call it!
835 if (0 < strlen(SPIRVIntrinsic)) {
836
837 const auto NewFType = FunctionType::get(
838 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
839
840 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
841
842 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
843
844 SelectSource = NewCI;
845
846 } else {
847 SelectSource = Cmp;
848 }
849
850 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400851 }
852
853 CI->replaceAllUsesWith(V);
854
855 // Lastly, remember to remove the user.
856 ToRemoves.push_back(CI);
857 }
858 }
859
860 Changed = !ToRemoves.empty();
861
862 // And cleanup the calls we don't use anymore.
863 for (auto V : ToRemoves) {
864 V->eraseFromParent();
865 }
866
867 // And remove the function we don't need either too.
868 F->eraseFromParent();
869 }
870 }
871
872 return Changed;
873}
874
Kévin Petitf5b78a22018-10-25 14:32:17 +0000875bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
876 bool Changed = false;
877
878 for (auto const &SymVal : M.getValueSymbolTable()) {
879 // Skip symbols whose name doesn't match
880 if (!SymVal.getKey().startswith("_Z6select")) {
881 continue;
882 }
883 // Is there a function going by that name?
884 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
885
886 SmallVector<Instruction *, 4> ToRemoves;
887
888 // Walk the users of the function.
889 for (auto &U : F->uses()) {
890 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
891
892 // Get arguments
893 auto FalseValue = CI->getOperand(0);
894 auto TrueValue = CI->getOperand(1);
895 auto PredicateValue = CI->getOperand(2);
896
897 // Don't touch overloads that aren't in OpenCL C
898 auto FalseType = FalseValue->getType();
899 auto TrueType = TrueValue->getType();
900 auto PredicateType = PredicateValue->getType();
901
902 if (FalseType != TrueType) {
903 continue;
904 }
905
906 if (!PredicateType->isIntOrIntVectorTy()) {
907 continue;
908 }
909
910 if (!FalseType->isIntOrIntVectorTy() &&
911 !FalseType->getScalarType()->isFloatingPointTy()) {
912 continue;
913 }
914
915 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
916 continue;
917 }
918
919 if (FalseType->getScalarSizeInBits() !=
920 PredicateType->getScalarSizeInBits()) {
921 continue;
922 }
923
924 if (FalseType->isVectorTy()) {
925 if (FalseType->getVectorNumElements() !=
926 PredicateType->getVectorNumElements()) {
927 continue;
928 }
929
930 if ((FalseType->getVectorNumElements() != 2) &&
931 (FalseType->getVectorNumElements() != 3) &&
932 (FalseType->getVectorNumElements() != 4) &&
933 (FalseType->getVectorNumElements() != 8) &&
934 (FalseType->getVectorNumElements() != 16)) {
935 continue;
936 }
937 }
938
939 // Create constant
940 const auto ZeroValue = Constant::getNullValue(PredicateType);
941
942 // Scalar and vector are to be treated differently
943 CmpInst::Predicate Pred;
944 if (PredicateType->isVectorTy()) {
945 Pred = CmpInst::ICMP_SLT;
946 } else {
947 Pred = CmpInst::ICMP_NE;
948 }
949
950 // Create comparison instruction
951 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
952 ZeroValue, "", CI);
953
954 // Create select
955 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
956
957 // Replace call with the selection
958 CI->replaceAllUsesWith(V);
959
960 // Lastly, remember to remove the user.
961 ToRemoves.push_back(CI);
962 }
963 }
964
965 Changed = !ToRemoves.empty();
966
967 // And cleanup the calls we don't use anymore.
968 for (auto V : ToRemoves) {
969 V->eraseFromParent();
970 }
971
972 // And remove the function we don't need either too.
973 F->eraseFromParent();
974 }
975 }
976
977 return Changed;
978}
979
Kévin Petite7d0cce2018-10-31 12:38:56 +0000980bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
981 bool Changed = false;
982
983 for (auto const &SymVal : M.getValueSymbolTable()) {
984 // Skip symbols whose name doesn't match
985 if (!SymVal.getKey().startswith("_Z9bitselect")) {
986 continue;
987 }
988 // Is there a function going by that name?
989 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
990
991 SmallVector<Instruction *, 4> ToRemoves;
992
993 // Walk the users of the function.
994 for (auto &U : F->uses()) {
995 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
996
997 if (CI->getNumOperands() != 4) {
998 continue;
999 }
1000
1001 // Get arguments
1002 auto FalseValue = CI->getOperand(0);
1003 auto TrueValue = CI->getOperand(1);
1004 auto PredicateValue = CI->getOperand(2);
1005
1006 // Don't touch overloads that aren't in OpenCL C
1007 auto FalseType = FalseValue->getType();
1008 auto TrueType = TrueValue->getType();
1009 auto PredicateType = PredicateValue->getType();
1010
1011 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1012 continue;
1013 }
1014
1015 if (TrueType->isVectorTy()) {
1016 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1017 !TrueType->getScalarType()->isIntegerTy()) {
1018 continue;
1019 }
1020 if ((TrueType->getVectorNumElements() != 2) &&
1021 (TrueType->getVectorNumElements() != 3) &&
1022 (TrueType->getVectorNumElements() != 4) &&
1023 (TrueType->getVectorNumElements() != 8) &&
1024 (TrueType->getVectorNumElements() != 16)) {
1025 continue;
1026 }
1027 }
1028
1029 // Remember the type of the operands
1030 auto OpType = TrueType;
1031
1032 // The actual bit selection will always be done on an integer type,
1033 // declare it here
1034 Type *BitType;
1035
1036 // If the operands are float, then bitcast them to int
1037 if (OpType->getScalarType()->isFloatingPointTy()) {
1038
1039 // First create the new type
1040 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1041 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1042 if (OpType->isVectorTy()) {
1043 BitType = VectorType::get(BitType, OpType->getVectorNumElements());
1044 }
1045
1046 // Then bitcast all operands
1047 PredicateValue = CastInst::CreateZExtOrBitCast(PredicateValue,
1048 BitType, "", CI);
1049 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue,
1050 BitType, "", CI);
1051 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1052
1053 } else {
1054 // The operands have an integer type, use it directly
1055 BitType = OpType;
1056 }
1057
1058 // All the operands are now always integers
1059 // implement as (c & b) | (~c & a)
1060
1061 // Create our negated predicate value
1062 auto AllOnes = Constant::getAllOnesValue(BitType);
1063 auto NotPredicateValue = BinaryOperator::Create(Instruction::Xor,
1064 PredicateValue,
1065 AllOnes, "", CI);
1066
1067 // Then put everything together
1068 auto BitsFalse = BinaryOperator::Create(Instruction::And,
1069 NotPredicateValue,
1070 FalseValue, "", CI);
1071 auto BitsTrue = BinaryOperator::Create(Instruction::And,
1072 PredicateValue,
1073 TrueValue, "", CI);
1074
1075 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1076 BitsTrue, "", CI);
1077
1078 // If we were dealing with a floating point type, we must bitcast
1079 // the result back to that
1080 if (OpType->getScalarType()->isFloatingPointTy()) {
1081 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1082 }
1083
1084 // Replace call with our new code
1085 CI->replaceAllUsesWith(V);
1086
1087 // Lastly, remember to remove the user.
1088 ToRemoves.push_back(CI);
1089 }
1090 }
1091
1092 Changed = !ToRemoves.empty();
1093
1094 // And cleanup the calls we don't use anymore.
1095 for (auto V : ToRemoves) {
1096 V->eraseFromParent();
1097 }
1098
1099 // And remove the function we don't need either too.
1100 F->eraseFromParent();
1101 }
1102 }
1103
1104 return Changed;
1105}
1106
Kévin Petit6b0a9532018-10-30 20:00:39 +00001107bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1108 bool Changed = false;
1109
1110 const std::map<const char *, const char *> Map = {
1111 { "_Z4stepfDv2_f", "_Z4stepDv2_fS_" },
1112 { "_Z4stepfDv3_f", "_Z4stepDv3_fS_" },
1113 { "_Z4stepfDv4_f", "_Z4stepDv4_fS_" },
1114 { "_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_" },
1115 { "_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_" },
1116 { "_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_" },
1117 };
1118
1119 for (auto Pair : Map) {
1120 // If we find a function with the matching name.
1121 if (auto F = M.getFunction(Pair.first)) {
1122 SmallVector<Instruction *, 4> ToRemoves;
1123
1124 // Walk the users of the function.
1125 for (auto &U : F->uses()) {
1126 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1127
1128 auto ReplacementFn = Pair.second;
1129
1130 SmallVector<Value*, 2> ArgsToSplat = {CI->getOperand(0)};
1131 Value *VectorArg;
1132
1133 // First figure out which function we're dealing with
1134 if (F->getName().startswith("_Z10smoothstep")) {
1135 ArgsToSplat.push_back(CI->getOperand(1));
1136 VectorArg = CI->getOperand(2);
1137 } else {
1138 VectorArg = CI->getOperand(1);
1139 }
1140
1141 // Splat arguments that need to be
1142 SmallVector<Value*, 2> SplatArgs;
1143 auto VecType = VectorArg->getType();
1144
1145 for (auto arg : ArgsToSplat) {
1146 Value* NewVectorArg = UndefValue::get(VecType);
1147 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
1148 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1149 NewVectorArg = InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1150 }
1151 SplatArgs.push_back(NewVectorArg);
1152 }
1153
1154 // Replace the call with the vector/vector flavour
1155 SmallVector<Type*, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1156 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1157
1158 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1159
1160 SmallVector<Value*, 3> NewArgs;
1161 for (auto arg : SplatArgs) {
1162 NewArgs.push_back(arg);
1163 }
1164 NewArgs.push_back(VectorArg);
1165
1166 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1167
1168 CI->replaceAllUsesWith(NewCI);
1169
1170 // Lastly, remember to remove the user.
1171 ToRemoves.push_back(CI);
1172 }
1173 }
1174
1175 Changed = !ToRemoves.empty();
1176
1177 // And cleanup the calls we don't use anymore.
1178 for (auto V : ToRemoves) {
1179 V->eraseFromParent();
1180 }
1181
1182 // And remove the function we don't need either too.
1183 F->eraseFromParent();
1184 }
1185 }
1186
1187 return Changed;
1188}
1189
David Neto22f144c2017-06-12 14:26:21 -04001190bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1191 bool Changed = false;
1192
1193 const std::map<const char *, Instruction::BinaryOps> Map = {
1194 {"_Z7signbitf", Instruction::LShr},
1195 {"_Z7signbitDv2_f", Instruction::AShr},
1196 {"_Z7signbitDv3_f", Instruction::AShr},
1197 {"_Z7signbitDv4_f", Instruction::AShr},
1198 };
1199
1200 for (auto Pair : Map) {
1201 // If we find a function with the matching name.
1202 if (auto F = M.getFunction(Pair.first)) {
1203 SmallVector<Instruction *, 4> ToRemoves;
1204
1205 // Walk the users of the function.
1206 for (auto &U : F->uses()) {
1207 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1208 auto Arg = CI->getOperand(0);
1209
1210 auto Bitcast =
1211 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1212
1213 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1214 ConstantInt::get(CI->getType(), 31),
1215 "", CI);
1216
1217 CI->replaceAllUsesWith(Shr);
1218
1219 // Lastly, remember to remove the user.
1220 ToRemoves.push_back(CI);
1221 }
1222 }
1223
1224 Changed = !ToRemoves.empty();
1225
1226 // And cleanup the calls we don't use anymore.
1227 for (auto V : ToRemoves) {
1228 V->eraseFromParent();
1229 }
1230
1231 // And remove the function we don't need either too.
1232 F->eraseFromParent();
1233 }
1234 }
1235
1236 return Changed;
1237}
1238
1239bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1240 bool Changed = false;
1241
1242 const std::map<const char *,
1243 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1244 Map = {
1245 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1246 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1247 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1248 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1249 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1250 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1251 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1252 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1253 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1254 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1255 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1256 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1257 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1258 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1259 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1260 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1261 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1262 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1263 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1264 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1265 };
1266
1267 for (auto Pair : Map) {
1268 // If we find a function with the matching name.
1269 if (auto F = M.getFunction(Pair.first)) {
1270 SmallVector<Instruction *, 4> ToRemoves;
1271
1272 // Walk the users of the function.
1273 for (auto &U : F->uses()) {
1274 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1275 // The multiply instruction to use.
1276 auto MulInst = Pair.second.first;
1277
1278 // The add instruction to use.
1279 auto AddInst = Pair.second.second;
1280
1281 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1282
1283 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1284 CI->getArgOperand(1), "", CI);
1285
1286 if (Instruction::BinaryOpsEnd != AddInst) {
1287 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1288 CI);
1289 }
1290
1291 CI->replaceAllUsesWith(I);
1292
1293 // Lastly, remember to remove the user.
1294 ToRemoves.push_back(CI);
1295 }
1296 }
1297
1298 Changed = !ToRemoves.empty();
1299
1300 // And cleanup the calls we don't use anymore.
1301 for (auto V : ToRemoves) {
1302 V->eraseFromParent();
1303 }
1304
1305 // And remove the function we don't need either too.
1306 F->eraseFromParent();
1307 }
1308 }
1309
1310 return Changed;
1311}
1312
Derek Chowcfd368b2017-10-19 20:58:45 -07001313bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1314 bool Changed = false;
1315
1316 struct VectorStoreOps {
1317 const char* name;
1318 int n;
1319 Type* (*get_scalar_type_function)(LLVMContext&);
1320 } vector_store_ops[] = {
1321 // TODO(derekjchow): Expand this list.
1322 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
1323 };
1324
David Neto544fffc2017-11-16 18:35:14 -05001325 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001326 auto Name = Op.name;
1327 auto N = Op.n;
1328 auto TypeFn = Op.get_scalar_type_function;
1329 if (auto F = M.getFunction(Name)) {
1330 SmallVector<Instruction *, 4> ToRemoves;
1331
1332 // Walk the users of the function.
1333 for (auto &U : F->uses()) {
1334 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1335 // The value argument from vstoren.
1336 auto Arg0 = CI->getOperand(0);
1337
1338 // The index argument from vstoren.
1339 auto Arg1 = CI->getOperand(1);
1340
1341 // The pointer argument from vstoren.
1342 auto Arg2 = CI->getOperand(2);
1343
1344 // Get types.
1345 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1346 auto ScalarNPointerTy = PointerType::get(
1347 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1348
1349 // Cast to scalarn
1350 auto Cast = CastInst::CreatePointerCast(
1351 Arg2, ScalarNPointerTy, "", CI);
1352 // Index to correct address
1353 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1354 // Store
1355 auto Store = new StoreInst(Arg0, Index, CI);
1356
1357 CI->replaceAllUsesWith(Store);
1358 ToRemoves.push_back(CI);
1359 }
1360 }
1361
1362 Changed = !ToRemoves.empty();
1363
1364 // And cleanup the calls we don't use anymore.
1365 for (auto V : ToRemoves) {
1366 V->eraseFromParent();
1367 }
1368
1369 // And remove the function we don't need either too.
1370 F->eraseFromParent();
1371 }
1372 }
1373
1374 return Changed;
1375}
1376
1377bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1378 bool Changed = false;
1379
1380 struct VectorLoadOps {
1381 const char* name;
1382 int n;
1383 Type* (*get_scalar_type_function)(LLVMContext&);
1384 } vector_load_ops[] = {
1385 // TODO(derekjchow): Expand this list.
1386 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
1387 };
1388
David Neto544fffc2017-11-16 18:35:14 -05001389 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001390 auto Name = Op.name;
1391 auto N = Op.n;
1392 auto TypeFn = Op.get_scalar_type_function;
1393 // If we find a function with the matching name.
1394 if (auto F = M.getFunction(Name)) {
1395 SmallVector<Instruction *, 4> ToRemoves;
1396
1397 // Walk the users of the function.
1398 for (auto &U : F->uses()) {
1399 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1400 // The index argument from vloadn.
1401 auto Arg0 = CI->getOperand(0);
1402
1403 // The pointer argument from vloadn.
1404 auto Arg1 = CI->getOperand(1);
1405
1406 // Get types.
1407 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1408 auto ScalarNPointerTy = PointerType::get(
1409 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1410
1411 // Cast to scalarn
1412 auto Cast = CastInst::CreatePointerCast(
1413 Arg1, ScalarNPointerTy, "", CI);
1414 // Index to correct address
1415 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1416 // Load
1417 auto Load = new LoadInst(Index, "", CI);
1418
1419 CI->replaceAllUsesWith(Load);
1420 ToRemoves.push_back(CI);
1421 }
1422 }
1423
1424 Changed = !ToRemoves.empty();
1425
1426 // And cleanup the calls we don't use anymore.
1427 for (auto V : ToRemoves) {
1428 V->eraseFromParent();
1429 }
1430
1431 // And remove the function we don't need either too.
1432 F->eraseFromParent();
1433
1434 }
1435 }
1436
1437 return Changed;
1438}
1439
David Neto22f144c2017-06-12 14:26:21 -04001440bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1441 bool Changed = false;
1442
1443 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1444 "_Z10vload_halfjPU3AS2KDh"};
1445
1446 for (auto Name : Map) {
1447 // If we find a function with the matching name.
1448 if (auto F = M.getFunction(Name)) {
1449 SmallVector<Instruction *, 4> ToRemoves;
1450
1451 // Walk the users of the function.
1452 for (auto &U : F->uses()) {
1453 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1454 // The index argument from vload_half.
1455 auto Arg0 = CI->getOperand(0);
1456
1457 // The pointer argument from vload_half.
1458 auto Arg1 = CI->getOperand(1);
1459
David Neto22f144c2017-06-12 14:26:21 -04001460 auto IntTy = Type::getInt32Ty(M.getContext());
1461 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001462 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1463
David Neto22f144c2017-06-12 14:26:21 -04001464 // Our intrinsic to unpack a float2 from an int.
1465 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1466
1467 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1468
David Neto482550a2018-03-24 05:21:07 -07001469 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04001470 auto ShortTy = Type::getInt16Ty(M.getContext());
1471 auto ShortPointerTy = PointerType::get(
1472 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001473
David Netoac825b82017-05-30 12:49:01 -04001474 // Cast the half* pointer to short*.
1475 auto Cast =
1476 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001477
David Netoac825b82017-05-30 12:49:01 -04001478 // Index into the correct address of the casted pointer.
1479 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1480
1481 // Load from the short* we casted to.
1482 auto Load = new LoadInst(Index, "", CI);
1483
1484 // ZExt the short -> int.
1485 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1486
1487 // Get our float2.
1488 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1489
1490 // Extract out the bottom element which is our float result.
1491 auto Extract = ExtractElementInst::Create(
1492 Call, ConstantInt::get(IntTy, 0), "", CI);
1493
1494 CI->replaceAllUsesWith(Extract);
1495 } else {
1496 // Assume the pointer argument points to storage aligned to 32bits
1497 // or more.
1498 // TODO(dneto): Do more analysis to make sure this is true?
1499 //
1500 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1501 // with:
1502 //
1503 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1504 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1505 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1506 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1507 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1508 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1509 // x float> %converted, %index_is_odd32
1510
1511 auto IntPointerTy = PointerType::get(
1512 IntTy, Arg1->getType()->getPointerAddressSpace());
1513
David Neto973e6a82017-05-30 13:48:18 -04001514 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001515 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001516 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001517 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1518
1519 auto One = ConstantInt::get(IntTy, 1);
1520 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1521 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1522
1523 // Index into the correct address of the casted pointer.
1524 auto Ptr =
1525 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1526
1527 // Load from the int* we casted to.
1528 auto Load = new LoadInst(Ptr, "", CI);
1529
1530 // Get our float2.
1531 auto Call = CallInst::Create(NewF, Load, "", CI);
1532
1533 // Extract out the float result, where the element number is
1534 // determined by whether the original index was even or odd.
1535 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1536
1537 CI->replaceAllUsesWith(Extract);
1538 }
David Neto22f144c2017-06-12 14:26:21 -04001539
1540 // Lastly, remember to remove the user.
1541 ToRemoves.push_back(CI);
1542 }
1543 }
1544
1545 Changed = !ToRemoves.empty();
1546
1547 // And cleanup the calls we don't use anymore.
1548 for (auto V : ToRemoves) {
1549 V->eraseFromParent();
1550 }
1551
1552 // And remove the function we don't need either too.
1553 F->eraseFromParent();
1554 }
1555 }
1556
1557 return Changed;
1558}
1559
1560bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1561 bool Changed = false;
1562
David Neto556c7e62018-06-08 13:45:55 -07001563 const std::vector<const char *> Map = {
1564 "_Z11vload_half2jPU3AS1KDh",
1565 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
1566 "_Z11vload_half2jPU3AS2KDh",
1567 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
1568 };
David Neto22f144c2017-06-12 14:26:21 -04001569
1570 for (auto Name : Map) {
1571 // If we find a function with the matching name.
1572 if (auto F = M.getFunction(Name)) {
1573 SmallVector<Instruction *, 4> ToRemoves;
1574
1575 // Walk the users of the function.
1576 for (auto &U : F->uses()) {
1577 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1578 // The index argument from vload_half.
1579 auto Arg0 = CI->getOperand(0);
1580
1581 // The pointer argument from vload_half.
1582 auto Arg1 = CI->getOperand(1);
1583
1584 auto IntTy = Type::getInt32Ty(M.getContext());
1585 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1586 auto NewPointerTy = PointerType::get(
1587 IntTy, Arg1->getType()->getPointerAddressSpace());
1588 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1589
1590 // Cast the half* pointer to int*.
1591 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1592
1593 // Index into the correct address of the casted pointer.
1594 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1595
1596 // Load from the int* we casted to.
1597 auto Load = new LoadInst(Index, "", CI);
1598
1599 // Our intrinsic to unpack a float2 from an int.
1600 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1601
1602 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1603
1604 // Get our float2.
1605 auto Call = CallInst::Create(NewF, Load, "", CI);
1606
1607 CI->replaceAllUsesWith(Call);
1608
1609 // Lastly, remember to remove the user.
1610 ToRemoves.push_back(CI);
1611 }
1612 }
1613
1614 Changed = !ToRemoves.empty();
1615
1616 // And cleanup the calls we don't use anymore.
1617 for (auto V : ToRemoves) {
1618 V->eraseFromParent();
1619 }
1620
1621 // And remove the function we don't need either too.
1622 F->eraseFromParent();
1623 }
1624 }
1625
1626 return Changed;
1627}
1628
1629bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1630 bool Changed = false;
1631
David Neto556c7e62018-06-08 13:45:55 -07001632 const std::vector<const char *> Map = {
1633 "_Z11vload_half4jPU3AS1KDh",
1634 "_Z12vloada_half4jPU3AS1KDh",
1635 "_Z11vload_half4jPU3AS2KDh",
1636 "_Z12vloada_half4jPU3AS2KDh",
1637 };
David Neto22f144c2017-06-12 14:26:21 -04001638
1639 for (auto Name : Map) {
1640 // If we find a function with the matching name.
1641 if (auto F = M.getFunction(Name)) {
1642 SmallVector<Instruction *, 4> ToRemoves;
1643
1644 // Walk the users of the function.
1645 for (auto &U : F->uses()) {
1646 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1647 // The index argument from vload_half.
1648 auto Arg0 = CI->getOperand(0);
1649
1650 // The pointer argument from vload_half.
1651 auto Arg1 = CI->getOperand(1);
1652
1653 auto IntTy = Type::getInt32Ty(M.getContext());
1654 auto Int2Ty = VectorType::get(IntTy, 2);
1655 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1656 auto NewPointerTy = PointerType::get(
1657 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1658 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1659
1660 // Cast the half* pointer to int2*.
1661 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1662
1663 // Index into the correct address of the casted pointer.
1664 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1665
1666 // Load from the int2* we casted to.
1667 auto Load = new LoadInst(Index, "", CI);
1668
1669 // Extract each element from the loaded int2.
1670 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1671 "", CI);
1672 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1673 "", CI);
1674
1675 // Our intrinsic to unpack a float2 from an int.
1676 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1677
1678 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1679
1680 // Get the lower (x & y) components of our final float4.
1681 auto Lo = CallInst::Create(NewF, X, "", CI);
1682
1683 // Get the higher (z & w) components of our final float4.
1684 auto Hi = CallInst::Create(NewF, Y, "", CI);
1685
1686 Constant *ShuffleMask[4] = {
1687 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1688 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1689
1690 // Combine our two float2's into one float4.
1691 auto Combine = new ShuffleVectorInst(
1692 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1693
1694 CI->replaceAllUsesWith(Combine);
1695
1696 // Lastly, remember to remove the user.
1697 ToRemoves.push_back(CI);
1698 }
1699 }
1700
1701 Changed = !ToRemoves.empty();
1702
1703 // And cleanup the calls we don't use anymore.
1704 for (auto V : ToRemoves) {
1705 V->eraseFromParent();
1706 }
1707
1708 // And remove the function we don't need either too.
1709 F->eraseFromParent();
1710 }
1711 }
1712
1713 return Changed;
1714}
1715
David Neto6ad93232018-06-07 15:42:58 -07001716bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
1717 bool Changed = false;
1718
1719 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1720 //
1721 // %u = load i32 %ptr
1722 // %fxy = call <2 x float> Unpack2xHalf(u)
1723 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1724 const std::vector<const char *> Map = {
1725 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
1726 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
1727 "_Z20__clspv_vloada_half2jPKj", // private
1728 };
1729
1730 for (auto Name : Map) {
1731 // If we find a function with the matching name.
1732 if (auto F = M.getFunction(Name)) {
1733 SmallVector<Instruction *, 4> ToRemoves;
1734
1735 // Walk the users of the function.
1736 for (auto &U : F->uses()) {
1737 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
1738 auto Index = CI->getOperand(0);
1739 auto Ptr = CI->getOperand(1);
1740
1741 auto IntTy = Type::getInt32Ty(M.getContext());
1742 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1743 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1744
1745 auto IndexedPtr =
1746 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
1747 auto Load = new LoadInst(IndexedPtr, "", CI);
1748
1749 // Our intrinsic to unpack a float2 from an int.
1750 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1751
1752 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1753
1754 // Get our final float2.
1755 auto Result = CallInst::Create(NewF, Load, "", CI);
1756
1757 CI->replaceAllUsesWith(Result);
1758
1759 // Lastly, remember to remove the user.
1760 ToRemoves.push_back(CI);
1761 }
1762 }
1763
1764 Changed = true;
1765
1766 // And cleanup the calls we don't use anymore.
1767 for (auto V : ToRemoves) {
1768 V->eraseFromParent();
1769 }
1770
1771 // And remove the function we don't need either too.
1772 F->eraseFromParent();
1773 }
1774 }
1775
1776 return Changed;
1777}
1778
1779bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
1780 bool Changed = false;
1781
1782 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1783 //
1784 // %u2 = load <2 x i32> %ptr
1785 // %u2xy = extractelement %u2, 0
1786 // %u2zw = extractelement %u2, 1
1787 // %fxy = call <2 x float> Unpack2xHalf(uint)
1788 // %fzw = call <2 x float> Unpack2xHalf(uint)
1789 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1790 const std::vector<const char *> Map = {
1791 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
1792 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
1793 "_Z20__clspv_vloada_half4jPKDv2_j", // private
1794 };
1795
1796 for (auto Name : Map) {
1797 // If we find a function with the matching name.
1798 if (auto F = M.getFunction(Name)) {
1799 SmallVector<Instruction *, 4> ToRemoves;
1800
1801 // Walk the users of the function.
1802 for (auto &U : F->uses()) {
1803 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1804 auto Index = CI->getOperand(0);
1805 auto Ptr = CI->getOperand(1);
1806
1807 auto IntTy = Type::getInt32Ty(M.getContext());
1808 auto Int2Ty = VectorType::get(IntTy, 2);
1809 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1810 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1811
1812 auto IndexedPtr =
1813 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
1814 auto Load = new LoadInst(IndexedPtr, "", CI);
1815
1816 // Extract each element from the loaded int2.
1817 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1818 "", CI);
1819 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1820 "", CI);
1821
1822 // Our intrinsic to unpack a float2 from an int.
1823 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1824
1825 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1826
1827 // Get the lower (x & y) components of our final float4.
1828 auto Lo = CallInst::Create(NewF, X, "", CI);
1829
1830 // Get the higher (z & w) components of our final float4.
1831 auto Hi = CallInst::Create(NewF, Y, "", CI);
1832
1833 Constant *ShuffleMask[4] = {
1834 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1835 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1836
1837 // Combine our two float2's into one float4.
1838 auto Combine = new ShuffleVectorInst(
1839 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1840
1841 CI->replaceAllUsesWith(Combine);
1842
1843 // Lastly, remember to remove the user.
1844 ToRemoves.push_back(CI);
1845 }
1846 }
1847
1848 Changed = true;
1849
1850 // And cleanup the calls we don't use anymore.
1851 for (auto V : ToRemoves) {
1852 V->eraseFromParent();
1853 }
1854
1855 // And remove the function we don't need either too.
1856 F->eraseFromParent();
1857 }
1858 }
1859
1860 return Changed;
1861}
1862
David Neto22f144c2017-06-12 14:26:21 -04001863bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1864 bool Changed = false;
1865
1866 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1867 "_Z15vstore_half_rtefjPU3AS1Dh",
1868 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1869
1870 for (auto Name : Map) {
1871 // If we find a function with the matching name.
1872 if (auto F = M.getFunction(Name)) {
1873 SmallVector<Instruction *, 4> ToRemoves;
1874
1875 // Walk the users of the function.
1876 for (auto &U : F->uses()) {
1877 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1878 // The value to store.
1879 auto Arg0 = CI->getOperand(0);
1880
1881 // The index argument from vstore_half.
1882 auto Arg1 = CI->getOperand(1);
1883
1884 // The pointer argument from vstore_half.
1885 auto Arg2 = CI->getOperand(2);
1886
David Neto22f144c2017-06-12 14:26:21 -04001887 auto IntTy = Type::getInt32Ty(M.getContext());
1888 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001889 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001890 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001891
1892 // Our intrinsic to pack a float2 to an int.
1893 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1894
1895 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1896
1897 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001898 auto TempVec =
1899 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1900 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001901
1902 // Pack the float2 -> half2 (in an int).
1903 auto X = CallInst::Create(NewF, TempVec, "", CI);
1904
David Neto482550a2018-03-24 05:21:07 -07001905 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04001906 auto ShortTy = Type::getInt16Ty(M.getContext());
1907 auto ShortPointerTy = PointerType::get(
1908 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001909
David Neto17852de2017-05-29 17:29:31 -04001910 // Truncate our i32 to an i16.
1911 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001912
David Neto17852de2017-05-29 17:29:31 -04001913 // Cast the half* pointer to short*.
1914 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001915
David Neto17852de2017-05-29 17:29:31 -04001916 // Index into the correct address of the casted pointer.
1917 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001918
David Neto17852de2017-05-29 17:29:31 -04001919 // Store to the int* we casted to.
1920 auto Store = new StoreInst(Trunc, Index, CI);
1921
1922 CI->replaceAllUsesWith(Store);
1923 } else {
1924 // We can only write to 32-bit aligned words.
1925 //
1926 // Assuming base is aligned to 32-bits, replace the equivalent of
1927 // vstore_half(value, index, base)
1928 // with:
1929 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1930 // uint32_t write_to_upper_half = index & 1u;
1931 // uint32_t shift = write_to_upper_half << 4;
1932 //
1933 // // Pack the float value as a half number in bottom 16 bits
1934 // // of an i32.
1935 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1936 //
1937 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1938 // ^ ((packed & 0xffff) << shift)
1939 // // We only need relaxed consistency, but OpenCL 1.2 only has
1940 // // sequentially consistent atomics.
1941 // // TODO(dneto): Use relaxed consistency.
1942 // atomic_xor(target_ptr, xor_value)
1943 auto IntPointerTy = PointerType::get(
1944 IntTy, Arg2->getType()->getPointerAddressSpace());
1945
1946 auto Four = ConstantInt::get(IntTy, 4);
1947 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1948
1949 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1950 // Compute index / 2
1951 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1952 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1953 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1954 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1955 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1956 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1957 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1958
1959 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1960 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1961 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1962
1963 // Generate the call to atomi_xor.
1964 SmallVector<Type *, 5> ParamTypes;
1965 // The pointer type.
1966 ParamTypes.push_back(IntPointerTy);
1967 // The Types for memory scope, semantics, and value.
1968 ParamTypes.push_back(IntTy);
1969 ParamTypes.push_back(IntTy);
1970 ParamTypes.push_back(IntTy);
1971 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1972 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1973
1974 const auto ConstantScopeDevice =
1975 ConstantInt::get(IntTy, spv::ScopeDevice);
1976 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1977 // (SPIR-V Workgroup).
1978 const auto AddrSpaceSemanticsBits =
1979 IntPointerTy->getPointerAddressSpace() == 1
1980 ? spv::MemorySemanticsUniformMemoryMask
1981 : spv::MemorySemanticsWorkgroupMemoryMask;
1982
1983 // We're using relaxed consistency here.
1984 const auto ConstantMemorySemantics =
1985 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1986 AddrSpaceSemanticsBits);
1987
1988 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1989 ConstantMemorySemantics, ValueToXor};
1990 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1991 }
David Neto22f144c2017-06-12 14:26:21 -04001992
1993 // Lastly, remember to remove the user.
1994 ToRemoves.push_back(CI);
1995 }
1996 }
1997
1998 Changed = !ToRemoves.empty();
1999
2000 // And cleanup the calls we don't use anymore.
2001 for (auto V : ToRemoves) {
2002 V->eraseFromParent();
2003 }
2004
2005 // And remove the function we don't need either too.
2006 F->eraseFromParent();
2007 }
2008 }
2009
2010 return Changed;
2011}
2012
2013bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
2014 bool Changed = false;
2015
David Netoe2871522018-06-08 11:09:54 -07002016 const std::vector<const char *> Map = {
2017 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2018 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2019 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2020 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2021 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2022 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2023 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2024 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2025 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2026 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2027 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2028 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2029 };
David Neto22f144c2017-06-12 14:26:21 -04002030
2031 for (auto Name : Map) {
2032 // If we find a function with the matching name.
2033 if (auto F = M.getFunction(Name)) {
2034 SmallVector<Instruction *, 4> ToRemoves;
2035
2036 // Walk the users of the function.
2037 for (auto &U : F->uses()) {
2038 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2039 // The value to store.
2040 auto Arg0 = CI->getOperand(0);
2041
2042 // The index argument from vstore_half.
2043 auto Arg1 = CI->getOperand(1);
2044
2045 // The pointer argument from vstore_half.
2046 auto Arg2 = CI->getOperand(2);
2047
2048 auto IntTy = Type::getInt32Ty(M.getContext());
2049 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2050 auto NewPointerTy = PointerType::get(
2051 IntTy, Arg2->getType()->getPointerAddressSpace());
2052 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2053
2054 // Our intrinsic to pack a float2 to an int.
2055 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2056
2057 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2058
2059 // Turn the packed x & y into the final packing.
2060 auto X = CallInst::Create(NewF, Arg0, "", CI);
2061
2062 // Cast the half* pointer to int*.
2063 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2064
2065 // Index into the correct address of the casted pointer.
2066 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
2067
2068 // Store to the int* we casted to.
2069 auto Store = new StoreInst(X, Index, CI);
2070
2071 CI->replaceAllUsesWith(Store);
2072
2073 // Lastly, remember to remove the user.
2074 ToRemoves.push_back(CI);
2075 }
2076 }
2077
2078 Changed = !ToRemoves.empty();
2079
2080 // And cleanup the calls we don't use anymore.
2081 for (auto V : ToRemoves) {
2082 V->eraseFromParent();
2083 }
2084
2085 // And remove the function we don't need either too.
2086 F->eraseFromParent();
2087 }
2088 }
2089
2090 return Changed;
2091}
2092
2093bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
2094 bool Changed = false;
2095
David Netoe2871522018-06-08 11:09:54 -07002096 const std::vector<const char *> Map = {
2097 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2098 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2099 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2100 "_Z13vstorea_half4Dv4_fjPDh", // private
2101 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2102 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2103 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2104 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2105 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2106 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2107 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2108 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2109 };
David Neto22f144c2017-06-12 14:26:21 -04002110
2111 for (auto Name : Map) {
2112 // If we find a function with the matching name.
2113 if (auto F = M.getFunction(Name)) {
2114 SmallVector<Instruction *, 4> ToRemoves;
2115
2116 // Walk the users of the function.
2117 for (auto &U : F->uses()) {
2118 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2119 // The value to store.
2120 auto Arg0 = CI->getOperand(0);
2121
2122 // The index argument from vstore_half.
2123 auto Arg1 = CI->getOperand(1);
2124
2125 // The pointer argument from vstore_half.
2126 auto Arg2 = CI->getOperand(2);
2127
2128 auto IntTy = Type::getInt32Ty(M.getContext());
2129 auto Int2Ty = VectorType::get(IntTy, 2);
2130 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2131 auto NewPointerTy = PointerType::get(
2132 Int2Ty, Arg2->getType()->getPointerAddressSpace());
2133 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2134
2135 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2136 ConstantInt::get(IntTy, 1)};
2137
2138 // Extract out the x & y components of our to store value.
2139 auto Lo =
2140 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2141 ConstantVector::get(LoShuffleMask), "", CI);
2142
2143 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2144 ConstantInt::get(IntTy, 3)};
2145
2146 // Extract out the z & w components of our to store value.
2147 auto Hi =
2148 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2149 ConstantVector::get(HiShuffleMask), "", CI);
2150
2151 // Our intrinsic to pack a float2 to an int.
2152 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2153
2154 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2155
2156 // Turn the packed x & y into the final component of our int2.
2157 auto X = CallInst::Create(NewF, Lo, "", CI);
2158
2159 // Turn the packed z & w into the final component of our int2.
2160 auto Y = CallInst::Create(NewF, Hi, "", CI);
2161
2162 auto Combine = InsertElementInst::Create(
2163 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
2164 Combine = InsertElementInst::Create(
2165 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
2166
2167 // Cast the half* pointer to int2*.
2168 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2169
2170 // Index into the correct address of the casted pointer.
2171 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
2172
2173 // Store to the int2* we casted to.
2174 auto Store = new StoreInst(Combine, Index, CI);
2175
2176 CI->replaceAllUsesWith(Store);
2177
2178 // Lastly, remember to remove the user.
2179 ToRemoves.push_back(CI);
2180 }
2181 }
2182
2183 Changed = !ToRemoves.empty();
2184
2185 // And cleanup the calls we don't use anymore.
2186 for (auto V : ToRemoves) {
2187 V->eraseFromParent();
2188 }
2189
2190 // And remove the function we don't need either too.
2191 F->eraseFromParent();
2192 }
2193 }
2194
2195 return Changed;
2196}
2197
2198bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2199 bool Changed = false;
2200
2201 const std::map<const char *, const char*> Map = {
2202 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
2203 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
2204 };
2205
2206 for (auto Pair : Map) {
2207 // If we find a function with the matching name.
2208 if (auto F = M.getFunction(Pair.first)) {
2209 SmallVector<Instruction *, 4> ToRemoves;
2210
2211 // Walk the users of the function.
2212 for (auto &U : F->uses()) {
2213 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2214 // The image.
2215 auto Arg0 = CI->getOperand(0);
2216
2217 // The sampler.
2218 auto Arg1 = CI->getOperand(1);
2219
2220 // The coordinate (integer type that we can't handle).
2221 auto Arg2 = CI->getOperand(2);
2222
2223 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
2224
2225 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
2226
2227 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2228
2229 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
2230
2231 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2232
2233 CI->replaceAllUsesWith(NewCI);
2234
2235 // Lastly, remember to remove the user.
2236 ToRemoves.push_back(CI);
2237 }
2238 }
2239
2240 Changed = !ToRemoves.empty();
2241
2242 // And cleanup the calls we don't use anymore.
2243 for (auto V : ToRemoves) {
2244 V->eraseFromParent();
2245 }
2246
2247 // And remove the function we don't need either too.
2248 F->eraseFromParent();
2249 }
2250 }
2251
2252 return Changed;
2253}
2254
2255bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2256 bool Changed = false;
2257
2258 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002259 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
2260 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
2261 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
2262 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
2263 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
2264 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002265 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
2266 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
2267 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
2268 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
2269 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01002270 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002271
2272 for (auto Pair : Map) {
2273 // If we find a function with the matching name.
2274 if (auto F = M.getFunction(Pair.first)) {
2275 SmallVector<Instruction *, 4> ToRemoves;
2276
2277 // Walk the users of the function.
2278 for (auto &U : F->uses()) {
2279 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2280 auto FType = F->getFunctionType();
2281 SmallVector<Type *, 5> ParamTypes;
2282
2283 // The pointer type.
2284 ParamTypes.push_back(FType->getParamType(0));
2285
2286 auto IntTy = Type::getInt32Ty(M.getContext());
2287
2288 // The memory scope type.
2289 ParamTypes.push_back(IntTy);
2290
2291 // The memory semantics type.
2292 ParamTypes.push_back(IntTy);
2293
2294 if (2 < CI->getNumArgOperands()) {
2295 // The unequal memory semantics type.
2296 ParamTypes.push_back(IntTy);
2297
2298 // The value type.
2299 ParamTypes.push_back(FType->getParamType(2));
2300
2301 // The comparator type.
2302 ParamTypes.push_back(FType->getParamType(1));
2303 } else if (1 < CI->getNumArgOperands()) {
2304 // The value type.
2305 ParamTypes.push_back(FType->getParamType(1));
2306 }
2307
2308 auto NewFType =
2309 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2310 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2311
2312 // We need to map the OpenCL constants to the SPIR-V equivalents.
2313 const auto ConstantScopeDevice =
2314 ConstantInt::get(IntTy, spv::ScopeDevice);
2315 const auto ConstantMemorySemantics = ConstantInt::get(
2316 IntTy, spv::MemorySemanticsUniformMemoryMask |
2317 spv::MemorySemanticsSequentiallyConsistentMask);
2318
2319 SmallVector<Value *, 5> Params;
2320
2321 // The pointer.
2322 Params.push_back(CI->getArgOperand(0));
2323
2324 // The memory scope.
2325 Params.push_back(ConstantScopeDevice);
2326
2327 // The memory semantics.
2328 Params.push_back(ConstantMemorySemantics);
2329
2330 if (2 < CI->getNumArgOperands()) {
2331 // The unequal memory semantics.
2332 Params.push_back(ConstantMemorySemantics);
2333
2334 // The value.
2335 Params.push_back(CI->getArgOperand(2));
2336
2337 // The comparator.
2338 Params.push_back(CI->getArgOperand(1));
2339 } else if (1 < CI->getNumArgOperands()) {
2340 // The value.
2341 Params.push_back(CI->getArgOperand(1));
2342 }
2343
2344 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2345
2346 CI->replaceAllUsesWith(NewCI);
2347
2348 // Lastly, remember to remove the user.
2349 ToRemoves.push_back(CI);
2350 }
2351 }
2352
2353 Changed = !ToRemoves.empty();
2354
2355 // And cleanup the calls we don't use anymore.
2356 for (auto V : ToRemoves) {
2357 V->eraseFromParent();
2358 }
2359
2360 // And remove the function we don't need either too.
2361 F->eraseFromParent();
2362 }
2363 }
2364
Neil Henning39672102017-09-29 14:33:13 +01002365 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002366 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2367 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2368 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2369 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2370 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2371 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2372 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2373 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2374 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2375 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2376 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2377 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2378 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2379 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2380 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2381 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002382 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2383 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2384 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2385 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2386 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2387 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2388 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2389 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2390 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2391 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2392 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2393 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2394 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2395 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2396 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2397 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
2398
2399 for (auto Pair : Map2) {
2400 // If we find a function with the matching name.
2401 if (auto F = M.getFunction(Pair.first)) {
2402 SmallVector<Instruction *, 4> ToRemoves;
2403
2404 // Walk the users of the function.
2405 for (auto &U : F->uses()) {
2406 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2407 auto AtomicOp = new AtomicRMWInst(
2408 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2409 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2410
2411 CI->replaceAllUsesWith(AtomicOp);
2412
2413 // Lastly, remember to remove the user.
2414 ToRemoves.push_back(CI);
2415 }
2416 }
2417
2418 Changed = !ToRemoves.empty();
2419
2420 // And cleanup the calls we don't use anymore.
2421 for (auto V : ToRemoves) {
2422 V->eraseFromParent();
2423 }
2424
2425 // And remove the function we don't need either too.
2426 F->eraseFromParent();
2427 }
2428 }
2429
David Neto22f144c2017-06-12 14:26:21 -04002430 return Changed;
2431}
2432
2433bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
2434 bool Changed = false;
2435
2436 // If we find a function with the matching name.
2437 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
2438 SmallVector<Instruction *, 4> ToRemoves;
2439
2440 auto IntTy = Type::getInt32Ty(M.getContext());
2441 auto FloatTy = Type::getFloatTy(M.getContext());
2442
2443 Constant *DownShuffleMask[3] = {
2444 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2445 ConstantInt::get(IntTy, 2)};
2446
2447 Constant *UpShuffleMask[4] = {
2448 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2449 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2450
2451 Constant *FloatVec[3] = {
2452 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
2453 };
2454
2455 // Walk the users of the function.
2456 for (auto &U : F->uses()) {
2457 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2458 auto Vec4Ty = CI->getArgOperand(0)->getType();
2459 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2460 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2461 auto Vec3Ty = Arg0->getType();
2462
2463 auto NewFType =
2464 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
2465
2466 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
2467
2468 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
2469
2470 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
2471
2472 CI->replaceAllUsesWith(Result);
2473
2474 // Lastly, remember to remove the user.
2475 ToRemoves.push_back(CI);
2476 }
2477 }
2478
2479 Changed = !ToRemoves.empty();
2480
2481 // And cleanup the calls we don't use anymore.
2482 for (auto V : ToRemoves) {
2483 V->eraseFromParent();
2484 }
2485
2486 // And remove the function we don't need either too.
2487 F->eraseFromParent();
2488 }
2489
2490 return Changed;
2491}
David Neto62653202017-10-16 19:05:18 -04002492
2493bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2494 bool Changed = false;
2495
2496 // OpenCL's float result = fract(float x, float* ptr)
2497 //
2498 // In the LLVM domain:
2499 //
2500 // %floor_result = call spir_func float @floor(float %x)
2501 // store float %floor_result, float * %ptr
2502 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2503 // %result = call spir_func float
2504 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2505 //
2506 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2507 // and clspv.fract occur in the SPIR-V generator pass:
2508 //
2509 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2510 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2511 // ...
2512 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2513 // OpStore %ptr %floor_result
2514 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2515 // %fract_result = OpExtInst %float
2516 // %glsl_ext Fmin %fract_intermediate %just_under_1
2517
2518
2519 using std::string;
2520
2521 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2522 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
2523 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
2524 auto make_quad = [](const char *a, const char *b, const char *c,
2525 const char *d) {
2526 return std::tuple<const char *, const char *, const char *, const char *>(
2527 a, b, c, d);
2528 };
2529 const std::vector<QuadType> Functions = {
2530 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
2531 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
2532 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
2533 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
2534 };
2535
2536 for (auto& quad : Functions) {
2537 const StringRef fract_name(std::get<0>(quad));
2538
2539 // If we find a function with the matching name.
2540 if (auto F = M.getFunction(fract_name)) {
2541 if (F->use_begin() == F->use_end())
2542 continue;
2543
2544 // We have some uses.
2545 Changed = true;
2546
2547 auto& Context = M.getContext();
2548
2549 const StringRef floor_name(std::get<1>(quad));
2550 const StringRef fmin_name(std::get<2>(quad));
2551 const StringRef clspv_fract_name(std::get<3>(quad));
2552
2553 // This is either float or a float vector. All the float-like
2554 // types are this type.
2555 auto result_ty = F->getReturnType();
2556
2557 Function* fmin_fn = M.getFunction(fmin_name);
2558 if (!fmin_fn) {
2559 // Make the fmin function.
2560 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
2561 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002562 fmin_fn->addFnAttr(Attribute::ReadNone);
2563 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2564 }
2565
2566 Function* floor_fn = M.getFunction(floor_name);
2567 if (!floor_fn) {
2568 // Make the floor function.
2569 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2570 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002571 floor_fn->addFnAttr(Attribute::ReadNone);
2572 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2573 }
2574
2575 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
2576 if (!clspv_fract_fn) {
2577 // Make the clspv_fract function.
2578 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2579 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002580 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2581 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2582 }
2583
2584 // Number of significant significand bits, whether represented or not.
2585 unsigned num_significand_bits;
2586 switch (result_ty->getScalarType()->getTypeID()) {
2587 case Type::HalfTyID:
2588 num_significand_bits = 11;
2589 break;
2590 case Type::FloatTyID:
2591 num_significand_bits = 24;
2592 break;
2593 case Type::DoubleTyID:
2594 num_significand_bits = 53;
2595 break;
2596 default:
2597 assert(false && "Unhandled float type when processing fract builtin");
2598 break;
2599 }
2600 // Beware that the disassembler displays this value as
2601 // OpConstant %float 1
2602 // which is not quite right.
2603 const double kJustUnderOneScalar =
2604 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2605
2606 Constant *just_under_one =
2607 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2608 if (result_ty->isVectorTy()) {
2609 just_under_one = ConstantVector::getSplat(
2610 result_ty->getVectorNumElements(), just_under_one);
2611 }
2612
2613 IRBuilder<> Builder(Context);
2614
2615 SmallVector<Instruction *, 4> ToRemoves;
2616
2617 // Walk the users of the function.
2618 for (auto &U : F->uses()) {
2619 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2620
2621 Builder.SetInsertPoint(CI);
2622 auto arg = CI->getArgOperand(0);
2623 auto ptr = CI->getArgOperand(1);
2624
2625 // Compute floor result and store it.
2626 auto floor = Builder.CreateCall(floor_fn, {arg});
2627 Builder.CreateStore(floor, ptr);
2628
2629 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2630 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2631
2632 CI->replaceAllUsesWith(fract_result);
2633
2634 // Lastly, remember to remove the user.
2635 ToRemoves.push_back(CI);
2636 }
2637 }
2638
2639 // And cleanup the calls we don't use anymore.
2640 for (auto V : ToRemoves) {
2641 V->eraseFromParent();
2642 }
2643
2644 // And remove the function we don't need either too.
2645 F->eraseFromParent();
2646 }
2647 }
2648
2649 return Changed;
2650}