blob: 7bcc6657f60c7a30415ee364738fd519c243d3a4 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto118188e2018-08-24 11:27:54 -040019#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000023#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040024#include "llvm/Pass.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/raw_ostream.h"
27#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040028
David Neto118188e2018-08-24 11:27:54 -040029#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040030
David Neto482550a2018-03-24 05:21:07 -070031#include "clspv/Option.h"
32
David Neto22f144c2017-06-12 14:26:21 -040033using namespace llvm;
34
35#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
36
37namespace {
38uint32_t clz(uint32_t v) {
39 uint32_t r;
40 uint32_t shift;
41
42 r = (v > 0xFFFF) << 4;
43 v >>= r;
44 shift = (v > 0xFF) << 3;
45 v >>= shift;
46 r |= shift;
47 shift = (v > 0xF) << 2;
48 v >>= shift;
49 r |= shift;
50 shift = (v > 0x3) << 1;
51 v >>= shift;
52 r |= shift;
53 r |= (v >> 1);
54
55 return r;
56}
57
58Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
59 if (1 == elements) {
60 return Type::getInt1Ty(C);
61 } else {
62 return VectorType::get(Type::getInt1Ty(C), elements);
63 }
64}
65
66struct ReplaceOpenCLBuiltinPass final : public ModulePass {
67 static char ID;
68 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
69
70 bool runOnModule(Module &M) override;
71 bool replaceRecip(Module &M);
72 bool replaceDivide(Module &M);
73 bool replaceExp10(Module &M);
74 bool replaceLog10(Module &M);
75 bool replaceBarrier(Module &M);
76 bool replaceMemFence(Module &M);
77 bool replaceRelational(Module &M);
78 bool replaceIsInfAndIsNan(Module &M);
79 bool replaceAllAndAny(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +000080 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +000081 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +000082 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040083 bool replaceSignbit(Module &M);
84 bool replaceMadandMad24andMul24(Module &M);
85 bool replaceVloadHalf(Module &M);
86 bool replaceVloadHalf2(Module &M);
87 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -070088 bool replaceClspvVloadaHalf2(Module &M);
89 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040090 bool replaceVstoreHalf(Module &M);
91 bool replaceVstoreHalf2(Module &M);
92 bool replaceVstoreHalf4(Module &M);
93 bool replaceReadImageF(Module &M);
94 bool replaceAtomics(Module &M);
95 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040096 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070097 bool replaceVload(Module &M);
98 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040099};
100}
101
102char ReplaceOpenCLBuiltinPass::ID = 0;
103static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
104 "Replace OpenCL Builtins Pass");
105
106namespace clspv {
107ModulePass *createReplaceOpenCLBuiltinPass() {
108 return new ReplaceOpenCLBuiltinPass();
109}
110}
111
112bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
113 bool Changed = false;
114
115 Changed |= replaceRecip(M);
116 Changed |= replaceDivide(M);
117 Changed |= replaceExp10(M);
118 Changed |= replaceLog10(M);
119 Changed |= replaceBarrier(M);
120 Changed |= replaceMemFence(M);
121 Changed |= replaceRelational(M);
122 Changed |= replaceIsInfAndIsNan(M);
123 Changed |= replaceAllAndAny(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000124 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000125 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000126 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400127 Changed |= replaceSignbit(M);
128 Changed |= replaceMadandMad24andMul24(M);
129 Changed |= replaceVloadHalf(M);
130 Changed |= replaceVloadHalf2(M);
131 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700132 Changed |= replaceClspvVloadaHalf2(M);
133 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400134 Changed |= replaceVstoreHalf(M);
135 Changed |= replaceVstoreHalf2(M);
136 Changed |= replaceVstoreHalf4(M);
137 Changed |= replaceReadImageF(M);
138 Changed |= replaceAtomics(M);
139 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400140 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700141 Changed |= replaceVload(M);
142 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400143
144 return Changed;
145}
146
147bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
148 bool Changed = false;
149
150 const char *Names[] = {
151 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
152 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
153 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
154 };
155
156 for (auto Name : Names) {
157 // If we find a function with the matching name.
158 if (auto F = M.getFunction(Name)) {
159 SmallVector<Instruction *, 4> ToRemoves;
160
161 // Walk the users of the function.
162 for (auto &U : F->uses()) {
163 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
164 // Recip has one arg.
165 auto Arg = CI->getOperand(0);
166
167 auto Div = BinaryOperator::Create(
168 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
169 CI);
170
171 CI->replaceAllUsesWith(Div);
172
173 // Lastly, remember to remove the user.
174 ToRemoves.push_back(CI);
175 }
176 }
177
178 Changed = !ToRemoves.empty();
179
180 // And cleanup the calls we don't use anymore.
181 for (auto V : ToRemoves) {
182 V->eraseFromParent();
183 }
184
185 // And remove the function we don't need either too.
186 F->eraseFromParent();
187 }
188 }
189
190 return Changed;
191}
192
193bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
194 bool Changed = false;
195
196 const char *Names[] = {
197 "_Z11half_divideff", "_Z13native_divideff",
198 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
199 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
200 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
201 };
202
203 for (auto Name : Names) {
204 // If we find a function with the matching name.
205 if (auto F = M.getFunction(Name)) {
206 SmallVector<Instruction *, 4> ToRemoves;
207
208 // Walk the users of the function.
209 for (auto &U : F->uses()) {
210 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
211 auto Div = BinaryOperator::Create(
212 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
213
214 CI->replaceAllUsesWith(Div);
215
216 // Lastly, remember to remove the user.
217 ToRemoves.push_back(CI);
218 }
219 }
220
221 Changed = !ToRemoves.empty();
222
223 // And cleanup the calls we don't use anymore.
224 for (auto V : ToRemoves) {
225 V->eraseFromParent();
226 }
227
228 // And remove the function we don't need either too.
229 F->eraseFromParent();
230 }
231 }
232
233 return Changed;
234}
235
236bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
237 bool Changed = false;
238
239 const std::map<const char *, const char *> Map = {
240 {"_Z5exp10f", "_Z3expf"},
241 {"_Z10half_exp10f", "_Z8half_expf"},
242 {"_Z12native_exp10f", "_Z10native_expf"},
243 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
244 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
245 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
246 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
247 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
248 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
249 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
250 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
251 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
252
253 for (auto Pair : Map) {
254 // If we find a function with the matching name.
255 if (auto F = M.getFunction(Pair.first)) {
256 SmallVector<Instruction *, 4> ToRemoves;
257
258 // Walk the users of the function.
259 for (auto &U : F->uses()) {
260 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
261 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
262
263 auto Arg = CI->getOperand(0);
264
265 // Constant of the natural log of 10 (ln(10)).
266 const double Ln10 =
267 2.302585092994045684017991454684364207601101488628772976033;
268
269 auto Mul = BinaryOperator::Create(
270 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
271 CI);
272
273 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
274
275 CI->replaceAllUsesWith(NewCI);
276
277 // Lastly, remember to remove the user.
278 ToRemoves.push_back(CI);
279 }
280 }
281
282 Changed = !ToRemoves.empty();
283
284 // And cleanup the calls we don't use anymore.
285 for (auto V : ToRemoves) {
286 V->eraseFromParent();
287 }
288
289 // And remove the function we don't need either too.
290 F->eraseFromParent();
291 }
292 }
293
294 return Changed;
295}
296
297bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
298 bool Changed = false;
299
300 const std::map<const char *, const char *> Map = {
301 {"_Z5log10f", "_Z3logf"},
302 {"_Z10half_log10f", "_Z8half_logf"},
303 {"_Z12native_log10f", "_Z10native_logf"},
304 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
305 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
306 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
307 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
308 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
309 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
310 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
311 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
312 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
313
314 for (auto Pair : Map) {
315 // If we find a function with the matching name.
316 if (auto F = M.getFunction(Pair.first)) {
317 SmallVector<Instruction *, 4> ToRemoves;
318
319 // Walk the users of the function.
320 for (auto &U : F->uses()) {
321 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
322 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
323
324 auto Arg = CI->getOperand(0);
325
326 // Constant of the reciprocal of the natural log of 10 (ln(10)).
327 const double Ln10 =
328 0.434294481903251827651128918916605082294397005803666566114;
329
330 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
331
332 auto Mul = BinaryOperator::Create(
333 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
334 "", CI);
335
336 CI->replaceAllUsesWith(Mul);
337
338 // Lastly, remember to remove the user.
339 ToRemoves.push_back(CI);
340 }
341 }
342
343 Changed = !ToRemoves.empty();
344
345 // And cleanup the calls we don't use anymore.
346 for (auto V : ToRemoves) {
347 V->eraseFromParent();
348 }
349
350 // And remove the function we don't need either too.
351 F->eraseFromParent();
352 }
353 }
354
355 return Changed;
356}
357
358bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
359 bool Changed = false;
360
361 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
362
363 const std::map<const char *, const char *> Map = {
364 {"_Z7barrierj", "__spirv_control_barrier"}};
365
366 for (auto Pair : Map) {
367 // If we find a function with the matching name.
368 if (auto F = M.getFunction(Pair.first)) {
369 SmallVector<Instruction *, 4> ToRemoves;
370
371 // Walk the users of the function.
372 for (auto &U : F->uses()) {
373 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
374 auto FType = F->getFunctionType();
375 SmallVector<Type *, 3> Params;
376 for (unsigned i = 0; i < 3; i++) {
377 Params.push_back(FType->getParamType(0));
378 }
379 auto NewFType =
380 FunctionType::get(FType->getReturnType(), Params, false);
381 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
382
383 auto Arg = CI->getOperand(0);
384
385 // We need to map the OpenCL constants to the SPIR-V equivalents.
386 const auto LocalMemFence =
387 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
388 const auto GlobalMemFence =
389 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
390 const auto ConstantSequentiallyConsistent = ConstantInt::get(
391 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
392 const auto ConstantScopeDevice =
393 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
394 const auto ConstantScopeWorkgroup =
395 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
396
397 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
398 const auto LocalMemFenceMask = BinaryOperator::Create(
399 Instruction::And, LocalMemFence, Arg, "", CI);
400 const auto WorkgroupShiftAmount =
401 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
402 clz(CLK_LOCAL_MEM_FENCE);
403 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
404 Instruction::Shl, LocalMemFenceMask,
405 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
406
407 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
408 const auto GlobalMemFenceMask = BinaryOperator::Create(
409 Instruction::And, GlobalMemFence, Arg, "", CI);
410 const auto UniformShiftAmount =
411 clz(spv::MemorySemanticsUniformMemoryMask) -
412 clz(CLK_GLOBAL_MEM_FENCE);
413 const auto MemorySemanticsUniform = BinaryOperator::Create(
414 Instruction::Shl, GlobalMemFenceMask,
415 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
416
417 // And combine the above together, also adding in
418 // MemorySemanticsSequentiallyConsistentMask.
419 auto MemorySemantics =
420 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
421 ConstantSequentiallyConsistent, "", CI);
422 MemorySemantics = BinaryOperator::Create(
423 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
424
425 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
426 // Device Scope, otherwise Workgroup Scope.
427 const auto Cmp =
428 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
429 GlobalMemFenceMask, GlobalMemFence, "", CI);
430 const auto MemoryScope = SelectInst::Create(
431 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
432
433 // Lastly, the Execution Scope is always Workgroup Scope.
434 const auto ExecutionScope = ConstantScopeWorkgroup;
435
436 auto NewCI = CallInst::Create(
437 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
438
439 CI->replaceAllUsesWith(NewCI);
440
441 // Lastly, remember to remove the user.
442 ToRemoves.push_back(CI);
443 }
444 }
445
446 Changed = !ToRemoves.empty();
447
448 // And cleanup the calls we don't use anymore.
449 for (auto V : ToRemoves) {
450 V->eraseFromParent();
451 }
452
453 // And remove the function we don't need either too.
454 F->eraseFromParent();
455 }
456 }
457
458 return Changed;
459}
460
461bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
462 bool Changed = false;
463
464 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
465
Neil Henning39672102017-09-29 14:33:13 +0100466 using Tuple = std::tuple<const char *, unsigned>;
467 const std::map<const char *, Tuple> Map = {
468 {"_Z9mem_fencej",
469 Tuple("__spirv_memory_barrier",
470 spv::MemorySemanticsSequentiallyConsistentMask)},
471 {"_Z14read_mem_fencej",
472 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
473 {"_Z15write_mem_fencej",
474 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400475
476 for (auto Pair : Map) {
477 // If we find a function with the matching name.
478 if (auto F = M.getFunction(Pair.first)) {
479 SmallVector<Instruction *, 4> ToRemoves;
480
481 // Walk the users of the function.
482 for (auto &U : F->uses()) {
483 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
484 auto FType = F->getFunctionType();
485 SmallVector<Type *, 2> Params;
486 for (unsigned i = 0; i < 2; i++) {
487 Params.push_back(FType->getParamType(0));
488 }
489 auto NewFType =
490 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100491 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400492
493 auto Arg = CI->getOperand(0);
494
495 // We need to map the OpenCL constants to the SPIR-V equivalents.
496 const auto LocalMemFence =
497 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
498 const auto GlobalMemFence =
499 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
500 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100501 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400502 const auto ConstantScopeDevice =
503 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
504
505 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
506 const auto LocalMemFenceMask = BinaryOperator::Create(
507 Instruction::And, LocalMemFence, Arg, "", CI);
508 const auto WorkgroupShiftAmount =
509 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
510 clz(CLK_LOCAL_MEM_FENCE);
511 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
512 Instruction::Shl, LocalMemFenceMask,
513 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
514
515 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
516 const auto GlobalMemFenceMask = BinaryOperator::Create(
517 Instruction::And, GlobalMemFence, Arg, "", CI);
518 const auto UniformShiftAmount =
519 clz(spv::MemorySemanticsUniformMemoryMask) -
520 clz(CLK_GLOBAL_MEM_FENCE);
521 const auto MemorySemanticsUniform = BinaryOperator::Create(
522 Instruction::Shl, GlobalMemFenceMask,
523 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
524
525 // And combine the above together, also adding in
526 // MemorySemanticsSequentiallyConsistentMask.
527 auto MemorySemantics =
528 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
529 ConstantMemorySemantics, "", CI);
530 MemorySemantics = BinaryOperator::Create(
531 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
532
533 // Memory Scope is always device.
534 const auto MemoryScope = ConstantScopeDevice;
535
536 auto NewCI =
537 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
538
539 CI->replaceAllUsesWith(NewCI);
540
541 // Lastly, remember to remove the user.
542 ToRemoves.push_back(CI);
543 }
544 }
545
546 Changed = !ToRemoves.empty();
547
548 // And cleanup the calls we don't use anymore.
549 for (auto V : ToRemoves) {
550 V->eraseFromParent();
551 }
552
553 // And remove the function we don't need either too.
554 F->eraseFromParent();
555 }
556 }
557
558 return Changed;
559}
560
561bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
562 bool Changed = false;
563
564 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
565 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
566 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
567 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
568 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
569 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
570 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
571 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
572 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
573 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
574 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
575 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
576 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
577 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
578 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
579 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
580 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
581 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
582 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
583 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
584 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
585 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
586 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
587 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
588 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
589 };
590
591 for (auto Pair : Map) {
592 // If we find a function with the matching name.
593 if (auto F = M.getFunction(Pair.first)) {
594 SmallVector<Instruction *, 4> ToRemoves;
595
596 // Walk the users of the function.
597 for (auto &U : F->uses()) {
598 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
599 // The predicate to use in the CmpInst.
600 auto Predicate = Pair.second.first;
601
602 // The value to return for true.
603 auto TrueValue =
604 ConstantInt::getSigned(CI->getType(), Pair.second.second);
605
606 // The value to return for false.
607 auto FalseValue = Constant::getNullValue(CI->getType());
608
609 auto Arg1 = CI->getOperand(0);
610 auto Arg2 = CI->getOperand(1);
611
612 const auto Cmp =
613 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
614
615 const auto Select =
616 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
617
618 CI->replaceAllUsesWith(Select);
619
620 // Lastly, remember to remove the user.
621 ToRemoves.push_back(CI);
622 }
623 }
624
625 Changed = !ToRemoves.empty();
626
627 // And cleanup the calls we don't use anymore.
628 for (auto V : ToRemoves) {
629 V->eraseFromParent();
630 }
631
632 // And remove the function we don't need either too.
633 F->eraseFromParent();
634 }
635 }
636
637 return Changed;
638}
639
640bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
641 bool Changed = false;
642
643 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
644 {"_Z5isinff", {"__spirv_isinff", 1}},
645 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
646 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
647 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
648 {"_Z5isnanf", {"__spirv_isnanf", 1}},
649 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
650 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
651 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
652 };
653
654 for (auto Pair : Map) {
655 // If we find a function with the matching name.
656 if (auto F = M.getFunction(Pair.first)) {
657 SmallVector<Instruction *, 4> ToRemoves;
658
659 // Walk the users of the function.
660 for (auto &U : F->uses()) {
661 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
662 const auto CITy = CI->getType();
663
664 // The fake SPIR-V intrinsic to generate.
665 auto SPIRVIntrinsic = Pair.second.first;
666
667 // The value to return for true.
668 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
669
670 // The value to return for false.
671 auto FalseValue = Constant::getNullValue(CITy);
672
673 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
674 M.getContext(),
675 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
676
677 auto NewFType =
678 FunctionType::get(CorrespondingBoolTy,
679 F->getFunctionType()->getParamType(0), false);
680
681 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
682
683 auto Arg = CI->getOperand(0);
684
685 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
686
687 const auto Select =
688 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
689
690 CI->replaceAllUsesWith(Select);
691
692 // Lastly, remember to remove the user.
693 ToRemoves.push_back(CI);
694 }
695 }
696
697 Changed = !ToRemoves.empty();
698
699 // And cleanup the calls we don't use anymore.
700 for (auto V : ToRemoves) {
701 V->eraseFromParent();
702 }
703
704 // And remove the function we don't need either too.
705 F->eraseFromParent();
706 }
707 }
708
709 return Changed;
710}
711
712bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
713 bool Changed = false;
714
715 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000716 // all
717 {"_Z3alls", ""},
718 {"_Z3allDv2_s", "__spirv_allDv2_s"},
719 {"_Z3allDv3_s", "__spirv_allDv3_s"},
720 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400721 {"_Z3alli", ""},
722 {"_Z3allDv2_i", "__spirv_allDv2_i"},
723 {"_Z3allDv3_i", "__spirv_allDv3_i"},
724 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000725 {"_Z3alll", ""},
726 {"_Z3allDv2_l", "__spirv_allDv2_l"},
727 {"_Z3allDv3_l", "__spirv_allDv3_l"},
728 {"_Z3allDv4_l", "__spirv_allDv4_l"},
729
730 // any
731 {"_Z3anys", ""},
732 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
733 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
734 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400735 {"_Z3anyi", ""},
736 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
737 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
738 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000739 {"_Z3anyl", ""},
740 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
741 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
742 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400743 };
744
745 for (auto Pair : Map) {
746 // If we find a function with the matching name.
747 if (auto F = M.getFunction(Pair.first)) {
748 SmallVector<Instruction *, 4> ToRemoves;
749
750 // Walk the users of the function.
751 for (auto &U : F->uses()) {
752 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
753 // The fake SPIR-V intrinsic to generate.
754 auto SPIRVIntrinsic = Pair.second;
755
756 auto Arg = CI->getOperand(0);
757
758 Value *V;
759
Kévin Petitfd27cca2018-10-31 13:00:17 +0000760 // If the argument is a 32-bit int, just use a shift
761 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
762 V = BinaryOperator::Create(Instruction::LShr, Arg,
763 ConstantInt::get(Arg->getType(), 31), "",
764 CI);
765 } else {
David Neto22f144c2017-06-12 14:26:21 -0400766 // The value for zero to compare against.
767 const auto ZeroValue = Constant::getNullValue(Arg->getType());
768
David Neto22f144c2017-06-12 14:26:21 -0400769 // The value to return for true.
770 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
771
772 // The value to return for false.
773 const auto FalseValue = Constant::getNullValue(CI->getType());
774
Kévin Petitfd27cca2018-10-31 13:00:17 +0000775 const auto Cmp = CmpInst::Create(
776 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
777
778 Value* SelectSource;
779
780 // If we have a function to call, call it!
781 if (0 < strlen(SPIRVIntrinsic)) {
782
783 const auto NewFType = FunctionType::get(
784 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
785
786 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
787
788 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
789
790 SelectSource = NewCI;
791
792 } else {
793 SelectSource = Cmp;
794 }
795
796 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400797 }
798
799 CI->replaceAllUsesWith(V);
800
801 // Lastly, remember to remove the user.
802 ToRemoves.push_back(CI);
803 }
804 }
805
806 Changed = !ToRemoves.empty();
807
808 // And cleanup the calls we don't use anymore.
809 for (auto V : ToRemoves) {
810 V->eraseFromParent();
811 }
812
813 // And remove the function we don't need either too.
814 F->eraseFromParent();
815 }
816 }
817
818 return Changed;
819}
820
Kévin Petitf5b78a22018-10-25 14:32:17 +0000821bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
822 bool Changed = false;
823
824 for (auto const &SymVal : M.getValueSymbolTable()) {
825 // Skip symbols whose name doesn't match
826 if (!SymVal.getKey().startswith("_Z6select")) {
827 continue;
828 }
829 // Is there a function going by that name?
830 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
831
832 SmallVector<Instruction *, 4> ToRemoves;
833
834 // Walk the users of the function.
835 for (auto &U : F->uses()) {
836 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
837
838 // Get arguments
839 auto FalseValue = CI->getOperand(0);
840 auto TrueValue = CI->getOperand(1);
841 auto PredicateValue = CI->getOperand(2);
842
843 // Don't touch overloads that aren't in OpenCL C
844 auto FalseType = FalseValue->getType();
845 auto TrueType = TrueValue->getType();
846 auto PredicateType = PredicateValue->getType();
847
848 if (FalseType != TrueType) {
849 continue;
850 }
851
852 if (!PredicateType->isIntOrIntVectorTy()) {
853 continue;
854 }
855
856 if (!FalseType->isIntOrIntVectorTy() &&
857 !FalseType->getScalarType()->isFloatingPointTy()) {
858 continue;
859 }
860
861 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
862 continue;
863 }
864
865 if (FalseType->getScalarSizeInBits() !=
866 PredicateType->getScalarSizeInBits()) {
867 continue;
868 }
869
870 if (FalseType->isVectorTy()) {
871 if (FalseType->getVectorNumElements() !=
872 PredicateType->getVectorNumElements()) {
873 continue;
874 }
875
876 if ((FalseType->getVectorNumElements() != 2) &&
877 (FalseType->getVectorNumElements() != 3) &&
878 (FalseType->getVectorNumElements() != 4) &&
879 (FalseType->getVectorNumElements() != 8) &&
880 (FalseType->getVectorNumElements() != 16)) {
881 continue;
882 }
883 }
884
885 // Create constant
886 const auto ZeroValue = Constant::getNullValue(PredicateType);
887
888 // Scalar and vector are to be treated differently
889 CmpInst::Predicate Pred;
890 if (PredicateType->isVectorTy()) {
891 Pred = CmpInst::ICMP_SLT;
892 } else {
893 Pred = CmpInst::ICMP_NE;
894 }
895
896 // Create comparison instruction
897 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
898 ZeroValue, "", CI);
899
900 // Create select
901 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
902
903 // Replace call with the selection
904 CI->replaceAllUsesWith(V);
905
906 // Lastly, remember to remove the user.
907 ToRemoves.push_back(CI);
908 }
909 }
910
911 Changed = !ToRemoves.empty();
912
913 // And cleanup the calls we don't use anymore.
914 for (auto V : ToRemoves) {
915 V->eraseFromParent();
916 }
917
918 // And remove the function we don't need either too.
919 F->eraseFromParent();
920 }
921 }
922
923 return Changed;
924}
925
Kévin Petite7d0cce2018-10-31 12:38:56 +0000926bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
927 bool Changed = false;
928
929 for (auto const &SymVal : M.getValueSymbolTable()) {
930 // Skip symbols whose name doesn't match
931 if (!SymVal.getKey().startswith("_Z9bitselect")) {
932 continue;
933 }
934 // Is there a function going by that name?
935 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
936
937 SmallVector<Instruction *, 4> ToRemoves;
938
939 // Walk the users of the function.
940 for (auto &U : F->uses()) {
941 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
942
943 if (CI->getNumOperands() != 4) {
944 continue;
945 }
946
947 // Get arguments
948 auto FalseValue = CI->getOperand(0);
949 auto TrueValue = CI->getOperand(1);
950 auto PredicateValue = CI->getOperand(2);
951
952 // Don't touch overloads that aren't in OpenCL C
953 auto FalseType = FalseValue->getType();
954 auto TrueType = TrueValue->getType();
955 auto PredicateType = PredicateValue->getType();
956
957 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
958 continue;
959 }
960
961 if (TrueType->isVectorTy()) {
962 if (!TrueType->getScalarType()->isFloatingPointTy() &&
963 !TrueType->getScalarType()->isIntegerTy()) {
964 continue;
965 }
966 if ((TrueType->getVectorNumElements() != 2) &&
967 (TrueType->getVectorNumElements() != 3) &&
968 (TrueType->getVectorNumElements() != 4) &&
969 (TrueType->getVectorNumElements() != 8) &&
970 (TrueType->getVectorNumElements() != 16)) {
971 continue;
972 }
973 }
974
975 // Remember the type of the operands
976 auto OpType = TrueType;
977
978 // The actual bit selection will always be done on an integer type,
979 // declare it here
980 Type *BitType;
981
982 // If the operands are float, then bitcast them to int
983 if (OpType->getScalarType()->isFloatingPointTy()) {
984
985 // First create the new type
986 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
987 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
988 if (OpType->isVectorTy()) {
989 BitType = VectorType::get(BitType, OpType->getVectorNumElements());
990 }
991
992 // Then bitcast all operands
993 PredicateValue = CastInst::CreateZExtOrBitCast(PredicateValue,
994 BitType, "", CI);
995 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue,
996 BitType, "", CI);
997 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
998
999 } else {
1000 // The operands have an integer type, use it directly
1001 BitType = OpType;
1002 }
1003
1004 // All the operands are now always integers
1005 // implement as (c & b) | (~c & a)
1006
1007 // Create our negated predicate value
1008 auto AllOnes = Constant::getAllOnesValue(BitType);
1009 auto NotPredicateValue = BinaryOperator::Create(Instruction::Xor,
1010 PredicateValue,
1011 AllOnes, "", CI);
1012
1013 // Then put everything together
1014 auto BitsFalse = BinaryOperator::Create(Instruction::And,
1015 NotPredicateValue,
1016 FalseValue, "", CI);
1017 auto BitsTrue = BinaryOperator::Create(Instruction::And,
1018 PredicateValue,
1019 TrueValue, "", CI);
1020
1021 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1022 BitsTrue, "", CI);
1023
1024 // If we were dealing with a floating point type, we must bitcast
1025 // the result back to that
1026 if (OpType->getScalarType()->isFloatingPointTy()) {
1027 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1028 }
1029
1030 // Replace call with our new code
1031 CI->replaceAllUsesWith(V);
1032
1033 // Lastly, remember to remove the user.
1034 ToRemoves.push_back(CI);
1035 }
1036 }
1037
1038 Changed = !ToRemoves.empty();
1039
1040 // And cleanup the calls we don't use anymore.
1041 for (auto V : ToRemoves) {
1042 V->eraseFromParent();
1043 }
1044
1045 // And remove the function we don't need either too.
1046 F->eraseFromParent();
1047 }
1048 }
1049
1050 return Changed;
1051}
1052
Kévin Petit6b0a9532018-10-30 20:00:39 +00001053bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1054 bool Changed = false;
1055
1056 const std::map<const char *, const char *> Map = {
1057 { "_Z4stepfDv2_f", "_Z4stepDv2_fS_" },
1058 { "_Z4stepfDv3_f", "_Z4stepDv3_fS_" },
1059 { "_Z4stepfDv4_f", "_Z4stepDv4_fS_" },
1060 { "_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_" },
1061 { "_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_" },
1062 { "_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_" },
1063 };
1064
1065 for (auto Pair : Map) {
1066 // If we find a function with the matching name.
1067 if (auto F = M.getFunction(Pair.first)) {
1068 SmallVector<Instruction *, 4> ToRemoves;
1069
1070 // Walk the users of the function.
1071 for (auto &U : F->uses()) {
1072 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1073
1074 auto ReplacementFn = Pair.second;
1075
1076 SmallVector<Value*, 2> ArgsToSplat = {CI->getOperand(0)};
1077 Value *VectorArg;
1078
1079 // First figure out which function we're dealing with
1080 if (F->getName().startswith("_Z10smoothstep")) {
1081 ArgsToSplat.push_back(CI->getOperand(1));
1082 VectorArg = CI->getOperand(2);
1083 } else {
1084 VectorArg = CI->getOperand(1);
1085 }
1086
1087 // Splat arguments that need to be
1088 SmallVector<Value*, 2> SplatArgs;
1089 auto VecType = VectorArg->getType();
1090
1091 for (auto arg : ArgsToSplat) {
1092 Value* NewVectorArg = UndefValue::get(VecType);
1093 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
1094 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1095 NewVectorArg = InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1096 }
1097 SplatArgs.push_back(NewVectorArg);
1098 }
1099
1100 // Replace the call with the vector/vector flavour
1101 SmallVector<Type*, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1102 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1103
1104 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1105
1106 SmallVector<Value*, 3> NewArgs;
1107 for (auto arg : SplatArgs) {
1108 NewArgs.push_back(arg);
1109 }
1110 NewArgs.push_back(VectorArg);
1111
1112 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1113
1114 CI->replaceAllUsesWith(NewCI);
1115
1116 // Lastly, remember to remove the user.
1117 ToRemoves.push_back(CI);
1118 }
1119 }
1120
1121 Changed = !ToRemoves.empty();
1122
1123 // And cleanup the calls we don't use anymore.
1124 for (auto V : ToRemoves) {
1125 V->eraseFromParent();
1126 }
1127
1128 // And remove the function we don't need either too.
1129 F->eraseFromParent();
1130 }
1131 }
1132
1133 return Changed;
1134}
1135
David Neto22f144c2017-06-12 14:26:21 -04001136bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1137 bool Changed = false;
1138
1139 const std::map<const char *, Instruction::BinaryOps> Map = {
1140 {"_Z7signbitf", Instruction::LShr},
1141 {"_Z7signbitDv2_f", Instruction::AShr},
1142 {"_Z7signbitDv3_f", Instruction::AShr},
1143 {"_Z7signbitDv4_f", Instruction::AShr},
1144 };
1145
1146 for (auto Pair : Map) {
1147 // If we find a function with the matching name.
1148 if (auto F = M.getFunction(Pair.first)) {
1149 SmallVector<Instruction *, 4> ToRemoves;
1150
1151 // Walk the users of the function.
1152 for (auto &U : F->uses()) {
1153 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1154 auto Arg = CI->getOperand(0);
1155
1156 auto Bitcast =
1157 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1158
1159 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1160 ConstantInt::get(CI->getType(), 31),
1161 "", CI);
1162
1163 CI->replaceAllUsesWith(Shr);
1164
1165 // Lastly, remember to remove the user.
1166 ToRemoves.push_back(CI);
1167 }
1168 }
1169
1170 Changed = !ToRemoves.empty();
1171
1172 // And cleanup the calls we don't use anymore.
1173 for (auto V : ToRemoves) {
1174 V->eraseFromParent();
1175 }
1176
1177 // And remove the function we don't need either too.
1178 F->eraseFromParent();
1179 }
1180 }
1181
1182 return Changed;
1183}
1184
1185bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1186 bool Changed = false;
1187
1188 const std::map<const char *,
1189 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1190 Map = {
1191 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1192 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1193 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1194 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1195 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1196 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1197 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1198 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1199 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1200 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1201 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1202 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1203 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1204 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1205 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1206 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1207 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1208 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1209 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1210 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1211 };
1212
1213 for (auto Pair : Map) {
1214 // If we find a function with the matching name.
1215 if (auto F = M.getFunction(Pair.first)) {
1216 SmallVector<Instruction *, 4> ToRemoves;
1217
1218 // Walk the users of the function.
1219 for (auto &U : F->uses()) {
1220 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1221 // The multiply instruction to use.
1222 auto MulInst = Pair.second.first;
1223
1224 // The add instruction to use.
1225 auto AddInst = Pair.second.second;
1226
1227 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1228
1229 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1230 CI->getArgOperand(1), "", CI);
1231
1232 if (Instruction::BinaryOpsEnd != AddInst) {
1233 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1234 CI);
1235 }
1236
1237 CI->replaceAllUsesWith(I);
1238
1239 // Lastly, remember to remove the user.
1240 ToRemoves.push_back(CI);
1241 }
1242 }
1243
1244 Changed = !ToRemoves.empty();
1245
1246 // And cleanup the calls we don't use anymore.
1247 for (auto V : ToRemoves) {
1248 V->eraseFromParent();
1249 }
1250
1251 // And remove the function we don't need either too.
1252 F->eraseFromParent();
1253 }
1254 }
1255
1256 return Changed;
1257}
1258
Derek Chowcfd368b2017-10-19 20:58:45 -07001259bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1260 bool Changed = false;
1261
1262 struct VectorStoreOps {
1263 const char* name;
1264 int n;
1265 Type* (*get_scalar_type_function)(LLVMContext&);
1266 } vector_store_ops[] = {
1267 // TODO(derekjchow): Expand this list.
1268 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
1269 };
1270
David Neto544fffc2017-11-16 18:35:14 -05001271 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001272 auto Name = Op.name;
1273 auto N = Op.n;
1274 auto TypeFn = Op.get_scalar_type_function;
1275 if (auto F = M.getFunction(Name)) {
1276 SmallVector<Instruction *, 4> ToRemoves;
1277
1278 // Walk the users of the function.
1279 for (auto &U : F->uses()) {
1280 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1281 // The value argument from vstoren.
1282 auto Arg0 = CI->getOperand(0);
1283
1284 // The index argument from vstoren.
1285 auto Arg1 = CI->getOperand(1);
1286
1287 // The pointer argument from vstoren.
1288 auto Arg2 = CI->getOperand(2);
1289
1290 // Get types.
1291 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1292 auto ScalarNPointerTy = PointerType::get(
1293 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1294
1295 // Cast to scalarn
1296 auto Cast = CastInst::CreatePointerCast(
1297 Arg2, ScalarNPointerTy, "", CI);
1298 // Index to correct address
1299 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1300 // Store
1301 auto Store = new StoreInst(Arg0, Index, CI);
1302
1303 CI->replaceAllUsesWith(Store);
1304 ToRemoves.push_back(CI);
1305 }
1306 }
1307
1308 Changed = !ToRemoves.empty();
1309
1310 // And cleanup the calls we don't use anymore.
1311 for (auto V : ToRemoves) {
1312 V->eraseFromParent();
1313 }
1314
1315 // And remove the function we don't need either too.
1316 F->eraseFromParent();
1317 }
1318 }
1319
1320 return Changed;
1321}
1322
1323bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1324 bool Changed = false;
1325
1326 struct VectorLoadOps {
1327 const char* name;
1328 int n;
1329 Type* (*get_scalar_type_function)(LLVMContext&);
1330 } vector_load_ops[] = {
1331 // TODO(derekjchow): Expand this list.
1332 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
1333 };
1334
David Neto544fffc2017-11-16 18:35:14 -05001335 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001336 auto Name = Op.name;
1337 auto N = Op.n;
1338 auto TypeFn = Op.get_scalar_type_function;
1339 // If we find a function with the matching name.
1340 if (auto F = M.getFunction(Name)) {
1341 SmallVector<Instruction *, 4> ToRemoves;
1342
1343 // Walk the users of the function.
1344 for (auto &U : F->uses()) {
1345 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1346 // The index argument from vloadn.
1347 auto Arg0 = CI->getOperand(0);
1348
1349 // The pointer argument from vloadn.
1350 auto Arg1 = CI->getOperand(1);
1351
1352 // Get types.
1353 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1354 auto ScalarNPointerTy = PointerType::get(
1355 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1356
1357 // Cast to scalarn
1358 auto Cast = CastInst::CreatePointerCast(
1359 Arg1, ScalarNPointerTy, "", CI);
1360 // Index to correct address
1361 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1362 // Load
1363 auto Load = new LoadInst(Index, "", CI);
1364
1365 CI->replaceAllUsesWith(Load);
1366 ToRemoves.push_back(CI);
1367 }
1368 }
1369
1370 Changed = !ToRemoves.empty();
1371
1372 // And cleanup the calls we don't use anymore.
1373 for (auto V : ToRemoves) {
1374 V->eraseFromParent();
1375 }
1376
1377 // And remove the function we don't need either too.
1378 F->eraseFromParent();
1379
1380 }
1381 }
1382
1383 return Changed;
1384}
1385
David Neto22f144c2017-06-12 14:26:21 -04001386bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1387 bool Changed = false;
1388
1389 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1390 "_Z10vload_halfjPU3AS2KDh"};
1391
1392 for (auto Name : Map) {
1393 // If we find a function with the matching name.
1394 if (auto F = M.getFunction(Name)) {
1395 SmallVector<Instruction *, 4> ToRemoves;
1396
1397 // Walk the users of the function.
1398 for (auto &U : F->uses()) {
1399 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1400 // The index argument from vload_half.
1401 auto Arg0 = CI->getOperand(0);
1402
1403 // The pointer argument from vload_half.
1404 auto Arg1 = CI->getOperand(1);
1405
David Neto22f144c2017-06-12 14:26:21 -04001406 auto IntTy = Type::getInt32Ty(M.getContext());
1407 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001408 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1409
David Neto22f144c2017-06-12 14:26:21 -04001410 // Our intrinsic to unpack a float2 from an int.
1411 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1412
1413 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1414
David Neto482550a2018-03-24 05:21:07 -07001415 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04001416 auto ShortTy = Type::getInt16Ty(M.getContext());
1417 auto ShortPointerTy = PointerType::get(
1418 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001419
David Netoac825b82017-05-30 12:49:01 -04001420 // Cast the half* pointer to short*.
1421 auto Cast =
1422 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001423
David Netoac825b82017-05-30 12:49:01 -04001424 // Index into the correct address of the casted pointer.
1425 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1426
1427 // Load from the short* we casted to.
1428 auto Load = new LoadInst(Index, "", CI);
1429
1430 // ZExt the short -> int.
1431 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1432
1433 // Get our float2.
1434 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1435
1436 // Extract out the bottom element which is our float result.
1437 auto Extract = ExtractElementInst::Create(
1438 Call, ConstantInt::get(IntTy, 0), "", CI);
1439
1440 CI->replaceAllUsesWith(Extract);
1441 } else {
1442 // Assume the pointer argument points to storage aligned to 32bits
1443 // or more.
1444 // TODO(dneto): Do more analysis to make sure this is true?
1445 //
1446 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1447 // with:
1448 //
1449 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1450 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1451 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1452 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1453 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1454 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1455 // x float> %converted, %index_is_odd32
1456
1457 auto IntPointerTy = PointerType::get(
1458 IntTy, Arg1->getType()->getPointerAddressSpace());
1459
David Neto973e6a82017-05-30 13:48:18 -04001460 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001461 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001462 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001463 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1464
1465 auto One = ConstantInt::get(IntTy, 1);
1466 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1467 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1468
1469 // Index into the correct address of the casted pointer.
1470 auto Ptr =
1471 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1472
1473 // Load from the int* we casted to.
1474 auto Load = new LoadInst(Ptr, "", CI);
1475
1476 // Get our float2.
1477 auto Call = CallInst::Create(NewF, Load, "", CI);
1478
1479 // Extract out the float result, where the element number is
1480 // determined by whether the original index was even or odd.
1481 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1482
1483 CI->replaceAllUsesWith(Extract);
1484 }
David Neto22f144c2017-06-12 14:26:21 -04001485
1486 // Lastly, remember to remove the user.
1487 ToRemoves.push_back(CI);
1488 }
1489 }
1490
1491 Changed = !ToRemoves.empty();
1492
1493 // And cleanup the calls we don't use anymore.
1494 for (auto V : ToRemoves) {
1495 V->eraseFromParent();
1496 }
1497
1498 // And remove the function we don't need either too.
1499 F->eraseFromParent();
1500 }
1501 }
1502
1503 return Changed;
1504}
1505
1506bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1507 bool Changed = false;
1508
David Neto556c7e62018-06-08 13:45:55 -07001509 const std::vector<const char *> Map = {
1510 "_Z11vload_half2jPU3AS1KDh",
1511 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
1512 "_Z11vload_half2jPU3AS2KDh",
1513 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
1514 };
David Neto22f144c2017-06-12 14:26:21 -04001515
1516 for (auto Name : Map) {
1517 // If we find a function with the matching name.
1518 if (auto F = M.getFunction(Name)) {
1519 SmallVector<Instruction *, 4> ToRemoves;
1520
1521 // Walk the users of the function.
1522 for (auto &U : F->uses()) {
1523 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1524 // The index argument from vload_half.
1525 auto Arg0 = CI->getOperand(0);
1526
1527 // The pointer argument from vload_half.
1528 auto Arg1 = CI->getOperand(1);
1529
1530 auto IntTy = Type::getInt32Ty(M.getContext());
1531 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1532 auto NewPointerTy = PointerType::get(
1533 IntTy, Arg1->getType()->getPointerAddressSpace());
1534 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1535
1536 // Cast the half* pointer to int*.
1537 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1538
1539 // Index into the correct address of the casted pointer.
1540 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1541
1542 // Load from the int* we casted to.
1543 auto Load = new LoadInst(Index, "", CI);
1544
1545 // Our intrinsic to unpack a float2 from an int.
1546 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1547
1548 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1549
1550 // Get our float2.
1551 auto Call = CallInst::Create(NewF, Load, "", CI);
1552
1553 CI->replaceAllUsesWith(Call);
1554
1555 // Lastly, remember to remove the user.
1556 ToRemoves.push_back(CI);
1557 }
1558 }
1559
1560 Changed = !ToRemoves.empty();
1561
1562 // And cleanup the calls we don't use anymore.
1563 for (auto V : ToRemoves) {
1564 V->eraseFromParent();
1565 }
1566
1567 // And remove the function we don't need either too.
1568 F->eraseFromParent();
1569 }
1570 }
1571
1572 return Changed;
1573}
1574
1575bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1576 bool Changed = false;
1577
David Neto556c7e62018-06-08 13:45:55 -07001578 const std::vector<const char *> Map = {
1579 "_Z11vload_half4jPU3AS1KDh",
1580 "_Z12vloada_half4jPU3AS1KDh",
1581 "_Z11vload_half4jPU3AS2KDh",
1582 "_Z12vloada_half4jPU3AS2KDh",
1583 };
David Neto22f144c2017-06-12 14:26:21 -04001584
1585 for (auto Name : Map) {
1586 // If we find a function with the matching name.
1587 if (auto F = M.getFunction(Name)) {
1588 SmallVector<Instruction *, 4> ToRemoves;
1589
1590 // Walk the users of the function.
1591 for (auto &U : F->uses()) {
1592 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1593 // The index argument from vload_half.
1594 auto Arg0 = CI->getOperand(0);
1595
1596 // The pointer argument from vload_half.
1597 auto Arg1 = CI->getOperand(1);
1598
1599 auto IntTy = Type::getInt32Ty(M.getContext());
1600 auto Int2Ty = VectorType::get(IntTy, 2);
1601 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1602 auto NewPointerTy = PointerType::get(
1603 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1604 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1605
1606 // Cast the half* pointer to int2*.
1607 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1608
1609 // Index into the correct address of the casted pointer.
1610 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1611
1612 // Load from the int2* we casted to.
1613 auto Load = new LoadInst(Index, "", CI);
1614
1615 // Extract each element from the loaded int2.
1616 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1617 "", CI);
1618 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1619 "", CI);
1620
1621 // Our intrinsic to unpack a float2 from an int.
1622 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1623
1624 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1625
1626 // Get the lower (x & y) components of our final float4.
1627 auto Lo = CallInst::Create(NewF, X, "", CI);
1628
1629 // Get the higher (z & w) components of our final float4.
1630 auto Hi = CallInst::Create(NewF, Y, "", CI);
1631
1632 Constant *ShuffleMask[4] = {
1633 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1634 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1635
1636 // Combine our two float2's into one float4.
1637 auto Combine = new ShuffleVectorInst(
1638 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1639
1640 CI->replaceAllUsesWith(Combine);
1641
1642 // Lastly, remember to remove the user.
1643 ToRemoves.push_back(CI);
1644 }
1645 }
1646
1647 Changed = !ToRemoves.empty();
1648
1649 // And cleanup the calls we don't use anymore.
1650 for (auto V : ToRemoves) {
1651 V->eraseFromParent();
1652 }
1653
1654 // And remove the function we don't need either too.
1655 F->eraseFromParent();
1656 }
1657 }
1658
1659 return Changed;
1660}
1661
David Neto6ad93232018-06-07 15:42:58 -07001662bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
1663 bool Changed = false;
1664
1665 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1666 //
1667 // %u = load i32 %ptr
1668 // %fxy = call <2 x float> Unpack2xHalf(u)
1669 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1670 const std::vector<const char *> Map = {
1671 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
1672 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
1673 "_Z20__clspv_vloada_half2jPKj", // private
1674 };
1675
1676 for (auto Name : Map) {
1677 // If we find a function with the matching name.
1678 if (auto F = M.getFunction(Name)) {
1679 SmallVector<Instruction *, 4> ToRemoves;
1680
1681 // Walk the users of the function.
1682 for (auto &U : F->uses()) {
1683 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
1684 auto Index = CI->getOperand(0);
1685 auto Ptr = CI->getOperand(1);
1686
1687 auto IntTy = Type::getInt32Ty(M.getContext());
1688 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1689 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1690
1691 auto IndexedPtr =
1692 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
1693 auto Load = new LoadInst(IndexedPtr, "", CI);
1694
1695 // Our intrinsic to unpack a float2 from an int.
1696 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1697
1698 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1699
1700 // Get our final float2.
1701 auto Result = CallInst::Create(NewF, Load, "", CI);
1702
1703 CI->replaceAllUsesWith(Result);
1704
1705 // Lastly, remember to remove the user.
1706 ToRemoves.push_back(CI);
1707 }
1708 }
1709
1710 Changed = true;
1711
1712 // And cleanup the calls we don't use anymore.
1713 for (auto V : ToRemoves) {
1714 V->eraseFromParent();
1715 }
1716
1717 // And remove the function we don't need either too.
1718 F->eraseFromParent();
1719 }
1720 }
1721
1722 return Changed;
1723}
1724
1725bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
1726 bool Changed = false;
1727
1728 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1729 //
1730 // %u2 = load <2 x i32> %ptr
1731 // %u2xy = extractelement %u2, 0
1732 // %u2zw = extractelement %u2, 1
1733 // %fxy = call <2 x float> Unpack2xHalf(uint)
1734 // %fzw = call <2 x float> Unpack2xHalf(uint)
1735 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1736 const std::vector<const char *> Map = {
1737 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
1738 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
1739 "_Z20__clspv_vloada_half4jPKDv2_j", // private
1740 };
1741
1742 for (auto Name : Map) {
1743 // If we find a function with the matching name.
1744 if (auto F = M.getFunction(Name)) {
1745 SmallVector<Instruction *, 4> ToRemoves;
1746
1747 // Walk the users of the function.
1748 for (auto &U : F->uses()) {
1749 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1750 auto Index = CI->getOperand(0);
1751 auto Ptr = CI->getOperand(1);
1752
1753 auto IntTy = Type::getInt32Ty(M.getContext());
1754 auto Int2Ty = VectorType::get(IntTy, 2);
1755 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1756 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1757
1758 auto IndexedPtr =
1759 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
1760 auto Load = new LoadInst(IndexedPtr, "", CI);
1761
1762 // Extract each element from the loaded int2.
1763 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1764 "", CI);
1765 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1766 "", CI);
1767
1768 // Our intrinsic to unpack a float2 from an int.
1769 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1770
1771 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1772
1773 // Get the lower (x & y) components of our final float4.
1774 auto Lo = CallInst::Create(NewF, X, "", CI);
1775
1776 // Get the higher (z & w) components of our final float4.
1777 auto Hi = CallInst::Create(NewF, Y, "", CI);
1778
1779 Constant *ShuffleMask[4] = {
1780 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1781 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1782
1783 // Combine our two float2's into one float4.
1784 auto Combine = new ShuffleVectorInst(
1785 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1786
1787 CI->replaceAllUsesWith(Combine);
1788
1789 // Lastly, remember to remove the user.
1790 ToRemoves.push_back(CI);
1791 }
1792 }
1793
1794 Changed = true;
1795
1796 // And cleanup the calls we don't use anymore.
1797 for (auto V : ToRemoves) {
1798 V->eraseFromParent();
1799 }
1800
1801 // And remove the function we don't need either too.
1802 F->eraseFromParent();
1803 }
1804 }
1805
1806 return Changed;
1807}
1808
David Neto22f144c2017-06-12 14:26:21 -04001809bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1810 bool Changed = false;
1811
1812 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1813 "_Z15vstore_half_rtefjPU3AS1Dh",
1814 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1815
1816 for (auto Name : Map) {
1817 // If we find a function with the matching name.
1818 if (auto F = M.getFunction(Name)) {
1819 SmallVector<Instruction *, 4> ToRemoves;
1820
1821 // Walk the users of the function.
1822 for (auto &U : F->uses()) {
1823 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1824 // The value to store.
1825 auto Arg0 = CI->getOperand(0);
1826
1827 // The index argument from vstore_half.
1828 auto Arg1 = CI->getOperand(1);
1829
1830 // The pointer argument from vstore_half.
1831 auto Arg2 = CI->getOperand(2);
1832
David Neto22f144c2017-06-12 14:26:21 -04001833 auto IntTy = Type::getInt32Ty(M.getContext());
1834 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001835 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001836 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001837
1838 // Our intrinsic to pack a float2 to an int.
1839 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1840
1841 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1842
1843 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001844 auto TempVec =
1845 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1846 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001847
1848 // Pack the float2 -> half2 (in an int).
1849 auto X = CallInst::Create(NewF, TempVec, "", CI);
1850
David Neto482550a2018-03-24 05:21:07 -07001851 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04001852 auto ShortTy = Type::getInt16Ty(M.getContext());
1853 auto ShortPointerTy = PointerType::get(
1854 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001855
David Neto17852de2017-05-29 17:29:31 -04001856 // Truncate our i32 to an i16.
1857 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001858
David Neto17852de2017-05-29 17:29:31 -04001859 // Cast the half* pointer to short*.
1860 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001861
David Neto17852de2017-05-29 17:29:31 -04001862 // Index into the correct address of the casted pointer.
1863 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001864
David Neto17852de2017-05-29 17:29:31 -04001865 // Store to the int* we casted to.
1866 auto Store = new StoreInst(Trunc, Index, CI);
1867
1868 CI->replaceAllUsesWith(Store);
1869 } else {
1870 // We can only write to 32-bit aligned words.
1871 //
1872 // Assuming base is aligned to 32-bits, replace the equivalent of
1873 // vstore_half(value, index, base)
1874 // with:
1875 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1876 // uint32_t write_to_upper_half = index & 1u;
1877 // uint32_t shift = write_to_upper_half << 4;
1878 //
1879 // // Pack the float value as a half number in bottom 16 bits
1880 // // of an i32.
1881 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1882 //
1883 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1884 // ^ ((packed & 0xffff) << shift)
1885 // // We only need relaxed consistency, but OpenCL 1.2 only has
1886 // // sequentially consistent atomics.
1887 // // TODO(dneto): Use relaxed consistency.
1888 // atomic_xor(target_ptr, xor_value)
1889 auto IntPointerTy = PointerType::get(
1890 IntTy, Arg2->getType()->getPointerAddressSpace());
1891
1892 auto Four = ConstantInt::get(IntTy, 4);
1893 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1894
1895 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1896 // Compute index / 2
1897 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1898 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1899 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1900 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1901 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1902 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1903 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1904
1905 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1906 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1907 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1908
1909 // Generate the call to atomi_xor.
1910 SmallVector<Type *, 5> ParamTypes;
1911 // The pointer type.
1912 ParamTypes.push_back(IntPointerTy);
1913 // The Types for memory scope, semantics, and value.
1914 ParamTypes.push_back(IntTy);
1915 ParamTypes.push_back(IntTy);
1916 ParamTypes.push_back(IntTy);
1917 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1918 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1919
1920 const auto ConstantScopeDevice =
1921 ConstantInt::get(IntTy, spv::ScopeDevice);
1922 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1923 // (SPIR-V Workgroup).
1924 const auto AddrSpaceSemanticsBits =
1925 IntPointerTy->getPointerAddressSpace() == 1
1926 ? spv::MemorySemanticsUniformMemoryMask
1927 : spv::MemorySemanticsWorkgroupMemoryMask;
1928
1929 // We're using relaxed consistency here.
1930 const auto ConstantMemorySemantics =
1931 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1932 AddrSpaceSemanticsBits);
1933
1934 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1935 ConstantMemorySemantics, ValueToXor};
1936 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1937 }
David Neto22f144c2017-06-12 14:26:21 -04001938
1939 // Lastly, remember to remove the user.
1940 ToRemoves.push_back(CI);
1941 }
1942 }
1943
1944 Changed = !ToRemoves.empty();
1945
1946 // And cleanup the calls we don't use anymore.
1947 for (auto V : ToRemoves) {
1948 V->eraseFromParent();
1949 }
1950
1951 // And remove the function we don't need either too.
1952 F->eraseFromParent();
1953 }
1954 }
1955
1956 return Changed;
1957}
1958
1959bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1960 bool Changed = false;
1961
David Netoe2871522018-06-08 11:09:54 -07001962 const std::vector<const char *> Map = {
1963 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
1964 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
1965 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
1966 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
1967 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1968 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
1969 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
1970 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
1971 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
1972 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
1973 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
1974 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
1975 };
David Neto22f144c2017-06-12 14:26:21 -04001976
1977 for (auto Name : Map) {
1978 // If we find a function with the matching name.
1979 if (auto F = M.getFunction(Name)) {
1980 SmallVector<Instruction *, 4> ToRemoves;
1981
1982 // Walk the users of the function.
1983 for (auto &U : F->uses()) {
1984 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1985 // The value to store.
1986 auto Arg0 = CI->getOperand(0);
1987
1988 // The index argument from vstore_half.
1989 auto Arg1 = CI->getOperand(1);
1990
1991 // The pointer argument from vstore_half.
1992 auto Arg2 = CI->getOperand(2);
1993
1994 auto IntTy = Type::getInt32Ty(M.getContext());
1995 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1996 auto NewPointerTy = PointerType::get(
1997 IntTy, Arg2->getType()->getPointerAddressSpace());
1998 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1999
2000 // Our intrinsic to pack a float2 to an int.
2001 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2002
2003 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2004
2005 // Turn the packed x & y into the final packing.
2006 auto X = CallInst::Create(NewF, Arg0, "", CI);
2007
2008 // Cast the half* pointer to int*.
2009 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2010
2011 // Index into the correct address of the casted pointer.
2012 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
2013
2014 // Store to the int* we casted to.
2015 auto Store = new StoreInst(X, Index, CI);
2016
2017 CI->replaceAllUsesWith(Store);
2018
2019 // Lastly, remember to remove the user.
2020 ToRemoves.push_back(CI);
2021 }
2022 }
2023
2024 Changed = !ToRemoves.empty();
2025
2026 // And cleanup the calls we don't use anymore.
2027 for (auto V : ToRemoves) {
2028 V->eraseFromParent();
2029 }
2030
2031 // And remove the function we don't need either too.
2032 F->eraseFromParent();
2033 }
2034 }
2035
2036 return Changed;
2037}
2038
2039bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
2040 bool Changed = false;
2041
David Netoe2871522018-06-08 11:09:54 -07002042 const std::vector<const char *> Map = {
2043 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2044 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2045 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2046 "_Z13vstorea_half4Dv4_fjPDh", // private
2047 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2048 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2049 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2050 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2051 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2052 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2053 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2054 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2055 };
David Neto22f144c2017-06-12 14:26:21 -04002056
2057 for (auto Name : Map) {
2058 // If we find a function with the matching name.
2059 if (auto F = M.getFunction(Name)) {
2060 SmallVector<Instruction *, 4> ToRemoves;
2061
2062 // Walk the users of the function.
2063 for (auto &U : F->uses()) {
2064 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2065 // The value to store.
2066 auto Arg0 = CI->getOperand(0);
2067
2068 // The index argument from vstore_half.
2069 auto Arg1 = CI->getOperand(1);
2070
2071 // The pointer argument from vstore_half.
2072 auto Arg2 = CI->getOperand(2);
2073
2074 auto IntTy = Type::getInt32Ty(M.getContext());
2075 auto Int2Ty = VectorType::get(IntTy, 2);
2076 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2077 auto NewPointerTy = PointerType::get(
2078 Int2Ty, Arg2->getType()->getPointerAddressSpace());
2079 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2080
2081 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2082 ConstantInt::get(IntTy, 1)};
2083
2084 // Extract out the x & y components of our to store value.
2085 auto Lo =
2086 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2087 ConstantVector::get(LoShuffleMask), "", CI);
2088
2089 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2090 ConstantInt::get(IntTy, 3)};
2091
2092 // Extract out the z & w components of our to store value.
2093 auto Hi =
2094 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2095 ConstantVector::get(HiShuffleMask), "", CI);
2096
2097 // Our intrinsic to pack a float2 to an int.
2098 auto SPIRVIntrinsic = "spirv.pack.v2f16";
2099
2100 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2101
2102 // Turn the packed x & y into the final component of our int2.
2103 auto X = CallInst::Create(NewF, Lo, "", CI);
2104
2105 // Turn the packed z & w into the final component of our int2.
2106 auto Y = CallInst::Create(NewF, Hi, "", CI);
2107
2108 auto Combine = InsertElementInst::Create(
2109 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
2110 Combine = InsertElementInst::Create(
2111 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
2112
2113 // Cast the half* pointer to int2*.
2114 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
2115
2116 // Index into the correct address of the casted pointer.
2117 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
2118
2119 // Store to the int2* we casted to.
2120 auto Store = new StoreInst(Combine, Index, CI);
2121
2122 CI->replaceAllUsesWith(Store);
2123
2124 // Lastly, remember to remove the user.
2125 ToRemoves.push_back(CI);
2126 }
2127 }
2128
2129 Changed = !ToRemoves.empty();
2130
2131 // And cleanup the calls we don't use anymore.
2132 for (auto V : ToRemoves) {
2133 V->eraseFromParent();
2134 }
2135
2136 // And remove the function we don't need either too.
2137 F->eraseFromParent();
2138 }
2139 }
2140
2141 return Changed;
2142}
2143
2144bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2145 bool Changed = false;
2146
2147 const std::map<const char *, const char*> Map = {
2148 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
2149 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
2150 };
2151
2152 for (auto Pair : Map) {
2153 // If we find a function with the matching name.
2154 if (auto F = M.getFunction(Pair.first)) {
2155 SmallVector<Instruction *, 4> ToRemoves;
2156
2157 // Walk the users of the function.
2158 for (auto &U : F->uses()) {
2159 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2160 // The image.
2161 auto Arg0 = CI->getOperand(0);
2162
2163 // The sampler.
2164 auto Arg1 = CI->getOperand(1);
2165
2166 // The coordinate (integer type that we can't handle).
2167 auto Arg2 = CI->getOperand(2);
2168
2169 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
2170
2171 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
2172
2173 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2174
2175 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
2176
2177 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2178
2179 CI->replaceAllUsesWith(NewCI);
2180
2181 // Lastly, remember to remove the user.
2182 ToRemoves.push_back(CI);
2183 }
2184 }
2185
2186 Changed = !ToRemoves.empty();
2187
2188 // And cleanup the calls we don't use anymore.
2189 for (auto V : ToRemoves) {
2190 V->eraseFromParent();
2191 }
2192
2193 // And remove the function we don't need either too.
2194 F->eraseFromParent();
2195 }
2196 }
2197
2198 return Changed;
2199}
2200
2201bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2202 bool Changed = false;
2203
2204 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002205 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
2206 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
2207 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
2208 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
2209 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
2210 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002211 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
2212 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
2213 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
2214 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
2215 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01002216 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002217
2218 for (auto Pair : Map) {
2219 // If we find a function with the matching name.
2220 if (auto F = M.getFunction(Pair.first)) {
2221 SmallVector<Instruction *, 4> ToRemoves;
2222
2223 // Walk the users of the function.
2224 for (auto &U : F->uses()) {
2225 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2226 auto FType = F->getFunctionType();
2227 SmallVector<Type *, 5> ParamTypes;
2228
2229 // The pointer type.
2230 ParamTypes.push_back(FType->getParamType(0));
2231
2232 auto IntTy = Type::getInt32Ty(M.getContext());
2233
2234 // The memory scope type.
2235 ParamTypes.push_back(IntTy);
2236
2237 // The memory semantics type.
2238 ParamTypes.push_back(IntTy);
2239
2240 if (2 < CI->getNumArgOperands()) {
2241 // The unequal memory semantics type.
2242 ParamTypes.push_back(IntTy);
2243
2244 // The value type.
2245 ParamTypes.push_back(FType->getParamType(2));
2246
2247 // The comparator type.
2248 ParamTypes.push_back(FType->getParamType(1));
2249 } else if (1 < CI->getNumArgOperands()) {
2250 // The value type.
2251 ParamTypes.push_back(FType->getParamType(1));
2252 }
2253
2254 auto NewFType =
2255 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2256 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2257
2258 // We need to map the OpenCL constants to the SPIR-V equivalents.
2259 const auto ConstantScopeDevice =
2260 ConstantInt::get(IntTy, spv::ScopeDevice);
2261 const auto ConstantMemorySemantics = ConstantInt::get(
2262 IntTy, spv::MemorySemanticsUniformMemoryMask |
2263 spv::MemorySemanticsSequentiallyConsistentMask);
2264
2265 SmallVector<Value *, 5> Params;
2266
2267 // The pointer.
2268 Params.push_back(CI->getArgOperand(0));
2269
2270 // The memory scope.
2271 Params.push_back(ConstantScopeDevice);
2272
2273 // The memory semantics.
2274 Params.push_back(ConstantMemorySemantics);
2275
2276 if (2 < CI->getNumArgOperands()) {
2277 // The unequal memory semantics.
2278 Params.push_back(ConstantMemorySemantics);
2279
2280 // The value.
2281 Params.push_back(CI->getArgOperand(2));
2282
2283 // The comparator.
2284 Params.push_back(CI->getArgOperand(1));
2285 } else if (1 < CI->getNumArgOperands()) {
2286 // The value.
2287 Params.push_back(CI->getArgOperand(1));
2288 }
2289
2290 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2291
2292 CI->replaceAllUsesWith(NewCI);
2293
2294 // Lastly, remember to remove the user.
2295 ToRemoves.push_back(CI);
2296 }
2297 }
2298
2299 Changed = !ToRemoves.empty();
2300
2301 // And cleanup the calls we don't use anymore.
2302 for (auto V : ToRemoves) {
2303 V->eraseFromParent();
2304 }
2305
2306 // And remove the function we don't need either too.
2307 F->eraseFromParent();
2308 }
2309 }
2310
Neil Henning39672102017-09-29 14:33:13 +01002311 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002312 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2313 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2314 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2315 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2316 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2317 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2318 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2319 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2320 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2321 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2322 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2323 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2324 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2325 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2326 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2327 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002328 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2329 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2330 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2331 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2332 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2333 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2334 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2335 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2336 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2337 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2338 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2339 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2340 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2341 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2342 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2343 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
2344
2345 for (auto Pair : Map2) {
2346 // If we find a function with the matching name.
2347 if (auto F = M.getFunction(Pair.first)) {
2348 SmallVector<Instruction *, 4> ToRemoves;
2349
2350 // Walk the users of the function.
2351 for (auto &U : F->uses()) {
2352 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2353 auto AtomicOp = new AtomicRMWInst(
2354 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2355 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2356
2357 CI->replaceAllUsesWith(AtomicOp);
2358
2359 // Lastly, remember to remove the user.
2360 ToRemoves.push_back(CI);
2361 }
2362 }
2363
2364 Changed = !ToRemoves.empty();
2365
2366 // And cleanup the calls we don't use anymore.
2367 for (auto V : ToRemoves) {
2368 V->eraseFromParent();
2369 }
2370
2371 // And remove the function we don't need either too.
2372 F->eraseFromParent();
2373 }
2374 }
2375
David Neto22f144c2017-06-12 14:26:21 -04002376 return Changed;
2377}
2378
2379bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
2380 bool Changed = false;
2381
2382 // If we find a function with the matching name.
2383 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
2384 SmallVector<Instruction *, 4> ToRemoves;
2385
2386 auto IntTy = Type::getInt32Ty(M.getContext());
2387 auto FloatTy = Type::getFloatTy(M.getContext());
2388
2389 Constant *DownShuffleMask[3] = {
2390 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2391 ConstantInt::get(IntTy, 2)};
2392
2393 Constant *UpShuffleMask[4] = {
2394 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2395 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2396
2397 Constant *FloatVec[3] = {
2398 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
2399 };
2400
2401 // Walk the users of the function.
2402 for (auto &U : F->uses()) {
2403 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2404 auto Vec4Ty = CI->getArgOperand(0)->getType();
2405 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2406 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2407 auto Vec3Ty = Arg0->getType();
2408
2409 auto NewFType =
2410 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
2411
2412 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
2413
2414 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
2415
2416 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
2417
2418 CI->replaceAllUsesWith(Result);
2419
2420 // Lastly, remember to remove the user.
2421 ToRemoves.push_back(CI);
2422 }
2423 }
2424
2425 Changed = !ToRemoves.empty();
2426
2427 // And cleanup the calls we don't use anymore.
2428 for (auto V : ToRemoves) {
2429 V->eraseFromParent();
2430 }
2431
2432 // And remove the function we don't need either too.
2433 F->eraseFromParent();
2434 }
2435
2436 return Changed;
2437}
David Neto62653202017-10-16 19:05:18 -04002438
2439bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2440 bool Changed = false;
2441
2442 // OpenCL's float result = fract(float x, float* ptr)
2443 //
2444 // In the LLVM domain:
2445 //
2446 // %floor_result = call spir_func float @floor(float %x)
2447 // store float %floor_result, float * %ptr
2448 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2449 // %result = call spir_func float
2450 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2451 //
2452 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2453 // and clspv.fract occur in the SPIR-V generator pass:
2454 //
2455 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2456 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2457 // ...
2458 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2459 // OpStore %ptr %floor_result
2460 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2461 // %fract_result = OpExtInst %float
2462 // %glsl_ext Fmin %fract_intermediate %just_under_1
2463
2464
2465 using std::string;
2466
2467 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2468 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
2469 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
2470 auto make_quad = [](const char *a, const char *b, const char *c,
2471 const char *d) {
2472 return std::tuple<const char *, const char *, const char *, const char *>(
2473 a, b, c, d);
2474 };
2475 const std::vector<QuadType> Functions = {
2476 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
2477 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
2478 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
2479 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
2480 };
2481
2482 for (auto& quad : Functions) {
2483 const StringRef fract_name(std::get<0>(quad));
2484
2485 // If we find a function with the matching name.
2486 if (auto F = M.getFunction(fract_name)) {
2487 if (F->use_begin() == F->use_end())
2488 continue;
2489
2490 // We have some uses.
2491 Changed = true;
2492
2493 auto& Context = M.getContext();
2494
2495 const StringRef floor_name(std::get<1>(quad));
2496 const StringRef fmin_name(std::get<2>(quad));
2497 const StringRef clspv_fract_name(std::get<3>(quad));
2498
2499 // This is either float or a float vector. All the float-like
2500 // types are this type.
2501 auto result_ty = F->getReturnType();
2502
2503 Function* fmin_fn = M.getFunction(fmin_name);
2504 if (!fmin_fn) {
2505 // Make the fmin function.
2506 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
2507 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002508 fmin_fn->addFnAttr(Attribute::ReadNone);
2509 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2510 }
2511
2512 Function* floor_fn = M.getFunction(floor_name);
2513 if (!floor_fn) {
2514 // Make the floor function.
2515 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2516 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002517 floor_fn->addFnAttr(Attribute::ReadNone);
2518 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2519 }
2520
2521 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
2522 if (!clspv_fract_fn) {
2523 // Make the clspv_fract function.
2524 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2525 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002526 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2527 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2528 }
2529
2530 // Number of significant significand bits, whether represented or not.
2531 unsigned num_significand_bits;
2532 switch (result_ty->getScalarType()->getTypeID()) {
2533 case Type::HalfTyID:
2534 num_significand_bits = 11;
2535 break;
2536 case Type::FloatTyID:
2537 num_significand_bits = 24;
2538 break;
2539 case Type::DoubleTyID:
2540 num_significand_bits = 53;
2541 break;
2542 default:
2543 assert(false && "Unhandled float type when processing fract builtin");
2544 break;
2545 }
2546 // Beware that the disassembler displays this value as
2547 // OpConstant %float 1
2548 // which is not quite right.
2549 const double kJustUnderOneScalar =
2550 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2551
2552 Constant *just_under_one =
2553 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2554 if (result_ty->isVectorTy()) {
2555 just_under_one = ConstantVector::getSplat(
2556 result_ty->getVectorNumElements(), just_under_one);
2557 }
2558
2559 IRBuilder<> Builder(Context);
2560
2561 SmallVector<Instruction *, 4> ToRemoves;
2562
2563 // Walk the users of the function.
2564 for (auto &U : F->uses()) {
2565 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2566
2567 Builder.SetInsertPoint(CI);
2568 auto arg = CI->getArgOperand(0);
2569 auto ptr = CI->getArgOperand(1);
2570
2571 // Compute floor result and store it.
2572 auto floor = Builder.CreateCall(floor_fn, {arg});
2573 Builder.CreateStore(floor, ptr);
2574
2575 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2576 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2577
2578 CI->replaceAllUsesWith(fract_result);
2579
2580 // Lastly, remember to remove the user.
2581 ToRemoves.push_back(CI);
2582 }
2583 }
2584
2585 // And cleanup the calls we don't use anymore.
2586 for (auto V : ToRemoves) {
2587 V->eraseFromParent();
2588 }
2589
2590 // And remove the function we don't need either too.
2591 F->eraseFromParent();
2592 }
2593 }
2594
2595 return Changed;
2596}