blob: c2d396448e479597cb5d1477e9d2324571306e50 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto118188e2018-08-24 11:27:54 -040019#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000023#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040024#include "llvm/Pass.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/raw_ostream.h"
27#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040028
David Neto118188e2018-08-24 11:27:54 -040029#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040030
David Neto482550a2018-03-24 05:21:07 -070031#include "clspv/Option.h"
32
David Neto22f144c2017-06-12 14:26:21 -040033using namespace llvm;
34
35#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
36
37namespace {
38uint32_t clz(uint32_t v) {
39 uint32_t r;
40 uint32_t shift;
41
42 r = (v > 0xFFFF) << 4;
43 v >>= r;
44 shift = (v > 0xFF) << 3;
45 v >>= shift;
46 r |= shift;
47 shift = (v > 0xF) << 2;
48 v >>= shift;
49 r |= shift;
50 shift = (v > 0x3) << 1;
51 v >>= shift;
52 r |= shift;
53 r |= (v >> 1);
54
55 return r;
56}
57
58Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
59 if (1 == elements) {
60 return Type::getInt1Ty(C);
61 } else {
62 return VectorType::get(Type::getInt1Ty(C), elements);
63 }
64}
65
66struct ReplaceOpenCLBuiltinPass final : public ModulePass {
67 static char ID;
68 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
69
70 bool runOnModule(Module &M) override;
71 bool replaceRecip(Module &M);
72 bool replaceDivide(Module &M);
73 bool replaceExp10(Module &M);
74 bool replaceLog10(Module &M);
75 bool replaceBarrier(Module &M);
76 bool replaceMemFence(Module &M);
77 bool replaceRelational(Module &M);
78 bool replaceIsInfAndIsNan(Module &M);
79 bool replaceAllAndAny(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +000080 bool replaceSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +000081 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040082 bool replaceSignbit(Module &M);
83 bool replaceMadandMad24andMul24(Module &M);
84 bool replaceVloadHalf(Module &M);
85 bool replaceVloadHalf2(Module &M);
86 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -070087 bool replaceClspvVloadaHalf2(Module &M);
88 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040089 bool replaceVstoreHalf(Module &M);
90 bool replaceVstoreHalf2(Module &M);
91 bool replaceVstoreHalf4(Module &M);
92 bool replaceReadImageF(Module &M);
93 bool replaceAtomics(Module &M);
94 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040095 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070096 bool replaceVload(Module &M);
97 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040098};
99}
100
101char ReplaceOpenCLBuiltinPass::ID = 0;
102static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
103 "Replace OpenCL Builtins Pass");
104
105namespace clspv {
106ModulePass *createReplaceOpenCLBuiltinPass() {
107 return new ReplaceOpenCLBuiltinPass();
108}
109}
110
111bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
112 bool Changed = false;
113
114 Changed |= replaceRecip(M);
115 Changed |= replaceDivide(M);
116 Changed |= replaceExp10(M);
117 Changed |= replaceLog10(M);
118 Changed |= replaceBarrier(M);
119 Changed |= replaceMemFence(M);
120 Changed |= replaceRelational(M);
121 Changed |= replaceIsInfAndIsNan(M);
122 Changed |= replaceAllAndAny(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000123 Changed |= replaceSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000124 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400125 Changed |= replaceSignbit(M);
126 Changed |= replaceMadandMad24andMul24(M);
127 Changed |= replaceVloadHalf(M);
128 Changed |= replaceVloadHalf2(M);
129 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700130 Changed |= replaceClspvVloadaHalf2(M);
131 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400132 Changed |= replaceVstoreHalf(M);
133 Changed |= replaceVstoreHalf2(M);
134 Changed |= replaceVstoreHalf4(M);
135 Changed |= replaceReadImageF(M);
136 Changed |= replaceAtomics(M);
137 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400138 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700139 Changed |= replaceVload(M);
140 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400141
142 return Changed;
143}
144
145bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
146 bool Changed = false;
147
148 const char *Names[] = {
149 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
150 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
151 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
152 };
153
154 for (auto Name : Names) {
155 // If we find a function with the matching name.
156 if (auto F = M.getFunction(Name)) {
157 SmallVector<Instruction *, 4> ToRemoves;
158
159 // Walk the users of the function.
160 for (auto &U : F->uses()) {
161 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
162 // Recip has one arg.
163 auto Arg = CI->getOperand(0);
164
165 auto Div = BinaryOperator::Create(
166 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
167 CI);
168
169 CI->replaceAllUsesWith(Div);
170
171 // Lastly, remember to remove the user.
172 ToRemoves.push_back(CI);
173 }
174 }
175
176 Changed = !ToRemoves.empty();
177
178 // And cleanup the calls we don't use anymore.
179 for (auto V : ToRemoves) {
180 V->eraseFromParent();
181 }
182
183 // And remove the function we don't need either too.
184 F->eraseFromParent();
185 }
186 }
187
188 return Changed;
189}
190
191bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
192 bool Changed = false;
193
194 const char *Names[] = {
195 "_Z11half_divideff", "_Z13native_divideff",
196 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
197 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
198 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
199 };
200
201 for (auto Name : Names) {
202 // If we find a function with the matching name.
203 if (auto F = M.getFunction(Name)) {
204 SmallVector<Instruction *, 4> ToRemoves;
205
206 // Walk the users of the function.
207 for (auto &U : F->uses()) {
208 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
209 auto Div = BinaryOperator::Create(
210 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
211
212 CI->replaceAllUsesWith(Div);
213
214 // Lastly, remember to remove the user.
215 ToRemoves.push_back(CI);
216 }
217 }
218
219 Changed = !ToRemoves.empty();
220
221 // And cleanup the calls we don't use anymore.
222 for (auto V : ToRemoves) {
223 V->eraseFromParent();
224 }
225
226 // And remove the function we don't need either too.
227 F->eraseFromParent();
228 }
229 }
230
231 return Changed;
232}
233
234bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
235 bool Changed = false;
236
237 const std::map<const char *, const char *> Map = {
238 {"_Z5exp10f", "_Z3expf"},
239 {"_Z10half_exp10f", "_Z8half_expf"},
240 {"_Z12native_exp10f", "_Z10native_expf"},
241 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
242 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
243 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
244 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
245 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
246 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
247 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
248 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
249 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
250
251 for (auto Pair : Map) {
252 // If we find a function with the matching name.
253 if (auto F = M.getFunction(Pair.first)) {
254 SmallVector<Instruction *, 4> ToRemoves;
255
256 // Walk the users of the function.
257 for (auto &U : F->uses()) {
258 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
259 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
260
261 auto Arg = CI->getOperand(0);
262
263 // Constant of the natural log of 10 (ln(10)).
264 const double Ln10 =
265 2.302585092994045684017991454684364207601101488628772976033;
266
267 auto Mul = BinaryOperator::Create(
268 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
269 CI);
270
271 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
272
273 CI->replaceAllUsesWith(NewCI);
274
275 // Lastly, remember to remove the user.
276 ToRemoves.push_back(CI);
277 }
278 }
279
280 Changed = !ToRemoves.empty();
281
282 // And cleanup the calls we don't use anymore.
283 for (auto V : ToRemoves) {
284 V->eraseFromParent();
285 }
286
287 // And remove the function we don't need either too.
288 F->eraseFromParent();
289 }
290 }
291
292 return Changed;
293}
294
295bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
296 bool Changed = false;
297
298 const std::map<const char *, const char *> Map = {
299 {"_Z5log10f", "_Z3logf"},
300 {"_Z10half_log10f", "_Z8half_logf"},
301 {"_Z12native_log10f", "_Z10native_logf"},
302 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
303 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
304 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
305 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
306 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
307 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
308 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
309 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
310 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
311
312 for (auto Pair : Map) {
313 // If we find a function with the matching name.
314 if (auto F = M.getFunction(Pair.first)) {
315 SmallVector<Instruction *, 4> ToRemoves;
316
317 // Walk the users of the function.
318 for (auto &U : F->uses()) {
319 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
320 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
321
322 auto Arg = CI->getOperand(0);
323
324 // Constant of the reciprocal of the natural log of 10 (ln(10)).
325 const double Ln10 =
326 0.434294481903251827651128918916605082294397005803666566114;
327
328 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
329
330 auto Mul = BinaryOperator::Create(
331 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
332 "", CI);
333
334 CI->replaceAllUsesWith(Mul);
335
336 // Lastly, remember to remove the user.
337 ToRemoves.push_back(CI);
338 }
339 }
340
341 Changed = !ToRemoves.empty();
342
343 // And cleanup the calls we don't use anymore.
344 for (auto V : ToRemoves) {
345 V->eraseFromParent();
346 }
347
348 // And remove the function we don't need either too.
349 F->eraseFromParent();
350 }
351 }
352
353 return Changed;
354}
355
356bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
357 bool Changed = false;
358
359 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
360
361 const std::map<const char *, const char *> Map = {
362 {"_Z7barrierj", "__spirv_control_barrier"}};
363
364 for (auto Pair : Map) {
365 // If we find a function with the matching name.
366 if (auto F = M.getFunction(Pair.first)) {
367 SmallVector<Instruction *, 4> ToRemoves;
368
369 // Walk the users of the function.
370 for (auto &U : F->uses()) {
371 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
372 auto FType = F->getFunctionType();
373 SmallVector<Type *, 3> Params;
374 for (unsigned i = 0; i < 3; i++) {
375 Params.push_back(FType->getParamType(0));
376 }
377 auto NewFType =
378 FunctionType::get(FType->getReturnType(), Params, false);
379 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
380
381 auto Arg = CI->getOperand(0);
382
383 // We need to map the OpenCL constants to the SPIR-V equivalents.
384 const auto LocalMemFence =
385 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
386 const auto GlobalMemFence =
387 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
388 const auto ConstantSequentiallyConsistent = ConstantInt::get(
389 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
390 const auto ConstantScopeDevice =
391 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
392 const auto ConstantScopeWorkgroup =
393 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
394
395 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
396 const auto LocalMemFenceMask = BinaryOperator::Create(
397 Instruction::And, LocalMemFence, Arg, "", CI);
398 const auto WorkgroupShiftAmount =
399 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
400 clz(CLK_LOCAL_MEM_FENCE);
401 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
402 Instruction::Shl, LocalMemFenceMask,
403 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
404
405 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
406 const auto GlobalMemFenceMask = BinaryOperator::Create(
407 Instruction::And, GlobalMemFence, Arg, "", CI);
408 const auto UniformShiftAmount =
409 clz(spv::MemorySemanticsUniformMemoryMask) -
410 clz(CLK_GLOBAL_MEM_FENCE);
411 const auto MemorySemanticsUniform = BinaryOperator::Create(
412 Instruction::Shl, GlobalMemFenceMask,
413 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
414
415 // And combine the above together, also adding in
416 // MemorySemanticsSequentiallyConsistentMask.
417 auto MemorySemantics =
418 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
419 ConstantSequentiallyConsistent, "", CI);
420 MemorySemantics = BinaryOperator::Create(
421 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
422
423 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
424 // Device Scope, otherwise Workgroup Scope.
425 const auto Cmp =
426 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
427 GlobalMemFenceMask, GlobalMemFence, "", CI);
428 const auto MemoryScope = SelectInst::Create(
429 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
430
431 // Lastly, the Execution Scope is always Workgroup Scope.
432 const auto ExecutionScope = ConstantScopeWorkgroup;
433
434 auto NewCI = CallInst::Create(
435 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
436
437 CI->replaceAllUsesWith(NewCI);
438
439 // Lastly, remember to remove the user.
440 ToRemoves.push_back(CI);
441 }
442 }
443
444 Changed = !ToRemoves.empty();
445
446 // And cleanup the calls we don't use anymore.
447 for (auto V : ToRemoves) {
448 V->eraseFromParent();
449 }
450
451 // And remove the function we don't need either too.
452 F->eraseFromParent();
453 }
454 }
455
456 return Changed;
457}
458
459bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
460 bool Changed = false;
461
462 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
463
Neil Henning39672102017-09-29 14:33:13 +0100464 using Tuple = std::tuple<const char *, unsigned>;
465 const std::map<const char *, Tuple> Map = {
466 {"_Z9mem_fencej",
467 Tuple("__spirv_memory_barrier",
468 spv::MemorySemanticsSequentiallyConsistentMask)},
469 {"_Z14read_mem_fencej",
470 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
471 {"_Z15write_mem_fencej",
472 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400473
474 for (auto Pair : Map) {
475 // If we find a function with the matching name.
476 if (auto F = M.getFunction(Pair.first)) {
477 SmallVector<Instruction *, 4> ToRemoves;
478
479 // Walk the users of the function.
480 for (auto &U : F->uses()) {
481 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
482 auto FType = F->getFunctionType();
483 SmallVector<Type *, 2> Params;
484 for (unsigned i = 0; i < 2; i++) {
485 Params.push_back(FType->getParamType(0));
486 }
487 auto NewFType =
488 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100489 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400490
491 auto Arg = CI->getOperand(0);
492
493 // We need to map the OpenCL constants to the SPIR-V equivalents.
494 const auto LocalMemFence =
495 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
496 const auto GlobalMemFence =
497 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
498 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100499 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400500 const auto ConstantScopeDevice =
501 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
502
503 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
504 const auto LocalMemFenceMask = BinaryOperator::Create(
505 Instruction::And, LocalMemFence, Arg, "", CI);
506 const auto WorkgroupShiftAmount =
507 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
508 clz(CLK_LOCAL_MEM_FENCE);
509 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
510 Instruction::Shl, LocalMemFenceMask,
511 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
512
513 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
514 const auto GlobalMemFenceMask = BinaryOperator::Create(
515 Instruction::And, GlobalMemFence, Arg, "", CI);
516 const auto UniformShiftAmount =
517 clz(spv::MemorySemanticsUniformMemoryMask) -
518 clz(CLK_GLOBAL_MEM_FENCE);
519 const auto MemorySemanticsUniform = BinaryOperator::Create(
520 Instruction::Shl, GlobalMemFenceMask,
521 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
522
523 // And combine the above together, also adding in
524 // MemorySemanticsSequentiallyConsistentMask.
525 auto MemorySemantics =
526 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
527 ConstantMemorySemantics, "", CI);
528 MemorySemantics = BinaryOperator::Create(
529 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
530
531 // Memory Scope is always device.
532 const auto MemoryScope = ConstantScopeDevice;
533
534 auto NewCI =
535 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
536
537 CI->replaceAllUsesWith(NewCI);
538
539 // Lastly, remember to remove the user.
540 ToRemoves.push_back(CI);
541 }
542 }
543
544 Changed = !ToRemoves.empty();
545
546 // And cleanup the calls we don't use anymore.
547 for (auto V : ToRemoves) {
548 V->eraseFromParent();
549 }
550
551 // And remove the function we don't need either too.
552 F->eraseFromParent();
553 }
554 }
555
556 return Changed;
557}
558
559bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
560 bool Changed = false;
561
562 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
563 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
564 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
565 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
566 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
567 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
568 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
569 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
570 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
571 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
572 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
573 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
574 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
575 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
576 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
577 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
578 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
579 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
580 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
581 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
582 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
583 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
584 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
585 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
586 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
587 };
588
589 for (auto Pair : Map) {
590 // If we find a function with the matching name.
591 if (auto F = M.getFunction(Pair.first)) {
592 SmallVector<Instruction *, 4> ToRemoves;
593
594 // Walk the users of the function.
595 for (auto &U : F->uses()) {
596 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
597 // The predicate to use in the CmpInst.
598 auto Predicate = Pair.second.first;
599
600 // The value to return for true.
601 auto TrueValue =
602 ConstantInt::getSigned(CI->getType(), Pair.second.second);
603
604 // The value to return for false.
605 auto FalseValue = Constant::getNullValue(CI->getType());
606
607 auto Arg1 = CI->getOperand(0);
608 auto Arg2 = CI->getOperand(1);
609
610 const auto Cmp =
611 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
612
613 const auto Select =
614 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
615
616 CI->replaceAllUsesWith(Select);
617
618 // Lastly, remember to remove the user.
619 ToRemoves.push_back(CI);
620 }
621 }
622
623 Changed = !ToRemoves.empty();
624
625 // And cleanup the calls we don't use anymore.
626 for (auto V : ToRemoves) {
627 V->eraseFromParent();
628 }
629
630 // And remove the function we don't need either too.
631 F->eraseFromParent();
632 }
633 }
634
635 return Changed;
636}
637
638bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
639 bool Changed = false;
640
641 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
642 {"_Z5isinff", {"__spirv_isinff", 1}},
643 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
644 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
645 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
646 {"_Z5isnanf", {"__spirv_isnanf", 1}},
647 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
648 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
649 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
650 };
651
652 for (auto Pair : Map) {
653 // If we find a function with the matching name.
654 if (auto F = M.getFunction(Pair.first)) {
655 SmallVector<Instruction *, 4> ToRemoves;
656
657 // Walk the users of the function.
658 for (auto &U : F->uses()) {
659 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
660 const auto CITy = CI->getType();
661
662 // The fake SPIR-V intrinsic to generate.
663 auto SPIRVIntrinsic = Pair.second.first;
664
665 // The value to return for true.
666 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
667
668 // The value to return for false.
669 auto FalseValue = Constant::getNullValue(CITy);
670
671 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
672 M.getContext(),
673 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
674
675 auto NewFType =
676 FunctionType::get(CorrespondingBoolTy,
677 F->getFunctionType()->getParamType(0), false);
678
679 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
680
681 auto Arg = CI->getOperand(0);
682
683 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
684
685 const auto Select =
686 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
687
688 CI->replaceAllUsesWith(Select);
689
690 // Lastly, remember to remove the user.
691 ToRemoves.push_back(CI);
692 }
693 }
694
695 Changed = !ToRemoves.empty();
696
697 // And cleanup the calls we don't use anymore.
698 for (auto V : ToRemoves) {
699 V->eraseFromParent();
700 }
701
702 // And remove the function we don't need either too.
703 F->eraseFromParent();
704 }
705 }
706
707 return Changed;
708}
709
710bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
711 bool Changed = false;
712
713 const std::map<const char *, const char *> Map = {
714 {"_Z3alli", ""},
715 {"_Z3allDv2_i", "__spirv_allDv2_i"},
716 {"_Z3allDv3_i", "__spirv_allDv3_i"},
717 {"_Z3allDv4_i", "__spirv_allDv4_i"},
718 {"_Z3anyi", ""},
719 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
720 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
721 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
722 };
723
724 for (auto Pair : Map) {
725 // If we find a function with the matching name.
726 if (auto F = M.getFunction(Pair.first)) {
727 SmallVector<Instruction *, 4> ToRemoves;
728
729 // Walk the users of the function.
730 for (auto &U : F->uses()) {
731 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
732 // The fake SPIR-V intrinsic to generate.
733 auto SPIRVIntrinsic = Pair.second;
734
735 auto Arg = CI->getOperand(0);
736
737 Value *V;
738
739 // If we have a function to call, call it!
740 if (0 < strlen(SPIRVIntrinsic)) {
741 // The value for zero to compare against.
742 const auto ZeroValue = Constant::getNullValue(Arg->getType());
743
744 const auto Cmp = CmpInst::Create(
745 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
746 const auto NewFType = FunctionType::get(
747 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
748
749 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
750
751 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
752
753 // The value to return for true.
754 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
755
756 // The value to return for false.
757 const auto FalseValue = Constant::getNullValue(CI->getType());
758
759 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
760 } else {
761 V = BinaryOperator::Create(Instruction::LShr, Arg,
762 ConstantInt::get(CI->getType(), 31), "",
763 CI);
764 }
765
766 CI->replaceAllUsesWith(V);
767
768 // Lastly, remember to remove the user.
769 ToRemoves.push_back(CI);
770 }
771 }
772
773 Changed = !ToRemoves.empty();
774
775 // And cleanup the calls we don't use anymore.
776 for (auto V : ToRemoves) {
777 V->eraseFromParent();
778 }
779
780 // And remove the function we don't need either too.
781 F->eraseFromParent();
782 }
783 }
784
785 return Changed;
786}
787
Kévin Petitf5b78a22018-10-25 14:32:17 +0000788bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
789 bool Changed = false;
790
791 for (auto const &SymVal : M.getValueSymbolTable()) {
792 // Skip symbols whose name doesn't match
793 if (!SymVal.getKey().startswith("_Z6select")) {
794 continue;
795 }
796 // Is there a function going by that name?
797 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
798
799 SmallVector<Instruction *, 4> ToRemoves;
800
801 // Walk the users of the function.
802 for (auto &U : F->uses()) {
803 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
804
805 // Get arguments
806 auto FalseValue = CI->getOperand(0);
807 auto TrueValue = CI->getOperand(1);
808 auto PredicateValue = CI->getOperand(2);
809
810 // Don't touch overloads that aren't in OpenCL C
811 auto FalseType = FalseValue->getType();
812 auto TrueType = TrueValue->getType();
813 auto PredicateType = PredicateValue->getType();
814
815 if (FalseType != TrueType) {
816 continue;
817 }
818
819 if (!PredicateType->isIntOrIntVectorTy()) {
820 continue;
821 }
822
823 if (!FalseType->isIntOrIntVectorTy() &&
824 !FalseType->getScalarType()->isFloatingPointTy()) {
825 continue;
826 }
827
828 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
829 continue;
830 }
831
832 if (FalseType->getScalarSizeInBits() !=
833 PredicateType->getScalarSizeInBits()) {
834 continue;
835 }
836
837 if (FalseType->isVectorTy()) {
838 if (FalseType->getVectorNumElements() !=
839 PredicateType->getVectorNumElements()) {
840 continue;
841 }
842
843 if ((FalseType->getVectorNumElements() != 2) &&
844 (FalseType->getVectorNumElements() != 3) &&
845 (FalseType->getVectorNumElements() != 4) &&
846 (FalseType->getVectorNumElements() != 8) &&
847 (FalseType->getVectorNumElements() != 16)) {
848 continue;
849 }
850 }
851
852 // Create constant
853 const auto ZeroValue = Constant::getNullValue(PredicateType);
854
855 // Scalar and vector are to be treated differently
856 CmpInst::Predicate Pred;
857 if (PredicateType->isVectorTy()) {
858 Pred = CmpInst::ICMP_SLT;
859 } else {
860 Pred = CmpInst::ICMP_NE;
861 }
862
863 // Create comparison instruction
864 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
865 ZeroValue, "", CI);
866
867 // Create select
868 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
869
870 // Replace call with the selection
871 CI->replaceAllUsesWith(V);
872
873 // Lastly, remember to remove the user.
874 ToRemoves.push_back(CI);
875 }
876 }
877
878 Changed = !ToRemoves.empty();
879
880 // And cleanup the calls we don't use anymore.
881 for (auto V : ToRemoves) {
882 V->eraseFromParent();
883 }
884
885 // And remove the function we don't need either too.
886 F->eraseFromParent();
887 }
888 }
889
890 return Changed;
891}
892
Kévin Petit6b0a9532018-10-30 20:00:39 +0000893bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
894 bool Changed = false;
895
896 const std::map<const char *, const char *> Map = {
897 { "_Z4stepfDv2_f", "_Z4stepDv2_fS_" },
898 { "_Z4stepfDv3_f", "_Z4stepDv3_fS_" },
899 { "_Z4stepfDv4_f", "_Z4stepDv4_fS_" },
900 { "_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_" },
901 { "_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_" },
902 { "_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_" },
903 };
904
905 for (auto Pair : Map) {
906 // If we find a function with the matching name.
907 if (auto F = M.getFunction(Pair.first)) {
908 SmallVector<Instruction *, 4> ToRemoves;
909
910 // Walk the users of the function.
911 for (auto &U : F->uses()) {
912 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
913
914 auto ReplacementFn = Pair.second;
915
916 SmallVector<Value*, 2> ArgsToSplat = {CI->getOperand(0)};
917 Value *VectorArg;
918
919 // First figure out which function we're dealing with
920 if (F->getName().startswith("_Z10smoothstep")) {
921 ArgsToSplat.push_back(CI->getOperand(1));
922 VectorArg = CI->getOperand(2);
923 } else {
924 VectorArg = CI->getOperand(1);
925 }
926
927 // Splat arguments that need to be
928 SmallVector<Value*, 2> SplatArgs;
929 auto VecType = VectorArg->getType();
930
931 for (auto arg : ArgsToSplat) {
932 Value* NewVectorArg = UndefValue::get(VecType);
933 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
934 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
935 NewVectorArg = InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
936 }
937 SplatArgs.push_back(NewVectorArg);
938 }
939
940 // Replace the call with the vector/vector flavour
941 SmallVector<Type*, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
942 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
943
944 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
945
946 SmallVector<Value*, 3> NewArgs;
947 for (auto arg : SplatArgs) {
948 NewArgs.push_back(arg);
949 }
950 NewArgs.push_back(VectorArg);
951
952 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
953
954 CI->replaceAllUsesWith(NewCI);
955
956 // Lastly, remember to remove the user.
957 ToRemoves.push_back(CI);
958 }
959 }
960
961 Changed = !ToRemoves.empty();
962
963 // And cleanup the calls we don't use anymore.
964 for (auto V : ToRemoves) {
965 V->eraseFromParent();
966 }
967
968 // And remove the function we don't need either too.
969 F->eraseFromParent();
970 }
971 }
972
973 return Changed;
974}
975
David Neto22f144c2017-06-12 14:26:21 -0400976bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
977 bool Changed = false;
978
979 const std::map<const char *, Instruction::BinaryOps> Map = {
980 {"_Z7signbitf", Instruction::LShr},
981 {"_Z7signbitDv2_f", Instruction::AShr},
982 {"_Z7signbitDv3_f", Instruction::AShr},
983 {"_Z7signbitDv4_f", Instruction::AShr},
984 };
985
986 for (auto Pair : Map) {
987 // If we find a function with the matching name.
988 if (auto F = M.getFunction(Pair.first)) {
989 SmallVector<Instruction *, 4> ToRemoves;
990
991 // Walk the users of the function.
992 for (auto &U : F->uses()) {
993 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
994 auto Arg = CI->getOperand(0);
995
996 auto Bitcast =
997 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
998
999 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1000 ConstantInt::get(CI->getType(), 31),
1001 "", CI);
1002
1003 CI->replaceAllUsesWith(Shr);
1004
1005 // Lastly, remember to remove the user.
1006 ToRemoves.push_back(CI);
1007 }
1008 }
1009
1010 Changed = !ToRemoves.empty();
1011
1012 // And cleanup the calls we don't use anymore.
1013 for (auto V : ToRemoves) {
1014 V->eraseFromParent();
1015 }
1016
1017 // And remove the function we don't need either too.
1018 F->eraseFromParent();
1019 }
1020 }
1021
1022 return Changed;
1023}
1024
1025bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1026 bool Changed = false;
1027
1028 const std::map<const char *,
1029 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1030 Map = {
1031 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1032 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1033 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1034 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1035 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1036 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1037 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1038 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1039 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1040 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1041 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1042 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1043 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1044 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1045 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1046 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1047 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1048 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1049 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1050 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1051 };
1052
1053 for (auto Pair : Map) {
1054 // If we find a function with the matching name.
1055 if (auto F = M.getFunction(Pair.first)) {
1056 SmallVector<Instruction *, 4> ToRemoves;
1057
1058 // Walk the users of the function.
1059 for (auto &U : F->uses()) {
1060 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1061 // The multiply instruction to use.
1062 auto MulInst = Pair.second.first;
1063
1064 // The add instruction to use.
1065 auto AddInst = Pair.second.second;
1066
1067 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1068
1069 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1070 CI->getArgOperand(1), "", CI);
1071
1072 if (Instruction::BinaryOpsEnd != AddInst) {
1073 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1074 CI);
1075 }
1076
1077 CI->replaceAllUsesWith(I);
1078
1079 // Lastly, remember to remove the user.
1080 ToRemoves.push_back(CI);
1081 }
1082 }
1083
1084 Changed = !ToRemoves.empty();
1085
1086 // And cleanup the calls we don't use anymore.
1087 for (auto V : ToRemoves) {
1088 V->eraseFromParent();
1089 }
1090
1091 // And remove the function we don't need either too.
1092 F->eraseFromParent();
1093 }
1094 }
1095
1096 return Changed;
1097}
1098
Derek Chowcfd368b2017-10-19 20:58:45 -07001099bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1100 bool Changed = false;
1101
1102 struct VectorStoreOps {
1103 const char* name;
1104 int n;
1105 Type* (*get_scalar_type_function)(LLVMContext&);
1106 } vector_store_ops[] = {
1107 // TODO(derekjchow): Expand this list.
1108 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
1109 };
1110
David Neto544fffc2017-11-16 18:35:14 -05001111 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001112 auto Name = Op.name;
1113 auto N = Op.n;
1114 auto TypeFn = Op.get_scalar_type_function;
1115 if (auto F = M.getFunction(Name)) {
1116 SmallVector<Instruction *, 4> ToRemoves;
1117
1118 // Walk the users of the function.
1119 for (auto &U : F->uses()) {
1120 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1121 // The value argument from vstoren.
1122 auto Arg0 = CI->getOperand(0);
1123
1124 // The index argument from vstoren.
1125 auto Arg1 = CI->getOperand(1);
1126
1127 // The pointer argument from vstoren.
1128 auto Arg2 = CI->getOperand(2);
1129
1130 // Get types.
1131 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1132 auto ScalarNPointerTy = PointerType::get(
1133 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1134
1135 // Cast to scalarn
1136 auto Cast = CastInst::CreatePointerCast(
1137 Arg2, ScalarNPointerTy, "", CI);
1138 // Index to correct address
1139 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1140 // Store
1141 auto Store = new StoreInst(Arg0, Index, CI);
1142
1143 CI->replaceAllUsesWith(Store);
1144 ToRemoves.push_back(CI);
1145 }
1146 }
1147
1148 Changed = !ToRemoves.empty();
1149
1150 // And cleanup the calls we don't use anymore.
1151 for (auto V : ToRemoves) {
1152 V->eraseFromParent();
1153 }
1154
1155 // And remove the function we don't need either too.
1156 F->eraseFromParent();
1157 }
1158 }
1159
1160 return Changed;
1161}
1162
1163bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1164 bool Changed = false;
1165
1166 struct VectorLoadOps {
1167 const char* name;
1168 int n;
1169 Type* (*get_scalar_type_function)(LLVMContext&);
1170 } vector_load_ops[] = {
1171 // TODO(derekjchow): Expand this list.
1172 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
1173 };
1174
David Neto544fffc2017-11-16 18:35:14 -05001175 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001176 auto Name = Op.name;
1177 auto N = Op.n;
1178 auto TypeFn = Op.get_scalar_type_function;
1179 // If we find a function with the matching name.
1180 if (auto F = M.getFunction(Name)) {
1181 SmallVector<Instruction *, 4> ToRemoves;
1182
1183 // Walk the users of the function.
1184 for (auto &U : F->uses()) {
1185 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1186 // The index argument from vloadn.
1187 auto Arg0 = CI->getOperand(0);
1188
1189 // The pointer argument from vloadn.
1190 auto Arg1 = CI->getOperand(1);
1191
1192 // Get types.
1193 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1194 auto ScalarNPointerTy = PointerType::get(
1195 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1196
1197 // Cast to scalarn
1198 auto Cast = CastInst::CreatePointerCast(
1199 Arg1, ScalarNPointerTy, "", CI);
1200 // Index to correct address
1201 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1202 // Load
1203 auto Load = new LoadInst(Index, "", CI);
1204
1205 CI->replaceAllUsesWith(Load);
1206 ToRemoves.push_back(CI);
1207 }
1208 }
1209
1210 Changed = !ToRemoves.empty();
1211
1212 // And cleanup the calls we don't use anymore.
1213 for (auto V : ToRemoves) {
1214 V->eraseFromParent();
1215 }
1216
1217 // And remove the function we don't need either too.
1218 F->eraseFromParent();
1219
1220 }
1221 }
1222
1223 return Changed;
1224}
1225
David Neto22f144c2017-06-12 14:26:21 -04001226bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1227 bool Changed = false;
1228
1229 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1230 "_Z10vload_halfjPU3AS2KDh"};
1231
1232 for (auto Name : Map) {
1233 // If we find a function with the matching name.
1234 if (auto F = M.getFunction(Name)) {
1235 SmallVector<Instruction *, 4> ToRemoves;
1236
1237 // Walk the users of the function.
1238 for (auto &U : F->uses()) {
1239 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1240 // The index argument from vload_half.
1241 auto Arg0 = CI->getOperand(0);
1242
1243 // The pointer argument from vload_half.
1244 auto Arg1 = CI->getOperand(1);
1245
David Neto22f144c2017-06-12 14:26:21 -04001246 auto IntTy = Type::getInt32Ty(M.getContext());
1247 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001248 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1249
David Neto22f144c2017-06-12 14:26:21 -04001250 // Our intrinsic to unpack a float2 from an int.
1251 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1252
1253 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1254
David Neto482550a2018-03-24 05:21:07 -07001255 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04001256 auto ShortTy = Type::getInt16Ty(M.getContext());
1257 auto ShortPointerTy = PointerType::get(
1258 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001259
David Netoac825b82017-05-30 12:49:01 -04001260 // Cast the half* pointer to short*.
1261 auto Cast =
1262 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001263
David Netoac825b82017-05-30 12:49:01 -04001264 // Index into the correct address of the casted pointer.
1265 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1266
1267 // Load from the short* we casted to.
1268 auto Load = new LoadInst(Index, "", CI);
1269
1270 // ZExt the short -> int.
1271 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1272
1273 // Get our float2.
1274 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1275
1276 // Extract out the bottom element which is our float result.
1277 auto Extract = ExtractElementInst::Create(
1278 Call, ConstantInt::get(IntTy, 0), "", CI);
1279
1280 CI->replaceAllUsesWith(Extract);
1281 } else {
1282 // Assume the pointer argument points to storage aligned to 32bits
1283 // or more.
1284 // TODO(dneto): Do more analysis to make sure this is true?
1285 //
1286 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1287 // with:
1288 //
1289 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1290 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1291 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1292 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1293 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1294 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1295 // x float> %converted, %index_is_odd32
1296
1297 auto IntPointerTy = PointerType::get(
1298 IntTy, Arg1->getType()->getPointerAddressSpace());
1299
David Neto973e6a82017-05-30 13:48:18 -04001300 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001301 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001302 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001303 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1304
1305 auto One = ConstantInt::get(IntTy, 1);
1306 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1307 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1308
1309 // Index into the correct address of the casted pointer.
1310 auto Ptr =
1311 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1312
1313 // Load from the int* we casted to.
1314 auto Load = new LoadInst(Ptr, "", CI);
1315
1316 // Get our float2.
1317 auto Call = CallInst::Create(NewF, Load, "", CI);
1318
1319 // Extract out the float result, where the element number is
1320 // determined by whether the original index was even or odd.
1321 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1322
1323 CI->replaceAllUsesWith(Extract);
1324 }
David Neto22f144c2017-06-12 14:26:21 -04001325
1326 // Lastly, remember to remove the user.
1327 ToRemoves.push_back(CI);
1328 }
1329 }
1330
1331 Changed = !ToRemoves.empty();
1332
1333 // And cleanup the calls we don't use anymore.
1334 for (auto V : ToRemoves) {
1335 V->eraseFromParent();
1336 }
1337
1338 // And remove the function we don't need either too.
1339 F->eraseFromParent();
1340 }
1341 }
1342
1343 return Changed;
1344}
1345
1346bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1347 bool Changed = false;
1348
David Neto556c7e62018-06-08 13:45:55 -07001349 const std::vector<const char *> Map = {
1350 "_Z11vload_half2jPU3AS1KDh",
1351 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
1352 "_Z11vload_half2jPU3AS2KDh",
1353 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
1354 };
David Neto22f144c2017-06-12 14:26:21 -04001355
1356 for (auto Name : Map) {
1357 // If we find a function with the matching name.
1358 if (auto F = M.getFunction(Name)) {
1359 SmallVector<Instruction *, 4> ToRemoves;
1360
1361 // Walk the users of the function.
1362 for (auto &U : F->uses()) {
1363 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1364 // The index argument from vload_half.
1365 auto Arg0 = CI->getOperand(0);
1366
1367 // The pointer argument from vload_half.
1368 auto Arg1 = CI->getOperand(1);
1369
1370 auto IntTy = Type::getInt32Ty(M.getContext());
1371 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1372 auto NewPointerTy = PointerType::get(
1373 IntTy, Arg1->getType()->getPointerAddressSpace());
1374 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1375
1376 // Cast the half* pointer to int*.
1377 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1378
1379 // Index into the correct address of the casted pointer.
1380 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1381
1382 // Load from the int* we casted to.
1383 auto Load = new LoadInst(Index, "", CI);
1384
1385 // Our intrinsic to unpack a float2 from an int.
1386 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1387
1388 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1389
1390 // Get our float2.
1391 auto Call = CallInst::Create(NewF, Load, "", CI);
1392
1393 CI->replaceAllUsesWith(Call);
1394
1395 // Lastly, remember to remove the user.
1396 ToRemoves.push_back(CI);
1397 }
1398 }
1399
1400 Changed = !ToRemoves.empty();
1401
1402 // And cleanup the calls we don't use anymore.
1403 for (auto V : ToRemoves) {
1404 V->eraseFromParent();
1405 }
1406
1407 // And remove the function we don't need either too.
1408 F->eraseFromParent();
1409 }
1410 }
1411
1412 return Changed;
1413}
1414
1415bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1416 bool Changed = false;
1417
David Neto556c7e62018-06-08 13:45:55 -07001418 const std::vector<const char *> Map = {
1419 "_Z11vload_half4jPU3AS1KDh",
1420 "_Z12vloada_half4jPU3AS1KDh",
1421 "_Z11vload_half4jPU3AS2KDh",
1422 "_Z12vloada_half4jPU3AS2KDh",
1423 };
David Neto22f144c2017-06-12 14:26:21 -04001424
1425 for (auto Name : Map) {
1426 // If we find a function with the matching name.
1427 if (auto F = M.getFunction(Name)) {
1428 SmallVector<Instruction *, 4> ToRemoves;
1429
1430 // Walk the users of the function.
1431 for (auto &U : F->uses()) {
1432 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1433 // The index argument from vload_half.
1434 auto Arg0 = CI->getOperand(0);
1435
1436 // The pointer argument from vload_half.
1437 auto Arg1 = CI->getOperand(1);
1438
1439 auto IntTy = Type::getInt32Ty(M.getContext());
1440 auto Int2Ty = VectorType::get(IntTy, 2);
1441 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1442 auto NewPointerTy = PointerType::get(
1443 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1444 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1445
1446 // Cast the half* pointer to int2*.
1447 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1448
1449 // Index into the correct address of the casted pointer.
1450 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1451
1452 // Load from the int2* we casted to.
1453 auto Load = new LoadInst(Index, "", CI);
1454
1455 // Extract each element from the loaded int2.
1456 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1457 "", CI);
1458 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1459 "", CI);
1460
1461 // Our intrinsic to unpack a float2 from an int.
1462 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1463
1464 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1465
1466 // Get the lower (x & y) components of our final float4.
1467 auto Lo = CallInst::Create(NewF, X, "", CI);
1468
1469 // Get the higher (z & w) components of our final float4.
1470 auto Hi = CallInst::Create(NewF, Y, "", CI);
1471
1472 Constant *ShuffleMask[4] = {
1473 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1474 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1475
1476 // Combine our two float2's into one float4.
1477 auto Combine = new ShuffleVectorInst(
1478 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1479
1480 CI->replaceAllUsesWith(Combine);
1481
1482 // Lastly, remember to remove the user.
1483 ToRemoves.push_back(CI);
1484 }
1485 }
1486
1487 Changed = !ToRemoves.empty();
1488
1489 // And cleanup the calls we don't use anymore.
1490 for (auto V : ToRemoves) {
1491 V->eraseFromParent();
1492 }
1493
1494 // And remove the function we don't need either too.
1495 F->eraseFromParent();
1496 }
1497 }
1498
1499 return Changed;
1500}
1501
David Neto6ad93232018-06-07 15:42:58 -07001502bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
1503 bool Changed = false;
1504
1505 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1506 //
1507 // %u = load i32 %ptr
1508 // %fxy = call <2 x float> Unpack2xHalf(u)
1509 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1510 const std::vector<const char *> Map = {
1511 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
1512 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
1513 "_Z20__clspv_vloada_half2jPKj", // private
1514 };
1515
1516 for (auto Name : Map) {
1517 // If we find a function with the matching name.
1518 if (auto F = M.getFunction(Name)) {
1519 SmallVector<Instruction *, 4> ToRemoves;
1520
1521 // Walk the users of the function.
1522 for (auto &U : F->uses()) {
1523 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
1524 auto Index = CI->getOperand(0);
1525 auto Ptr = CI->getOperand(1);
1526
1527 auto IntTy = Type::getInt32Ty(M.getContext());
1528 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1529 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1530
1531 auto IndexedPtr =
1532 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
1533 auto Load = new LoadInst(IndexedPtr, "", CI);
1534
1535 // Our intrinsic to unpack a float2 from an int.
1536 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1537
1538 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1539
1540 // Get our final float2.
1541 auto Result = CallInst::Create(NewF, Load, "", CI);
1542
1543 CI->replaceAllUsesWith(Result);
1544
1545 // Lastly, remember to remove the user.
1546 ToRemoves.push_back(CI);
1547 }
1548 }
1549
1550 Changed = true;
1551
1552 // And cleanup the calls we don't use anymore.
1553 for (auto V : ToRemoves) {
1554 V->eraseFromParent();
1555 }
1556
1557 // And remove the function we don't need either too.
1558 F->eraseFromParent();
1559 }
1560 }
1561
1562 return Changed;
1563}
1564
1565bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
1566 bool Changed = false;
1567
1568 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1569 //
1570 // %u2 = load <2 x i32> %ptr
1571 // %u2xy = extractelement %u2, 0
1572 // %u2zw = extractelement %u2, 1
1573 // %fxy = call <2 x float> Unpack2xHalf(uint)
1574 // %fzw = call <2 x float> Unpack2xHalf(uint)
1575 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1576 const std::vector<const char *> Map = {
1577 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
1578 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
1579 "_Z20__clspv_vloada_half4jPKDv2_j", // private
1580 };
1581
1582 for (auto Name : Map) {
1583 // If we find a function with the matching name.
1584 if (auto F = M.getFunction(Name)) {
1585 SmallVector<Instruction *, 4> ToRemoves;
1586
1587 // Walk the users of the function.
1588 for (auto &U : F->uses()) {
1589 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1590 auto Index = CI->getOperand(0);
1591 auto Ptr = CI->getOperand(1);
1592
1593 auto IntTy = Type::getInt32Ty(M.getContext());
1594 auto Int2Ty = VectorType::get(IntTy, 2);
1595 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1596 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1597
1598 auto IndexedPtr =
1599 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
1600 auto Load = new LoadInst(IndexedPtr, "", CI);
1601
1602 // Extract each element from the loaded int2.
1603 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1604 "", CI);
1605 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1606 "", CI);
1607
1608 // Our intrinsic to unpack a float2 from an int.
1609 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1610
1611 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1612
1613 // Get the lower (x & y) components of our final float4.
1614 auto Lo = CallInst::Create(NewF, X, "", CI);
1615
1616 // Get the higher (z & w) components of our final float4.
1617 auto Hi = CallInst::Create(NewF, Y, "", CI);
1618
1619 Constant *ShuffleMask[4] = {
1620 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1621 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1622
1623 // Combine our two float2's into one float4.
1624 auto Combine = new ShuffleVectorInst(
1625 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1626
1627 CI->replaceAllUsesWith(Combine);
1628
1629 // Lastly, remember to remove the user.
1630 ToRemoves.push_back(CI);
1631 }
1632 }
1633
1634 Changed = true;
1635
1636 // And cleanup the calls we don't use anymore.
1637 for (auto V : ToRemoves) {
1638 V->eraseFromParent();
1639 }
1640
1641 // And remove the function we don't need either too.
1642 F->eraseFromParent();
1643 }
1644 }
1645
1646 return Changed;
1647}
1648
David Neto22f144c2017-06-12 14:26:21 -04001649bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1650 bool Changed = false;
1651
1652 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1653 "_Z15vstore_half_rtefjPU3AS1Dh",
1654 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1655
1656 for (auto Name : Map) {
1657 // If we find a function with the matching name.
1658 if (auto F = M.getFunction(Name)) {
1659 SmallVector<Instruction *, 4> ToRemoves;
1660
1661 // Walk the users of the function.
1662 for (auto &U : F->uses()) {
1663 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1664 // The value to store.
1665 auto Arg0 = CI->getOperand(0);
1666
1667 // The index argument from vstore_half.
1668 auto Arg1 = CI->getOperand(1);
1669
1670 // The pointer argument from vstore_half.
1671 auto Arg2 = CI->getOperand(2);
1672
David Neto22f144c2017-06-12 14:26:21 -04001673 auto IntTy = Type::getInt32Ty(M.getContext());
1674 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001675 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001676 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001677
1678 // Our intrinsic to pack a float2 to an int.
1679 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1680
1681 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1682
1683 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001684 auto TempVec =
1685 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1686 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001687
1688 // Pack the float2 -> half2 (in an int).
1689 auto X = CallInst::Create(NewF, TempVec, "", CI);
1690
David Neto482550a2018-03-24 05:21:07 -07001691 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04001692 auto ShortTy = Type::getInt16Ty(M.getContext());
1693 auto ShortPointerTy = PointerType::get(
1694 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001695
David Neto17852de2017-05-29 17:29:31 -04001696 // Truncate our i32 to an i16.
1697 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001698
David Neto17852de2017-05-29 17:29:31 -04001699 // Cast the half* pointer to short*.
1700 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001701
David Neto17852de2017-05-29 17:29:31 -04001702 // Index into the correct address of the casted pointer.
1703 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001704
David Neto17852de2017-05-29 17:29:31 -04001705 // Store to the int* we casted to.
1706 auto Store = new StoreInst(Trunc, Index, CI);
1707
1708 CI->replaceAllUsesWith(Store);
1709 } else {
1710 // We can only write to 32-bit aligned words.
1711 //
1712 // Assuming base is aligned to 32-bits, replace the equivalent of
1713 // vstore_half(value, index, base)
1714 // with:
1715 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1716 // uint32_t write_to_upper_half = index & 1u;
1717 // uint32_t shift = write_to_upper_half << 4;
1718 //
1719 // // Pack the float value as a half number in bottom 16 bits
1720 // // of an i32.
1721 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1722 //
1723 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1724 // ^ ((packed & 0xffff) << shift)
1725 // // We only need relaxed consistency, but OpenCL 1.2 only has
1726 // // sequentially consistent atomics.
1727 // // TODO(dneto): Use relaxed consistency.
1728 // atomic_xor(target_ptr, xor_value)
1729 auto IntPointerTy = PointerType::get(
1730 IntTy, Arg2->getType()->getPointerAddressSpace());
1731
1732 auto Four = ConstantInt::get(IntTy, 4);
1733 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1734
1735 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1736 // Compute index / 2
1737 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1738 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1739 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1740 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1741 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1742 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1743 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1744
1745 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1746 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1747 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1748
1749 // Generate the call to atomi_xor.
1750 SmallVector<Type *, 5> ParamTypes;
1751 // The pointer type.
1752 ParamTypes.push_back(IntPointerTy);
1753 // The Types for memory scope, semantics, and value.
1754 ParamTypes.push_back(IntTy);
1755 ParamTypes.push_back(IntTy);
1756 ParamTypes.push_back(IntTy);
1757 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1758 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1759
1760 const auto ConstantScopeDevice =
1761 ConstantInt::get(IntTy, spv::ScopeDevice);
1762 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1763 // (SPIR-V Workgroup).
1764 const auto AddrSpaceSemanticsBits =
1765 IntPointerTy->getPointerAddressSpace() == 1
1766 ? spv::MemorySemanticsUniformMemoryMask
1767 : spv::MemorySemanticsWorkgroupMemoryMask;
1768
1769 // We're using relaxed consistency here.
1770 const auto ConstantMemorySemantics =
1771 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1772 AddrSpaceSemanticsBits);
1773
1774 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1775 ConstantMemorySemantics, ValueToXor};
1776 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1777 }
David Neto22f144c2017-06-12 14:26:21 -04001778
1779 // Lastly, remember to remove the user.
1780 ToRemoves.push_back(CI);
1781 }
1782 }
1783
1784 Changed = !ToRemoves.empty();
1785
1786 // And cleanup the calls we don't use anymore.
1787 for (auto V : ToRemoves) {
1788 V->eraseFromParent();
1789 }
1790
1791 // And remove the function we don't need either too.
1792 F->eraseFromParent();
1793 }
1794 }
1795
1796 return Changed;
1797}
1798
1799bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1800 bool Changed = false;
1801
David Netoe2871522018-06-08 11:09:54 -07001802 const std::vector<const char *> Map = {
1803 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
1804 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
1805 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
1806 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
1807 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1808 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
1809 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
1810 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
1811 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
1812 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
1813 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
1814 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
1815 };
David Neto22f144c2017-06-12 14:26:21 -04001816
1817 for (auto Name : Map) {
1818 // If we find a function with the matching name.
1819 if (auto F = M.getFunction(Name)) {
1820 SmallVector<Instruction *, 4> ToRemoves;
1821
1822 // Walk the users of the function.
1823 for (auto &U : F->uses()) {
1824 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1825 // The value to store.
1826 auto Arg0 = CI->getOperand(0);
1827
1828 // The index argument from vstore_half.
1829 auto Arg1 = CI->getOperand(1);
1830
1831 // The pointer argument from vstore_half.
1832 auto Arg2 = CI->getOperand(2);
1833
1834 auto IntTy = Type::getInt32Ty(M.getContext());
1835 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1836 auto NewPointerTy = PointerType::get(
1837 IntTy, Arg2->getType()->getPointerAddressSpace());
1838 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1839
1840 // Our intrinsic to pack a float2 to an int.
1841 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1842
1843 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1844
1845 // Turn the packed x & y into the final packing.
1846 auto X = CallInst::Create(NewF, Arg0, "", CI);
1847
1848 // Cast the half* pointer to int*.
1849 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1850
1851 // Index into the correct address of the casted pointer.
1852 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1853
1854 // Store to the int* we casted to.
1855 auto Store = new StoreInst(X, Index, CI);
1856
1857 CI->replaceAllUsesWith(Store);
1858
1859 // Lastly, remember to remove the user.
1860 ToRemoves.push_back(CI);
1861 }
1862 }
1863
1864 Changed = !ToRemoves.empty();
1865
1866 // And cleanup the calls we don't use anymore.
1867 for (auto V : ToRemoves) {
1868 V->eraseFromParent();
1869 }
1870
1871 // And remove the function we don't need either too.
1872 F->eraseFromParent();
1873 }
1874 }
1875
1876 return Changed;
1877}
1878
1879bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1880 bool Changed = false;
1881
David Netoe2871522018-06-08 11:09:54 -07001882 const std::vector<const char *> Map = {
1883 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
1884 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
1885 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
1886 "_Z13vstorea_half4Dv4_fjPDh", // private
1887 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1888 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
1889 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
1890 "_Z17vstorea_half4_rteDv4_fjPDh", // private
1891 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
1892 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
1893 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
1894 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
1895 };
David Neto22f144c2017-06-12 14:26:21 -04001896
1897 for (auto Name : Map) {
1898 // If we find a function with the matching name.
1899 if (auto F = M.getFunction(Name)) {
1900 SmallVector<Instruction *, 4> ToRemoves;
1901
1902 // Walk the users of the function.
1903 for (auto &U : F->uses()) {
1904 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1905 // The value to store.
1906 auto Arg0 = CI->getOperand(0);
1907
1908 // The index argument from vstore_half.
1909 auto Arg1 = CI->getOperand(1);
1910
1911 // The pointer argument from vstore_half.
1912 auto Arg2 = CI->getOperand(2);
1913
1914 auto IntTy = Type::getInt32Ty(M.getContext());
1915 auto Int2Ty = VectorType::get(IntTy, 2);
1916 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1917 auto NewPointerTy = PointerType::get(
1918 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1919 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1920
1921 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1922 ConstantInt::get(IntTy, 1)};
1923
1924 // Extract out the x & y components of our to store value.
1925 auto Lo =
1926 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1927 ConstantVector::get(LoShuffleMask), "", CI);
1928
1929 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1930 ConstantInt::get(IntTy, 3)};
1931
1932 // Extract out the z & w components of our to store value.
1933 auto Hi =
1934 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1935 ConstantVector::get(HiShuffleMask), "", CI);
1936
1937 // Our intrinsic to pack a float2 to an int.
1938 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1939
1940 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1941
1942 // Turn the packed x & y into the final component of our int2.
1943 auto X = CallInst::Create(NewF, Lo, "", CI);
1944
1945 // Turn the packed z & w into the final component of our int2.
1946 auto Y = CallInst::Create(NewF, Hi, "", CI);
1947
1948 auto Combine = InsertElementInst::Create(
1949 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1950 Combine = InsertElementInst::Create(
1951 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1952
1953 // Cast the half* pointer to int2*.
1954 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1955
1956 // Index into the correct address of the casted pointer.
1957 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1958
1959 // Store to the int2* we casted to.
1960 auto Store = new StoreInst(Combine, Index, CI);
1961
1962 CI->replaceAllUsesWith(Store);
1963
1964 // Lastly, remember to remove the user.
1965 ToRemoves.push_back(CI);
1966 }
1967 }
1968
1969 Changed = !ToRemoves.empty();
1970
1971 // And cleanup the calls we don't use anymore.
1972 for (auto V : ToRemoves) {
1973 V->eraseFromParent();
1974 }
1975
1976 // And remove the function we don't need either too.
1977 F->eraseFromParent();
1978 }
1979 }
1980
1981 return Changed;
1982}
1983
1984bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1985 bool Changed = false;
1986
1987 const std::map<const char *, const char*> Map = {
1988 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1989 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1990 };
1991
1992 for (auto Pair : Map) {
1993 // If we find a function with the matching name.
1994 if (auto F = M.getFunction(Pair.first)) {
1995 SmallVector<Instruction *, 4> ToRemoves;
1996
1997 // Walk the users of the function.
1998 for (auto &U : F->uses()) {
1999 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2000 // The image.
2001 auto Arg0 = CI->getOperand(0);
2002
2003 // The sampler.
2004 auto Arg1 = CI->getOperand(1);
2005
2006 // The coordinate (integer type that we can't handle).
2007 auto Arg2 = CI->getOperand(2);
2008
2009 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
2010
2011 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
2012
2013 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2014
2015 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
2016
2017 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2018
2019 CI->replaceAllUsesWith(NewCI);
2020
2021 // Lastly, remember to remove the user.
2022 ToRemoves.push_back(CI);
2023 }
2024 }
2025
2026 Changed = !ToRemoves.empty();
2027
2028 // And cleanup the calls we don't use anymore.
2029 for (auto V : ToRemoves) {
2030 V->eraseFromParent();
2031 }
2032
2033 // And remove the function we don't need either too.
2034 F->eraseFromParent();
2035 }
2036 }
2037
2038 return Changed;
2039}
2040
2041bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2042 bool Changed = false;
2043
2044 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002045 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
2046 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
2047 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
2048 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
2049 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
2050 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002051 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
2052 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
2053 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
2054 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
2055 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01002056 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002057
2058 for (auto Pair : Map) {
2059 // If we find a function with the matching name.
2060 if (auto F = M.getFunction(Pair.first)) {
2061 SmallVector<Instruction *, 4> ToRemoves;
2062
2063 // Walk the users of the function.
2064 for (auto &U : F->uses()) {
2065 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2066 auto FType = F->getFunctionType();
2067 SmallVector<Type *, 5> ParamTypes;
2068
2069 // The pointer type.
2070 ParamTypes.push_back(FType->getParamType(0));
2071
2072 auto IntTy = Type::getInt32Ty(M.getContext());
2073
2074 // The memory scope type.
2075 ParamTypes.push_back(IntTy);
2076
2077 // The memory semantics type.
2078 ParamTypes.push_back(IntTy);
2079
2080 if (2 < CI->getNumArgOperands()) {
2081 // The unequal memory semantics type.
2082 ParamTypes.push_back(IntTy);
2083
2084 // The value type.
2085 ParamTypes.push_back(FType->getParamType(2));
2086
2087 // The comparator type.
2088 ParamTypes.push_back(FType->getParamType(1));
2089 } else if (1 < CI->getNumArgOperands()) {
2090 // The value type.
2091 ParamTypes.push_back(FType->getParamType(1));
2092 }
2093
2094 auto NewFType =
2095 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2096 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2097
2098 // We need to map the OpenCL constants to the SPIR-V equivalents.
2099 const auto ConstantScopeDevice =
2100 ConstantInt::get(IntTy, spv::ScopeDevice);
2101 const auto ConstantMemorySemantics = ConstantInt::get(
2102 IntTy, spv::MemorySemanticsUniformMemoryMask |
2103 spv::MemorySemanticsSequentiallyConsistentMask);
2104
2105 SmallVector<Value *, 5> Params;
2106
2107 // The pointer.
2108 Params.push_back(CI->getArgOperand(0));
2109
2110 // The memory scope.
2111 Params.push_back(ConstantScopeDevice);
2112
2113 // The memory semantics.
2114 Params.push_back(ConstantMemorySemantics);
2115
2116 if (2 < CI->getNumArgOperands()) {
2117 // The unequal memory semantics.
2118 Params.push_back(ConstantMemorySemantics);
2119
2120 // The value.
2121 Params.push_back(CI->getArgOperand(2));
2122
2123 // The comparator.
2124 Params.push_back(CI->getArgOperand(1));
2125 } else if (1 < CI->getNumArgOperands()) {
2126 // The value.
2127 Params.push_back(CI->getArgOperand(1));
2128 }
2129
2130 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2131
2132 CI->replaceAllUsesWith(NewCI);
2133
2134 // Lastly, remember to remove the user.
2135 ToRemoves.push_back(CI);
2136 }
2137 }
2138
2139 Changed = !ToRemoves.empty();
2140
2141 // And cleanup the calls we don't use anymore.
2142 for (auto V : ToRemoves) {
2143 V->eraseFromParent();
2144 }
2145
2146 // And remove the function we don't need either too.
2147 F->eraseFromParent();
2148 }
2149 }
2150
Neil Henning39672102017-09-29 14:33:13 +01002151 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002152 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2153 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2154 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2155 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2156 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2157 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2158 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2159 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2160 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2161 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2162 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2163 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2164 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2165 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2166 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2167 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002168 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2169 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2170 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2171 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2172 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2173 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2174 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2175 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2176 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2177 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2178 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2179 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2180 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2181 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2182 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2183 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
2184
2185 for (auto Pair : Map2) {
2186 // If we find a function with the matching name.
2187 if (auto F = M.getFunction(Pair.first)) {
2188 SmallVector<Instruction *, 4> ToRemoves;
2189
2190 // Walk the users of the function.
2191 for (auto &U : F->uses()) {
2192 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2193 auto AtomicOp = new AtomicRMWInst(
2194 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2195 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2196
2197 CI->replaceAllUsesWith(AtomicOp);
2198
2199 // Lastly, remember to remove the user.
2200 ToRemoves.push_back(CI);
2201 }
2202 }
2203
2204 Changed = !ToRemoves.empty();
2205
2206 // And cleanup the calls we don't use anymore.
2207 for (auto V : ToRemoves) {
2208 V->eraseFromParent();
2209 }
2210
2211 // And remove the function we don't need either too.
2212 F->eraseFromParent();
2213 }
2214 }
2215
David Neto22f144c2017-06-12 14:26:21 -04002216 return Changed;
2217}
2218
2219bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
2220 bool Changed = false;
2221
2222 // If we find a function with the matching name.
2223 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
2224 SmallVector<Instruction *, 4> ToRemoves;
2225
2226 auto IntTy = Type::getInt32Ty(M.getContext());
2227 auto FloatTy = Type::getFloatTy(M.getContext());
2228
2229 Constant *DownShuffleMask[3] = {
2230 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2231 ConstantInt::get(IntTy, 2)};
2232
2233 Constant *UpShuffleMask[4] = {
2234 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2235 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2236
2237 Constant *FloatVec[3] = {
2238 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
2239 };
2240
2241 // Walk the users of the function.
2242 for (auto &U : F->uses()) {
2243 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2244 auto Vec4Ty = CI->getArgOperand(0)->getType();
2245 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2246 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2247 auto Vec3Ty = Arg0->getType();
2248
2249 auto NewFType =
2250 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
2251
2252 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
2253
2254 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
2255
2256 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
2257
2258 CI->replaceAllUsesWith(Result);
2259
2260 // Lastly, remember to remove the user.
2261 ToRemoves.push_back(CI);
2262 }
2263 }
2264
2265 Changed = !ToRemoves.empty();
2266
2267 // And cleanup the calls we don't use anymore.
2268 for (auto V : ToRemoves) {
2269 V->eraseFromParent();
2270 }
2271
2272 // And remove the function we don't need either too.
2273 F->eraseFromParent();
2274 }
2275
2276 return Changed;
2277}
David Neto62653202017-10-16 19:05:18 -04002278
2279bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2280 bool Changed = false;
2281
2282 // OpenCL's float result = fract(float x, float* ptr)
2283 //
2284 // In the LLVM domain:
2285 //
2286 // %floor_result = call spir_func float @floor(float %x)
2287 // store float %floor_result, float * %ptr
2288 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2289 // %result = call spir_func float
2290 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2291 //
2292 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2293 // and clspv.fract occur in the SPIR-V generator pass:
2294 //
2295 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2296 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2297 // ...
2298 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2299 // OpStore %ptr %floor_result
2300 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2301 // %fract_result = OpExtInst %float
2302 // %glsl_ext Fmin %fract_intermediate %just_under_1
2303
2304
2305 using std::string;
2306
2307 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2308 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
2309 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
2310 auto make_quad = [](const char *a, const char *b, const char *c,
2311 const char *d) {
2312 return std::tuple<const char *, const char *, const char *, const char *>(
2313 a, b, c, d);
2314 };
2315 const std::vector<QuadType> Functions = {
2316 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
2317 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
2318 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
2319 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
2320 };
2321
2322 for (auto& quad : Functions) {
2323 const StringRef fract_name(std::get<0>(quad));
2324
2325 // If we find a function with the matching name.
2326 if (auto F = M.getFunction(fract_name)) {
2327 if (F->use_begin() == F->use_end())
2328 continue;
2329
2330 // We have some uses.
2331 Changed = true;
2332
2333 auto& Context = M.getContext();
2334
2335 const StringRef floor_name(std::get<1>(quad));
2336 const StringRef fmin_name(std::get<2>(quad));
2337 const StringRef clspv_fract_name(std::get<3>(quad));
2338
2339 // This is either float or a float vector. All the float-like
2340 // types are this type.
2341 auto result_ty = F->getReturnType();
2342
2343 Function* fmin_fn = M.getFunction(fmin_name);
2344 if (!fmin_fn) {
2345 // Make the fmin function.
2346 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
2347 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002348 fmin_fn->addFnAttr(Attribute::ReadNone);
2349 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2350 }
2351
2352 Function* floor_fn = M.getFunction(floor_name);
2353 if (!floor_fn) {
2354 // Make the floor function.
2355 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2356 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002357 floor_fn->addFnAttr(Attribute::ReadNone);
2358 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2359 }
2360
2361 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
2362 if (!clspv_fract_fn) {
2363 // Make the clspv_fract function.
2364 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2365 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002366 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2367 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2368 }
2369
2370 // Number of significant significand bits, whether represented or not.
2371 unsigned num_significand_bits;
2372 switch (result_ty->getScalarType()->getTypeID()) {
2373 case Type::HalfTyID:
2374 num_significand_bits = 11;
2375 break;
2376 case Type::FloatTyID:
2377 num_significand_bits = 24;
2378 break;
2379 case Type::DoubleTyID:
2380 num_significand_bits = 53;
2381 break;
2382 default:
2383 assert(false && "Unhandled float type when processing fract builtin");
2384 break;
2385 }
2386 // Beware that the disassembler displays this value as
2387 // OpConstant %float 1
2388 // which is not quite right.
2389 const double kJustUnderOneScalar =
2390 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2391
2392 Constant *just_under_one =
2393 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2394 if (result_ty->isVectorTy()) {
2395 just_under_one = ConstantVector::getSplat(
2396 result_ty->getVectorNumElements(), just_under_one);
2397 }
2398
2399 IRBuilder<> Builder(Context);
2400
2401 SmallVector<Instruction *, 4> ToRemoves;
2402
2403 // Walk the users of the function.
2404 for (auto &U : F->uses()) {
2405 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2406
2407 Builder.SetInsertPoint(CI);
2408 auto arg = CI->getArgOperand(0);
2409 auto ptr = CI->getArgOperand(1);
2410
2411 // Compute floor result and store it.
2412 auto floor = Builder.CreateCall(floor_fn, {arg});
2413 Builder.CreateStore(floor, ptr);
2414
2415 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2416 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2417
2418 CI->replaceAllUsesWith(fract_result);
2419
2420 // Lastly, remember to remove the user.
2421 ToRemoves.push_back(CI);
2422 }
2423 }
2424
2425 // And cleanup the calls we don't use anymore.
2426 for (auto V : ToRemoves) {
2427 V->eraseFromParent();
2428 }
2429
2430 // And remove the function we don't need either too.
2431 F->eraseFromParent();
2432 }
2433 }
2434
2435 return Changed;
2436}