blob: 2dc81b59728cdf88b39c0bcfc55af5245b239e62 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
David Neto118188e2018-08-24 11:27:54 -040019#include "llvm/IR/Constants.h"
20#include "llvm/IR/Instructions.h"
21#include "llvm/IR/IRBuilder.h"
22#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000023#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040024#include "llvm/Pass.h"
25#include "llvm/Support/CommandLine.h"
26#include "llvm/Support/raw_ostream.h"
27#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040028
David Neto118188e2018-08-24 11:27:54 -040029#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040030
David Neto482550a2018-03-24 05:21:07 -070031#include "clspv/Option.h"
32
David Neto22f144c2017-06-12 14:26:21 -040033using namespace llvm;
34
35#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
36
37namespace {
38uint32_t clz(uint32_t v) {
39 uint32_t r;
40 uint32_t shift;
41
42 r = (v > 0xFFFF) << 4;
43 v >>= r;
44 shift = (v > 0xFF) << 3;
45 v >>= shift;
46 r |= shift;
47 shift = (v > 0xF) << 2;
48 v >>= shift;
49 r |= shift;
50 shift = (v > 0x3) << 1;
51 v >>= shift;
52 r |= shift;
53 r |= (v >> 1);
54
55 return r;
56}
57
58Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
59 if (1 == elements) {
60 return Type::getInt1Ty(C);
61 } else {
62 return VectorType::get(Type::getInt1Ty(C), elements);
63 }
64}
65
66struct ReplaceOpenCLBuiltinPass final : public ModulePass {
67 static char ID;
68 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
69
70 bool runOnModule(Module &M) override;
71 bool replaceRecip(Module &M);
72 bool replaceDivide(Module &M);
73 bool replaceExp10(Module &M);
74 bool replaceLog10(Module &M);
75 bool replaceBarrier(Module &M);
76 bool replaceMemFence(Module &M);
77 bool replaceRelational(Module &M);
78 bool replaceIsInfAndIsNan(Module &M);
79 bool replaceAllAndAny(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +000080 bool replaceSelect(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040081 bool replaceSignbit(Module &M);
82 bool replaceMadandMad24andMul24(Module &M);
83 bool replaceVloadHalf(Module &M);
84 bool replaceVloadHalf2(Module &M);
85 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -070086 bool replaceClspvVloadaHalf2(Module &M);
87 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040088 bool replaceVstoreHalf(Module &M);
89 bool replaceVstoreHalf2(Module &M);
90 bool replaceVstoreHalf4(Module &M);
91 bool replaceReadImageF(Module &M);
92 bool replaceAtomics(Module &M);
93 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -040094 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -070095 bool replaceVload(Module &M);
96 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -040097};
98}
99
100char ReplaceOpenCLBuiltinPass::ID = 0;
101static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
102 "Replace OpenCL Builtins Pass");
103
104namespace clspv {
105ModulePass *createReplaceOpenCLBuiltinPass() {
106 return new ReplaceOpenCLBuiltinPass();
107}
108}
109
110bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
111 bool Changed = false;
112
113 Changed |= replaceRecip(M);
114 Changed |= replaceDivide(M);
115 Changed |= replaceExp10(M);
116 Changed |= replaceLog10(M);
117 Changed |= replaceBarrier(M);
118 Changed |= replaceMemFence(M);
119 Changed |= replaceRelational(M);
120 Changed |= replaceIsInfAndIsNan(M);
121 Changed |= replaceAllAndAny(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000122 Changed |= replaceSelect(M);
David Neto22f144c2017-06-12 14:26:21 -0400123 Changed |= replaceSignbit(M);
124 Changed |= replaceMadandMad24andMul24(M);
125 Changed |= replaceVloadHalf(M);
126 Changed |= replaceVloadHalf2(M);
127 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700128 Changed |= replaceClspvVloadaHalf2(M);
129 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400130 Changed |= replaceVstoreHalf(M);
131 Changed |= replaceVstoreHalf2(M);
132 Changed |= replaceVstoreHalf4(M);
133 Changed |= replaceReadImageF(M);
134 Changed |= replaceAtomics(M);
135 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400136 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700137 Changed |= replaceVload(M);
138 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400139
140 return Changed;
141}
142
143bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
144 bool Changed = false;
145
146 const char *Names[] = {
147 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
148 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
149 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
150 };
151
152 for (auto Name : Names) {
153 // If we find a function with the matching name.
154 if (auto F = M.getFunction(Name)) {
155 SmallVector<Instruction *, 4> ToRemoves;
156
157 // Walk the users of the function.
158 for (auto &U : F->uses()) {
159 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
160 // Recip has one arg.
161 auto Arg = CI->getOperand(0);
162
163 auto Div = BinaryOperator::Create(
164 Instruction::FDiv, ConstantFP::get(Arg->getType(), 1.0), Arg, "",
165 CI);
166
167 CI->replaceAllUsesWith(Div);
168
169 // Lastly, remember to remove the user.
170 ToRemoves.push_back(CI);
171 }
172 }
173
174 Changed = !ToRemoves.empty();
175
176 // And cleanup the calls we don't use anymore.
177 for (auto V : ToRemoves) {
178 V->eraseFromParent();
179 }
180
181 // And remove the function we don't need either too.
182 F->eraseFromParent();
183 }
184 }
185
186 return Changed;
187}
188
189bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
190 bool Changed = false;
191
192 const char *Names[] = {
193 "_Z11half_divideff", "_Z13native_divideff",
194 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
195 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
196 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
197 };
198
199 for (auto Name : Names) {
200 // If we find a function with the matching name.
201 if (auto F = M.getFunction(Name)) {
202 SmallVector<Instruction *, 4> ToRemoves;
203
204 // Walk the users of the function.
205 for (auto &U : F->uses()) {
206 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
207 auto Div = BinaryOperator::Create(
208 Instruction::FDiv, CI->getOperand(0), CI->getOperand(1), "", CI);
209
210 CI->replaceAllUsesWith(Div);
211
212 // Lastly, remember to remove the user.
213 ToRemoves.push_back(CI);
214 }
215 }
216
217 Changed = !ToRemoves.empty();
218
219 // And cleanup the calls we don't use anymore.
220 for (auto V : ToRemoves) {
221 V->eraseFromParent();
222 }
223
224 // And remove the function we don't need either too.
225 F->eraseFromParent();
226 }
227 }
228
229 return Changed;
230}
231
232bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
233 bool Changed = false;
234
235 const std::map<const char *, const char *> Map = {
236 {"_Z5exp10f", "_Z3expf"},
237 {"_Z10half_exp10f", "_Z8half_expf"},
238 {"_Z12native_exp10f", "_Z10native_expf"},
239 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
240 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
241 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
242 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
243 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
244 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
245 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
246 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
247 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
248
249 for (auto Pair : Map) {
250 // If we find a function with the matching name.
251 if (auto F = M.getFunction(Pair.first)) {
252 SmallVector<Instruction *, 4> ToRemoves;
253
254 // Walk the users of the function.
255 for (auto &U : F->uses()) {
256 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
257 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
258
259 auto Arg = CI->getOperand(0);
260
261 // Constant of the natural log of 10 (ln(10)).
262 const double Ln10 =
263 2.302585092994045684017991454684364207601101488628772976033;
264
265 auto Mul = BinaryOperator::Create(
266 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
267 CI);
268
269 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
270
271 CI->replaceAllUsesWith(NewCI);
272
273 // Lastly, remember to remove the user.
274 ToRemoves.push_back(CI);
275 }
276 }
277
278 Changed = !ToRemoves.empty();
279
280 // And cleanup the calls we don't use anymore.
281 for (auto V : ToRemoves) {
282 V->eraseFromParent();
283 }
284
285 // And remove the function we don't need either too.
286 F->eraseFromParent();
287 }
288 }
289
290 return Changed;
291}
292
293bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
294 bool Changed = false;
295
296 const std::map<const char *, const char *> Map = {
297 {"_Z5log10f", "_Z3logf"},
298 {"_Z10half_log10f", "_Z8half_logf"},
299 {"_Z12native_log10f", "_Z10native_logf"},
300 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
301 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
302 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
303 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
304 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
305 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
306 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
307 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
308 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
309
310 for (auto Pair : Map) {
311 // If we find a function with the matching name.
312 if (auto F = M.getFunction(Pair.first)) {
313 SmallVector<Instruction *, 4> ToRemoves;
314
315 // Walk the users of the function.
316 for (auto &U : F->uses()) {
317 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
318 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
319
320 auto Arg = CI->getOperand(0);
321
322 // Constant of the reciprocal of the natural log of 10 (ln(10)).
323 const double Ln10 =
324 0.434294481903251827651128918916605082294397005803666566114;
325
326 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
327
328 auto Mul = BinaryOperator::Create(
329 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
330 "", CI);
331
332 CI->replaceAllUsesWith(Mul);
333
334 // Lastly, remember to remove the user.
335 ToRemoves.push_back(CI);
336 }
337 }
338
339 Changed = !ToRemoves.empty();
340
341 // And cleanup the calls we don't use anymore.
342 for (auto V : ToRemoves) {
343 V->eraseFromParent();
344 }
345
346 // And remove the function we don't need either too.
347 F->eraseFromParent();
348 }
349 }
350
351 return Changed;
352}
353
354bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
355 bool Changed = false;
356
357 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
358
359 const std::map<const char *, const char *> Map = {
360 {"_Z7barrierj", "__spirv_control_barrier"}};
361
362 for (auto Pair : Map) {
363 // If we find a function with the matching name.
364 if (auto F = M.getFunction(Pair.first)) {
365 SmallVector<Instruction *, 4> ToRemoves;
366
367 // Walk the users of the function.
368 for (auto &U : F->uses()) {
369 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
370 auto FType = F->getFunctionType();
371 SmallVector<Type *, 3> Params;
372 for (unsigned i = 0; i < 3; i++) {
373 Params.push_back(FType->getParamType(0));
374 }
375 auto NewFType =
376 FunctionType::get(FType->getReturnType(), Params, false);
377 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
378
379 auto Arg = CI->getOperand(0);
380
381 // We need to map the OpenCL constants to the SPIR-V equivalents.
382 const auto LocalMemFence =
383 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
384 const auto GlobalMemFence =
385 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
386 const auto ConstantSequentiallyConsistent = ConstantInt::get(
387 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
388 const auto ConstantScopeDevice =
389 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
390 const auto ConstantScopeWorkgroup =
391 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
392
393 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
394 const auto LocalMemFenceMask = BinaryOperator::Create(
395 Instruction::And, LocalMemFence, Arg, "", CI);
396 const auto WorkgroupShiftAmount =
397 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
398 clz(CLK_LOCAL_MEM_FENCE);
399 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
400 Instruction::Shl, LocalMemFenceMask,
401 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
402
403 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
404 const auto GlobalMemFenceMask = BinaryOperator::Create(
405 Instruction::And, GlobalMemFence, Arg, "", CI);
406 const auto UniformShiftAmount =
407 clz(spv::MemorySemanticsUniformMemoryMask) -
408 clz(CLK_GLOBAL_MEM_FENCE);
409 const auto MemorySemanticsUniform = BinaryOperator::Create(
410 Instruction::Shl, GlobalMemFenceMask,
411 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
412
413 // And combine the above together, also adding in
414 // MemorySemanticsSequentiallyConsistentMask.
415 auto MemorySemantics =
416 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
417 ConstantSequentiallyConsistent, "", CI);
418 MemorySemantics = BinaryOperator::Create(
419 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
420
421 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
422 // Device Scope, otherwise Workgroup Scope.
423 const auto Cmp =
424 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
425 GlobalMemFenceMask, GlobalMemFence, "", CI);
426 const auto MemoryScope = SelectInst::Create(
427 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
428
429 // Lastly, the Execution Scope is always Workgroup Scope.
430 const auto ExecutionScope = ConstantScopeWorkgroup;
431
432 auto NewCI = CallInst::Create(
433 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
434
435 CI->replaceAllUsesWith(NewCI);
436
437 // Lastly, remember to remove the user.
438 ToRemoves.push_back(CI);
439 }
440 }
441
442 Changed = !ToRemoves.empty();
443
444 // And cleanup the calls we don't use anymore.
445 for (auto V : ToRemoves) {
446 V->eraseFromParent();
447 }
448
449 // And remove the function we don't need either too.
450 F->eraseFromParent();
451 }
452 }
453
454 return Changed;
455}
456
457bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
458 bool Changed = false;
459
460 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
461
Neil Henning39672102017-09-29 14:33:13 +0100462 using Tuple = std::tuple<const char *, unsigned>;
463 const std::map<const char *, Tuple> Map = {
464 {"_Z9mem_fencej",
465 Tuple("__spirv_memory_barrier",
466 spv::MemorySemanticsSequentiallyConsistentMask)},
467 {"_Z14read_mem_fencej",
468 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
469 {"_Z15write_mem_fencej",
470 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400471
472 for (auto Pair : Map) {
473 // If we find a function with the matching name.
474 if (auto F = M.getFunction(Pair.first)) {
475 SmallVector<Instruction *, 4> ToRemoves;
476
477 // Walk the users of the function.
478 for (auto &U : F->uses()) {
479 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
480 auto FType = F->getFunctionType();
481 SmallVector<Type *, 2> Params;
482 for (unsigned i = 0; i < 2; i++) {
483 Params.push_back(FType->getParamType(0));
484 }
485 auto NewFType =
486 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100487 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400488
489 auto Arg = CI->getOperand(0);
490
491 // We need to map the OpenCL constants to the SPIR-V equivalents.
492 const auto LocalMemFence =
493 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
494 const auto GlobalMemFence =
495 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
496 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100497 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400498 const auto ConstantScopeDevice =
499 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
500
501 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
502 const auto LocalMemFenceMask = BinaryOperator::Create(
503 Instruction::And, LocalMemFence, Arg, "", CI);
504 const auto WorkgroupShiftAmount =
505 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
506 clz(CLK_LOCAL_MEM_FENCE);
507 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
508 Instruction::Shl, LocalMemFenceMask,
509 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
510
511 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
512 const auto GlobalMemFenceMask = BinaryOperator::Create(
513 Instruction::And, GlobalMemFence, Arg, "", CI);
514 const auto UniformShiftAmount =
515 clz(spv::MemorySemanticsUniformMemoryMask) -
516 clz(CLK_GLOBAL_MEM_FENCE);
517 const auto MemorySemanticsUniform = BinaryOperator::Create(
518 Instruction::Shl, GlobalMemFenceMask,
519 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
520
521 // And combine the above together, also adding in
522 // MemorySemanticsSequentiallyConsistentMask.
523 auto MemorySemantics =
524 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
525 ConstantMemorySemantics, "", CI);
526 MemorySemantics = BinaryOperator::Create(
527 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
528
529 // Memory Scope is always device.
530 const auto MemoryScope = ConstantScopeDevice;
531
532 auto NewCI =
533 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
534
535 CI->replaceAllUsesWith(NewCI);
536
537 // Lastly, remember to remove the user.
538 ToRemoves.push_back(CI);
539 }
540 }
541
542 Changed = !ToRemoves.empty();
543
544 // And cleanup the calls we don't use anymore.
545 for (auto V : ToRemoves) {
546 V->eraseFromParent();
547 }
548
549 // And remove the function we don't need either too.
550 F->eraseFromParent();
551 }
552 }
553
554 return Changed;
555}
556
557bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
558 bool Changed = false;
559
560 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
561 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
562 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
563 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
564 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
565 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
566 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
567 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
568 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
569 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
570 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
571 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
572 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
573 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
574 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
575 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
576 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
577 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
578 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
579 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
580 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
581 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
582 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
583 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
584 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
585 };
586
587 for (auto Pair : Map) {
588 // If we find a function with the matching name.
589 if (auto F = M.getFunction(Pair.first)) {
590 SmallVector<Instruction *, 4> ToRemoves;
591
592 // Walk the users of the function.
593 for (auto &U : F->uses()) {
594 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
595 // The predicate to use in the CmpInst.
596 auto Predicate = Pair.second.first;
597
598 // The value to return for true.
599 auto TrueValue =
600 ConstantInt::getSigned(CI->getType(), Pair.second.second);
601
602 // The value to return for false.
603 auto FalseValue = Constant::getNullValue(CI->getType());
604
605 auto Arg1 = CI->getOperand(0);
606 auto Arg2 = CI->getOperand(1);
607
608 const auto Cmp =
609 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
610
611 const auto Select =
612 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
613
614 CI->replaceAllUsesWith(Select);
615
616 // Lastly, remember to remove the user.
617 ToRemoves.push_back(CI);
618 }
619 }
620
621 Changed = !ToRemoves.empty();
622
623 // And cleanup the calls we don't use anymore.
624 for (auto V : ToRemoves) {
625 V->eraseFromParent();
626 }
627
628 // And remove the function we don't need either too.
629 F->eraseFromParent();
630 }
631 }
632
633 return Changed;
634}
635
636bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
637 bool Changed = false;
638
639 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
640 {"_Z5isinff", {"__spirv_isinff", 1}},
641 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
642 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
643 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
644 {"_Z5isnanf", {"__spirv_isnanf", 1}},
645 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
646 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
647 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
648 };
649
650 for (auto Pair : Map) {
651 // If we find a function with the matching name.
652 if (auto F = M.getFunction(Pair.first)) {
653 SmallVector<Instruction *, 4> ToRemoves;
654
655 // Walk the users of the function.
656 for (auto &U : F->uses()) {
657 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
658 const auto CITy = CI->getType();
659
660 // The fake SPIR-V intrinsic to generate.
661 auto SPIRVIntrinsic = Pair.second.first;
662
663 // The value to return for true.
664 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
665
666 // The value to return for false.
667 auto FalseValue = Constant::getNullValue(CITy);
668
669 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
670 M.getContext(),
671 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
672
673 auto NewFType =
674 FunctionType::get(CorrespondingBoolTy,
675 F->getFunctionType()->getParamType(0), false);
676
677 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
678
679 auto Arg = CI->getOperand(0);
680
681 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
682
683 const auto Select =
684 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
685
686 CI->replaceAllUsesWith(Select);
687
688 // Lastly, remember to remove the user.
689 ToRemoves.push_back(CI);
690 }
691 }
692
693 Changed = !ToRemoves.empty();
694
695 // And cleanup the calls we don't use anymore.
696 for (auto V : ToRemoves) {
697 V->eraseFromParent();
698 }
699
700 // And remove the function we don't need either too.
701 F->eraseFromParent();
702 }
703 }
704
705 return Changed;
706}
707
708bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
709 bool Changed = false;
710
711 const std::map<const char *, const char *> Map = {
712 {"_Z3alli", ""},
713 {"_Z3allDv2_i", "__spirv_allDv2_i"},
714 {"_Z3allDv3_i", "__spirv_allDv3_i"},
715 {"_Z3allDv4_i", "__spirv_allDv4_i"},
716 {"_Z3anyi", ""},
717 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
718 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
719 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
720 };
721
722 for (auto Pair : Map) {
723 // If we find a function with the matching name.
724 if (auto F = M.getFunction(Pair.first)) {
725 SmallVector<Instruction *, 4> ToRemoves;
726
727 // Walk the users of the function.
728 for (auto &U : F->uses()) {
729 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
730 // The fake SPIR-V intrinsic to generate.
731 auto SPIRVIntrinsic = Pair.second;
732
733 auto Arg = CI->getOperand(0);
734
735 Value *V;
736
737 // If we have a function to call, call it!
738 if (0 < strlen(SPIRVIntrinsic)) {
739 // The value for zero to compare against.
740 const auto ZeroValue = Constant::getNullValue(Arg->getType());
741
742 const auto Cmp = CmpInst::Create(
743 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
744 const auto NewFType = FunctionType::get(
745 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
746
747 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
748
749 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
750
751 // The value to return for true.
752 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
753
754 // The value to return for false.
755 const auto FalseValue = Constant::getNullValue(CI->getType());
756
757 V = SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
758 } else {
759 V = BinaryOperator::Create(Instruction::LShr, Arg,
760 ConstantInt::get(CI->getType(), 31), "",
761 CI);
762 }
763
764 CI->replaceAllUsesWith(V);
765
766 // Lastly, remember to remove the user.
767 ToRemoves.push_back(CI);
768 }
769 }
770
771 Changed = !ToRemoves.empty();
772
773 // And cleanup the calls we don't use anymore.
774 for (auto V : ToRemoves) {
775 V->eraseFromParent();
776 }
777
778 // And remove the function we don't need either too.
779 F->eraseFromParent();
780 }
781 }
782
783 return Changed;
784}
785
Kévin Petitf5b78a22018-10-25 14:32:17 +0000786bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
787 bool Changed = false;
788
789 for (auto const &SymVal : M.getValueSymbolTable()) {
790 // Skip symbols whose name doesn't match
791 if (!SymVal.getKey().startswith("_Z6select")) {
792 continue;
793 }
794 // Is there a function going by that name?
795 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
796
797 SmallVector<Instruction *, 4> ToRemoves;
798
799 // Walk the users of the function.
800 for (auto &U : F->uses()) {
801 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
802
803 // Get arguments
804 auto FalseValue = CI->getOperand(0);
805 auto TrueValue = CI->getOperand(1);
806 auto PredicateValue = CI->getOperand(2);
807
808 // Don't touch overloads that aren't in OpenCL C
809 auto FalseType = FalseValue->getType();
810 auto TrueType = TrueValue->getType();
811 auto PredicateType = PredicateValue->getType();
812
813 if (FalseType != TrueType) {
814 continue;
815 }
816
817 if (!PredicateType->isIntOrIntVectorTy()) {
818 continue;
819 }
820
821 if (!FalseType->isIntOrIntVectorTy() &&
822 !FalseType->getScalarType()->isFloatingPointTy()) {
823 continue;
824 }
825
826 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
827 continue;
828 }
829
830 if (FalseType->getScalarSizeInBits() !=
831 PredicateType->getScalarSizeInBits()) {
832 continue;
833 }
834
835 if (FalseType->isVectorTy()) {
836 if (FalseType->getVectorNumElements() !=
837 PredicateType->getVectorNumElements()) {
838 continue;
839 }
840
841 if ((FalseType->getVectorNumElements() != 2) &&
842 (FalseType->getVectorNumElements() != 3) &&
843 (FalseType->getVectorNumElements() != 4) &&
844 (FalseType->getVectorNumElements() != 8) &&
845 (FalseType->getVectorNumElements() != 16)) {
846 continue;
847 }
848 }
849
850 // Create constant
851 const auto ZeroValue = Constant::getNullValue(PredicateType);
852
853 // Scalar and vector are to be treated differently
854 CmpInst::Predicate Pred;
855 if (PredicateType->isVectorTy()) {
856 Pred = CmpInst::ICMP_SLT;
857 } else {
858 Pred = CmpInst::ICMP_NE;
859 }
860
861 // Create comparison instruction
862 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
863 ZeroValue, "", CI);
864
865 // Create select
866 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
867
868 // Replace call with the selection
869 CI->replaceAllUsesWith(V);
870
871 // Lastly, remember to remove the user.
872 ToRemoves.push_back(CI);
873 }
874 }
875
876 Changed = !ToRemoves.empty();
877
878 // And cleanup the calls we don't use anymore.
879 for (auto V : ToRemoves) {
880 V->eraseFromParent();
881 }
882
883 // And remove the function we don't need either too.
884 F->eraseFromParent();
885 }
886 }
887
888 return Changed;
889}
890
David Neto22f144c2017-06-12 14:26:21 -0400891bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
892 bool Changed = false;
893
894 const std::map<const char *, Instruction::BinaryOps> Map = {
895 {"_Z7signbitf", Instruction::LShr},
896 {"_Z7signbitDv2_f", Instruction::AShr},
897 {"_Z7signbitDv3_f", Instruction::AShr},
898 {"_Z7signbitDv4_f", Instruction::AShr},
899 };
900
901 for (auto Pair : Map) {
902 // If we find a function with the matching name.
903 if (auto F = M.getFunction(Pair.first)) {
904 SmallVector<Instruction *, 4> ToRemoves;
905
906 // Walk the users of the function.
907 for (auto &U : F->uses()) {
908 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
909 auto Arg = CI->getOperand(0);
910
911 auto Bitcast =
912 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
913
914 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
915 ConstantInt::get(CI->getType(), 31),
916 "", CI);
917
918 CI->replaceAllUsesWith(Shr);
919
920 // Lastly, remember to remove the user.
921 ToRemoves.push_back(CI);
922 }
923 }
924
925 Changed = !ToRemoves.empty();
926
927 // And cleanup the calls we don't use anymore.
928 for (auto V : ToRemoves) {
929 V->eraseFromParent();
930 }
931
932 // And remove the function we don't need either too.
933 F->eraseFromParent();
934 }
935 }
936
937 return Changed;
938}
939
940bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
941 bool Changed = false;
942
943 const std::map<const char *,
944 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
945 Map = {
946 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
947 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
948 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
949 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
950 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
951 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
952 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
953 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
954 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
955 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
956 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
957 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
958 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
959 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
960 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
961 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
962 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
963 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
964 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
965 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
966 };
967
968 for (auto Pair : Map) {
969 // If we find a function with the matching name.
970 if (auto F = M.getFunction(Pair.first)) {
971 SmallVector<Instruction *, 4> ToRemoves;
972
973 // Walk the users of the function.
974 for (auto &U : F->uses()) {
975 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
976 // The multiply instruction to use.
977 auto MulInst = Pair.second.first;
978
979 // The add instruction to use.
980 auto AddInst = Pair.second.second;
981
982 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
983
984 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
985 CI->getArgOperand(1), "", CI);
986
987 if (Instruction::BinaryOpsEnd != AddInst) {
988 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
989 CI);
990 }
991
992 CI->replaceAllUsesWith(I);
993
994 // Lastly, remember to remove the user.
995 ToRemoves.push_back(CI);
996 }
997 }
998
999 Changed = !ToRemoves.empty();
1000
1001 // And cleanup the calls we don't use anymore.
1002 for (auto V : ToRemoves) {
1003 V->eraseFromParent();
1004 }
1005
1006 // And remove the function we don't need either too.
1007 F->eraseFromParent();
1008 }
1009 }
1010
1011 return Changed;
1012}
1013
Derek Chowcfd368b2017-10-19 20:58:45 -07001014bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1015 bool Changed = false;
1016
1017 struct VectorStoreOps {
1018 const char* name;
1019 int n;
1020 Type* (*get_scalar_type_function)(LLVMContext&);
1021 } vector_store_ops[] = {
1022 // TODO(derekjchow): Expand this list.
1023 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
1024 };
1025
David Neto544fffc2017-11-16 18:35:14 -05001026 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001027 auto Name = Op.name;
1028 auto N = Op.n;
1029 auto TypeFn = Op.get_scalar_type_function;
1030 if (auto F = M.getFunction(Name)) {
1031 SmallVector<Instruction *, 4> ToRemoves;
1032
1033 // Walk the users of the function.
1034 for (auto &U : F->uses()) {
1035 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1036 // The value argument from vstoren.
1037 auto Arg0 = CI->getOperand(0);
1038
1039 // The index argument from vstoren.
1040 auto Arg1 = CI->getOperand(1);
1041
1042 // The pointer argument from vstoren.
1043 auto Arg2 = CI->getOperand(2);
1044
1045 // Get types.
1046 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1047 auto ScalarNPointerTy = PointerType::get(
1048 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1049
1050 // Cast to scalarn
1051 auto Cast = CastInst::CreatePointerCast(
1052 Arg2, ScalarNPointerTy, "", CI);
1053 // Index to correct address
1054 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1055 // Store
1056 auto Store = new StoreInst(Arg0, Index, CI);
1057
1058 CI->replaceAllUsesWith(Store);
1059 ToRemoves.push_back(CI);
1060 }
1061 }
1062
1063 Changed = !ToRemoves.empty();
1064
1065 // And cleanup the calls we don't use anymore.
1066 for (auto V : ToRemoves) {
1067 V->eraseFromParent();
1068 }
1069
1070 // And remove the function we don't need either too.
1071 F->eraseFromParent();
1072 }
1073 }
1074
1075 return Changed;
1076}
1077
1078bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1079 bool Changed = false;
1080
1081 struct VectorLoadOps {
1082 const char* name;
1083 int n;
1084 Type* (*get_scalar_type_function)(LLVMContext&);
1085 } vector_load_ops[] = {
1086 // TODO(derekjchow): Expand this list.
1087 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
1088 };
1089
David Neto544fffc2017-11-16 18:35:14 -05001090 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001091 auto Name = Op.name;
1092 auto N = Op.n;
1093 auto TypeFn = Op.get_scalar_type_function;
1094 // If we find a function with the matching name.
1095 if (auto F = M.getFunction(Name)) {
1096 SmallVector<Instruction *, 4> ToRemoves;
1097
1098 // Walk the users of the function.
1099 for (auto &U : F->uses()) {
1100 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1101 // The index argument from vloadn.
1102 auto Arg0 = CI->getOperand(0);
1103
1104 // The pointer argument from vloadn.
1105 auto Arg1 = CI->getOperand(1);
1106
1107 // Get types.
1108 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1109 auto ScalarNPointerTy = PointerType::get(
1110 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1111
1112 // Cast to scalarn
1113 auto Cast = CastInst::CreatePointerCast(
1114 Arg1, ScalarNPointerTy, "", CI);
1115 // Index to correct address
1116 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1117 // Load
1118 auto Load = new LoadInst(Index, "", CI);
1119
1120 CI->replaceAllUsesWith(Load);
1121 ToRemoves.push_back(CI);
1122 }
1123 }
1124
1125 Changed = !ToRemoves.empty();
1126
1127 // And cleanup the calls we don't use anymore.
1128 for (auto V : ToRemoves) {
1129 V->eraseFromParent();
1130 }
1131
1132 // And remove the function we don't need either too.
1133 F->eraseFromParent();
1134
1135 }
1136 }
1137
1138 return Changed;
1139}
1140
David Neto22f144c2017-06-12 14:26:21 -04001141bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
1142 bool Changed = false;
1143
1144 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
1145 "_Z10vload_halfjPU3AS2KDh"};
1146
1147 for (auto Name : Map) {
1148 // If we find a function with the matching name.
1149 if (auto F = M.getFunction(Name)) {
1150 SmallVector<Instruction *, 4> ToRemoves;
1151
1152 // Walk the users of the function.
1153 for (auto &U : F->uses()) {
1154 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1155 // The index argument from vload_half.
1156 auto Arg0 = CI->getOperand(0);
1157
1158 // The pointer argument from vload_half.
1159 auto Arg1 = CI->getOperand(1);
1160
David Neto22f144c2017-06-12 14:26:21 -04001161 auto IntTy = Type::getInt32Ty(M.getContext());
1162 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001163 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1164
David Neto22f144c2017-06-12 14:26:21 -04001165 // Our intrinsic to unpack a float2 from an int.
1166 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1167
1168 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1169
David Neto482550a2018-03-24 05:21:07 -07001170 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04001171 auto ShortTy = Type::getInt16Ty(M.getContext());
1172 auto ShortPointerTy = PointerType::get(
1173 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001174
David Netoac825b82017-05-30 12:49:01 -04001175 // Cast the half* pointer to short*.
1176 auto Cast =
1177 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001178
David Netoac825b82017-05-30 12:49:01 -04001179 // Index into the correct address of the casted pointer.
1180 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1181
1182 // Load from the short* we casted to.
1183 auto Load = new LoadInst(Index, "", CI);
1184
1185 // ZExt the short -> int.
1186 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1187
1188 // Get our float2.
1189 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1190
1191 // Extract out the bottom element which is our float result.
1192 auto Extract = ExtractElementInst::Create(
1193 Call, ConstantInt::get(IntTy, 0), "", CI);
1194
1195 CI->replaceAllUsesWith(Extract);
1196 } else {
1197 // Assume the pointer argument points to storage aligned to 32bits
1198 // or more.
1199 // TODO(dneto): Do more analysis to make sure this is true?
1200 //
1201 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1202 // with:
1203 //
1204 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1205 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1206 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1207 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1208 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1209 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1210 // x float> %converted, %index_is_odd32
1211
1212 auto IntPointerTy = PointerType::get(
1213 IntTy, Arg1->getType()->getPointerAddressSpace());
1214
David Neto973e6a82017-05-30 13:48:18 -04001215 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04001216 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04001217 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04001218 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1219
1220 auto One = ConstantInt::get(IntTy, 1);
1221 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1222 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1223
1224 // Index into the correct address of the casted pointer.
1225 auto Ptr =
1226 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1227
1228 // Load from the int* we casted to.
1229 auto Load = new LoadInst(Ptr, "", CI);
1230
1231 // Get our float2.
1232 auto Call = CallInst::Create(NewF, Load, "", CI);
1233
1234 // Extract out the float result, where the element number is
1235 // determined by whether the original index was even or odd.
1236 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1237
1238 CI->replaceAllUsesWith(Extract);
1239 }
David Neto22f144c2017-06-12 14:26:21 -04001240
1241 // Lastly, remember to remove the user.
1242 ToRemoves.push_back(CI);
1243 }
1244 }
1245
1246 Changed = !ToRemoves.empty();
1247
1248 // And cleanup the calls we don't use anymore.
1249 for (auto V : ToRemoves) {
1250 V->eraseFromParent();
1251 }
1252
1253 // And remove the function we don't need either too.
1254 F->eraseFromParent();
1255 }
1256 }
1257
1258 return Changed;
1259}
1260
1261bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
1262 bool Changed = false;
1263
David Neto556c7e62018-06-08 13:45:55 -07001264 const std::vector<const char *> Map = {
1265 "_Z11vload_half2jPU3AS1KDh",
1266 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
1267 "_Z11vload_half2jPU3AS2KDh",
1268 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
1269 };
David Neto22f144c2017-06-12 14:26:21 -04001270
1271 for (auto Name : Map) {
1272 // If we find a function with the matching name.
1273 if (auto F = M.getFunction(Name)) {
1274 SmallVector<Instruction *, 4> ToRemoves;
1275
1276 // Walk the users of the function.
1277 for (auto &U : F->uses()) {
1278 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1279 // The index argument from vload_half.
1280 auto Arg0 = CI->getOperand(0);
1281
1282 // The pointer argument from vload_half.
1283 auto Arg1 = CI->getOperand(1);
1284
1285 auto IntTy = Type::getInt32Ty(M.getContext());
1286 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1287 auto NewPointerTy = PointerType::get(
1288 IntTy, Arg1->getType()->getPointerAddressSpace());
1289 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1290
1291 // Cast the half* pointer to int*.
1292 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1293
1294 // Index into the correct address of the casted pointer.
1295 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
1296
1297 // Load from the int* we casted to.
1298 auto Load = new LoadInst(Index, "", CI);
1299
1300 // Our intrinsic to unpack a float2 from an int.
1301 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1302
1303 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1304
1305 // Get our float2.
1306 auto Call = CallInst::Create(NewF, Load, "", CI);
1307
1308 CI->replaceAllUsesWith(Call);
1309
1310 // Lastly, remember to remove the user.
1311 ToRemoves.push_back(CI);
1312 }
1313 }
1314
1315 Changed = !ToRemoves.empty();
1316
1317 // And cleanup the calls we don't use anymore.
1318 for (auto V : ToRemoves) {
1319 V->eraseFromParent();
1320 }
1321
1322 // And remove the function we don't need either too.
1323 F->eraseFromParent();
1324 }
1325 }
1326
1327 return Changed;
1328}
1329
1330bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
1331 bool Changed = false;
1332
David Neto556c7e62018-06-08 13:45:55 -07001333 const std::vector<const char *> Map = {
1334 "_Z11vload_half4jPU3AS1KDh",
1335 "_Z12vloada_half4jPU3AS1KDh",
1336 "_Z11vload_half4jPU3AS2KDh",
1337 "_Z12vloada_half4jPU3AS2KDh",
1338 };
David Neto22f144c2017-06-12 14:26:21 -04001339
1340 for (auto Name : Map) {
1341 // If we find a function with the matching name.
1342 if (auto F = M.getFunction(Name)) {
1343 SmallVector<Instruction *, 4> ToRemoves;
1344
1345 // Walk the users of the function.
1346 for (auto &U : F->uses()) {
1347 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1348 // The index argument from vload_half.
1349 auto Arg0 = CI->getOperand(0);
1350
1351 // The pointer argument from vload_half.
1352 auto Arg1 = CI->getOperand(1);
1353
1354 auto IntTy = Type::getInt32Ty(M.getContext());
1355 auto Int2Ty = VectorType::get(IntTy, 2);
1356 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1357 auto NewPointerTy = PointerType::get(
1358 Int2Ty, Arg1->getType()->getPointerAddressSpace());
1359 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1360
1361 // Cast the half* pointer to int2*.
1362 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
1363
1364 // Index into the correct address of the casted pointer.
1365 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
1366
1367 // Load from the int2* we casted to.
1368 auto Load = new LoadInst(Index, "", CI);
1369
1370 // Extract each element from the loaded int2.
1371 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1372 "", CI);
1373 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1374 "", CI);
1375
1376 // Our intrinsic to unpack a float2 from an int.
1377 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1378
1379 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1380
1381 // Get the lower (x & y) components of our final float4.
1382 auto Lo = CallInst::Create(NewF, X, "", CI);
1383
1384 // Get the higher (z & w) components of our final float4.
1385 auto Hi = CallInst::Create(NewF, Y, "", CI);
1386
1387 Constant *ShuffleMask[4] = {
1388 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1389 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1390
1391 // Combine our two float2's into one float4.
1392 auto Combine = new ShuffleVectorInst(
1393 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1394
1395 CI->replaceAllUsesWith(Combine);
1396
1397 // Lastly, remember to remove the user.
1398 ToRemoves.push_back(CI);
1399 }
1400 }
1401
1402 Changed = !ToRemoves.empty();
1403
1404 // And cleanup the calls we don't use anymore.
1405 for (auto V : ToRemoves) {
1406 V->eraseFromParent();
1407 }
1408
1409 // And remove the function we don't need either too.
1410 F->eraseFromParent();
1411 }
1412 }
1413
1414 return Changed;
1415}
1416
David Neto6ad93232018-06-07 15:42:58 -07001417bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
1418 bool Changed = false;
1419
1420 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1421 //
1422 // %u = load i32 %ptr
1423 // %fxy = call <2 x float> Unpack2xHalf(u)
1424 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1425 const std::vector<const char *> Map = {
1426 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
1427 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
1428 "_Z20__clspv_vloada_half2jPKj", // private
1429 };
1430
1431 for (auto Name : Map) {
1432 // If we find a function with the matching name.
1433 if (auto F = M.getFunction(Name)) {
1434 SmallVector<Instruction *, 4> ToRemoves;
1435
1436 // Walk the users of the function.
1437 for (auto &U : F->uses()) {
1438 if (auto* CI = dyn_cast<CallInst>(U.getUser())) {
1439 auto Index = CI->getOperand(0);
1440 auto Ptr = CI->getOperand(1);
1441
1442 auto IntTy = Type::getInt32Ty(M.getContext());
1443 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1444 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1445
1446 auto IndexedPtr =
1447 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
1448 auto Load = new LoadInst(IndexedPtr, "", CI);
1449
1450 // Our intrinsic to unpack a float2 from an int.
1451 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1452
1453 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1454
1455 // Get our final float2.
1456 auto Result = CallInst::Create(NewF, Load, "", CI);
1457
1458 CI->replaceAllUsesWith(Result);
1459
1460 // Lastly, remember to remove the user.
1461 ToRemoves.push_back(CI);
1462 }
1463 }
1464
1465 Changed = true;
1466
1467 // And cleanup the calls we don't use anymore.
1468 for (auto V : ToRemoves) {
1469 V->eraseFromParent();
1470 }
1471
1472 // And remove the function we don't need either too.
1473 F->eraseFromParent();
1474 }
1475 }
1476
1477 return Changed;
1478}
1479
1480bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
1481 bool Changed = false;
1482
1483 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1484 //
1485 // %u2 = load <2 x i32> %ptr
1486 // %u2xy = extractelement %u2, 0
1487 // %u2zw = extractelement %u2, 1
1488 // %fxy = call <2 x float> Unpack2xHalf(uint)
1489 // %fzw = call <2 x float> Unpack2xHalf(uint)
1490 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
1491 const std::vector<const char *> Map = {
1492 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
1493 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
1494 "_Z20__clspv_vloada_half4jPKDv2_j", // private
1495 };
1496
1497 for (auto Name : Map) {
1498 // If we find a function with the matching name.
1499 if (auto F = M.getFunction(Name)) {
1500 SmallVector<Instruction *, 4> ToRemoves;
1501
1502 // Walk the users of the function.
1503 for (auto &U : F->uses()) {
1504 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1505 auto Index = CI->getOperand(0);
1506 auto Ptr = CI->getOperand(1);
1507
1508 auto IntTy = Type::getInt32Ty(M.getContext());
1509 auto Int2Ty = VectorType::get(IntTy, 2);
1510 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1511 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1512
1513 auto IndexedPtr =
1514 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
1515 auto Load = new LoadInst(IndexedPtr, "", CI);
1516
1517 // Extract each element from the loaded int2.
1518 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
1519 "", CI);
1520 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
1521 "", CI);
1522
1523 // Our intrinsic to unpack a float2 from an int.
1524 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
1525
1526 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1527
1528 // Get the lower (x & y) components of our final float4.
1529 auto Lo = CallInst::Create(NewF, X, "", CI);
1530
1531 // Get the higher (z & w) components of our final float4.
1532 auto Hi = CallInst::Create(NewF, Y, "", CI);
1533
1534 Constant *ShuffleMask[4] = {
1535 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1536 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
1537
1538 // Combine our two float2's into one float4.
1539 auto Combine = new ShuffleVectorInst(
1540 Lo, Hi, ConstantVector::get(ShuffleMask), "", CI);
1541
1542 CI->replaceAllUsesWith(Combine);
1543
1544 // Lastly, remember to remove the user.
1545 ToRemoves.push_back(CI);
1546 }
1547 }
1548
1549 Changed = true;
1550
1551 // And cleanup the calls we don't use anymore.
1552 for (auto V : ToRemoves) {
1553 V->eraseFromParent();
1554 }
1555
1556 // And remove the function we don't need either too.
1557 F->eraseFromParent();
1558 }
1559 }
1560
1561 return Changed;
1562}
1563
David Neto22f144c2017-06-12 14:26:21 -04001564bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
1565 bool Changed = false;
1566
1567 const std::vector<const char *> Map = {"_Z11vstore_halffjPU3AS1Dh",
1568 "_Z15vstore_half_rtefjPU3AS1Dh",
1569 "_Z15vstore_half_rtzfjPU3AS1Dh"};
1570
1571 for (auto Name : Map) {
1572 // If we find a function with the matching name.
1573 if (auto F = M.getFunction(Name)) {
1574 SmallVector<Instruction *, 4> ToRemoves;
1575
1576 // Walk the users of the function.
1577 for (auto &U : F->uses()) {
1578 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1579 // The value to store.
1580 auto Arg0 = CI->getOperand(0);
1581
1582 // The index argument from vstore_half.
1583 auto Arg1 = CI->getOperand(1);
1584
1585 // The pointer argument from vstore_half.
1586 auto Arg2 = CI->getOperand(2);
1587
David Neto22f144c2017-06-12 14:26:21 -04001588 auto IntTy = Type::getInt32Ty(M.getContext());
1589 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04001590 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto17852de2017-05-29 17:29:31 -04001591 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001592
1593 // Our intrinsic to pack a float2 to an int.
1594 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1595
1596 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1597
1598 // Insert our value into a float2 so that we can pack it.
David Neto17852de2017-05-29 17:29:31 -04001599 auto TempVec =
1600 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
1601 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001602
1603 // Pack the float2 -> half2 (in an int).
1604 auto X = CallInst::Create(NewF, TempVec, "", CI);
1605
David Neto482550a2018-03-24 05:21:07 -07001606 if (clspv::Option::F16BitStorage()) {
David Neto17852de2017-05-29 17:29:31 -04001607 auto ShortTy = Type::getInt16Ty(M.getContext());
1608 auto ShortPointerTy = PointerType::get(
1609 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001610
David Neto17852de2017-05-29 17:29:31 -04001611 // Truncate our i32 to an i16.
1612 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001613
David Neto17852de2017-05-29 17:29:31 -04001614 // Cast the half* pointer to short*.
1615 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001616
David Neto17852de2017-05-29 17:29:31 -04001617 // Index into the correct address of the casted pointer.
1618 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001619
David Neto17852de2017-05-29 17:29:31 -04001620 // Store to the int* we casted to.
1621 auto Store = new StoreInst(Trunc, Index, CI);
1622
1623 CI->replaceAllUsesWith(Store);
1624 } else {
1625 // We can only write to 32-bit aligned words.
1626 //
1627 // Assuming base is aligned to 32-bits, replace the equivalent of
1628 // vstore_half(value, index, base)
1629 // with:
1630 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1631 // uint32_t write_to_upper_half = index & 1u;
1632 // uint32_t shift = write_to_upper_half << 4;
1633 //
1634 // // Pack the float value as a half number in bottom 16 bits
1635 // // of an i32.
1636 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1637 //
1638 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1639 // ^ ((packed & 0xffff) << shift)
1640 // // We only need relaxed consistency, but OpenCL 1.2 only has
1641 // // sequentially consistent atomics.
1642 // // TODO(dneto): Use relaxed consistency.
1643 // atomic_xor(target_ptr, xor_value)
1644 auto IntPointerTy = PointerType::get(
1645 IntTy, Arg2->getType()->getPointerAddressSpace());
1646
1647 auto Four = ConstantInt::get(IntTy, 4);
1648 auto FFFF = ConstantInt::get(IntTy, 0xffff);
1649
1650 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
1651 // Compute index / 2
1652 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1653 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1654 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
1655 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
1656 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
1657 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1658 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
1659
1660 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1661 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1662 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
1663
1664 // Generate the call to atomi_xor.
1665 SmallVector<Type *, 5> ParamTypes;
1666 // The pointer type.
1667 ParamTypes.push_back(IntPointerTy);
1668 // The Types for memory scope, semantics, and value.
1669 ParamTypes.push_back(IntTy);
1670 ParamTypes.push_back(IntTy);
1671 ParamTypes.push_back(IntTy);
1672 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1673 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
1674
1675 const auto ConstantScopeDevice =
1676 ConstantInt::get(IntTy, spv::ScopeDevice);
1677 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1678 // (SPIR-V Workgroup).
1679 const auto AddrSpaceSemanticsBits =
1680 IntPointerTy->getPointerAddressSpace() == 1
1681 ? spv::MemorySemanticsUniformMemoryMask
1682 : spv::MemorySemanticsWorkgroupMemoryMask;
1683
1684 // We're using relaxed consistency here.
1685 const auto ConstantMemorySemantics =
1686 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1687 AddrSpaceSemanticsBits);
1688
1689 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1690 ConstantMemorySemantics, ValueToXor};
1691 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
1692 }
David Neto22f144c2017-06-12 14:26:21 -04001693
1694 // Lastly, remember to remove the user.
1695 ToRemoves.push_back(CI);
1696 }
1697 }
1698
1699 Changed = !ToRemoves.empty();
1700
1701 // And cleanup the calls we don't use anymore.
1702 for (auto V : ToRemoves) {
1703 V->eraseFromParent();
1704 }
1705
1706 // And remove the function we don't need either too.
1707 F->eraseFromParent();
1708 }
1709 }
1710
1711 return Changed;
1712}
1713
1714bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
1715 bool Changed = false;
1716
David Netoe2871522018-06-08 11:09:54 -07001717 const std::vector<const char *> Map = {
1718 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
1719 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
1720 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
1721 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
1722 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
1723 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
1724 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
1725 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
1726 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
1727 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
1728 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
1729 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
1730 };
David Neto22f144c2017-06-12 14:26:21 -04001731
1732 for (auto Name : Map) {
1733 // If we find a function with the matching name.
1734 if (auto F = M.getFunction(Name)) {
1735 SmallVector<Instruction *, 4> ToRemoves;
1736
1737 // Walk the users of the function.
1738 for (auto &U : F->uses()) {
1739 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1740 // The value to store.
1741 auto Arg0 = CI->getOperand(0);
1742
1743 // The index argument from vstore_half.
1744 auto Arg1 = CI->getOperand(1);
1745
1746 // The pointer argument from vstore_half.
1747 auto Arg2 = CI->getOperand(2);
1748
1749 auto IntTy = Type::getInt32Ty(M.getContext());
1750 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1751 auto NewPointerTy = PointerType::get(
1752 IntTy, Arg2->getType()->getPointerAddressSpace());
1753 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1754
1755 // Our intrinsic to pack a float2 to an int.
1756 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1757
1758 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1759
1760 // Turn the packed x & y into the final packing.
1761 auto X = CallInst::Create(NewF, Arg0, "", CI);
1762
1763 // Cast the half* pointer to int*.
1764 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1765
1766 // Index into the correct address of the casted pointer.
1767 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
1768
1769 // Store to the int* we casted to.
1770 auto Store = new StoreInst(X, Index, CI);
1771
1772 CI->replaceAllUsesWith(Store);
1773
1774 // Lastly, remember to remove the user.
1775 ToRemoves.push_back(CI);
1776 }
1777 }
1778
1779 Changed = !ToRemoves.empty();
1780
1781 // And cleanup the calls we don't use anymore.
1782 for (auto V : ToRemoves) {
1783 V->eraseFromParent();
1784 }
1785
1786 // And remove the function we don't need either too.
1787 F->eraseFromParent();
1788 }
1789 }
1790
1791 return Changed;
1792}
1793
1794bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
1795 bool Changed = false;
1796
David Netoe2871522018-06-08 11:09:54 -07001797 const std::vector<const char *> Map = {
1798 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
1799 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
1800 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
1801 "_Z13vstorea_half4Dv4_fjPDh", // private
1802 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
1803 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
1804 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
1805 "_Z17vstorea_half4_rteDv4_fjPDh", // private
1806 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
1807 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
1808 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
1809 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
1810 };
David Neto22f144c2017-06-12 14:26:21 -04001811
1812 for (auto Name : Map) {
1813 // If we find a function with the matching name.
1814 if (auto F = M.getFunction(Name)) {
1815 SmallVector<Instruction *, 4> ToRemoves;
1816
1817 // Walk the users of the function.
1818 for (auto &U : F->uses()) {
1819 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1820 // The value to store.
1821 auto Arg0 = CI->getOperand(0);
1822
1823 // The index argument from vstore_half.
1824 auto Arg1 = CI->getOperand(1);
1825
1826 // The pointer argument from vstore_half.
1827 auto Arg2 = CI->getOperand(2);
1828
1829 auto IntTy = Type::getInt32Ty(M.getContext());
1830 auto Int2Ty = VectorType::get(IntTy, 2);
1831 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
1832 auto NewPointerTy = PointerType::get(
1833 Int2Ty, Arg2->getType()->getPointerAddressSpace());
1834 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1835
1836 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1837 ConstantInt::get(IntTy, 1)};
1838
1839 // Extract out the x & y components of our to store value.
1840 auto Lo =
1841 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1842 ConstantVector::get(LoShuffleMask), "", CI);
1843
1844 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1845 ConstantInt::get(IntTy, 3)};
1846
1847 // Extract out the z & w components of our to store value.
1848 auto Hi =
1849 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1850 ConstantVector::get(HiShuffleMask), "", CI);
1851
1852 // Our intrinsic to pack a float2 to an int.
1853 auto SPIRVIntrinsic = "spirv.pack.v2f16";
1854
1855 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1856
1857 // Turn the packed x & y into the final component of our int2.
1858 auto X = CallInst::Create(NewF, Lo, "", CI);
1859
1860 // Turn the packed z & w into the final component of our int2.
1861 auto Y = CallInst::Create(NewF, Hi, "", CI);
1862
1863 auto Combine = InsertElementInst::Create(
1864 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
1865 Combine = InsertElementInst::Create(
1866 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
1867
1868 // Cast the half* pointer to int2*.
1869 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
1870
1871 // Index into the correct address of the casted pointer.
1872 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
1873
1874 // Store to the int2* we casted to.
1875 auto Store = new StoreInst(Combine, Index, CI);
1876
1877 CI->replaceAllUsesWith(Store);
1878
1879 // Lastly, remember to remove the user.
1880 ToRemoves.push_back(CI);
1881 }
1882 }
1883
1884 Changed = !ToRemoves.empty();
1885
1886 // And cleanup the calls we don't use anymore.
1887 for (auto V : ToRemoves) {
1888 V->eraseFromParent();
1889 }
1890
1891 // And remove the function we don't need either too.
1892 F->eraseFromParent();
1893 }
1894 }
1895
1896 return Changed;
1897}
1898
1899bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
1900 bool Changed = false;
1901
1902 const std::map<const char *, const char*> Map = {
1903 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
1904 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
1905 };
1906
1907 for (auto Pair : Map) {
1908 // If we find a function with the matching name.
1909 if (auto F = M.getFunction(Pair.first)) {
1910 SmallVector<Instruction *, 4> ToRemoves;
1911
1912 // Walk the users of the function.
1913 for (auto &U : F->uses()) {
1914 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1915 // The image.
1916 auto Arg0 = CI->getOperand(0);
1917
1918 // The sampler.
1919 auto Arg1 = CI->getOperand(1);
1920
1921 // The coordinate (integer type that we can't handle).
1922 auto Arg2 = CI->getOperand(2);
1923
1924 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
1925
1926 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
1927
1928 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
1929
1930 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
1931
1932 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
1933
1934 CI->replaceAllUsesWith(NewCI);
1935
1936 // Lastly, remember to remove the user.
1937 ToRemoves.push_back(CI);
1938 }
1939 }
1940
1941 Changed = !ToRemoves.empty();
1942
1943 // And cleanup the calls we don't use anymore.
1944 for (auto V : ToRemoves) {
1945 V->eraseFromParent();
1946 }
1947
1948 // And remove the function we don't need either too.
1949 F->eraseFromParent();
1950 }
1951 }
1952
1953 return Changed;
1954}
1955
1956bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
1957 bool Changed = false;
1958
1959 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00001960 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
1961 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
1962 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
1963 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
1964 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
1965 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04001966 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
1967 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
1968 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
1969 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
1970 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Neil Henning39672102017-09-29 14:33:13 +01001971 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04001972
1973 for (auto Pair : Map) {
1974 // If we find a function with the matching name.
1975 if (auto F = M.getFunction(Pair.first)) {
1976 SmallVector<Instruction *, 4> ToRemoves;
1977
1978 // Walk the users of the function.
1979 for (auto &U : F->uses()) {
1980 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1981 auto FType = F->getFunctionType();
1982 SmallVector<Type *, 5> ParamTypes;
1983
1984 // The pointer type.
1985 ParamTypes.push_back(FType->getParamType(0));
1986
1987 auto IntTy = Type::getInt32Ty(M.getContext());
1988
1989 // The memory scope type.
1990 ParamTypes.push_back(IntTy);
1991
1992 // The memory semantics type.
1993 ParamTypes.push_back(IntTy);
1994
1995 if (2 < CI->getNumArgOperands()) {
1996 // The unequal memory semantics type.
1997 ParamTypes.push_back(IntTy);
1998
1999 // The value type.
2000 ParamTypes.push_back(FType->getParamType(2));
2001
2002 // The comparator type.
2003 ParamTypes.push_back(FType->getParamType(1));
2004 } else if (1 < CI->getNumArgOperands()) {
2005 // The value type.
2006 ParamTypes.push_back(FType->getParamType(1));
2007 }
2008
2009 auto NewFType =
2010 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2011 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2012
2013 // We need to map the OpenCL constants to the SPIR-V equivalents.
2014 const auto ConstantScopeDevice =
2015 ConstantInt::get(IntTy, spv::ScopeDevice);
2016 const auto ConstantMemorySemantics = ConstantInt::get(
2017 IntTy, spv::MemorySemanticsUniformMemoryMask |
2018 spv::MemorySemanticsSequentiallyConsistentMask);
2019
2020 SmallVector<Value *, 5> Params;
2021
2022 // The pointer.
2023 Params.push_back(CI->getArgOperand(0));
2024
2025 // The memory scope.
2026 Params.push_back(ConstantScopeDevice);
2027
2028 // The memory semantics.
2029 Params.push_back(ConstantMemorySemantics);
2030
2031 if (2 < CI->getNumArgOperands()) {
2032 // The unequal memory semantics.
2033 Params.push_back(ConstantMemorySemantics);
2034
2035 // The value.
2036 Params.push_back(CI->getArgOperand(2));
2037
2038 // The comparator.
2039 Params.push_back(CI->getArgOperand(1));
2040 } else if (1 < CI->getNumArgOperands()) {
2041 // The value.
2042 Params.push_back(CI->getArgOperand(1));
2043 }
2044
2045 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2046
2047 CI->replaceAllUsesWith(NewCI);
2048
2049 // Lastly, remember to remove the user.
2050 ToRemoves.push_back(CI);
2051 }
2052 }
2053
2054 Changed = !ToRemoves.empty();
2055
2056 // And cleanup the calls we don't use anymore.
2057 for (auto V : ToRemoves) {
2058 V->eraseFromParent();
2059 }
2060
2061 // And remove the function we don't need either too.
2062 F->eraseFromParent();
2063 }
2064 }
2065
Neil Henning39672102017-09-29 14:33:13 +01002066 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002067 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2068 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2069 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2070 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2071 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2072 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2073 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2074 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2075 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2076 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2077 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2078 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2079 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2080 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2081 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2082 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002083 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
2084 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
2085 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
2086 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
2087 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
2088 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
2089 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
2090 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
2091 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
2092 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
2093 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
2094 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
2095 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
2096 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
2097 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
2098 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor}};
2099
2100 for (auto Pair : Map2) {
2101 // If we find a function with the matching name.
2102 if (auto F = M.getFunction(Pair.first)) {
2103 SmallVector<Instruction *, 4> ToRemoves;
2104
2105 // Walk the users of the function.
2106 for (auto &U : F->uses()) {
2107 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2108 auto AtomicOp = new AtomicRMWInst(
2109 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2110 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2111
2112 CI->replaceAllUsesWith(AtomicOp);
2113
2114 // Lastly, remember to remove the user.
2115 ToRemoves.push_back(CI);
2116 }
2117 }
2118
2119 Changed = !ToRemoves.empty();
2120
2121 // And cleanup the calls we don't use anymore.
2122 for (auto V : ToRemoves) {
2123 V->eraseFromParent();
2124 }
2125
2126 // And remove the function we don't need either too.
2127 F->eraseFromParent();
2128 }
2129 }
2130
David Neto22f144c2017-06-12 14:26:21 -04002131 return Changed;
2132}
2133
2134bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
2135 bool Changed = false;
2136
2137 // If we find a function with the matching name.
2138 if (auto F = M.getFunction("_Z5crossDv4_fS_")) {
2139 SmallVector<Instruction *, 4> ToRemoves;
2140
2141 auto IntTy = Type::getInt32Ty(M.getContext());
2142 auto FloatTy = Type::getFloatTy(M.getContext());
2143
2144 Constant *DownShuffleMask[3] = {
2145 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2146 ConstantInt::get(IntTy, 2)};
2147
2148 Constant *UpShuffleMask[4] = {
2149 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2150 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2151
2152 Constant *FloatVec[3] = {
2153 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
2154 };
2155
2156 // Walk the users of the function.
2157 for (auto &U : F->uses()) {
2158 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2159 auto Vec4Ty = CI->getArgOperand(0)->getType();
2160 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2161 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2162 auto Vec3Ty = Arg0->getType();
2163
2164 auto NewFType =
2165 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
2166
2167 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
2168
2169 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
2170
2171 auto Result = new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
2172
2173 CI->replaceAllUsesWith(Result);
2174
2175 // Lastly, remember to remove the user.
2176 ToRemoves.push_back(CI);
2177 }
2178 }
2179
2180 Changed = !ToRemoves.empty();
2181
2182 // And cleanup the calls we don't use anymore.
2183 for (auto V : ToRemoves) {
2184 V->eraseFromParent();
2185 }
2186
2187 // And remove the function we don't need either too.
2188 F->eraseFromParent();
2189 }
2190
2191 return Changed;
2192}
David Neto62653202017-10-16 19:05:18 -04002193
2194bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2195 bool Changed = false;
2196
2197 // OpenCL's float result = fract(float x, float* ptr)
2198 //
2199 // In the LLVM domain:
2200 //
2201 // %floor_result = call spir_func float @floor(float %x)
2202 // store float %floor_result, float * %ptr
2203 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2204 // %result = call spir_func float
2205 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2206 //
2207 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2208 // and clspv.fract occur in the SPIR-V generator pass:
2209 //
2210 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2211 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2212 // ...
2213 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2214 // OpStore %ptr %floor_result
2215 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2216 // %fract_result = OpExtInst %float
2217 // %glsl_ext Fmin %fract_intermediate %just_under_1
2218
2219
2220 using std::string;
2221
2222 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2223 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
2224 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
2225 auto make_quad = [](const char *a, const char *b, const char *c,
2226 const char *d) {
2227 return std::tuple<const char *, const char *, const char *, const char *>(
2228 a, b, c, d);
2229 };
2230 const std::vector<QuadType> Functions = {
2231 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
2232 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
2233 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
2234 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
2235 };
2236
2237 for (auto& quad : Functions) {
2238 const StringRef fract_name(std::get<0>(quad));
2239
2240 // If we find a function with the matching name.
2241 if (auto F = M.getFunction(fract_name)) {
2242 if (F->use_begin() == F->use_end())
2243 continue;
2244
2245 // We have some uses.
2246 Changed = true;
2247
2248 auto& Context = M.getContext();
2249
2250 const StringRef floor_name(std::get<1>(quad));
2251 const StringRef fmin_name(std::get<2>(quad));
2252 const StringRef clspv_fract_name(std::get<3>(quad));
2253
2254 // This is either float or a float vector. All the float-like
2255 // types are this type.
2256 auto result_ty = F->getReturnType();
2257
2258 Function* fmin_fn = M.getFunction(fmin_name);
2259 if (!fmin_fn) {
2260 // Make the fmin function.
2261 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
2262 fmin_fn = cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002263 fmin_fn->addFnAttr(Attribute::ReadNone);
2264 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2265 }
2266
2267 Function* floor_fn = M.getFunction(floor_name);
2268 if (!floor_fn) {
2269 // Make the floor function.
2270 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2271 floor_fn = cast<Function>(M.getOrInsertFunction(floor_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002272 floor_fn->addFnAttr(Attribute::ReadNone);
2273 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2274 }
2275
2276 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
2277 if (!clspv_fract_fn) {
2278 // Make the clspv_fract function.
2279 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2280 clspv_fract_fn = cast<Function>(M.getOrInsertFunction(clspv_fract_name, fn_ty));
David Neto62653202017-10-16 19:05:18 -04002281 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2282 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2283 }
2284
2285 // Number of significant significand bits, whether represented or not.
2286 unsigned num_significand_bits;
2287 switch (result_ty->getScalarType()->getTypeID()) {
2288 case Type::HalfTyID:
2289 num_significand_bits = 11;
2290 break;
2291 case Type::FloatTyID:
2292 num_significand_bits = 24;
2293 break;
2294 case Type::DoubleTyID:
2295 num_significand_bits = 53;
2296 break;
2297 default:
2298 assert(false && "Unhandled float type when processing fract builtin");
2299 break;
2300 }
2301 // Beware that the disassembler displays this value as
2302 // OpConstant %float 1
2303 // which is not quite right.
2304 const double kJustUnderOneScalar =
2305 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2306
2307 Constant *just_under_one =
2308 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2309 if (result_ty->isVectorTy()) {
2310 just_under_one = ConstantVector::getSplat(
2311 result_ty->getVectorNumElements(), just_under_one);
2312 }
2313
2314 IRBuilder<> Builder(Context);
2315
2316 SmallVector<Instruction *, 4> ToRemoves;
2317
2318 // Walk the users of the function.
2319 for (auto &U : F->uses()) {
2320 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2321
2322 Builder.SetInsertPoint(CI);
2323 auto arg = CI->getArgOperand(0);
2324 auto ptr = CI->getArgOperand(1);
2325
2326 // Compute floor result and store it.
2327 auto floor = Builder.CreateCall(floor_fn, {arg});
2328 Builder.CreateStore(floor, ptr);
2329
2330 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2331 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2332
2333 CI->replaceAllUsesWith(fract_result);
2334
2335 // Lastly, remember to remove the user.
2336 ToRemoves.push_back(CI);
2337 }
2338 }
2339
2340 // And cleanup the calls we don't use anymore.
2341 for (auto V : ToRemoves) {
2342 V->eraseFromParent();
2343 }
2344
2345 // And remove the function we don't need either too.
2346 F->eraseFromParent();
2347 }
2348 }
2349
2350 return Changed;
2351}