blob: 6e3545478477adc83369e95ccfada5ba69fe7103 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
21#include "llvm/IR/Instructions.h"
22#include "llvm/IR/IRBuilder.h"
23#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
David Neto482550a2018-03-24 05:21:07 -070032#include "clspv/Option.h"
Kévin Petit617a76d2019-04-04 13:54:16 +010033#include "SPIRVOp.h"
David Neto482550a2018-03-24 05:21:07 -070034
David Neto22f144c2017-06-12 14:26:21 -040035using namespace llvm;
36
37#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
38
39namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000040
41struct ArgTypeInfo {
42 enum class SignedNess {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000043 None,
Kévin Petit8a560882019-03-21 15:24:34 +000044 Unsigned,
45 Signed
46 };
47 SignedNess signedness;
48};
49
50struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000051 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000052 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000053
Kévin Petit91bc72e2019-04-08 15:17:46 +010054 bool isArgSigned(size_t arg) const {
55 assert(argTypeInfos.size() > arg);
56 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000057 }
58
Kévin Petit91bc72e2019-04-08 15:17:46 +010059 static FunctionInfo getFromMangledName(StringRef name) {
60 FunctionInfo fi;
61 if (!getFromMangledNameCheck(name, &fi)) {
62 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000063 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010064 return fi;
65 }
Kévin Petit8a560882019-03-21 15:24:34 +000066
Kévin Petit91bc72e2019-04-08 15:17:46 +010067 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
68 if (!name.consume_front("_Z")) {
69 return false;
70 }
71 size_t nameLen;
72 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000073 return false;
74 }
75
Kévin Petit91bc72e2019-04-08 15:17:46 +010076 finfo->name = name.take_front(nameLen);
77 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000078
Kévin Petit91bc72e2019-04-08 15:17:46 +010079 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000080
Kévin Petit91bc72e2019-04-08 15:17:46 +010081 while (name.size() != 0) {
82
83 ArgTypeInfo ti;
84
85 // Try parsing a vector prefix
86 if (name.consume_front("Dv")) {
87 int numElems;
88 if (name.consumeInteger(10, numElems)) {
89 return false;
90 }
91
92 if (!name.consume_front("_")) {
93 return false;
94 }
95 }
96
97 // Parse the base type
98 char typeCode = name.front();
99 name = name.drop_front(1);
100 switch(typeCode) {
101 case 'c': // char
102 case 'a': // signed char
103 case 's': // short
104 case 'i': // int
105 case 'l': // long
106 ti.signedness = ArgTypeInfo::SignedNess::Signed;
107 break;
108 case 'h': // unsigned char
109 case 't': // unsigned short
110 case 'j': // unsigned int
111 case 'm': // unsigned long
112 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
113 break;
114 case 'f':
115 ti.signedness = ArgTypeInfo::SignedNess::None;
116 break;
117 case 'S':
118 ti = prev_ti;
119 if (!name.consume_front("_")) {
120 return false;
121 }
122 break;
123 default:
124 return false;
125 }
126
127 finfo->argTypeInfos.push_back(ti);
128
129 prev_ti = ti;
130 }
131
132 return true;
133 };
Kévin Petit8a560882019-03-21 15:24:34 +0000134};
135
David Neto22f144c2017-06-12 14:26:21 -0400136uint32_t clz(uint32_t v) {
137 uint32_t r;
138 uint32_t shift;
139
140 r = (v > 0xFFFF) << 4;
141 v >>= r;
142 shift = (v > 0xFF) << 3;
143 v >>= shift;
144 r |= shift;
145 shift = (v > 0xF) << 2;
146 v >>= shift;
147 r |= shift;
148 shift = (v > 0x3) << 1;
149 v >>= shift;
150 r |= shift;
151 r |= (v >> 1);
152
153 return r;
154}
155
156Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
157 if (1 == elements) {
158 return Type::getInt1Ty(C);
159 } else {
160 return VectorType::get(Type::getInt1Ty(C), elements);
161 }
162}
163
164struct ReplaceOpenCLBuiltinPass final : public ModulePass {
165 static char ID;
166 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
167
168 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000169 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100170 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100171 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400172 bool replaceRecip(Module &M);
173 bool replaceDivide(Module &M);
174 bool replaceExp10(Module &M);
175 bool replaceLog10(Module &M);
176 bool replaceBarrier(Module &M);
177 bool replaceMemFence(Module &M);
178 bool replaceRelational(Module &M);
179 bool replaceIsInfAndIsNan(Module &M);
180 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000181 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000182 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000183 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000184 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000185 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000186 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000187 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400188 bool replaceSignbit(Module &M);
189 bool replaceMadandMad24andMul24(Module &M);
190 bool replaceVloadHalf(Module &M);
191 bool replaceVloadHalf2(Module &M);
192 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700193 bool replaceClspvVloadaHalf2(Module &M);
194 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400195 bool replaceVstoreHalf(Module &M);
196 bool replaceVstoreHalf2(Module &M);
197 bool replaceVstoreHalf4(Module &M);
198 bool replaceReadImageF(Module &M);
199 bool replaceAtomics(Module &M);
200 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400201 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700202 bool replaceVload(Module &M);
203 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400204};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100205} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400206
207char ReplaceOpenCLBuiltinPass::ID = 0;
208static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
209 "Replace OpenCL Builtins Pass");
210
211namespace clspv {
212ModulePass *createReplaceOpenCLBuiltinPass() {
213 return new ReplaceOpenCLBuiltinPass();
214}
215}
216
217bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
218 bool Changed = false;
219
Kévin Petit2444e9b2018-11-09 14:14:37 +0000220 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100221 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100222 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400223 Changed |= replaceRecip(M);
224 Changed |= replaceDivide(M);
225 Changed |= replaceExp10(M);
226 Changed |= replaceLog10(M);
227 Changed |= replaceBarrier(M);
228 Changed |= replaceMemFence(M);
229 Changed |= replaceRelational(M);
230 Changed |= replaceIsInfAndIsNan(M);
231 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000232 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000233 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000234 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000235 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000236 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000237 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000238 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400239 Changed |= replaceSignbit(M);
240 Changed |= replaceMadandMad24andMul24(M);
241 Changed |= replaceVloadHalf(M);
242 Changed |= replaceVloadHalf2(M);
243 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700244 Changed |= replaceClspvVloadaHalf2(M);
245 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400246 Changed |= replaceVstoreHalf(M);
247 Changed |= replaceVstoreHalf2(M);
248 Changed |= replaceVstoreHalf4(M);
249 Changed |= replaceReadImageF(M);
250 Changed |= replaceAtomics(M);
251 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400252 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700253 Changed |= replaceVload(M);
254 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400255
256 return Changed;
257}
258
Kévin Petite8edce32019-04-10 14:23:32 +0100259bool replaceCallsWithValue(
260 Module &M,
261 std::vector<const char*> Names,
262 std::function<Value*(CallInst*)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000263
Kévin Petite8edce32019-04-10 14:23:32 +0100264 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000265
266 for (auto Name : Names) {
267 // If we find a function with the matching name.
268 if (auto F = M.getFunction(Name)) {
269 SmallVector<Instruction *, 4> ToRemoves;
270
271 // Walk the users of the function.
272 for (auto &U : F->uses()) {
273 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000274
Kévin Petite8edce32019-04-10 14:23:32 +0100275 auto NewValue = Replacer(CI);
276
277 if (NewValue != nullptr) {
278 CI->replaceAllUsesWith(NewValue);
279 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000280
281 // Lastly, remember to remove the user.
282 ToRemoves.push_back(CI);
283 }
284 }
285
286 Changed = !ToRemoves.empty();
287
288 // And cleanup the calls we don't use anymore.
289 for (auto V : ToRemoves) {
290 V->eraseFromParent();
291 }
292
293 // And remove the function we don't need either too.
294 F->eraseFromParent();
295 }
296 }
297
298 return Changed;
299}
300
Kévin Petite8edce32019-04-10 14:23:32 +0100301bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100302
Kévin Petite8edce32019-04-10 14:23:32 +0100303 std::vector<const char *> Names = {
304 "_Z3absh",
305 "_Z3absDv2_h",
306 "_Z3absDv3_h",
307 "_Z3absDv4_h",
308 "_Z3abst",
309 "_Z3absDv2_t",
310 "_Z3absDv3_t",
311 "_Z3absDv4_t",
312 "_Z3absj",
313 "_Z3absDv2_j",
314 "_Z3absDv3_j",
315 "_Z3absDv4_j",
316 "_Z3absm",
317 "_Z3absDv2_m",
318 "_Z3absDv3_m",
319 "_Z3absDv4_m",
320 };
321
322 return replaceCallsWithValue(M, Names, [](CallInst* CI) {
323 return CI->getOperand(0);
324 });
325}
326
327bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
328
329 std::vector<const char *> Names = {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100330 "_Z8abs_diffcc",
331 "_Z8abs_diffDv2_cS_",
332 "_Z8abs_diffDv3_cS_",
333 "_Z8abs_diffDv4_cS_",
334 "_Z8abs_diffhh",
335 "_Z8abs_diffDv2_hS_",
336 "_Z8abs_diffDv3_hS_",
337 "_Z8abs_diffDv4_hS_",
338 "_Z8abs_diffss",
339 "_Z8abs_diffDv2_sS_",
340 "_Z8abs_diffDv3_sS_",
341 "_Z8abs_diffDv4_sS_",
342 "_Z8abs_difftt",
343 "_Z8abs_diffDv2_tS_",
344 "_Z8abs_diffDv3_tS_",
345 "_Z8abs_diffDv4_tS_",
346 "_Z8abs_diffii",
347 "_Z8abs_diffDv2_iS_",
348 "_Z8abs_diffDv3_iS_",
349 "_Z8abs_diffDv4_iS_",
350 "_Z8abs_diffjj",
351 "_Z8abs_diffDv2_jS_",
352 "_Z8abs_diffDv3_jS_",
353 "_Z8abs_diffDv4_jS_",
354 "_Z8abs_diffll",
355 "_Z8abs_diffDv2_lS_",
356 "_Z8abs_diffDv3_lS_",
357 "_Z8abs_diffDv4_lS_",
358 "_Z8abs_diffmm",
359 "_Z8abs_diffDv2_mS_",
360 "_Z8abs_diffDv3_mS_",
361 "_Z8abs_diffDv4_mS_",
362 };
363
Kévin Petite8edce32019-04-10 14:23:32 +0100364 return replaceCallsWithValue(M, Names, [](CallInst* CI) {
365 auto XValue = CI->getOperand(0);
366 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100367
Kévin Petite8edce32019-04-10 14:23:32 +0100368 IRBuilder<> Builder(CI);
369 auto XmY = Builder.CreateSub(XValue, YValue);
370 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100371
Kévin Petite8edce32019-04-10 14:23:32 +0100372 Value* Cmp;
373 auto F = CI->getCalledFunction();
374 auto finfo = FunctionInfo::getFromMangledName(F->getName());
375 if (finfo.isArgSigned(0)) {
376 Cmp = Builder.CreateICmpSGT(YValue, XValue);
377 } else {
378 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100379 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100380
Kévin Petite8edce32019-04-10 14:23:32 +0100381 return Builder.CreateSelect(Cmp, YmX, XmY);
382 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100383}
384
Kévin Petit8c1be282019-04-02 19:34:25 +0100385bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100386
Kévin Petite8edce32019-04-10 14:23:32 +0100387 std::vector<const char *> Names = {
Kévin Petit8c1be282019-04-02 19:34:25 +0100388 "_Z8copysignff",
389 "_Z8copysignDv2_fS_",
390 "_Z8copysignDv3_fS_",
391 "_Z8copysignDv4_fS_",
392 };
393
Kévin Petite8edce32019-04-10 14:23:32 +0100394 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
395 auto XValue = CI->getOperand(0);
396 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100397
Kévin Petite8edce32019-04-10 14:23:32 +0100398 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100399
Kévin Petite8edce32019-04-10 14:23:32 +0100400 Type* IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
401 if (Ty->isVectorTy()) {
402 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100403 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100404
Kévin Petite8edce32019-04-10 14:23:32 +0100405 // Return X with the sign of Y
406
407 // Sign bit masks
408 auto SignBit = IntTy->getScalarSizeInBits() - 1;
409 auto SignBitMask = 1 << SignBit;
410 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
411 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
412
413 IRBuilder<> Builder(CI);
414
415 // Extract sign of Y
416 auto YInt = Builder.CreateBitCast(YValue, IntTy);
417 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
418
419 // Clear sign bit in X
420 auto XInt = Builder.CreateBitCast(XValue, IntTy);
421 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
422
423 // Insert sign bit of Y into X
424 auto NewXInt = Builder.CreateOr(XInt, YSign);
425
426 // And cast back to floating-point
427 return Builder.CreateBitCast(NewXInt, Ty);
428 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100429}
430
David Neto22f144c2017-06-12 14:26:21 -0400431bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400432
Kévin Petite8edce32019-04-10 14:23:32 +0100433 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400434 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
435 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
436 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
437 };
438
Kévin Petite8edce32019-04-10 14:23:32 +0100439 return replaceCallsWithValue(M, Names, [](CallInst* CI) {
440 // Recip has one arg.
441 auto Arg = CI->getOperand(0);
442 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
443 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
444 });
David Neto22f144c2017-06-12 14:26:21 -0400445}
446
447bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400448
Kévin Petite8edce32019-04-10 14:23:32 +0100449 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400450 "_Z11half_divideff", "_Z13native_divideff",
451 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
452 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
453 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
454 };
455
Kévin Petite8edce32019-04-10 14:23:32 +0100456 return replaceCallsWithValue(M, Names, [](CallInst* CI) {
457 auto Op0 = CI->getOperand(0);
458 auto Op1 = CI->getOperand(1);
459 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
460 });
David Neto22f144c2017-06-12 14:26:21 -0400461}
462
463bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
464 bool Changed = false;
465
466 const std::map<const char *, const char *> Map = {
467 {"_Z5exp10f", "_Z3expf"},
468 {"_Z10half_exp10f", "_Z8half_expf"},
469 {"_Z12native_exp10f", "_Z10native_expf"},
470 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
471 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
472 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
473 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
474 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
475 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
476 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
477 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
478 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
479
480 for (auto Pair : Map) {
481 // If we find a function with the matching name.
482 if (auto F = M.getFunction(Pair.first)) {
483 SmallVector<Instruction *, 4> ToRemoves;
484
485 // Walk the users of the function.
486 for (auto &U : F->uses()) {
487 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
488 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
489
490 auto Arg = CI->getOperand(0);
491
492 // Constant of the natural log of 10 (ln(10)).
493 const double Ln10 =
494 2.302585092994045684017991454684364207601101488628772976033;
495
496 auto Mul = BinaryOperator::Create(
497 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
498 CI);
499
500 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
501
502 CI->replaceAllUsesWith(NewCI);
503
504 // Lastly, remember to remove the user.
505 ToRemoves.push_back(CI);
506 }
507 }
508
509 Changed = !ToRemoves.empty();
510
511 // And cleanup the calls we don't use anymore.
512 for (auto V : ToRemoves) {
513 V->eraseFromParent();
514 }
515
516 // And remove the function we don't need either too.
517 F->eraseFromParent();
518 }
519 }
520
521 return Changed;
522}
523
524bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
525 bool Changed = false;
526
527 const std::map<const char *, const char *> Map = {
528 {"_Z5log10f", "_Z3logf"},
529 {"_Z10half_log10f", "_Z8half_logf"},
530 {"_Z12native_log10f", "_Z10native_logf"},
531 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
532 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
533 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
534 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
535 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
536 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
537 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
538 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
539 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
540
541 for (auto Pair : Map) {
542 // If we find a function with the matching name.
543 if (auto F = M.getFunction(Pair.first)) {
544 SmallVector<Instruction *, 4> ToRemoves;
545
546 // Walk the users of the function.
547 for (auto &U : F->uses()) {
548 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
549 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
550
551 auto Arg = CI->getOperand(0);
552
553 // Constant of the reciprocal of the natural log of 10 (ln(10)).
554 const double Ln10 =
555 0.434294481903251827651128918916605082294397005803666566114;
556
557 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
558
559 auto Mul = BinaryOperator::Create(
560 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
561 "", CI);
562
563 CI->replaceAllUsesWith(Mul);
564
565 // Lastly, remember to remove the user.
566 ToRemoves.push_back(CI);
567 }
568 }
569
570 Changed = !ToRemoves.empty();
571
572 // And cleanup the calls we don't use anymore.
573 for (auto V : ToRemoves) {
574 V->eraseFromParent();
575 }
576
577 // And remove the function we don't need either too.
578 F->eraseFromParent();
579 }
580 }
581
582 return Changed;
583}
584
585bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
586 bool Changed = false;
587
588 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
589
590 const std::map<const char *, const char *> Map = {
591 {"_Z7barrierj", "__spirv_control_barrier"}};
592
593 for (auto Pair : Map) {
594 // If we find a function with the matching name.
595 if (auto F = M.getFunction(Pair.first)) {
596 SmallVector<Instruction *, 4> ToRemoves;
597
598 // Walk the users of the function.
599 for (auto &U : F->uses()) {
600 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
601 auto FType = F->getFunctionType();
602 SmallVector<Type *, 3> Params;
603 for (unsigned i = 0; i < 3; i++) {
604 Params.push_back(FType->getParamType(0));
605 }
606 auto NewFType =
607 FunctionType::get(FType->getReturnType(), Params, false);
608 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
609
610 auto Arg = CI->getOperand(0);
611
612 // We need to map the OpenCL constants to the SPIR-V equivalents.
613 const auto LocalMemFence =
614 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
615 const auto GlobalMemFence =
616 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
617 const auto ConstantSequentiallyConsistent = ConstantInt::get(
618 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
619 const auto ConstantScopeDevice =
620 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
621 const auto ConstantScopeWorkgroup =
622 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
623
624 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
625 const auto LocalMemFenceMask = BinaryOperator::Create(
626 Instruction::And, LocalMemFence, Arg, "", CI);
627 const auto WorkgroupShiftAmount =
628 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
629 clz(CLK_LOCAL_MEM_FENCE);
630 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
631 Instruction::Shl, LocalMemFenceMask,
632 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
633
634 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
635 const auto GlobalMemFenceMask = BinaryOperator::Create(
636 Instruction::And, GlobalMemFence, Arg, "", CI);
637 const auto UniformShiftAmount =
638 clz(spv::MemorySemanticsUniformMemoryMask) -
639 clz(CLK_GLOBAL_MEM_FENCE);
640 const auto MemorySemanticsUniform = BinaryOperator::Create(
641 Instruction::Shl, GlobalMemFenceMask,
642 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
643
644 // And combine the above together, also adding in
645 // MemorySemanticsSequentiallyConsistentMask.
646 auto MemorySemantics =
647 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
648 ConstantSequentiallyConsistent, "", CI);
649 MemorySemantics = BinaryOperator::Create(
650 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
651
652 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
653 // Device Scope, otherwise Workgroup Scope.
654 const auto Cmp =
655 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
656 GlobalMemFenceMask, GlobalMemFence, "", CI);
657 const auto MemoryScope = SelectInst::Create(
658 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
659
660 // Lastly, the Execution Scope is always Workgroup Scope.
661 const auto ExecutionScope = ConstantScopeWorkgroup;
662
663 auto NewCI = CallInst::Create(
664 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
665
666 CI->replaceAllUsesWith(NewCI);
667
668 // Lastly, remember to remove the user.
669 ToRemoves.push_back(CI);
670 }
671 }
672
673 Changed = !ToRemoves.empty();
674
675 // And cleanup the calls we don't use anymore.
676 for (auto V : ToRemoves) {
677 V->eraseFromParent();
678 }
679
680 // And remove the function we don't need either too.
681 F->eraseFromParent();
682 }
683 }
684
685 return Changed;
686}
687
688bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
689 bool Changed = false;
690
691 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
692
Neil Henning39672102017-09-29 14:33:13 +0100693 using Tuple = std::tuple<const char *, unsigned>;
694 const std::map<const char *, Tuple> Map = {
695 {"_Z9mem_fencej",
696 Tuple("__spirv_memory_barrier",
697 spv::MemorySemanticsSequentiallyConsistentMask)},
698 {"_Z14read_mem_fencej",
699 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
700 {"_Z15write_mem_fencej",
701 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400702
703 for (auto Pair : Map) {
704 // If we find a function with the matching name.
705 if (auto F = M.getFunction(Pair.first)) {
706 SmallVector<Instruction *, 4> ToRemoves;
707
708 // Walk the users of the function.
709 for (auto &U : F->uses()) {
710 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
711 auto FType = F->getFunctionType();
712 SmallVector<Type *, 2> Params;
713 for (unsigned i = 0; i < 2; i++) {
714 Params.push_back(FType->getParamType(0));
715 }
716 auto NewFType =
717 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100718 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400719
720 auto Arg = CI->getOperand(0);
721
722 // We need to map the OpenCL constants to the SPIR-V equivalents.
723 const auto LocalMemFence =
724 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
725 const auto GlobalMemFence =
726 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
727 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100728 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400729 const auto ConstantScopeDevice =
730 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
731
732 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
733 const auto LocalMemFenceMask = BinaryOperator::Create(
734 Instruction::And, LocalMemFence, Arg, "", CI);
735 const auto WorkgroupShiftAmount =
736 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
737 clz(CLK_LOCAL_MEM_FENCE);
738 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
739 Instruction::Shl, LocalMemFenceMask,
740 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
741
742 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
743 const auto GlobalMemFenceMask = BinaryOperator::Create(
744 Instruction::And, GlobalMemFence, Arg, "", CI);
745 const auto UniformShiftAmount =
746 clz(spv::MemorySemanticsUniformMemoryMask) -
747 clz(CLK_GLOBAL_MEM_FENCE);
748 const auto MemorySemanticsUniform = BinaryOperator::Create(
749 Instruction::Shl, GlobalMemFenceMask,
750 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
751
752 // And combine the above together, also adding in
753 // MemorySemanticsSequentiallyConsistentMask.
754 auto MemorySemantics =
755 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
756 ConstantMemorySemantics, "", CI);
757 MemorySemantics = BinaryOperator::Create(
758 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
759
760 // Memory Scope is always device.
761 const auto MemoryScope = ConstantScopeDevice;
762
763 auto NewCI =
764 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
765
766 CI->replaceAllUsesWith(NewCI);
767
768 // Lastly, remember to remove the user.
769 ToRemoves.push_back(CI);
770 }
771 }
772
773 Changed = !ToRemoves.empty();
774
775 // And cleanup the calls we don't use anymore.
776 for (auto V : ToRemoves) {
777 V->eraseFromParent();
778 }
779
780 // And remove the function we don't need either too.
781 F->eraseFromParent();
782 }
783 }
784
785 return Changed;
786}
787
788bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
789 bool Changed = false;
790
791 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
792 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
793 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
794 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
795 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
796 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
797 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
798 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
799 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
800 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
801 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
802 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
803 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
804 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
805 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
806 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
807 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
808 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
809 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
810 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
811 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
812 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
813 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
814 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
815 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
816 };
817
818 for (auto Pair : Map) {
819 // If we find a function with the matching name.
820 if (auto F = M.getFunction(Pair.first)) {
821 SmallVector<Instruction *, 4> ToRemoves;
822
823 // Walk the users of the function.
824 for (auto &U : F->uses()) {
825 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
826 // The predicate to use in the CmpInst.
827 auto Predicate = Pair.second.first;
828
829 // The value to return for true.
830 auto TrueValue =
831 ConstantInt::getSigned(CI->getType(), Pair.second.second);
832
833 // The value to return for false.
834 auto FalseValue = Constant::getNullValue(CI->getType());
835
836 auto Arg1 = CI->getOperand(0);
837 auto Arg2 = CI->getOperand(1);
838
839 const auto Cmp =
840 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
841
842 const auto Select =
843 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
844
845 CI->replaceAllUsesWith(Select);
846
847 // Lastly, remember to remove the user.
848 ToRemoves.push_back(CI);
849 }
850 }
851
852 Changed = !ToRemoves.empty();
853
854 // And cleanup the calls we don't use anymore.
855 for (auto V : ToRemoves) {
856 V->eraseFromParent();
857 }
858
859 // And remove the function we don't need either too.
860 F->eraseFromParent();
861 }
862 }
863
864 return Changed;
865}
866
867bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
868 bool Changed = false;
869
870 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
871 {"_Z5isinff", {"__spirv_isinff", 1}},
872 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
873 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
874 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
875 {"_Z5isnanf", {"__spirv_isnanf", 1}},
876 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
877 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
878 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
879 };
880
881 for (auto Pair : Map) {
882 // If we find a function with the matching name.
883 if (auto F = M.getFunction(Pair.first)) {
884 SmallVector<Instruction *, 4> ToRemoves;
885
886 // Walk the users of the function.
887 for (auto &U : F->uses()) {
888 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
889 const auto CITy = CI->getType();
890
891 // The fake SPIR-V intrinsic to generate.
892 auto SPIRVIntrinsic = Pair.second.first;
893
894 // The value to return for true.
895 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
896
897 // The value to return for false.
898 auto FalseValue = Constant::getNullValue(CITy);
899
900 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
901 M.getContext(),
902 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
903
904 auto NewFType =
905 FunctionType::get(CorrespondingBoolTy,
906 F->getFunctionType()->getParamType(0), false);
907
908 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
909
910 auto Arg = CI->getOperand(0);
911
912 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
913
914 const auto Select =
915 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
916
917 CI->replaceAllUsesWith(Select);
918
919 // Lastly, remember to remove the user.
920 ToRemoves.push_back(CI);
921 }
922 }
923
924 Changed = !ToRemoves.empty();
925
926 // And cleanup the calls we don't use anymore.
927 for (auto V : ToRemoves) {
928 V->eraseFromParent();
929 }
930
931 // And remove the function we don't need either too.
932 F->eraseFromParent();
933 }
934 }
935
936 return Changed;
937}
938
939bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
940 bool Changed = false;
941
942 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000943 // all
alan-bakerb39c8262019-03-08 14:03:37 -0500944 {"_Z3allc", ""},
945 {"_Z3allDv2_c", "__spirv_allDv2_c"},
946 {"_Z3allDv3_c", "__spirv_allDv3_c"},
947 {"_Z3allDv4_c", "__spirv_allDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000948 {"_Z3alls", ""},
949 {"_Z3allDv2_s", "__spirv_allDv2_s"},
950 {"_Z3allDv3_s", "__spirv_allDv3_s"},
951 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400952 {"_Z3alli", ""},
953 {"_Z3allDv2_i", "__spirv_allDv2_i"},
954 {"_Z3allDv3_i", "__spirv_allDv3_i"},
955 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000956 {"_Z3alll", ""},
957 {"_Z3allDv2_l", "__spirv_allDv2_l"},
958 {"_Z3allDv3_l", "__spirv_allDv3_l"},
959 {"_Z3allDv4_l", "__spirv_allDv4_l"},
960
961 // any
alan-bakerb39c8262019-03-08 14:03:37 -0500962 {"_Z3anyc", ""},
963 {"_Z3anyDv2_c", "__spirv_anyDv2_c"},
964 {"_Z3anyDv3_c", "__spirv_anyDv3_c"},
965 {"_Z3anyDv4_c", "__spirv_anyDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000966 {"_Z3anys", ""},
967 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
968 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
969 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400970 {"_Z3anyi", ""},
971 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
972 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
973 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000974 {"_Z3anyl", ""},
975 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
976 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
977 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400978 };
979
980 for (auto Pair : Map) {
981 // If we find a function with the matching name.
982 if (auto F = M.getFunction(Pair.first)) {
983 SmallVector<Instruction *, 4> ToRemoves;
984
985 // Walk the users of the function.
986 for (auto &U : F->uses()) {
987 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
988 // The fake SPIR-V intrinsic to generate.
989 auto SPIRVIntrinsic = Pair.second;
990
991 auto Arg = CI->getOperand(0);
992
993 Value *V;
994
Kévin Petitfd27cca2018-10-31 13:00:17 +0000995 // If the argument is a 32-bit int, just use a shift
996 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
997 V = BinaryOperator::Create(Instruction::LShr, Arg,
998 ConstantInt::get(Arg->getType(), 31), "",
999 CI);
1000 } else {
David Neto22f144c2017-06-12 14:26:21 -04001001 // The value for zero to compare against.
1002 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1003
David Neto22f144c2017-06-12 14:26:21 -04001004 // The value to return for true.
1005 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1006
1007 // The value to return for false.
1008 const auto FalseValue = Constant::getNullValue(CI->getType());
1009
Kévin Petitfd27cca2018-10-31 13:00:17 +00001010 const auto Cmp = CmpInst::Create(
1011 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1012
1013 Value* SelectSource;
1014
1015 // If we have a function to call, call it!
1016 if (0 < strlen(SPIRVIntrinsic)) {
1017
1018 const auto NewFType = FunctionType::get(
1019 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
1020
1021 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1022
1023 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
1024
1025 SelectSource = NewCI;
1026
1027 } else {
1028 SelectSource = Cmp;
1029 }
1030
1031 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001032 }
1033
1034 CI->replaceAllUsesWith(V);
1035
1036 // Lastly, remember to remove the user.
1037 ToRemoves.push_back(CI);
1038 }
1039 }
1040
1041 Changed = !ToRemoves.empty();
1042
1043 // And cleanup the calls we don't use anymore.
1044 for (auto V : ToRemoves) {
1045 V->eraseFromParent();
1046 }
1047
1048 // And remove the function we don't need either too.
1049 F->eraseFromParent();
1050 }
1051 }
1052
1053 return Changed;
1054}
1055
Kévin Petitbf0036c2019-03-06 13:57:10 +00001056bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1057 bool Changed = false;
1058
1059 for (auto const &SymVal : M.getValueSymbolTable()) {
1060 // Skip symbols whose name doesn't match
1061 if (!SymVal.getKey().startswith("_Z8upsample")) {
1062 continue;
1063 }
1064 // Is there a function going by that name?
1065 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1066
1067 SmallVector<Instruction *, 4> ToRemoves;
1068
1069 // Walk the users of the function.
1070 for (auto &U : F->uses()) {
1071 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1072
1073 // Get arguments
1074 auto HiValue = CI->getOperand(0);
1075 auto LoValue = CI->getOperand(1);
1076
1077 // Don't touch overloads that aren't in OpenCL C
1078 auto HiType = HiValue->getType();
1079 auto LoType = LoValue->getType();
1080
1081 if (HiType != LoType) {
1082 continue;
1083 }
1084
1085 if (!HiType->isIntOrIntVectorTy()) {
1086 continue;
1087 }
1088
1089 if (HiType->getScalarSizeInBits() * 2 !=
1090 CI->getType()->getScalarSizeInBits()) {
1091 continue;
1092 }
1093
1094 if ((HiType->getScalarSizeInBits() != 8) &&
1095 (HiType->getScalarSizeInBits() != 16) &&
1096 (HiType->getScalarSizeInBits() != 32)) {
1097 continue;
1098 }
1099
1100 if (HiType->isVectorTy()) {
1101 if ((HiType->getVectorNumElements() != 2) &&
1102 (HiType->getVectorNumElements() != 3) &&
1103 (HiType->getVectorNumElements() != 4) &&
1104 (HiType->getVectorNumElements() != 8) &&
1105 (HiType->getVectorNumElements() != 16)) {
1106 continue;
1107 }
1108 }
1109
1110 // Convert both operands to the result type
1111 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(),
1112 "", CI);
1113 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(),
1114 "", CI);
1115
1116 // Shift high operand
1117 auto ShiftAmount = ConstantInt::get(CI->getType(),
1118 HiType->getScalarSizeInBits());
1119 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1120 ShiftAmount, "", CI);
1121
1122 // OR both results
1123 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1124 "", CI);
1125
1126 // Replace call with the expression
1127 CI->replaceAllUsesWith(V);
1128
1129 // Lastly, remember to remove the user.
1130 ToRemoves.push_back(CI);
1131 }
1132 }
1133
1134 Changed = !ToRemoves.empty();
1135
1136 // And cleanup the calls we don't use anymore.
1137 for (auto V : ToRemoves) {
1138 V->eraseFromParent();
1139 }
1140
1141 // And remove the function we don't need either too.
1142 F->eraseFromParent();
1143 }
1144 }
1145
1146 return Changed;
1147}
1148
Kévin Petitd44eef52019-03-08 13:22:14 +00001149bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1150 bool Changed = false;
1151
1152 for (auto const &SymVal : M.getValueSymbolTable()) {
1153 // Skip symbols whose name doesn't match
1154 if (!SymVal.getKey().startswith("_Z6rotate")) {
1155 continue;
1156 }
1157 // Is there a function going by that name?
1158 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1159
1160 SmallVector<Instruction *, 4> ToRemoves;
1161
1162 // Walk the users of the function.
1163 for (auto &U : F->uses()) {
1164 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1165
1166 // Get arguments
1167 auto SrcValue = CI->getOperand(0);
1168 auto RotAmount = CI->getOperand(1);
1169
1170 // Don't touch overloads that aren't in OpenCL C
1171 auto SrcType = SrcValue->getType();
1172 auto RotType = RotAmount->getType();
1173
1174 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1175 continue;
1176 }
1177
1178 if (!SrcType->isIntOrIntVectorTy()) {
1179 continue;
1180 }
1181
1182 if ((SrcType->getScalarSizeInBits() != 8) &&
1183 (SrcType->getScalarSizeInBits() != 16) &&
1184 (SrcType->getScalarSizeInBits() != 32) &&
1185 (SrcType->getScalarSizeInBits() != 64)) {
1186 continue;
1187 }
1188
1189 if (SrcType->isVectorTy()) {
1190 if ((SrcType->getVectorNumElements() != 2) &&
1191 (SrcType->getVectorNumElements() != 3) &&
1192 (SrcType->getVectorNumElements() != 4) &&
1193 (SrcType->getVectorNumElements() != 8) &&
1194 (SrcType->getVectorNumElements() != 16)) {
1195 continue;
1196 }
1197 }
1198
1199 // The approach used is to shift the top bits down, the bottom bits up
1200 // and OR the two shifted values.
1201
1202 // The rotation amount is to be treated modulo the element size.
1203 // Since SPIR-V shift ops don't support this, let's apply the
1204 // modulo ahead of shifting. The element size is always a power of
1205 // two so we can just AND with a mask.
1206 auto ModMask = ConstantInt::get(SrcType,
1207 SrcType->getScalarSizeInBits() - 1);
1208 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1209 ModMask, "", CI);
1210
1211 // Let's calc the amount by which to shift top bits down
1212 auto ScalarSize = ConstantInt::get(SrcType,
1213 SrcType->getScalarSizeInBits());
1214 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1215 RotAmount, "", CI);
1216
1217 // Now shift the bottom bits up and the top bits down
1218 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1219 RotAmount, "", CI);
1220 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1221 DownAmount, "", CI);
1222
1223 // Finally OR the two shifted values
1224 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1225 HiRotated, "", CI);
1226
1227 // Replace call with the expression
1228 CI->replaceAllUsesWith(V);
1229
1230 // Lastly, remember to remove the user.
1231 ToRemoves.push_back(CI);
1232 }
1233 }
1234
1235 Changed = !ToRemoves.empty();
1236
1237 // And cleanup the calls we don't use anymore.
1238 for (auto V : ToRemoves) {
1239 V->eraseFromParent();
1240 }
1241
1242 // And remove the function we don't need either too.
1243 F->eraseFromParent();
1244 }
1245 }
1246
1247 return Changed;
1248}
1249
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001250bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1251 bool Changed = false;
1252
1253 for (auto const &SymVal : M.getValueSymbolTable()) {
1254
1255 // Skip symbols whose name obviously doesn't match
1256 if (!SymVal.getKey().contains("convert_")) {
1257 continue;
1258 }
1259
1260 // Is there a function going by that name?
1261 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1262
1263 // Get info from the mangled name
1264 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001265 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001266
1267 // All functions of interest are handled by our mangled name parser
1268 if (!parsed) {
1269 continue;
1270 }
1271
1272 // Move on if this isn't a call to convert_
1273 if (!finfo.name.startswith("convert_")) {
1274 continue;
1275 }
1276
1277 // Extract the destination type from the function name
1278 StringRef DstTypeName = finfo.name;
1279 DstTypeName.consume_front("convert_");
1280
1281 auto DstSignedNess = StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1282 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1283 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1284 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1285 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1286 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1287 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1288 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1289 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1290 .Default(ArgTypeInfo::SignedNess::None);
1291
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001292 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001293 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001294
1295 SmallVector<Instruction *, 4> ToRemoves;
1296
1297 // Walk the users of the function.
1298 for (auto &U : F->uses()) {
1299 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1300
1301 // Get arguments
1302 auto SrcValue = CI->getOperand(0);
1303
1304 // Don't touch overloads that aren't in OpenCL C
1305 auto SrcType = SrcValue->getType();
1306 auto DstType = CI->getType();
1307
1308 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1309 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1310 continue;
1311 }
1312
1313 if (SrcType->isVectorTy()) {
1314
1315 if (SrcType->getVectorNumElements() !=
1316 DstType->getVectorNumElements()) {
1317 continue;
1318 }
1319
1320 if ((SrcType->getVectorNumElements() != 2) &&
1321 (SrcType->getVectorNumElements() != 3) &&
1322 (SrcType->getVectorNumElements() != 4) &&
1323 (SrcType->getVectorNumElements() != 8) &&
1324 (SrcType->getVectorNumElements() != 16)) {
1325 continue;
1326 }
1327 }
1328
1329 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1330 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1331
1332 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1333 bool DstIsInt = DstType->isIntOrIntVectorTy();
1334
1335 Value *V;
1336 if (SrcIsFloat && DstIsFloat) {
1337 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1338 } else if (SrcIsFloat && DstIsInt) {
1339 if (DstIsSigned) {
1340 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1341 } else {
1342 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1343 }
1344 } else if (SrcIsInt && DstIsFloat) {
1345 if (SrcIsSigned) {
1346 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1347 } else {
1348 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1349 }
1350 } else if (SrcIsInt && DstIsInt) {
1351 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1352 } else {
1353 // Not something we're supposed to handle, just move on
1354 continue;
1355 }
1356
1357 // Replace call with the expression
1358 CI->replaceAllUsesWith(V);
1359
1360 // Lastly, remember to remove the user.
1361 ToRemoves.push_back(CI);
1362 }
1363 }
1364
1365 Changed = !ToRemoves.empty();
1366
1367 // And cleanup the calls we don't use anymore.
1368 for (auto V : ToRemoves) {
1369 V->eraseFromParent();
1370 }
1371
1372 // And remove the function we don't need either too.
1373 F->eraseFromParent();
1374 }
1375 }
1376
1377 return Changed;
1378}
1379
Kévin Petit8a560882019-03-21 15:24:34 +00001380bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1381 bool Changed = false;
1382
Kévin Petit617a76d2019-04-04 13:54:16 +01001383 SmallVector<Function*, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001384
Kévin Petit617a76d2019-04-04 13:54:16 +01001385 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001386 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1387 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1388
1389 // Skip symbols whose name doesn't match
1390 if (!isMad && !isMul) {
1391 continue;
1392 }
1393
1394 // Is there a function going by that name?
1395 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001396 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001397 }
1398 }
1399
Kévin Petit617a76d2019-04-04 13:54:16 +01001400 for (auto F : FnWorklist) {
1401 SmallVector<Instruction *, 4> ToRemoves;
1402
1403 bool isMad = F->getName().startswith("_Z6mad_hi");
1404 // Walk the users of the function.
1405 for (auto &U : F->uses()) {
1406 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1407
1408 // Get arguments
1409 auto AValue = CI->getOperand(0);
1410 auto BValue = CI->getOperand(1);
1411 auto CValue = CI->getOperand(2);
1412
1413 // Don't touch overloads that aren't in OpenCL C
1414 auto AType = AValue->getType();
1415 auto BType = BValue->getType();
1416 auto CType = CValue->getType();
1417
1418 if ((AType != BType) || (CI->getType() != AType) ||
1419 (isMad && (AType != CType))) {
1420 continue;
1421 }
1422
1423 if (!AType->isIntOrIntVectorTy()) {
1424 continue;
1425 }
1426
1427 if ((AType->getScalarSizeInBits() != 8) &&
1428 (AType->getScalarSizeInBits() != 16) &&
1429 (AType->getScalarSizeInBits() != 32) &&
1430 (AType->getScalarSizeInBits() != 64)) {
1431 continue;
1432 }
1433
1434 if (AType->isVectorTy()) {
1435 if ((AType->getVectorNumElements() != 2) &&
1436 (AType->getVectorNumElements() != 3) &&
1437 (AType->getVectorNumElements() != 4) &&
1438 (AType->getVectorNumElements() != 8) &&
1439 (AType->getVectorNumElements() != 16)) {
1440 continue;
1441 }
1442 }
1443
1444 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001445 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001446
1447 // Select the appropriate signed/unsigned SPIR-V op
1448 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001449 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001450 opcode = spv::OpSMulExtended;
1451 } else {
1452 opcode = spv::OpUMulExtended;
1453 }
1454
1455 // Our SPIR-V op returns a struct, create a type for it
1456 SmallVector<Type*, 2> TwoValueType = {
1457 AType,
1458 AType
1459 };
1460 auto ExMulRetType = StructType::create(TwoValueType);
1461
1462 // Call the SPIR-V op
1463 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1464 ExMulRetType, {AValue, BValue});
1465
1466 // Get the high part of the result
1467 unsigned Idxs[] = {1};
1468 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1469
1470 // If we're handling a mad_hi, add the third argument to the result
1471 if (isMad) {
1472 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1473 }
1474
1475 // Replace call with the expression
1476 CI->replaceAllUsesWith(V);
1477
1478 // Lastly, remember to remove the user.
1479 ToRemoves.push_back(CI);
1480 }
1481 }
1482
1483 Changed = !ToRemoves.empty();
1484
1485 // And cleanup the calls we don't use anymore.
1486 for (auto V : ToRemoves) {
1487 V->eraseFromParent();
1488 }
1489
1490 // And remove the function we don't need either too.
1491 F->eraseFromParent();
1492 }
1493
Kévin Petit8a560882019-03-21 15:24:34 +00001494 return Changed;
1495}
1496
Kévin Petitf5b78a22018-10-25 14:32:17 +00001497bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1498 bool Changed = false;
1499
1500 for (auto const &SymVal : M.getValueSymbolTable()) {
1501 // Skip symbols whose name doesn't match
1502 if (!SymVal.getKey().startswith("_Z6select")) {
1503 continue;
1504 }
1505 // Is there a function going by that name?
1506 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1507
1508 SmallVector<Instruction *, 4> ToRemoves;
1509
1510 // Walk the users of the function.
1511 for (auto &U : F->uses()) {
1512 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1513
1514 // Get arguments
1515 auto FalseValue = CI->getOperand(0);
1516 auto TrueValue = CI->getOperand(1);
1517 auto PredicateValue = CI->getOperand(2);
1518
1519 // Don't touch overloads that aren't in OpenCL C
1520 auto FalseType = FalseValue->getType();
1521 auto TrueType = TrueValue->getType();
1522 auto PredicateType = PredicateValue->getType();
1523
1524 if (FalseType != TrueType) {
1525 continue;
1526 }
1527
1528 if (!PredicateType->isIntOrIntVectorTy()) {
1529 continue;
1530 }
1531
1532 if (!FalseType->isIntOrIntVectorTy() &&
1533 !FalseType->getScalarType()->isFloatingPointTy()) {
1534 continue;
1535 }
1536
1537 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1538 continue;
1539 }
1540
1541 if (FalseType->getScalarSizeInBits() !=
1542 PredicateType->getScalarSizeInBits()) {
1543 continue;
1544 }
1545
1546 if (FalseType->isVectorTy()) {
1547 if (FalseType->getVectorNumElements() !=
1548 PredicateType->getVectorNumElements()) {
1549 continue;
1550 }
1551
1552 if ((FalseType->getVectorNumElements() != 2) &&
1553 (FalseType->getVectorNumElements() != 3) &&
1554 (FalseType->getVectorNumElements() != 4) &&
1555 (FalseType->getVectorNumElements() != 8) &&
1556 (FalseType->getVectorNumElements() != 16)) {
1557 continue;
1558 }
1559 }
1560
1561 // Create constant
1562 const auto ZeroValue = Constant::getNullValue(PredicateType);
1563
1564 // Scalar and vector are to be treated differently
1565 CmpInst::Predicate Pred;
1566 if (PredicateType->isVectorTy()) {
1567 Pred = CmpInst::ICMP_SLT;
1568 } else {
1569 Pred = CmpInst::ICMP_NE;
1570 }
1571
1572 // Create comparison instruction
1573 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1574 ZeroValue, "", CI);
1575
1576 // Create select
1577 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1578
1579 // Replace call with the selection
1580 CI->replaceAllUsesWith(V);
1581
1582 // Lastly, remember to remove the user.
1583 ToRemoves.push_back(CI);
1584 }
1585 }
1586
1587 Changed = !ToRemoves.empty();
1588
1589 // And cleanup the calls we don't use anymore.
1590 for (auto V : ToRemoves) {
1591 V->eraseFromParent();
1592 }
1593
1594 // And remove the function we don't need either too.
1595 F->eraseFromParent();
1596 }
1597 }
1598
1599 return Changed;
1600}
1601
Kévin Petite7d0cce2018-10-31 12:38:56 +00001602bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1603 bool Changed = false;
1604
1605 for (auto const &SymVal : M.getValueSymbolTable()) {
1606 // Skip symbols whose name doesn't match
1607 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1608 continue;
1609 }
1610 // Is there a function going by that name?
1611 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1612
1613 SmallVector<Instruction *, 4> ToRemoves;
1614
1615 // Walk the users of the function.
1616 for (auto &U : F->uses()) {
1617 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1618
1619 if (CI->getNumOperands() != 4) {
1620 continue;
1621 }
1622
1623 // Get arguments
1624 auto FalseValue = CI->getOperand(0);
1625 auto TrueValue = CI->getOperand(1);
1626 auto PredicateValue = CI->getOperand(2);
1627
1628 // Don't touch overloads that aren't in OpenCL C
1629 auto FalseType = FalseValue->getType();
1630 auto TrueType = TrueValue->getType();
1631 auto PredicateType = PredicateValue->getType();
1632
1633 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1634 continue;
1635 }
1636
1637 if (TrueType->isVectorTy()) {
1638 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1639 !TrueType->getScalarType()->isIntegerTy()) {
1640 continue;
1641 }
1642 if ((TrueType->getVectorNumElements() != 2) &&
1643 (TrueType->getVectorNumElements() != 3) &&
1644 (TrueType->getVectorNumElements() != 4) &&
1645 (TrueType->getVectorNumElements() != 8) &&
1646 (TrueType->getVectorNumElements() != 16)) {
1647 continue;
1648 }
1649 }
1650
1651 // Remember the type of the operands
1652 auto OpType = TrueType;
1653
1654 // The actual bit selection will always be done on an integer type,
1655 // declare it here
1656 Type *BitType;
1657
1658 // If the operands are float, then bitcast them to int
1659 if (OpType->getScalarType()->isFloatingPointTy()) {
1660
1661 // First create the new type
1662 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1663 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1664 if (OpType->isVectorTy()) {
1665 BitType = VectorType::get(BitType, OpType->getVectorNumElements());
1666 }
1667
1668 // Then bitcast all operands
1669 PredicateValue = CastInst::CreateZExtOrBitCast(PredicateValue,
1670 BitType, "", CI);
1671 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue,
1672 BitType, "", CI);
1673 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1674
1675 } else {
1676 // The operands have an integer type, use it directly
1677 BitType = OpType;
1678 }
1679
1680 // All the operands are now always integers
1681 // implement as (c & b) | (~c & a)
1682
1683 // Create our negated predicate value
1684 auto AllOnes = Constant::getAllOnesValue(BitType);
1685 auto NotPredicateValue = BinaryOperator::Create(Instruction::Xor,
1686 PredicateValue,
1687 AllOnes, "", CI);
1688
1689 // Then put everything together
1690 auto BitsFalse = BinaryOperator::Create(Instruction::And,
1691 NotPredicateValue,
1692 FalseValue, "", CI);
1693 auto BitsTrue = BinaryOperator::Create(Instruction::And,
1694 PredicateValue,
1695 TrueValue, "", CI);
1696
1697 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1698 BitsTrue, "", CI);
1699
1700 // If we were dealing with a floating point type, we must bitcast
1701 // the result back to that
1702 if (OpType->getScalarType()->isFloatingPointTy()) {
1703 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1704 }
1705
1706 // Replace call with our new code
1707 CI->replaceAllUsesWith(V);
1708
1709 // Lastly, remember to remove the user.
1710 ToRemoves.push_back(CI);
1711 }
1712 }
1713
1714 Changed = !ToRemoves.empty();
1715
1716 // And cleanup the calls we don't use anymore.
1717 for (auto V : ToRemoves) {
1718 V->eraseFromParent();
1719 }
1720
1721 // And remove the function we don't need either too.
1722 F->eraseFromParent();
1723 }
1724 }
1725
1726 return Changed;
1727}
1728
Kévin Petit6b0a9532018-10-30 20:00:39 +00001729bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1730 bool Changed = false;
1731
1732 const std::map<const char *, const char *> Map = {
1733 { "_Z4stepfDv2_f", "_Z4stepDv2_fS_" },
1734 { "_Z4stepfDv3_f", "_Z4stepDv3_fS_" },
1735 { "_Z4stepfDv4_f", "_Z4stepDv4_fS_" },
1736 { "_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_" },
1737 { "_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_" },
1738 { "_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_" },
1739 };
1740
1741 for (auto Pair : Map) {
1742 // If we find a function with the matching name.
1743 if (auto F = M.getFunction(Pair.first)) {
1744 SmallVector<Instruction *, 4> ToRemoves;
1745
1746 // Walk the users of the function.
1747 for (auto &U : F->uses()) {
1748 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1749
1750 auto ReplacementFn = Pair.second;
1751
1752 SmallVector<Value*, 2> ArgsToSplat = {CI->getOperand(0)};
1753 Value *VectorArg;
1754
1755 // First figure out which function we're dealing with
1756 if (F->getName().startswith("_Z10smoothstep")) {
1757 ArgsToSplat.push_back(CI->getOperand(1));
1758 VectorArg = CI->getOperand(2);
1759 } else {
1760 VectorArg = CI->getOperand(1);
1761 }
1762
1763 // Splat arguments that need to be
1764 SmallVector<Value*, 2> SplatArgs;
1765 auto VecType = VectorArg->getType();
1766
1767 for (auto arg : ArgsToSplat) {
1768 Value* NewVectorArg = UndefValue::get(VecType);
1769 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
1770 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1771 NewVectorArg = InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1772 }
1773 SplatArgs.push_back(NewVectorArg);
1774 }
1775
1776 // Replace the call with the vector/vector flavour
1777 SmallVector<Type*, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1778 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1779
1780 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1781
1782 SmallVector<Value*, 3> NewArgs;
1783 for (auto arg : SplatArgs) {
1784 NewArgs.push_back(arg);
1785 }
1786 NewArgs.push_back(VectorArg);
1787
1788 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1789
1790 CI->replaceAllUsesWith(NewCI);
1791
1792 // Lastly, remember to remove the user.
1793 ToRemoves.push_back(CI);
1794 }
1795 }
1796
1797 Changed = !ToRemoves.empty();
1798
1799 // And cleanup the calls we don't use anymore.
1800 for (auto V : ToRemoves) {
1801 V->eraseFromParent();
1802 }
1803
1804 // And remove the function we don't need either too.
1805 F->eraseFromParent();
1806 }
1807 }
1808
1809 return Changed;
1810}
1811
David Neto22f144c2017-06-12 14:26:21 -04001812bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1813 bool Changed = false;
1814
1815 const std::map<const char *, Instruction::BinaryOps> Map = {
1816 {"_Z7signbitf", Instruction::LShr},
1817 {"_Z7signbitDv2_f", Instruction::AShr},
1818 {"_Z7signbitDv3_f", Instruction::AShr},
1819 {"_Z7signbitDv4_f", Instruction::AShr},
1820 };
1821
1822 for (auto Pair : Map) {
1823 // If we find a function with the matching name.
1824 if (auto F = M.getFunction(Pair.first)) {
1825 SmallVector<Instruction *, 4> ToRemoves;
1826
1827 // Walk the users of the function.
1828 for (auto &U : F->uses()) {
1829 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1830 auto Arg = CI->getOperand(0);
1831
1832 auto Bitcast =
1833 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1834
1835 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1836 ConstantInt::get(CI->getType(), 31),
1837 "", CI);
1838
1839 CI->replaceAllUsesWith(Shr);
1840
1841 // Lastly, remember to remove the user.
1842 ToRemoves.push_back(CI);
1843 }
1844 }
1845
1846 Changed = !ToRemoves.empty();
1847
1848 // And cleanup the calls we don't use anymore.
1849 for (auto V : ToRemoves) {
1850 V->eraseFromParent();
1851 }
1852
1853 // And remove the function we don't need either too.
1854 F->eraseFromParent();
1855 }
1856 }
1857
1858 return Changed;
1859}
1860
1861bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1862 bool Changed = false;
1863
1864 const std::map<const char *,
1865 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1866 Map = {
1867 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1868 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1869 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1870 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1871 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1872 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1873 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1874 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1875 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1876 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1877 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1878 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1879 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1880 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1881 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1882 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1883 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1884 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1885 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1886 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1887 };
1888
1889 for (auto Pair : Map) {
1890 // If we find a function with the matching name.
1891 if (auto F = M.getFunction(Pair.first)) {
1892 SmallVector<Instruction *, 4> ToRemoves;
1893
1894 // Walk the users of the function.
1895 for (auto &U : F->uses()) {
1896 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1897 // The multiply instruction to use.
1898 auto MulInst = Pair.second.first;
1899
1900 // The add instruction to use.
1901 auto AddInst = Pair.second.second;
1902
1903 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1904
1905 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1906 CI->getArgOperand(1), "", CI);
1907
1908 if (Instruction::BinaryOpsEnd != AddInst) {
1909 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1910 CI);
1911 }
1912
1913 CI->replaceAllUsesWith(I);
1914
1915 // Lastly, remember to remove the user.
1916 ToRemoves.push_back(CI);
1917 }
1918 }
1919
1920 Changed = !ToRemoves.empty();
1921
1922 // And cleanup the calls we don't use anymore.
1923 for (auto V : ToRemoves) {
1924 V->eraseFromParent();
1925 }
1926
1927 // And remove the function we don't need either too.
1928 F->eraseFromParent();
1929 }
1930 }
1931
1932 return Changed;
1933}
1934
Derek Chowcfd368b2017-10-19 20:58:45 -07001935bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1936 bool Changed = false;
1937
1938 struct VectorStoreOps {
1939 const char* name;
1940 int n;
1941 Type* (*get_scalar_type_function)(LLVMContext&);
1942 } vector_store_ops[] = {
1943 // TODO(derekjchow): Expand this list.
1944 { "_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy }
1945 };
1946
David Neto544fffc2017-11-16 18:35:14 -05001947 for (const auto& Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001948 auto Name = Op.name;
1949 auto N = Op.n;
1950 auto TypeFn = Op.get_scalar_type_function;
1951 if (auto F = M.getFunction(Name)) {
1952 SmallVector<Instruction *, 4> ToRemoves;
1953
1954 // Walk the users of the function.
1955 for (auto &U : F->uses()) {
1956 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1957 // The value argument from vstoren.
1958 auto Arg0 = CI->getOperand(0);
1959
1960 // The index argument from vstoren.
1961 auto Arg1 = CI->getOperand(1);
1962
1963 // The pointer argument from vstoren.
1964 auto Arg2 = CI->getOperand(2);
1965
1966 // Get types.
1967 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1968 auto ScalarNPointerTy = PointerType::get(
1969 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1970
1971 // Cast to scalarn
1972 auto Cast = CastInst::CreatePointerCast(
1973 Arg2, ScalarNPointerTy, "", CI);
1974 // Index to correct address
1975 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1976 // Store
1977 auto Store = new StoreInst(Arg0, Index, CI);
1978
1979 CI->replaceAllUsesWith(Store);
1980 ToRemoves.push_back(CI);
1981 }
1982 }
1983
1984 Changed = !ToRemoves.empty();
1985
1986 // And cleanup the calls we don't use anymore.
1987 for (auto V : ToRemoves) {
1988 V->eraseFromParent();
1989 }
1990
1991 // And remove the function we don't need either too.
1992 F->eraseFromParent();
1993 }
1994 }
1995
1996 return Changed;
1997}
1998
1999bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2000 bool Changed = false;
2001
2002 struct VectorLoadOps {
2003 const char* name;
2004 int n;
2005 Type* (*get_scalar_type_function)(LLVMContext&);
2006 } vector_load_ops[] = {
2007 // TODO(derekjchow): Expand this list.
2008 { "_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy }
2009 };
2010
David Neto544fffc2017-11-16 18:35:14 -05002011 for (const auto& Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002012 auto Name = Op.name;
2013 auto N = Op.n;
2014 auto TypeFn = Op.get_scalar_type_function;
2015 // If we find a function with the matching name.
2016 if (auto F = M.getFunction(Name)) {
2017 SmallVector<Instruction *, 4> ToRemoves;
2018
2019 // Walk the users of the function.
2020 for (auto &U : F->uses()) {
2021 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2022 // The index argument from vloadn.
2023 auto Arg0 = CI->getOperand(0);
2024
2025 // The pointer argument from vloadn.
2026 auto Arg1 = CI->getOperand(1);
2027
2028 // Get types.
2029 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
2030 auto ScalarNPointerTy = PointerType::get(
2031 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
2032
2033 // Cast to scalarn
2034 auto Cast = CastInst::CreatePointerCast(
2035 Arg1, ScalarNPointerTy, "", CI);
2036 // Index to correct address
2037 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
2038 // Load
2039 auto Load = new LoadInst(Index, "", CI);
2040
2041 CI->replaceAllUsesWith(Load);
2042 ToRemoves.push_back(CI);
2043 }
2044 }
2045
2046 Changed = !ToRemoves.empty();
2047
2048 // And cleanup the calls we don't use anymore.
2049 for (auto V : ToRemoves) {
2050 V->eraseFromParent();
2051 }
2052
2053 // And remove the function we don't need either too.
2054 F->eraseFromParent();
2055
2056 }
2057 }
2058
2059 return Changed;
2060}
2061
David Neto22f144c2017-06-12 14:26:21 -04002062bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2063 bool Changed = false;
2064
2065 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2066 "_Z10vload_halfjPU3AS2KDh"};
2067
2068 for (auto Name : Map) {
2069 // If we find a function with the matching name.
2070 if (auto F = M.getFunction(Name)) {
2071 SmallVector<Instruction *, 4> ToRemoves;
2072
2073 // Walk the users of the function.
2074 for (auto &U : F->uses()) {
2075 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2076 // The index argument from vload_half.
2077 auto Arg0 = CI->getOperand(0);
2078
2079 // The pointer argument from vload_half.
2080 auto Arg1 = CI->getOperand(1);
2081
David Neto22f144c2017-06-12 14:26:21 -04002082 auto IntTy = Type::getInt32Ty(M.getContext());
2083 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002084 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2085
David Neto22f144c2017-06-12 14:26:21 -04002086 // Our intrinsic to unpack a float2 from an int.
2087 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2088
2089 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2090
David Neto482550a2018-03-24 05:21:07 -07002091 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002092 auto ShortTy = Type::getInt16Ty(M.getContext());
2093 auto ShortPointerTy = PointerType::get(
2094 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002095
David Netoac825b82017-05-30 12:49:01 -04002096 // Cast the half* pointer to short*.
2097 auto Cast =
2098 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002099
David Netoac825b82017-05-30 12:49:01 -04002100 // Index into the correct address of the casted pointer.
2101 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2102
2103 // Load from the short* we casted to.
2104 auto Load = new LoadInst(Index, "", CI);
2105
2106 // ZExt the short -> int.
2107 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2108
2109 // Get our float2.
2110 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2111
2112 // Extract out the bottom element which is our float result.
2113 auto Extract = ExtractElementInst::Create(
2114 Call, ConstantInt::get(IntTy, 0), "", CI);
2115
2116 CI->replaceAllUsesWith(Extract);
2117 } else {
2118 // Assume the pointer argument points to storage aligned to 32bits
2119 // or more.
2120 // TODO(dneto): Do more analysis to make sure this is true?
2121 //
2122 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2123 // with:
2124 //
2125 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2126 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2127 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2128 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2129 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2130 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2131 // x float> %converted, %index_is_odd32
2132
2133 auto IntPointerTy = PointerType::get(
2134 IntTy, Arg1->getType()->getPointerAddressSpace());
2135
David Neto973e6a82017-05-30 13:48:18 -04002136 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002137 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002138 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002139 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2140
2141 auto One = ConstantInt::get(IntTy, 1);
2142 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2143 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2144
2145 // Index into the correct address of the casted pointer.
2146 auto Ptr =
2147 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2148
2149 // Load from the int* we casted to.
2150 auto Load = new LoadInst(Ptr, "", CI);
2151
2152 // Get our float2.
2153 auto Call = CallInst::Create(NewF, Load, "", CI);
2154
2155 // Extract out the float result, where the element number is
2156 // determined by whether the original index was even or odd.
2157 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2158
2159 CI->replaceAllUsesWith(Extract);
2160 }
David Neto22f144c2017-06-12 14:26:21 -04002161
2162 // Lastly, remember to remove the user.
2163 ToRemoves.push_back(CI);
2164 }
2165 }
2166
2167 Changed = !ToRemoves.empty();
2168
2169 // And cleanup the calls we don't use anymore.
2170 for (auto V : ToRemoves) {
2171 V->eraseFromParent();
2172 }
2173
2174 // And remove the function we don't need either too.
2175 F->eraseFromParent();
2176 }
2177 }
2178
2179 return Changed;
2180}
2181
2182bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002183
Kévin Petite8edce32019-04-10 14:23:32 +01002184 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002185 "_Z11vload_half2jPU3AS1KDh",
2186 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2187 "_Z11vload_half2jPU3AS2KDh",
2188 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2189 };
David Neto22f144c2017-06-12 14:26:21 -04002190
Kévin Petite8edce32019-04-10 14:23:32 +01002191 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2192 // The index argument from vload_half.
2193 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002194
Kévin Petite8edce32019-04-10 14:23:32 +01002195 // The pointer argument from vload_half.
2196 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002197
Kévin Petite8edce32019-04-10 14:23:32 +01002198 auto IntTy = Type::getInt32Ty(M.getContext());
2199 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2200 auto NewPointerTy = PointerType::get(
2201 IntTy, Arg1->getType()->getPointerAddressSpace());
2202 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002203
Kévin Petite8edce32019-04-10 14:23:32 +01002204 // Cast the half* pointer to int*.
2205 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002206
Kévin Petite8edce32019-04-10 14:23:32 +01002207 // Index into the correct address of the casted pointer.
2208 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002209
Kévin Petite8edce32019-04-10 14:23:32 +01002210 // Load from the int* we casted to.
2211 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002212
Kévin Petite8edce32019-04-10 14:23:32 +01002213 // Our intrinsic to unpack a float2 from an int.
2214 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002215
Kévin Petite8edce32019-04-10 14:23:32 +01002216 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002217
Kévin Petite8edce32019-04-10 14:23:32 +01002218 // Get our float2.
2219 return CallInst::Create(NewF, Load, "", CI);
2220 });
David Neto22f144c2017-06-12 14:26:21 -04002221}
2222
2223bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002224
Kévin Petite8edce32019-04-10 14:23:32 +01002225 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002226 "_Z11vload_half4jPU3AS1KDh",
2227 "_Z12vloada_half4jPU3AS1KDh",
2228 "_Z11vload_half4jPU3AS2KDh",
2229 "_Z12vloada_half4jPU3AS2KDh",
2230 };
David Neto22f144c2017-06-12 14:26:21 -04002231
Kévin Petite8edce32019-04-10 14:23:32 +01002232 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2233 // The index argument from vload_half.
2234 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002235
Kévin Petite8edce32019-04-10 14:23:32 +01002236 // The pointer argument from vload_half.
2237 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002238
Kévin Petite8edce32019-04-10 14:23:32 +01002239 auto IntTy = Type::getInt32Ty(M.getContext());
2240 auto Int2Ty = VectorType::get(IntTy, 2);
2241 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2242 auto NewPointerTy = PointerType::get(
2243 Int2Ty, Arg1->getType()->getPointerAddressSpace());
2244 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002245
Kévin Petite8edce32019-04-10 14:23:32 +01002246 // Cast the half* pointer to int2*.
2247 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002248
Kévin Petite8edce32019-04-10 14:23:32 +01002249 // Index into the correct address of the casted pointer.
2250 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002251
Kévin Petite8edce32019-04-10 14:23:32 +01002252 // Load from the int2* we casted to.
2253 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002254
Kévin Petite8edce32019-04-10 14:23:32 +01002255 // Extract each element from the loaded int2.
2256 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2257 "", CI);
2258 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2259 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002260
Kévin Petite8edce32019-04-10 14:23:32 +01002261 // Our intrinsic to unpack a float2 from an int.
2262 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002263
Kévin Petite8edce32019-04-10 14:23:32 +01002264 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002265
Kévin Petite8edce32019-04-10 14:23:32 +01002266 // Get the lower (x & y) components of our final float4.
2267 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002268
Kévin Petite8edce32019-04-10 14:23:32 +01002269 // Get the higher (z & w) components of our final float4.
2270 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002271
Kévin Petite8edce32019-04-10 14:23:32 +01002272 Constant *ShuffleMask[4] = {
2273 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2274 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002275
Kévin Petite8edce32019-04-10 14:23:32 +01002276 // Combine our two float2's into one float4.
2277 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask),
2278 "", CI);
2279 });
David Neto22f144c2017-06-12 14:26:21 -04002280}
2281
David Neto6ad93232018-06-07 15:42:58 -07002282bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002283
2284 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2285 //
2286 // %u = load i32 %ptr
2287 // %fxy = call <2 x float> Unpack2xHalf(u)
2288 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002289 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002290 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2291 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2292 "_Z20__clspv_vloada_half2jPKj", // private
2293 };
2294
Kévin Petite8edce32019-04-10 14:23:32 +01002295 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2296 auto Index = CI->getOperand(0);
2297 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002298
Kévin Petite8edce32019-04-10 14:23:32 +01002299 auto IntTy = Type::getInt32Ty(M.getContext());
2300 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2301 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002302
Kévin Petite8edce32019-04-10 14:23:32 +01002303 auto IndexedPtr =
2304 GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
2305 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002306
Kévin Petite8edce32019-04-10 14:23:32 +01002307 // Our intrinsic to unpack a float2 from an int.
2308 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002309
Kévin Petite8edce32019-04-10 14:23:32 +01002310 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002311
Kévin Petite8edce32019-04-10 14:23:32 +01002312 // Get our final float2.
2313 return CallInst::Create(NewF, Load, "", CI);
2314 });
David Neto6ad93232018-06-07 15:42:58 -07002315}
2316
2317bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002318
2319 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2320 //
2321 // %u2 = load <2 x i32> %ptr
2322 // %u2xy = extractelement %u2, 0
2323 // %u2zw = extractelement %u2, 1
2324 // %fxy = call <2 x float> Unpack2xHalf(uint)
2325 // %fzw = call <2 x float> Unpack2xHalf(uint)
2326 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002327 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002328 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2329 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2330 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2331 };
2332
Kévin Petite8edce32019-04-10 14:23:32 +01002333 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2334 auto Index = CI->getOperand(0);
2335 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002336
Kévin Petite8edce32019-04-10 14:23:32 +01002337 auto IntTy = Type::getInt32Ty(M.getContext());
2338 auto Int2Ty = VectorType::get(IntTy, 2);
2339 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2340 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002341
Kévin Petite8edce32019-04-10 14:23:32 +01002342 auto IndexedPtr =
2343 GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
2344 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002345
Kévin Petite8edce32019-04-10 14:23:32 +01002346 // Extract each element from the loaded int2.
2347 auto X = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0),
2348 "", CI);
2349 auto Y = ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1),
2350 "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002351
Kévin Petite8edce32019-04-10 14:23:32 +01002352 // Our intrinsic to unpack a float2 from an int.
2353 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002354
Kévin Petite8edce32019-04-10 14:23:32 +01002355 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002356
Kévin Petite8edce32019-04-10 14:23:32 +01002357 // Get the lower (x & y) components of our final float4.
2358 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002359
Kévin Petite8edce32019-04-10 14:23:32 +01002360 // Get the higher (z & w) components of our final float4.
2361 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002362
Kévin Petite8edce32019-04-10 14:23:32 +01002363 Constant *ShuffleMask[4] = {
2364 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2365 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002366
Kévin Petite8edce32019-04-10 14:23:32 +01002367 // Combine our two float2's into one float4.
2368 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask),
2369 "", CI);
2370 });
David Neto6ad93232018-06-07 15:42:58 -07002371}
2372
David Neto22f144c2017-06-12 14:26:21 -04002373bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002374
Kévin Petite8edce32019-04-10 14:23:32 +01002375 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2376 "_Z15vstore_half_rtefjPU3AS1Dh",
2377 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002378
Kévin Petite8edce32019-04-10 14:23:32 +01002379 return replaceCallsWithValue(M, Names, [&M](CallInst* CI) {
2380 // The value to store.
2381 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002382
Kévin Petite8edce32019-04-10 14:23:32 +01002383 // The index argument from vstore_half.
2384 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002385
Kévin Petite8edce32019-04-10 14:23:32 +01002386 // The pointer argument from vstore_half.
2387 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002388
Kévin Petite8edce32019-04-10 14:23:32 +01002389 auto IntTy = Type::getInt32Ty(M.getContext());
2390 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2391 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2392 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002393
Kévin Petite8edce32019-04-10 14:23:32 +01002394 // Our intrinsic to pack a float2 to an int.
2395 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002396
Kévin Petite8edce32019-04-10 14:23:32 +01002397 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002398
Kévin Petite8edce32019-04-10 14:23:32 +01002399 // Insert our value into a float2 so that we can pack it.
2400 auto TempVec =
2401 InsertElementInst::Create(UndefValue::get(Float2Ty), Arg0,
2402 ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002403
Kévin Petite8edce32019-04-10 14:23:32 +01002404 // Pack the float2 -> half2 (in an int).
2405 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002406
Kévin Petite8edce32019-04-10 14:23:32 +01002407 Value *Ret;
2408 if (clspv::Option::F16BitStorage()) {
2409 auto ShortTy = Type::getInt16Ty(M.getContext());
2410 auto ShortPointerTy = PointerType::get(
2411 ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002412
Kévin Petite8edce32019-04-10 14:23:32 +01002413 // Truncate our i32 to an i16.
2414 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002415
Kévin Petite8edce32019-04-10 14:23:32 +01002416 // Cast the half* pointer to short*.
2417 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002418
Kévin Petite8edce32019-04-10 14:23:32 +01002419 // Index into the correct address of the casted pointer.
2420 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002421
Kévin Petite8edce32019-04-10 14:23:32 +01002422 // Store to the int* we casted to.
2423 Ret = new StoreInst(Trunc, Index, CI);
2424 } else {
2425 // We can only write to 32-bit aligned words.
2426 //
2427 // Assuming base is aligned to 32-bits, replace the equivalent of
2428 // vstore_half(value, index, base)
2429 // with:
2430 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2431 // uint32_t write_to_upper_half = index & 1u;
2432 // uint32_t shift = write_to_upper_half << 4;
2433 //
2434 // // Pack the float value as a half number in bottom 16 bits
2435 // // of an i32.
2436 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2437 //
2438 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2439 // ^ ((packed & 0xffff) << shift)
2440 // // We only need relaxed consistency, but OpenCL 1.2 only has
2441 // // sequentially consistent atomics.
2442 // // TODO(dneto): Use relaxed consistency.
2443 // atomic_xor(target_ptr, xor_value)
2444 auto IntPointerTy = PointerType::get(
2445 IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002446
Kévin Petite8edce32019-04-10 14:23:32 +01002447 auto Four = ConstantInt::get(IntTy, 4);
2448 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002449
Kévin Petite8edce32019-04-10 14:23:32 +01002450 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
2451 // Compute index / 2
2452 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2453 auto BaseI32Ptr = CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2454 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32, "base_i32_ptr", CI);
2455 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2456 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
2457 auto MaskBitsToWrite = BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2458 auto MaskedCurrent = BinaryOperator::CreateAnd(MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002459
Kévin Petite8edce32019-04-10 14:23:32 +01002460 auto XLowerBits = BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2461 auto NewBitsToWrite = BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2462 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite, "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002463
Kévin Petite8edce32019-04-10 14:23:32 +01002464 // Generate the call to atomi_xor.
2465 SmallVector<Type *, 5> ParamTypes;
2466 // The pointer type.
2467 ParamTypes.push_back(IntPointerTy);
2468 // The Types for memory scope, semantics, and value.
2469 ParamTypes.push_back(IntTy);
2470 ParamTypes.push_back(IntTy);
2471 ParamTypes.push_back(IntTy);
2472 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2473 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002474
Kévin Petite8edce32019-04-10 14:23:32 +01002475 const auto ConstantScopeDevice =
2476 ConstantInt::get(IntTy, spv::ScopeDevice);
2477 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2478 // (SPIR-V Workgroup).
2479 const auto AddrSpaceSemanticsBits =
2480 IntPointerTy->getPointerAddressSpace() == 1
2481 ? spv::MemorySemanticsUniformMemoryMask
2482 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002483
Kévin Petite8edce32019-04-10 14:23:32 +01002484 // We're using relaxed consistency here.
2485 const auto ConstantMemorySemantics =
2486 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2487 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002488
Kévin Petite8edce32019-04-10 14:23:32 +01002489 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2490 ConstantMemorySemantics, ValueToXor};
2491 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2492 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002493 }
David Neto22f144c2017-06-12 14:26:21 -04002494
Kévin Petite8edce32019-04-10 14:23:32 +01002495 return Ret;
2496 });
David Neto22f144c2017-06-12 14:26:21 -04002497}
2498
2499bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002500
Kévin Petite8edce32019-04-10 14:23:32 +01002501 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002502 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2503 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2504 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2505 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2506 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2507 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2508 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2509 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2510 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2511 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2512 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2513 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2514 };
David Neto22f144c2017-06-12 14:26:21 -04002515
Kévin Petite8edce32019-04-10 14:23:32 +01002516 return replaceCallsWithValue(M, Names, [&M](CallInst* CI) {
2517 // The value to store.
2518 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002519
Kévin Petite8edce32019-04-10 14:23:32 +01002520 // The index argument from vstore_half.
2521 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002522
Kévin Petite8edce32019-04-10 14:23:32 +01002523 // The pointer argument from vstore_half.
2524 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002525
Kévin Petite8edce32019-04-10 14:23:32 +01002526 auto IntTy = Type::getInt32Ty(M.getContext());
2527 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2528 auto NewPointerTy = PointerType::get(
2529 IntTy, Arg2->getType()->getPointerAddressSpace());
2530 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002531
Kévin Petite8edce32019-04-10 14:23:32 +01002532 // Our intrinsic to pack a float2 to an int.
2533 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002534
Kévin Petite8edce32019-04-10 14:23:32 +01002535 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002536
Kévin Petite8edce32019-04-10 14:23:32 +01002537 // Turn the packed x & y into the final packing.
2538 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002539
Kévin Petite8edce32019-04-10 14:23:32 +01002540 // Cast the half* pointer to int*.
2541 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002542
Kévin Petite8edce32019-04-10 14:23:32 +01002543 // Index into the correct address of the casted pointer.
2544 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002545
Kévin Petite8edce32019-04-10 14:23:32 +01002546 // Store to the int* we casted to.
2547 return new StoreInst(X, Index, CI);
2548 });
David Neto22f144c2017-06-12 14:26:21 -04002549}
2550
2551bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002552
Kévin Petite8edce32019-04-10 14:23:32 +01002553 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002554 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2555 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2556 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2557 "_Z13vstorea_half4Dv4_fjPDh", // private
2558 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2559 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2560 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2561 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2562 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2563 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2564 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2565 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2566 };
David Neto22f144c2017-06-12 14:26:21 -04002567
Kévin Petite8edce32019-04-10 14:23:32 +01002568 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2569 // The value to store.
2570 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002571
Kévin Petite8edce32019-04-10 14:23:32 +01002572 // The index argument from vstore_half.
2573 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002574
Kévin Petite8edce32019-04-10 14:23:32 +01002575 // The pointer argument from vstore_half.
2576 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002577
Kévin Petite8edce32019-04-10 14:23:32 +01002578 auto IntTy = Type::getInt32Ty(M.getContext());
2579 auto Int2Ty = VectorType::get(IntTy, 2);
2580 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2581 auto NewPointerTy = PointerType::get(
2582 Int2Ty, Arg2->getType()->getPointerAddressSpace());
2583 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002584
Kévin Petite8edce32019-04-10 14:23:32 +01002585 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2586 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002587
Kévin Petite8edce32019-04-10 14:23:32 +01002588 // Extract out the x & y components of our to store value.
2589 auto Lo =
2590 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2591 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002592
Kévin Petite8edce32019-04-10 14:23:32 +01002593 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2594 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002595
Kévin Petite8edce32019-04-10 14:23:32 +01002596 // Extract out the z & w components of our to store value.
2597 auto Hi =
2598 new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2599 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002600
Kévin Petite8edce32019-04-10 14:23:32 +01002601 // Our intrinsic to pack a float2 to an int.
2602 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002603
Kévin Petite8edce32019-04-10 14:23:32 +01002604 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002605
Kévin Petite8edce32019-04-10 14:23:32 +01002606 // Turn the packed x & y into the final component of our int2.
2607 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002608
Kévin Petite8edce32019-04-10 14:23:32 +01002609 // Turn the packed z & w into the final component of our int2.
2610 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002611
Kévin Petite8edce32019-04-10 14:23:32 +01002612 auto Combine = InsertElementInst::Create(
2613 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
2614 Combine = InsertElementInst::Create(
2615 Combine, Y, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002616
Kévin Petite8edce32019-04-10 14:23:32 +01002617 // Cast the half* pointer to int2*.
2618 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002619
Kévin Petite8edce32019-04-10 14:23:32 +01002620 // Index into the correct address of the casted pointer.
2621 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002622
Kévin Petite8edce32019-04-10 14:23:32 +01002623 // Store to the int2* we casted to.
2624 return new StoreInst(Combine, Index, CI);
2625 });
David Neto22f144c2017-06-12 14:26:21 -04002626}
2627
2628bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2629 bool Changed = false;
2630
2631 const std::map<const char *, const char*> Map = {
2632 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f" },
2633 { "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i", "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f" }
2634 };
2635
2636 for (auto Pair : Map) {
2637 // If we find a function with the matching name.
2638 if (auto F = M.getFunction(Pair.first)) {
2639 SmallVector<Instruction *, 4> ToRemoves;
2640
2641 // Walk the users of the function.
2642 for (auto &U : F->uses()) {
2643 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2644 // The image.
2645 auto Arg0 = CI->getOperand(0);
2646
2647 // The sampler.
2648 auto Arg1 = CI->getOperand(1);
2649
2650 // The coordinate (integer type that we can't handle).
2651 auto Arg2 = CI->getOperand(2);
2652
2653 auto FloatVecTy = VectorType::get(Type::getFloatTy(M.getContext()), Arg2->getType()->getVectorNumElements());
2654
2655 auto NewFType = FunctionType::get(CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy}, false);
2656
2657 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2658
2659 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
2660
2661 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2662
2663 CI->replaceAllUsesWith(NewCI);
2664
2665 // Lastly, remember to remove the user.
2666 ToRemoves.push_back(CI);
2667 }
2668 }
2669
2670 Changed = !ToRemoves.empty();
2671
2672 // And cleanup the calls we don't use anymore.
2673 for (auto V : ToRemoves) {
2674 V->eraseFromParent();
2675 }
2676
2677 // And remove the function we don't need either too.
2678 F->eraseFromParent();
2679 }
2680 }
2681
2682 return Changed;
2683}
2684
2685bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2686 bool Changed = false;
2687
2688 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002689 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002690 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002691 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002692 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002693 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002694 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002695 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002696 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002697 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002698 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002699 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002700 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002701 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002702 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002703 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002704 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002705 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002706 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002707 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002708 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002709 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002710 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2711 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2712 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002713
2714 for (auto Pair : Map) {
2715 // If we find a function with the matching name.
2716 if (auto F = M.getFunction(Pair.first)) {
2717 SmallVector<Instruction *, 4> ToRemoves;
2718
2719 // Walk the users of the function.
2720 for (auto &U : F->uses()) {
2721 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2722 auto FType = F->getFunctionType();
2723 SmallVector<Type *, 5> ParamTypes;
2724
2725 // The pointer type.
2726 ParamTypes.push_back(FType->getParamType(0));
2727
2728 auto IntTy = Type::getInt32Ty(M.getContext());
2729
2730 // The memory scope type.
2731 ParamTypes.push_back(IntTy);
2732
2733 // The memory semantics type.
2734 ParamTypes.push_back(IntTy);
2735
2736 if (2 < CI->getNumArgOperands()) {
2737 // The unequal memory semantics type.
2738 ParamTypes.push_back(IntTy);
2739
2740 // The value type.
2741 ParamTypes.push_back(FType->getParamType(2));
2742
2743 // The comparator type.
2744 ParamTypes.push_back(FType->getParamType(1));
2745 } else if (1 < CI->getNumArgOperands()) {
2746 // The value type.
2747 ParamTypes.push_back(FType->getParamType(1));
2748 }
2749
2750 auto NewFType =
2751 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2752 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2753
2754 // We need to map the OpenCL constants to the SPIR-V equivalents.
2755 const auto ConstantScopeDevice =
2756 ConstantInt::get(IntTy, spv::ScopeDevice);
2757 const auto ConstantMemorySemantics = ConstantInt::get(
2758 IntTy, spv::MemorySemanticsUniformMemoryMask |
2759 spv::MemorySemanticsSequentiallyConsistentMask);
2760
2761 SmallVector<Value *, 5> Params;
2762
2763 // The pointer.
2764 Params.push_back(CI->getArgOperand(0));
2765
2766 // The memory scope.
2767 Params.push_back(ConstantScopeDevice);
2768
2769 // The memory semantics.
2770 Params.push_back(ConstantMemorySemantics);
2771
2772 if (2 < CI->getNumArgOperands()) {
2773 // The unequal memory semantics.
2774 Params.push_back(ConstantMemorySemantics);
2775
2776 // The value.
2777 Params.push_back(CI->getArgOperand(2));
2778
2779 // The comparator.
2780 Params.push_back(CI->getArgOperand(1));
2781 } else if (1 < CI->getNumArgOperands()) {
2782 // The value.
2783 Params.push_back(CI->getArgOperand(1));
2784 }
2785
2786 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2787
2788 CI->replaceAllUsesWith(NewCI);
2789
2790 // Lastly, remember to remove the user.
2791 ToRemoves.push_back(CI);
2792 }
2793 }
2794
2795 Changed = !ToRemoves.empty();
2796
2797 // And cleanup the calls we don't use anymore.
2798 for (auto V : ToRemoves) {
2799 V->eraseFromParent();
2800 }
2801
2802 // And remove the function we don't need either too.
2803 F->eraseFromParent();
2804 }
2805 }
2806
Neil Henning39672102017-09-29 14:33:13 +01002807 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002808 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002809 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002810 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002811 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002812 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002813 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002814 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002815 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002816 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002817 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002818 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002819 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002820 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002821 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002822 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002823 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002824 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002825 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002826 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002827 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002828 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002829 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002830 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002831 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002832 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002833 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002834 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002835 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002836 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002837 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002838 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002839 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002840 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002841 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002842 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002843 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002844 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002845 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002846 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002847 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002848 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002849 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002850 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002851 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002852 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002853 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002854 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002855 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002856 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002857 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002858 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002859 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002860 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002861 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002862 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002863 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002864 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002865 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002866 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002867 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002868 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002869 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2870 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2871 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002872
2873 for (auto Pair : Map2) {
2874 // If we find a function with the matching name.
2875 if (auto F = M.getFunction(Pair.first)) {
2876 SmallVector<Instruction *, 4> ToRemoves;
2877
2878 // Walk the users of the function.
2879 for (auto &U : F->uses()) {
2880 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2881 auto AtomicOp = new AtomicRMWInst(
2882 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2883 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2884
2885 CI->replaceAllUsesWith(AtomicOp);
2886
2887 // Lastly, remember to remove the user.
2888 ToRemoves.push_back(CI);
2889 }
2890 }
2891
2892 Changed = !ToRemoves.empty();
2893
2894 // And cleanup the calls we don't use anymore.
2895 for (auto V : ToRemoves) {
2896 V->eraseFromParent();
2897 }
2898
2899 // And remove the function we don't need either too.
2900 F->eraseFromParent();
2901 }
2902 }
2903
David Neto22f144c2017-06-12 14:26:21 -04002904 return Changed;
2905}
2906
2907bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002908
Kévin Petite8edce32019-04-10 14:23:32 +01002909 std::vector<const char*> Names = {
2910 "_Z5crossDv4_fS_",
2911 };
2912
2913 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002914
2915 auto IntTy = Type::getInt32Ty(M.getContext());
2916 auto FloatTy = Type::getFloatTy(M.getContext());
2917
2918 Constant *DownShuffleMask[3] = {
2919 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2920 ConstantInt::get(IntTy, 2)};
2921
2922 Constant *UpShuffleMask[4] = {
2923 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2924 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2925
2926 Constant *FloatVec[3] = {
2927 ConstantFP::get(FloatTy, 0.0f), UndefValue::get(FloatTy), UndefValue::get(FloatTy)
2928 };
2929
Kévin Petite8edce32019-04-10 14:23:32 +01002930 auto Vec4Ty = CI->getArgOperand(0)->getType();
2931 auto Arg0 = new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2932 auto Arg1 = new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty), ConstantVector::get(DownShuffleMask), "", CI);
2933 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002934
Kévin Petite8edce32019-04-10 14:23:32 +01002935 auto NewFType =
2936 FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04002937
Kévin Petite8edce32019-04-10 14:23:32 +01002938 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002939
Kévin Petite8edce32019-04-10 14:23:32 +01002940 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002941
Kévin Petite8edce32019-04-10 14:23:32 +01002942 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec), ConstantVector::get(UpShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002943
Kévin Petite8edce32019-04-10 14:23:32 +01002944 });
David Neto22f144c2017-06-12 14:26:21 -04002945}
David Neto62653202017-10-16 19:05:18 -04002946
2947bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2948 bool Changed = false;
2949
2950 // OpenCL's float result = fract(float x, float* ptr)
2951 //
2952 // In the LLVM domain:
2953 //
2954 // %floor_result = call spir_func float @floor(float %x)
2955 // store float %floor_result, float * %ptr
2956 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2957 // %result = call spir_func float
2958 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2959 //
2960 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2961 // and clspv.fract occur in the SPIR-V generator pass:
2962 //
2963 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2964 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2965 // ...
2966 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2967 // OpStore %ptr %floor_result
2968 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2969 // %fract_result = OpExtInst %float
2970 // %glsl_ext Fmin %fract_intermediate %just_under_1
2971
2972
2973 using std::string;
2974
2975 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2976 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
2977 using QuadType = std::tuple<const char *, const char *, const char *, const char *>;
2978 auto make_quad = [](const char *a, const char *b, const char *c,
2979 const char *d) {
2980 return std::tuple<const char *, const char *, const char *, const char *>(
2981 a, b, c, d);
2982 };
2983 const std::vector<QuadType> Functions = {
2984 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
2985 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff", "clspv.fract.v2f"),
2986 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff", "clspv.fract.v3f"),
2987 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff", "clspv.fract.v4f"),
2988 };
2989
2990 for (auto& quad : Functions) {
2991 const StringRef fract_name(std::get<0>(quad));
2992
2993 // If we find a function with the matching name.
2994 if (auto F = M.getFunction(fract_name)) {
2995 if (F->use_begin() == F->use_end())
2996 continue;
2997
2998 // We have some uses.
2999 Changed = true;
3000
3001 auto& Context = M.getContext();
3002
3003 const StringRef floor_name(std::get<1>(quad));
3004 const StringRef fmin_name(std::get<2>(quad));
3005 const StringRef clspv_fract_name(std::get<3>(quad));
3006
3007 // This is either float or a float vector. All the float-like
3008 // types are this type.
3009 auto result_ty = F->getReturnType();
3010
3011 Function* fmin_fn = M.getFunction(fmin_name);
3012 if (!fmin_fn) {
3013 // Make the fmin function.
3014 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003015 fmin_fn =
3016 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003017 fmin_fn->addFnAttr(Attribute::ReadNone);
3018 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3019 }
3020
3021 Function* floor_fn = M.getFunction(floor_name);
3022 if (!floor_fn) {
3023 // Make the floor function.
3024 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003025 floor_fn = cast<Function>(
3026 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003027 floor_fn->addFnAttr(Attribute::ReadNone);
3028 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3029 }
3030
3031 Function* clspv_fract_fn = M.getFunction(clspv_fract_name);
3032 if (!clspv_fract_fn) {
3033 // Make the clspv_fract function.
3034 FunctionType* fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003035 clspv_fract_fn = cast<Function>(
3036 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003037 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3038 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3039 }
3040
3041 // Number of significant significand bits, whether represented or not.
3042 unsigned num_significand_bits;
3043 switch (result_ty->getScalarType()->getTypeID()) {
3044 case Type::HalfTyID:
3045 num_significand_bits = 11;
3046 break;
3047 case Type::FloatTyID:
3048 num_significand_bits = 24;
3049 break;
3050 case Type::DoubleTyID:
3051 num_significand_bits = 53;
3052 break;
3053 default:
3054 assert(false && "Unhandled float type when processing fract builtin");
3055 break;
3056 }
3057 // Beware that the disassembler displays this value as
3058 // OpConstant %float 1
3059 // which is not quite right.
3060 const double kJustUnderOneScalar =
3061 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3062
3063 Constant *just_under_one =
3064 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3065 if (result_ty->isVectorTy()) {
3066 just_under_one = ConstantVector::getSplat(
3067 result_ty->getVectorNumElements(), just_under_one);
3068 }
3069
3070 IRBuilder<> Builder(Context);
3071
3072 SmallVector<Instruction *, 4> ToRemoves;
3073
3074 // Walk the users of the function.
3075 for (auto &U : F->uses()) {
3076 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3077
3078 Builder.SetInsertPoint(CI);
3079 auto arg = CI->getArgOperand(0);
3080 auto ptr = CI->getArgOperand(1);
3081
3082 // Compute floor result and store it.
3083 auto floor = Builder.CreateCall(floor_fn, {arg});
3084 Builder.CreateStore(floor, ptr);
3085
3086 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
3087 auto fract_result = Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
3088
3089 CI->replaceAllUsesWith(fract_result);
3090
3091 // Lastly, remember to remove the user.
3092 ToRemoves.push_back(CI);
3093 }
3094 }
3095
3096 // And cleanup the calls we don't use anymore.
3097 for (auto V : ToRemoves) {
3098 V->eraseFromParent();
3099 }
3100
3101 // And remove the function we don't need either too.
3102 F->eraseFromParent();
3103 }
3104 }
3105
3106 return Changed;
3107}