blob: 00a738bb8b104ea6708b51456b291ca62de35438 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
Kévin Petit617a76d2019-04-04 13:54:16 +010032#include "SPIRVOp.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040033#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070034
David Neto22f144c2017-06-12 14:26:21 -040035using namespace llvm;
36
37#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
38
39namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000040
41struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040042 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000043 SignedNess signedness;
44};
45
46struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000047 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000048 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000049
Kévin Petit91bc72e2019-04-08 15:17:46 +010050 bool isArgSigned(size_t arg) const {
51 assert(argTypeInfos.size() > arg);
52 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000053 }
54
Kévin Petit91bc72e2019-04-08 15:17:46 +010055 static FunctionInfo getFromMangledName(StringRef name) {
56 FunctionInfo fi;
57 if (!getFromMangledNameCheck(name, &fi)) {
58 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000059 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010060 return fi;
61 }
Kévin Petit8a560882019-03-21 15:24:34 +000062
Kévin Petit91bc72e2019-04-08 15:17:46 +010063 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
64 if (!name.consume_front("_Z")) {
65 return false;
66 }
67 size_t nameLen;
68 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000069 return false;
70 }
71
Kévin Petit91bc72e2019-04-08 15:17:46 +010072 finfo->name = name.take_front(nameLen);
73 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000074
Kévin Petit91bc72e2019-04-08 15:17:46 +010075 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000076
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 while (name.size() != 0) {
78
79 ArgTypeInfo ti;
80
81 // Try parsing a vector prefix
82 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040083 int numElems;
84 if (name.consumeInteger(10, numElems)) {
85 return false;
86 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010087
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040088 if (!name.consume_front("_")) {
89 return false;
90 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010091 }
92
93 // Parse the base type
94 char typeCode = name.front();
95 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040096 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +010097 case 'c': // char
98 case 'a': // signed char
99 case 's': // short
100 case 'i': // int
101 case 'l': // long
102 ti.signedness = ArgTypeInfo::SignedNess::Signed;
103 break;
104 case 'h': // unsigned char
105 case 't': // unsigned short
106 case 'j': // unsigned int
107 case 'm': // unsigned long
108 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
109 break;
110 case 'f':
111 ti.signedness = ArgTypeInfo::SignedNess::None;
112 break;
113 case 'S':
114 ti = prev_ti;
115 if (!name.consume_front("_")) {
116 return false;
117 }
118 break;
119 default:
120 return false;
121 }
122
123 finfo->argTypeInfos.push_back(ti);
124
125 prev_ti = ti;
126 }
127
128 return true;
129 };
Kévin Petit8a560882019-03-21 15:24:34 +0000130};
131
David Neto22f144c2017-06-12 14:26:21 -0400132uint32_t clz(uint32_t v) {
133 uint32_t r;
134 uint32_t shift;
135
136 r = (v > 0xFFFF) << 4;
137 v >>= r;
138 shift = (v > 0xFF) << 3;
139 v >>= shift;
140 r |= shift;
141 shift = (v > 0xF) << 2;
142 v >>= shift;
143 r |= shift;
144 shift = (v > 0x3) << 1;
145 v >>= shift;
146 r |= shift;
147 r |= (v >> 1);
148
149 return r;
150}
151
152Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
153 if (1 == elements) {
154 return Type::getInt1Ty(C);
155 } else {
156 return VectorType::get(Type::getInt1Ty(C), elements);
157 }
158}
159
160struct ReplaceOpenCLBuiltinPass final : public ModulePass {
161 static char ID;
162 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
163
164 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000165 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100166 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100167 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400168 bool replaceRecip(Module &M);
169 bool replaceDivide(Module &M);
170 bool replaceExp10(Module &M);
171 bool replaceLog10(Module &M);
172 bool replaceBarrier(Module &M);
173 bool replaceMemFence(Module &M);
174 bool replaceRelational(Module &M);
175 bool replaceIsInfAndIsNan(Module &M);
176 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000177 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000178 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000179 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000180 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000181 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000182 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000183 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400184 bool replaceSignbit(Module &M);
185 bool replaceMadandMad24andMul24(Module &M);
186 bool replaceVloadHalf(Module &M);
187 bool replaceVloadHalf2(Module &M);
188 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700189 bool replaceClspvVloadaHalf2(Module &M);
190 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400191 bool replaceVstoreHalf(Module &M);
192 bool replaceVstoreHalf2(Module &M);
193 bool replaceVstoreHalf4(Module &M);
194 bool replaceReadImageF(Module &M);
195 bool replaceAtomics(Module &M);
196 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400197 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700198 bool replaceVload(Module &M);
199 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400200};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100201} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400202
203char ReplaceOpenCLBuiltinPass::ID = 0;
204static RegisterPass<ReplaceOpenCLBuiltinPass> X("ReplaceOpenCLBuiltin",
205 "Replace OpenCL Builtins Pass");
206
207namespace clspv {
208ModulePass *createReplaceOpenCLBuiltinPass() {
209 return new ReplaceOpenCLBuiltinPass();
210}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400211} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400212
213bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
214 bool Changed = false;
215
Kévin Petit2444e9b2018-11-09 14:14:37 +0000216 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100217 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100218 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400219 Changed |= replaceRecip(M);
220 Changed |= replaceDivide(M);
221 Changed |= replaceExp10(M);
222 Changed |= replaceLog10(M);
223 Changed |= replaceBarrier(M);
224 Changed |= replaceMemFence(M);
225 Changed |= replaceRelational(M);
226 Changed |= replaceIsInfAndIsNan(M);
227 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000228 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000229 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000230 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000231 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000232 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000233 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000234 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400235 Changed |= replaceSignbit(M);
236 Changed |= replaceMadandMad24andMul24(M);
237 Changed |= replaceVloadHalf(M);
238 Changed |= replaceVloadHalf2(M);
239 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700240 Changed |= replaceClspvVloadaHalf2(M);
241 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400242 Changed |= replaceVstoreHalf(M);
243 Changed |= replaceVstoreHalf2(M);
244 Changed |= replaceVstoreHalf4(M);
245 Changed |= replaceReadImageF(M);
246 Changed |= replaceAtomics(M);
247 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400248 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700249 Changed |= replaceVload(M);
250 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400251
252 return Changed;
253}
254
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400255bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
256 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000257
Kévin Petite8edce32019-04-10 14:23:32 +0100258 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000259
260 for (auto Name : Names) {
261 // If we find a function with the matching name.
262 if (auto F = M.getFunction(Name)) {
263 SmallVector<Instruction *, 4> ToRemoves;
264
265 // Walk the users of the function.
266 for (auto &U : F->uses()) {
267 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000268
Kévin Petite8edce32019-04-10 14:23:32 +0100269 auto NewValue = Replacer(CI);
270
271 if (NewValue != nullptr) {
272 CI->replaceAllUsesWith(NewValue);
273 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000274
275 // Lastly, remember to remove the user.
276 ToRemoves.push_back(CI);
277 }
278 }
279
280 Changed = !ToRemoves.empty();
281
282 // And cleanup the calls we don't use anymore.
283 for (auto V : ToRemoves) {
284 V->eraseFromParent();
285 }
286
287 // And remove the function we don't need either too.
288 F->eraseFromParent();
289 }
290 }
291
292 return Changed;
293}
294
Kévin Petite8edce32019-04-10 14:23:32 +0100295bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100296
Kévin Petite8edce32019-04-10 14:23:32 +0100297 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400298 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
299 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
300 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
301 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100302 };
303
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400304 return replaceCallsWithValue(M, Names,
305 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100306}
307
308bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
309
310 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400311 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
312 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
313 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
314 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
315 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
316 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
317 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
318 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
319 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
320 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
321 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100322 };
323
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400324 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100325 auto XValue = CI->getOperand(0);
326 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100327
Kévin Petite8edce32019-04-10 14:23:32 +0100328 IRBuilder<> Builder(CI);
329 auto XmY = Builder.CreateSub(XValue, YValue);
330 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100331
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400332 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100333 auto F = CI->getCalledFunction();
334 auto finfo = FunctionInfo::getFromMangledName(F->getName());
335 if (finfo.isArgSigned(0)) {
336 Cmp = Builder.CreateICmpSGT(YValue, XValue);
337 } else {
338 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100339 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100340
Kévin Petite8edce32019-04-10 14:23:32 +0100341 return Builder.CreateSelect(Cmp, YmX, XmY);
342 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100343}
344
Kévin Petit8c1be282019-04-02 19:34:25 +0100345bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100346
Kévin Petite8edce32019-04-10 14:23:32 +0100347 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400348 "_Z8copysignff",
349 "_Z8copysignDv2_fS_",
350 "_Z8copysignDv3_fS_",
351 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100352 };
353
Kévin Petite8edce32019-04-10 14:23:32 +0100354 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
355 auto XValue = CI->getOperand(0);
356 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100357
Kévin Petite8edce32019-04-10 14:23:32 +0100358 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100359
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400360 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100361 if (Ty->isVectorTy()) {
362 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100363 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100364
Kévin Petite8edce32019-04-10 14:23:32 +0100365 // Return X with the sign of Y
366
367 // Sign bit masks
368 auto SignBit = IntTy->getScalarSizeInBits() - 1;
369 auto SignBitMask = 1 << SignBit;
370 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
371 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
372
373 IRBuilder<> Builder(CI);
374
375 // Extract sign of Y
376 auto YInt = Builder.CreateBitCast(YValue, IntTy);
377 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
378
379 // Clear sign bit in X
380 auto XInt = Builder.CreateBitCast(XValue, IntTy);
381 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
382
383 // Insert sign bit of Y into X
384 auto NewXInt = Builder.CreateOr(XInt, YSign);
385
386 // And cast back to floating-point
387 return Builder.CreateBitCast(NewXInt, Ty);
388 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100389}
390
David Neto22f144c2017-06-12 14:26:21 -0400391bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400392
Kévin Petite8edce32019-04-10 14:23:32 +0100393 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400394 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
395 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
396 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
397 };
398
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400399 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100400 // Recip has one arg.
401 auto Arg = CI->getOperand(0);
402 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
403 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
404 });
David Neto22f144c2017-06-12 14:26:21 -0400405}
406
407bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400408
Kévin Petite8edce32019-04-10 14:23:32 +0100409 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400410 "_Z11half_divideff", "_Z13native_divideff",
411 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
412 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
413 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
414 };
415
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400416 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100417 auto Op0 = CI->getOperand(0);
418 auto Op1 = CI->getOperand(1);
419 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
420 });
David Neto22f144c2017-06-12 14:26:21 -0400421}
422
423bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
424 bool Changed = false;
425
426 const std::map<const char *, const char *> Map = {
427 {"_Z5exp10f", "_Z3expf"},
428 {"_Z10half_exp10f", "_Z8half_expf"},
429 {"_Z12native_exp10f", "_Z10native_expf"},
430 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
431 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
432 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
433 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
434 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
435 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
436 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
437 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
438 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
439
440 for (auto Pair : Map) {
441 // If we find a function with the matching name.
442 if (auto F = M.getFunction(Pair.first)) {
443 SmallVector<Instruction *, 4> ToRemoves;
444
445 // Walk the users of the function.
446 for (auto &U : F->uses()) {
447 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
448 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
449
450 auto Arg = CI->getOperand(0);
451
452 // Constant of the natural log of 10 (ln(10)).
453 const double Ln10 =
454 2.302585092994045684017991454684364207601101488628772976033;
455
456 auto Mul = BinaryOperator::Create(
457 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
458 CI);
459
460 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
461
462 CI->replaceAllUsesWith(NewCI);
463
464 // Lastly, remember to remove the user.
465 ToRemoves.push_back(CI);
466 }
467 }
468
469 Changed = !ToRemoves.empty();
470
471 // And cleanup the calls we don't use anymore.
472 for (auto V : ToRemoves) {
473 V->eraseFromParent();
474 }
475
476 // And remove the function we don't need either too.
477 F->eraseFromParent();
478 }
479 }
480
481 return Changed;
482}
483
484bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
485 bool Changed = false;
486
487 const std::map<const char *, const char *> Map = {
488 {"_Z5log10f", "_Z3logf"},
489 {"_Z10half_log10f", "_Z8half_logf"},
490 {"_Z12native_log10f", "_Z10native_logf"},
491 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
492 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
493 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
494 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
495 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
496 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
497 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
498 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
499 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
500
501 for (auto Pair : Map) {
502 // If we find a function with the matching name.
503 if (auto F = M.getFunction(Pair.first)) {
504 SmallVector<Instruction *, 4> ToRemoves;
505
506 // Walk the users of the function.
507 for (auto &U : F->uses()) {
508 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
509 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
510
511 auto Arg = CI->getOperand(0);
512
513 // Constant of the reciprocal of the natural log of 10 (ln(10)).
514 const double Ln10 =
515 0.434294481903251827651128918916605082294397005803666566114;
516
517 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
518
519 auto Mul = BinaryOperator::Create(
520 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
521 "", CI);
522
523 CI->replaceAllUsesWith(Mul);
524
525 // Lastly, remember to remove the user.
526 ToRemoves.push_back(CI);
527 }
528 }
529
530 Changed = !ToRemoves.empty();
531
532 // And cleanup the calls we don't use anymore.
533 for (auto V : ToRemoves) {
534 V->eraseFromParent();
535 }
536
537 // And remove the function we don't need either too.
538 F->eraseFromParent();
539 }
540 }
541
542 return Changed;
543}
544
545bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
546 bool Changed = false;
547
548 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
549
550 const std::map<const char *, const char *> Map = {
551 {"_Z7barrierj", "__spirv_control_barrier"}};
552
553 for (auto Pair : Map) {
554 // If we find a function with the matching name.
555 if (auto F = M.getFunction(Pair.first)) {
556 SmallVector<Instruction *, 4> ToRemoves;
557
558 // Walk the users of the function.
559 for (auto &U : F->uses()) {
560 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
561 auto FType = F->getFunctionType();
562 SmallVector<Type *, 3> Params;
563 for (unsigned i = 0; i < 3; i++) {
564 Params.push_back(FType->getParamType(0));
565 }
566 auto NewFType =
567 FunctionType::get(FType->getReturnType(), Params, false);
568 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
569
570 auto Arg = CI->getOperand(0);
571
572 // We need to map the OpenCL constants to the SPIR-V equivalents.
573 const auto LocalMemFence =
574 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
575 const auto GlobalMemFence =
576 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
577 const auto ConstantSequentiallyConsistent = ConstantInt::get(
578 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
579 const auto ConstantScopeDevice =
580 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
581 const auto ConstantScopeWorkgroup =
582 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
583
584 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
585 const auto LocalMemFenceMask = BinaryOperator::Create(
586 Instruction::And, LocalMemFence, Arg, "", CI);
587 const auto WorkgroupShiftAmount =
588 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
589 clz(CLK_LOCAL_MEM_FENCE);
590 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
591 Instruction::Shl, LocalMemFenceMask,
592 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
593
594 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
595 const auto GlobalMemFenceMask = BinaryOperator::Create(
596 Instruction::And, GlobalMemFence, Arg, "", CI);
597 const auto UniformShiftAmount =
598 clz(spv::MemorySemanticsUniformMemoryMask) -
599 clz(CLK_GLOBAL_MEM_FENCE);
600 const auto MemorySemanticsUniform = BinaryOperator::Create(
601 Instruction::Shl, GlobalMemFenceMask,
602 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
603
604 // And combine the above together, also adding in
605 // MemorySemanticsSequentiallyConsistentMask.
606 auto MemorySemantics =
607 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
608 ConstantSequentiallyConsistent, "", CI);
609 MemorySemantics = BinaryOperator::Create(
610 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
611
612 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
613 // Device Scope, otherwise Workgroup Scope.
614 const auto Cmp =
615 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
616 GlobalMemFenceMask, GlobalMemFence, "", CI);
617 const auto MemoryScope = SelectInst::Create(
618 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
619
620 // Lastly, the Execution Scope is always Workgroup Scope.
621 const auto ExecutionScope = ConstantScopeWorkgroup;
622
623 auto NewCI = CallInst::Create(
624 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
625
626 CI->replaceAllUsesWith(NewCI);
627
628 // Lastly, remember to remove the user.
629 ToRemoves.push_back(CI);
630 }
631 }
632
633 Changed = !ToRemoves.empty();
634
635 // And cleanup the calls we don't use anymore.
636 for (auto V : ToRemoves) {
637 V->eraseFromParent();
638 }
639
640 // And remove the function we don't need either too.
641 F->eraseFromParent();
642 }
643 }
644
645 return Changed;
646}
647
648bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
649 bool Changed = false;
650
651 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
652
Neil Henning39672102017-09-29 14:33:13 +0100653 using Tuple = std::tuple<const char *, unsigned>;
654 const std::map<const char *, Tuple> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400655 {"_Z9mem_fencej", Tuple("__spirv_memory_barrier",
656 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100657 {"_Z14read_mem_fencej",
658 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
659 {"_Z15write_mem_fencej",
660 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400661
662 for (auto Pair : Map) {
663 // If we find a function with the matching name.
664 if (auto F = M.getFunction(Pair.first)) {
665 SmallVector<Instruction *, 4> ToRemoves;
666
667 // Walk the users of the function.
668 for (auto &U : F->uses()) {
669 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
670 auto FType = F->getFunctionType();
671 SmallVector<Type *, 2> Params;
672 for (unsigned i = 0; i < 2; i++) {
673 Params.push_back(FType->getParamType(0));
674 }
675 auto NewFType =
676 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100677 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400678
679 auto Arg = CI->getOperand(0);
680
681 // We need to map the OpenCL constants to the SPIR-V equivalents.
682 const auto LocalMemFence =
683 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
684 const auto GlobalMemFence =
685 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
686 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100687 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400688 const auto ConstantScopeDevice =
689 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
690
691 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
692 const auto LocalMemFenceMask = BinaryOperator::Create(
693 Instruction::And, LocalMemFence, Arg, "", CI);
694 const auto WorkgroupShiftAmount =
695 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
696 clz(CLK_LOCAL_MEM_FENCE);
697 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
698 Instruction::Shl, LocalMemFenceMask,
699 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
700
701 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
702 const auto GlobalMemFenceMask = BinaryOperator::Create(
703 Instruction::And, GlobalMemFence, Arg, "", CI);
704 const auto UniformShiftAmount =
705 clz(spv::MemorySemanticsUniformMemoryMask) -
706 clz(CLK_GLOBAL_MEM_FENCE);
707 const auto MemorySemanticsUniform = BinaryOperator::Create(
708 Instruction::Shl, GlobalMemFenceMask,
709 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
710
711 // And combine the above together, also adding in
712 // MemorySemanticsSequentiallyConsistentMask.
713 auto MemorySemantics =
714 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
715 ConstantMemorySemantics, "", CI);
716 MemorySemantics = BinaryOperator::Create(
717 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
718
719 // Memory Scope is always device.
720 const auto MemoryScope = ConstantScopeDevice;
721
722 auto NewCI =
723 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
724
725 CI->replaceAllUsesWith(NewCI);
726
727 // Lastly, remember to remove the user.
728 ToRemoves.push_back(CI);
729 }
730 }
731
732 Changed = !ToRemoves.empty();
733
734 // And cleanup the calls we don't use anymore.
735 for (auto V : ToRemoves) {
736 V->eraseFromParent();
737 }
738
739 // And remove the function we don't need either too.
740 F->eraseFromParent();
741 }
742 }
743
744 return Changed;
745}
746
747bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
748 bool Changed = false;
749
750 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
751 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
752 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
753 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
754 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
755 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
756 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
757 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
758 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
759 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
760 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
761 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
762 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
763 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
764 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
765 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
766 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
767 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
768 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
769 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
770 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
771 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
772 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
773 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
774 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
775 };
776
777 for (auto Pair : Map) {
778 // If we find a function with the matching name.
779 if (auto F = M.getFunction(Pair.first)) {
780 SmallVector<Instruction *, 4> ToRemoves;
781
782 // Walk the users of the function.
783 for (auto &U : F->uses()) {
784 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
785 // The predicate to use in the CmpInst.
786 auto Predicate = Pair.second.first;
787
788 // The value to return for true.
789 auto TrueValue =
790 ConstantInt::getSigned(CI->getType(), Pair.second.second);
791
792 // The value to return for false.
793 auto FalseValue = Constant::getNullValue(CI->getType());
794
795 auto Arg1 = CI->getOperand(0);
796 auto Arg2 = CI->getOperand(1);
797
798 const auto Cmp =
799 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
800
801 const auto Select =
802 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
803
804 CI->replaceAllUsesWith(Select);
805
806 // Lastly, remember to remove the user.
807 ToRemoves.push_back(CI);
808 }
809 }
810
811 Changed = !ToRemoves.empty();
812
813 // And cleanup the calls we don't use anymore.
814 for (auto V : ToRemoves) {
815 V->eraseFromParent();
816 }
817
818 // And remove the function we don't need either too.
819 F->eraseFromParent();
820 }
821 }
822
823 return Changed;
824}
825
826bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
827 bool Changed = false;
828
829 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
830 {"_Z5isinff", {"__spirv_isinff", 1}},
831 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
832 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
833 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
834 {"_Z5isnanf", {"__spirv_isnanf", 1}},
835 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
836 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
837 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
838 };
839
840 for (auto Pair : Map) {
841 // If we find a function with the matching name.
842 if (auto F = M.getFunction(Pair.first)) {
843 SmallVector<Instruction *, 4> ToRemoves;
844
845 // Walk the users of the function.
846 for (auto &U : F->uses()) {
847 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
848 const auto CITy = CI->getType();
849
850 // The fake SPIR-V intrinsic to generate.
851 auto SPIRVIntrinsic = Pair.second.first;
852
853 // The value to return for true.
854 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
855
856 // The value to return for false.
857 auto FalseValue = Constant::getNullValue(CITy);
858
859 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
860 M.getContext(),
861 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
862
863 auto NewFType =
864 FunctionType::get(CorrespondingBoolTy,
865 F->getFunctionType()->getParamType(0), false);
866
867 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
868
869 auto Arg = CI->getOperand(0);
870
871 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
872
873 const auto Select =
874 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
875
876 CI->replaceAllUsesWith(Select);
877
878 // Lastly, remember to remove the user.
879 ToRemoves.push_back(CI);
880 }
881 }
882
883 Changed = !ToRemoves.empty();
884
885 // And cleanup the calls we don't use anymore.
886 for (auto V : ToRemoves) {
887 V->eraseFromParent();
888 }
889
890 // And remove the function we don't need either too.
891 F->eraseFromParent();
892 }
893 }
894
895 return Changed;
896}
897
898bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
899 bool Changed = false;
900
901 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000902 // all
alan-bakerb39c8262019-03-08 14:03:37 -0500903 {"_Z3allc", ""},
904 {"_Z3allDv2_c", "__spirv_allDv2_c"},
905 {"_Z3allDv3_c", "__spirv_allDv3_c"},
906 {"_Z3allDv4_c", "__spirv_allDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000907 {"_Z3alls", ""},
908 {"_Z3allDv2_s", "__spirv_allDv2_s"},
909 {"_Z3allDv3_s", "__spirv_allDv3_s"},
910 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400911 {"_Z3alli", ""},
912 {"_Z3allDv2_i", "__spirv_allDv2_i"},
913 {"_Z3allDv3_i", "__spirv_allDv3_i"},
914 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000915 {"_Z3alll", ""},
916 {"_Z3allDv2_l", "__spirv_allDv2_l"},
917 {"_Z3allDv3_l", "__spirv_allDv3_l"},
918 {"_Z3allDv4_l", "__spirv_allDv4_l"},
919
920 // any
alan-bakerb39c8262019-03-08 14:03:37 -0500921 {"_Z3anyc", ""},
922 {"_Z3anyDv2_c", "__spirv_anyDv2_c"},
923 {"_Z3anyDv3_c", "__spirv_anyDv3_c"},
924 {"_Z3anyDv4_c", "__spirv_anyDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000925 {"_Z3anys", ""},
926 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
927 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
928 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400929 {"_Z3anyi", ""},
930 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
931 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
932 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000933 {"_Z3anyl", ""},
934 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
935 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
936 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400937 };
938
939 for (auto Pair : Map) {
940 // If we find a function with the matching name.
941 if (auto F = M.getFunction(Pair.first)) {
942 SmallVector<Instruction *, 4> ToRemoves;
943
944 // Walk the users of the function.
945 for (auto &U : F->uses()) {
946 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
947 // The fake SPIR-V intrinsic to generate.
948 auto SPIRVIntrinsic = Pair.second;
949
950 auto Arg = CI->getOperand(0);
951
952 Value *V;
953
Kévin Petitfd27cca2018-10-31 13:00:17 +0000954 // If the argument is a 32-bit int, just use a shift
955 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
956 V = BinaryOperator::Create(Instruction::LShr, Arg,
957 ConstantInt::get(Arg->getType(), 31), "",
958 CI);
959 } else {
David Neto22f144c2017-06-12 14:26:21 -0400960 // The value for zero to compare against.
961 const auto ZeroValue = Constant::getNullValue(Arg->getType());
962
David Neto22f144c2017-06-12 14:26:21 -0400963 // The value to return for true.
964 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
965
966 // The value to return for false.
967 const auto FalseValue = Constant::getNullValue(CI->getType());
968
Kévin Petitfd27cca2018-10-31 13:00:17 +0000969 const auto Cmp = CmpInst::Create(
970 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
971
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400972 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000973
974 // If we have a function to call, call it!
975 if (0 < strlen(SPIRVIntrinsic)) {
976
977 const auto NewFType = FunctionType::get(
978 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
979
980 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
981
982 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
983
984 SelectSource = NewCI;
985
986 } else {
987 SelectSource = Cmp;
988 }
989
990 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400991 }
992
993 CI->replaceAllUsesWith(V);
994
995 // Lastly, remember to remove the user.
996 ToRemoves.push_back(CI);
997 }
998 }
999
1000 Changed = !ToRemoves.empty();
1001
1002 // And cleanup the calls we don't use anymore.
1003 for (auto V : ToRemoves) {
1004 V->eraseFromParent();
1005 }
1006
1007 // And remove the function we don't need either too.
1008 F->eraseFromParent();
1009 }
1010 }
1011
1012 return Changed;
1013}
1014
Kévin Petitbf0036c2019-03-06 13:57:10 +00001015bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1016 bool Changed = false;
1017
1018 for (auto const &SymVal : M.getValueSymbolTable()) {
1019 // Skip symbols whose name doesn't match
1020 if (!SymVal.getKey().startswith("_Z8upsample")) {
1021 continue;
1022 }
1023 // Is there a function going by that name?
1024 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1025
1026 SmallVector<Instruction *, 4> ToRemoves;
1027
1028 // Walk the users of the function.
1029 for (auto &U : F->uses()) {
1030 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1031
1032 // Get arguments
1033 auto HiValue = CI->getOperand(0);
1034 auto LoValue = CI->getOperand(1);
1035
1036 // Don't touch overloads that aren't in OpenCL C
1037 auto HiType = HiValue->getType();
1038 auto LoType = LoValue->getType();
1039
1040 if (HiType != LoType) {
1041 continue;
1042 }
1043
1044 if (!HiType->isIntOrIntVectorTy()) {
1045 continue;
1046 }
1047
1048 if (HiType->getScalarSizeInBits() * 2 !=
1049 CI->getType()->getScalarSizeInBits()) {
1050 continue;
1051 }
1052
1053 if ((HiType->getScalarSizeInBits() != 8) &&
1054 (HiType->getScalarSizeInBits() != 16) &&
1055 (HiType->getScalarSizeInBits() != 32)) {
1056 continue;
1057 }
1058
1059 if (HiType->isVectorTy()) {
1060 if ((HiType->getVectorNumElements() != 2) &&
1061 (HiType->getVectorNumElements() != 3) &&
1062 (HiType->getVectorNumElements() != 4) &&
1063 (HiType->getVectorNumElements() != 8) &&
1064 (HiType->getVectorNumElements() != 16)) {
1065 continue;
1066 }
1067 }
1068
1069 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001070 auto HiCast =
1071 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1072 auto LoCast =
1073 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001074
1075 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001076 auto ShiftAmount =
1077 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001078 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1079 ShiftAmount, "", CI);
1080
1081 // OR both results
1082 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1083 "", CI);
1084
1085 // Replace call with the expression
1086 CI->replaceAllUsesWith(V);
1087
1088 // Lastly, remember to remove the user.
1089 ToRemoves.push_back(CI);
1090 }
1091 }
1092
1093 Changed = !ToRemoves.empty();
1094
1095 // And cleanup the calls we don't use anymore.
1096 for (auto V : ToRemoves) {
1097 V->eraseFromParent();
1098 }
1099
1100 // And remove the function we don't need either too.
1101 F->eraseFromParent();
1102 }
1103 }
1104
1105 return Changed;
1106}
1107
Kévin Petitd44eef52019-03-08 13:22:14 +00001108bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1109 bool Changed = false;
1110
1111 for (auto const &SymVal : M.getValueSymbolTable()) {
1112 // Skip symbols whose name doesn't match
1113 if (!SymVal.getKey().startswith("_Z6rotate")) {
1114 continue;
1115 }
1116 // Is there a function going by that name?
1117 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1118
1119 SmallVector<Instruction *, 4> ToRemoves;
1120
1121 // Walk the users of the function.
1122 for (auto &U : F->uses()) {
1123 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1124
1125 // Get arguments
1126 auto SrcValue = CI->getOperand(0);
1127 auto RotAmount = CI->getOperand(1);
1128
1129 // Don't touch overloads that aren't in OpenCL C
1130 auto SrcType = SrcValue->getType();
1131 auto RotType = RotAmount->getType();
1132
1133 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1134 continue;
1135 }
1136
1137 if (!SrcType->isIntOrIntVectorTy()) {
1138 continue;
1139 }
1140
1141 if ((SrcType->getScalarSizeInBits() != 8) &&
1142 (SrcType->getScalarSizeInBits() != 16) &&
1143 (SrcType->getScalarSizeInBits() != 32) &&
1144 (SrcType->getScalarSizeInBits() != 64)) {
1145 continue;
1146 }
1147
1148 if (SrcType->isVectorTy()) {
1149 if ((SrcType->getVectorNumElements() != 2) &&
1150 (SrcType->getVectorNumElements() != 3) &&
1151 (SrcType->getVectorNumElements() != 4) &&
1152 (SrcType->getVectorNumElements() != 8) &&
1153 (SrcType->getVectorNumElements() != 16)) {
1154 continue;
1155 }
1156 }
1157
1158 // The approach used is to shift the top bits down, the bottom bits up
1159 // and OR the two shifted values.
1160
1161 // The rotation amount is to be treated modulo the element size.
1162 // Since SPIR-V shift ops don't support this, let's apply the
1163 // modulo ahead of shifting. The element size is always a power of
1164 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001165 auto ModMask =
1166 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001167 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1168 ModMask, "", CI);
1169
1170 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001171 auto ScalarSize =
1172 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001173 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1174 RotAmount, "", CI);
1175
1176 // Now shift the bottom bits up and the top bits down
1177 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1178 RotAmount, "", CI);
1179 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1180 DownAmount, "", CI);
1181
1182 // Finally OR the two shifted values
1183 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1184 HiRotated, "", CI);
1185
1186 // Replace call with the expression
1187 CI->replaceAllUsesWith(V);
1188
1189 // Lastly, remember to remove the user.
1190 ToRemoves.push_back(CI);
1191 }
1192 }
1193
1194 Changed = !ToRemoves.empty();
1195
1196 // And cleanup the calls we don't use anymore.
1197 for (auto V : ToRemoves) {
1198 V->eraseFromParent();
1199 }
1200
1201 // And remove the function we don't need either too.
1202 F->eraseFromParent();
1203 }
1204 }
1205
1206 return Changed;
1207}
1208
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001209bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1210 bool Changed = false;
1211
1212 for (auto const &SymVal : M.getValueSymbolTable()) {
1213
1214 // Skip symbols whose name obviously doesn't match
1215 if (!SymVal.getKey().contains("convert_")) {
1216 continue;
1217 }
1218
1219 // Is there a function going by that name?
1220 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1221
1222 // Get info from the mangled name
1223 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001224 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001225
1226 // All functions of interest are handled by our mangled name parser
1227 if (!parsed) {
1228 continue;
1229 }
1230
1231 // Move on if this isn't a call to convert_
1232 if (!finfo.name.startswith("convert_")) {
1233 continue;
1234 }
1235
1236 // Extract the destination type from the function name
1237 StringRef DstTypeName = finfo.name;
1238 DstTypeName.consume_front("convert_");
1239
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001240 auto DstSignedNess =
1241 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1242 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1243 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1244 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1245 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1246 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1247 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1248 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1249 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1250 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001251
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001252 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001253 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001254
1255 SmallVector<Instruction *, 4> ToRemoves;
1256
1257 // Walk the users of the function.
1258 for (auto &U : F->uses()) {
1259 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1260
1261 // Get arguments
1262 auto SrcValue = CI->getOperand(0);
1263
1264 // Don't touch overloads that aren't in OpenCL C
1265 auto SrcType = SrcValue->getType();
1266 auto DstType = CI->getType();
1267
1268 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1269 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1270 continue;
1271 }
1272
1273 if (SrcType->isVectorTy()) {
1274
1275 if (SrcType->getVectorNumElements() !=
1276 DstType->getVectorNumElements()) {
1277 continue;
1278 }
1279
1280 if ((SrcType->getVectorNumElements() != 2) &&
1281 (SrcType->getVectorNumElements() != 3) &&
1282 (SrcType->getVectorNumElements() != 4) &&
1283 (SrcType->getVectorNumElements() != 8) &&
1284 (SrcType->getVectorNumElements() != 16)) {
1285 continue;
1286 }
1287 }
1288
1289 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1290 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1291
1292 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1293 bool DstIsInt = DstType->isIntOrIntVectorTy();
1294
1295 Value *V;
1296 if (SrcIsFloat && DstIsFloat) {
1297 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1298 } else if (SrcIsFloat && DstIsInt) {
1299 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001300 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1301 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001302 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001303 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1304 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001305 }
1306 } else if (SrcIsInt && DstIsFloat) {
1307 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001308 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1309 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001310 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001311 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1312 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001313 }
1314 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001315 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1316 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001317 } else {
1318 // Not something we're supposed to handle, just move on
1319 continue;
1320 }
1321
1322 // Replace call with the expression
1323 CI->replaceAllUsesWith(V);
1324
1325 // Lastly, remember to remove the user.
1326 ToRemoves.push_back(CI);
1327 }
1328 }
1329
1330 Changed = !ToRemoves.empty();
1331
1332 // And cleanup the calls we don't use anymore.
1333 for (auto V : ToRemoves) {
1334 V->eraseFromParent();
1335 }
1336
1337 // And remove the function we don't need either too.
1338 F->eraseFromParent();
1339 }
1340 }
1341
1342 return Changed;
1343}
1344
Kévin Petit8a560882019-03-21 15:24:34 +00001345bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1346 bool Changed = false;
1347
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001348 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001349
Kévin Petit617a76d2019-04-04 13:54:16 +01001350 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001351 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1352 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1353
1354 // Skip symbols whose name doesn't match
1355 if (!isMad && !isMul) {
1356 continue;
1357 }
1358
1359 // Is there a function going by that name?
1360 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001361 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001362 }
1363 }
1364
Kévin Petit617a76d2019-04-04 13:54:16 +01001365 for (auto F : FnWorklist) {
1366 SmallVector<Instruction *, 4> ToRemoves;
1367
1368 bool isMad = F->getName().startswith("_Z6mad_hi");
1369 // Walk the users of the function.
1370 for (auto &U : F->uses()) {
1371 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1372
1373 // Get arguments
1374 auto AValue = CI->getOperand(0);
1375 auto BValue = CI->getOperand(1);
1376 auto CValue = CI->getOperand(2);
1377
1378 // Don't touch overloads that aren't in OpenCL C
1379 auto AType = AValue->getType();
1380 auto BType = BValue->getType();
1381 auto CType = CValue->getType();
1382
1383 if ((AType != BType) || (CI->getType() != AType) ||
1384 (isMad && (AType != CType))) {
1385 continue;
1386 }
1387
1388 if (!AType->isIntOrIntVectorTy()) {
1389 continue;
1390 }
1391
1392 if ((AType->getScalarSizeInBits() != 8) &&
1393 (AType->getScalarSizeInBits() != 16) &&
1394 (AType->getScalarSizeInBits() != 32) &&
1395 (AType->getScalarSizeInBits() != 64)) {
1396 continue;
1397 }
1398
1399 if (AType->isVectorTy()) {
1400 if ((AType->getVectorNumElements() != 2) &&
1401 (AType->getVectorNumElements() != 3) &&
1402 (AType->getVectorNumElements() != 4) &&
1403 (AType->getVectorNumElements() != 8) &&
1404 (AType->getVectorNumElements() != 16)) {
1405 continue;
1406 }
1407 }
1408
1409 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001410 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001411
1412 // Select the appropriate signed/unsigned SPIR-V op
1413 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001414 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001415 opcode = spv::OpSMulExtended;
1416 } else {
1417 opcode = spv::OpUMulExtended;
1418 }
1419
1420 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001421 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001422 auto ExMulRetType = StructType::create(TwoValueType);
1423
1424 // Call the SPIR-V op
1425 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1426 ExMulRetType, {AValue, BValue});
1427
1428 // Get the high part of the result
1429 unsigned Idxs[] = {1};
1430 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1431
1432 // If we're handling a mad_hi, add the third argument to the result
1433 if (isMad) {
1434 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1435 }
1436
1437 // Replace call with the expression
1438 CI->replaceAllUsesWith(V);
1439
1440 // Lastly, remember to remove the user.
1441 ToRemoves.push_back(CI);
1442 }
1443 }
1444
1445 Changed = !ToRemoves.empty();
1446
1447 // And cleanup the calls we don't use anymore.
1448 for (auto V : ToRemoves) {
1449 V->eraseFromParent();
1450 }
1451
1452 // And remove the function we don't need either too.
1453 F->eraseFromParent();
1454 }
1455
Kévin Petit8a560882019-03-21 15:24:34 +00001456 return Changed;
1457}
1458
Kévin Petitf5b78a22018-10-25 14:32:17 +00001459bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1460 bool Changed = false;
1461
1462 for (auto const &SymVal : M.getValueSymbolTable()) {
1463 // Skip symbols whose name doesn't match
1464 if (!SymVal.getKey().startswith("_Z6select")) {
1465 continue;
1466 }
1467 // Is there a function going by that name?
1468 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1469
1470 SmallVector<Instruction *, 4> ToRemoves;
1471
1472 // Walk the users of the function.
1473 for (auto &U : F->uses()) {
1474 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1475
1476 // Get arguments
1477 auto FalseValue = CI->getOperand(0);
1478 auto TrueValue = CI->getOperand(1);
1479 auto PredicateValue = CI->getOperand(2);
1480
1481 // Don't touch overloads that aren't in OpenCL C
1482 auto FalseType = FalseValue->getType();
1483 auto TrueType = TrueValue->getType();
1484 auto PredicateType = PredicateValue->getType();
1485
1486 if (FalseType != TrueType) {
1487 continue;
1488 }
1489
1490 if (!PredicateType->isIntOrIntVectorTy()) {
1491 continue;
1492 }
1493
1494 if (!FalseType->isIntOrIntVectorTy() &&
1495 !FalseType->getScalarType()->isFloatingPointTy()) {
1496 continue;
1497 }
1498
1499 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1500 continue;
1501 }
1502
1503 if (FalseType->getScalarSizeInBits() !=
1504 PredicateType->getScalarSizeInBits()) {
1505 continue;
1506 }
1507
1508 if (FalseType->isVectorTy()) {
1509 if (FalseType->getVectorNumElements() !=
1510 PredicateType->getVectorNumElements()) {
1511 continue;
1512 }
1513
1514 if ((FalseType->getVectorNumElements() != 2) &&
1515 (FalseType->getVectorNumElements() != 3) &&
1516 (FalseType->getVectorNumElements() != 4) &&
1517 (FalseType->getVectorNumElements() != 8) &&
1518 (FalseType->getVectorNumElements() != 16)) {
1519 continue;
1520 }
1521 }
1522
1523 // Create constant
1524 const auto ZeroValue = Constant::getNullValue(PredicateType);
1525
1526 // Scalar and vector are to be treated differently
1527 CmpInst::Predicate Pred;
1528 if (PredicateType->isVectorTy()) {
1529 Pred = CmpInst::ICMP_SLT;
1530 } else {
1531 Pred = CmpInst::ICMP_NE;
1532 }
1533
1534 // Create comparison instruction
1535 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1536 ZeroValue, "", CI);
1537
1538 // Create select
1539 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1540
1541 // Replace call with the selection
1542 CI->replaceAllUsesWith(V);
1543
1544 // Lastly, remember to remove the user.
1545 ToRemoves.push_back(CI);
1546 }
1547 }
1548
1549 Changed = !ToRemoves.empty();
1550
1551 // And cleanup the calls we don't use anymore.
1552 for (auto V : ToRemoves) {
1553 V->eraseFromParent();
1554 }
1555
1556 // And remove the function we don't need either too.
1557 F->eraseFromParent();
1558 }
1559 }
1560
1561 return Changed;
1562}
1563
Kévin Petite7d0cce2018-10-31 12:38:56 +00001564bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1565 bool Changed = false;
1566
1567 for (auto const &SymVal : M.getValueSymbolTable()) {
1568 // Skip symbols whose name doesn't match
1569 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1570 continue;
1571 }
1572 // Is there a function going by that name?
1573 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1574
1575 SmallVector<Instruction *, 4> ToRemoves;
1576
1577 // Walk the users of the function.
1578 for (auto &U : F->uses()) {
1579 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1580
1581 if (CI->getNumOperands() != 4) {
1582 continue;
1583 }
1584
1585 // Get arguments
1586 auto FalseValue = CI->getOperand(0);
1587 auto TrueValue = CI->getOperand(1);
1588 auto PredicateValue = CI->getOperand(2);
1589
1590 // Don't touch overloads that aren't in OpenCL C
1591 auto FalseType = FalseValue->getType();
1592 auto TrueType = TrueValue->getType();
1593 auto PredicateType = PredicateValue->getType();
1594
1595 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1596 continue;
1597 }
1598
1599 if (TrueType->isVectorTy()) {
1600 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1601 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001602 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001603 }
1604 if ((TrueType->getVectorNumElements() != 2) &&
1605 (TrueType->getVectorNumElements() != 3) &&
1606 (TrueType->getVectorNumElements() != 4) &&
1607 (TrueType->getVectorNumElements() != 8) &&
1608 (TrueType->getVectorNumElements() != 16)) {
1609 continue;
1610 }
1611 }
1612
1613 // Remember the type of the operands
1614 auto OpType = TrueType;
1615
1616 // The actual bit selection will always be done on an integer type,
1617 // declare it here
1618 Type *BitType;
1619
1620 // If the operands are float, then bitcast them to int
1621 if (OpType->getScalarType()->isFloatingPointTy()) {
1622
1623 // First create the new type
1624 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1625 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1626 if (OpType->isVectorTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001627 BitType =
1628 VectorType::get(BitType, OpType->getVectorNumElements());
Kévin Petite7d0cce2018-10-31 12:38:56 +00001629 }
1630
1631 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001632 PredicateValue =
1633 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1634 FalseValue =
1635 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1636 TrueValue =
1637 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001638
1639 } else {
1640 // The operands have an integer type, use it directly
1641 BitType = OpType;
1642 }
1643
1644 // All the operands are now always integers
1645 // implement as (c & b) | (~c & a)
1646
1647 // Create our negated predicate value
1648 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001649 auto NotPredicateValue = BinaryOperator::Create(
1650 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001651
1652 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001653 auto BitsFalse = BinaryOperator::Create(
1654 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1655 auto BitsTrue = BinaryOperator::Create(
1656 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001657
1658 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1659 BitsTrue, "", CI);
1660
1661 // If we were dealing with a floating point type, we must bitcast
1662 // the result back to that
1663 if (OpType->getScalarType()->isFloatingPointTy()) {
1664 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1665 }
1666
1667 // Replace call with our new code
1668 CI->replaceAllUsesWith(V);
1669
1670 // Lastly, remember to remove the user.
1671 ToRemoves.push_back(CI);
1672 }
1673 }
1674
1675 Changed = !ToRemoves.empty();
1676
1677 // And cleanup the calls we don't use anymore.
1678 for (auto V : ToRemoves) {
1679 V->eraseFromParent();
1680 }
1681
1682 // And remove the function we don't need either too.
1683 F->eraseFromParent();
1684 }
1685 }
1686
1687 return Changed;
1688}
1689
Kévin Petit6b0a9532018-10-30 20:00:39 +00001690bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1691 bool Changed = false;
1692
1693 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001694 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1695 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1696 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1697 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1698 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1699 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001700 };
1701
1702 for (auto Pair : Map) {
1703 // If we find a function with the matching name.
1704 if (auto F = M.getFunction(Pair.first)) {
1705 SmallVector<Instruction *, 4> ToRemoves;
1706
1707 // Walk the users of the function.
1708 for (auto &U : F->uses()) {
1709 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1710
1711 auto ReplacementFn = Pair.second;
1712
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001713 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001714 Value *VectorArg;
1715
1716 // First figure out which function we're dealing with
1717 if (F->getName().startswith("_Z10smoothstep")) {
1718 ArgsToSplat.push_back(CI->getOperand(1));
1719 VectorArg = CI->getOperand(2);
1720 } else {
1721 VectorArg = CI->getOperand(1);
1722 }
1723
1724 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001725 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001726 auto VecType = VectorArg->getType();
1727
1728 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001729 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001730 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001731 auto index =
1732 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1733 NewVectorArg =
1734 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001735 }
1736 SplatArgs.push_back(NewVectorArg);
1737 }
1738
1739 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001740 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1741 const auto NewFType =
1742 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001743
1744 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1745
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001746 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001747 for (auto arg : SplatArgs) {
1748 NewArgs.push_back(arg);
1749 }
1750 NewArgs.push_back(VectorArg);
1751
1752 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1753
1754 CI->replaceAllUsesWith(NewCI);
1755
1756 // Lastly, remember to remove the user.
1757 ToRemoves.push_back(CI);
1758 }
1759 }
1760
1761 Changed = !ToRemoves.empty();
1762
1763 // And cleanup the calls we don't use anymore.
1764 for (auto V : ToRemoves) {
1765 V->eraseFromParent();
1766 }
1767
1768 // And remove the function we don't need either too.
1769 F->eraseFromParent();
1770 }
1771 }
1772
1773 return Changed;
1774}
1775
David Neto22f144c2017-06-12 14:26:21 -04001776bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1777 bool Changed = false;
1778
1779 const std::map<const char *, Instruction::BinaryOps> Map = {
1780 {"_Z7signbitf", Instruction::LShr},
1781 {"_Z7signbitDv2_f", Instruction::AShr},
1782 {"_Z7signbitDv3_f", Instruction::AShr},
1783 {"_Z7signbitDv4_f", Instruction::AShr},
1784 };
1785
1786 for (auto Pair : Map) {
1787 // If we find a function with the matching name.
1788 if (auto F = M.getFunction(Pair.first)) {
1789 SmallVector<Instruction *, 4> ToRemoves;
1790
1791 // Walk the users of the function.
1792 for (auto &U : F->uses()) {
1793 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1794 auto Arg = CI->getOperand(0);
1795
1796 auto Bitcast =
1797 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1798
1799 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1800 ConstantInt::get(CI->getType(), 31),
1801 "", CI);
1802
1803 CI->replaceAllUsesWith(Shr);
1804
1805 // Lastly, remember to remove the user.
1806 ToRemoves.push_back(CI);
1807 }
1808 }
1809
1810 Changed = !ToRemoves.empty();
1811
1812 // And cleanup the calls we don't use anymore.
1813 for (auto V : ToRemoves) {
1814 V->eraseFromParent();
1815 }
1816
1817 // And remove the function we don't need either too.
1818 F->eraseFromParent();
1819 }
1820 }
1821
1822 return Changed;
1823}
1824
1825bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1826 bool Changed = false;
1827
1828 const std::map<const char *,
1829 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1830 Map = {
1831 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1832 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1833 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1834 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1835 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1836 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1837 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1838 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1839 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1840 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1841 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1842 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1843 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1844 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1845 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1846 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1847 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1848 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1849 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1850 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1851 };
1852
1853 for (auto Pair : Map) {
1854 // If we find a function with the matching name.
1855 if (auto F = M.getFunction(Pair.first)) {
1856 SmallVector<Instruction *, 4> ToRemoves;
1857
1858 // Walk the users of the function.
1859 for (auto &U : F->uses()) {
1860 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1861 // The multiply instruction to use.
1862 auto MulInst = Pair.second.first;
1863
1864 // The add instruction to use.
1865 auto AddInst = Pair.second.second;
1866
1867 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1868
1869 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1870 CI->getArgOperand(1), "", CI);
1871
1872 if (Instruction::BinaryOpsEnd != AddInst) {
1873 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1874 CI);
1875 }
1876
1877 CI->replaceAllUsesWith(I);
1878
1879 // Lastly, remember to remove the user.
1880 ToRemoves.push_back(CI);
1881 }
1882 }
1883
1884 Changed = !ToRemoves.empty();
1885
1886 // And cleanup the calls we don't use anymore.
1887 for (auto V : ToRemoves) {
1888 V->eraseFromParent();
1889 }
1890
1891 // And remove the function we don't need either too.
1892 F->eraseFromParent();
1893 }
1894 }
1895
1896 return Changed;
1897}
1898
Derek Chowcfd368b2017-10-19 20:58:45 -07001899bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1900 bool Changed = false;
1901
1902 struct VectorStoreOps {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001903 const char *name;
Derek Chowcfd368b2017-10-19 20:58:45 -07001904 int n;
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001905 Type *(*get_scalar_type_function)(LLVMContext &);
1906 } vector_store_ops[] = {// TODO(derekjchow): Expand this list.
1907 {"_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy}};
Derek Chowcfd368b2017-10-19 20:58:45 -07001908
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001909 for (const auto &Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001910 auto Name = Op.name;
1911 auto N = Op.n;
1912 auto TypeFn = Op.get_scalar_type_function;
1913 if (auto F = M.getFunction(Name)) {
1914 SmallVector<Instruction *, 4> ToRemoves;
1915
1916 // Walk the users of the function.
1917 for (auto &U : F->uses()) {
1918 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1919 // The value argument from vstoren.
1920 auto Arg0 = CI->getOperand(0);
1921
1922 // The index argument from vstoren.
1923 auto Arg1 = CI->getOperand(1);
1924
1925 // The pointer argument from vstoren.
1926 auto Arg2 = CI->getOperand(2);
1927
1928 // Get types.
1929 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1930 auto ScalarNPointerTy = PointerType::get(
1931 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1932
1933 // Cast to scalarn
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001934 auto Cast =
1935 CastInst::CreatePointerCast(Arg2, ScalarNPointerTy, "", CI);
Derek Chowcfd368b2017-10-19 20:58:45 -07001936 // Index to correct address
1937 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1938 // Store
1939 auto Store = new StoreInst(Arg0, Index, CI);
1940
1941 CI->replaceAllUsesWith(Store);
1942 ToRemoves.push_back(CI);
1943 }
1944 }
1945
1946 Changed = !ToRemoves.empty();
1947
1948 // And cleanup the calls we don't use anymore.
1949 for (auto V : ToRemoves) {
1950 V->eraseFromParent();
1951 }
1952
1953 // And remove the function we don't need either too.
1954 F->eraseFromParent();
1955 }
1956 }
1957
1958 return Changed;
1959}
1960
1961bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1962 bool Changed = false;
1963
1964 struct VectorLoadOps {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001965 const char *name;
Derek Chowcfd368b2017-10-19 20:58:45 -07001966 int n;
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001967 Type *(*get_scalar_type_function)(LLVMContext &);
1968 } vector_load_ops[] = {// TODO(derekjchow): Expand this list.
1969 {"_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy}};
Derek Chowcfd368b2017-10-19 20:58:45 -07001970
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001971 for (const auto &Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001972 auto Name = Op.name;
1973 auto N = Op.n;
1974 auto TypeFn = Op.get_scalar_type_function;
1975 // If we find a function with the matching name.
1976 if (auto F = M.getFunction(Name)) {
1977 SmallVector<Instruction *, 4> ToRemoves;
1978
1979 // Walk the users of the function.
1980 for (auto &U : F->uses()) {
1981 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1982 // The index argument from vloadn.
1983 auto Arg0 = CI->getOperand(0);
1984
1985 // The pointer argument from vloadn.
1986 auto Arg1 = CI->getOperand(1);
1987
1988 // Get types.
1989 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1990 auto ScalarNPointerTy = PointerType::get(
1991 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1992
1993 // Cast to scalarn
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001994 auto Cast =
1995 CastInst::CreatePointerCast(Arg1, ScalarNPointerTy, "", CI);
Derek Chowcfd368b2017-10-19 20:58:45 -07001996 // Index to correct address
1997 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
1998 // Load
1999 auto Load = new LoadInst(Index, "", CI);
2000
2001 CI->replaceAllUsesWith(Load);
2002 ToRemoves.push_back(CI);
2003 }
2004 }
2005
2006 Changed = !ToRemoves.empty();
2007
2008 // And cleanup the calls we don't use anymore.
2009 for (auto V : ToRemoves) {
2010 V->eraseFromParent();
2011 }
2012
2013 // And remove the function we don't need either too.
2014 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002015 }
2016 }
2017
2018 return Changed;
2019}
2020
David Neto22f144c2017-06-12 14:26:21 -04002021bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2022 bool Changed = false;
2023
2024 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2025 "_Z10vload_halfjPU3AS2KDh"};
2026
2027 for (auto Name : Map) {
2028 // If we find a function with the matching name.
2029 if (auto F = M.getFunction(Name)) {
2030 SmallVector<Instruction *, 4> ToRemoves;
2031
2032 // Walk the users of the function.
2033 for (auto &U : F->uses()) {
2034 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2035 // The index argument from vload_half.
2036 auto Arg0 = CI->getOperand(0);
2037
2038 // The pointer argument from vload_half.
2039 auto Arg1 = CI->getOperand(1);
2040
David Neto22f144c2017-06-12 14:26:21 -04002041 auto IntTy = Type::getInt32Ty(M.getContext());
2042 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002043 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2044
David Neto22f144c2017-06-12 14:26:21 -04002045 // Our intrinsic to unpack a float2 from an int.
2046 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2047
2048 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2049
David Neto482550a2018-03-24 05:21:07 -07002050 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002051 auto ShortTy = Type::getInt16Ty(M.getContext());
2052 auto ShortPointerTy = PointerType::get(
2053 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002054
David Netoac825b82017-05-30 12:49:01 -04002055 // Cast the half* pointer to short*.
2056 auto Cast =
2057 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002058
David Netoac825b82017-05-30 12:49:01 -04002059 // Index into the correct address of the casted pointer.
2060 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2061
2062 // Load from the short* we casted to.
2063 auto Load = new LoadInst(Index, "", CI);
2064
2065 // ZExt the short -> int.
2066 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2067
2068 // Get our float2.
2069 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2070
2071 // Extract out the bottom element which is our float result.
2072 auto Extract = ExtractElementInst::Create(
2073 Call, ConstantInt::get(IntTy, 0), "", CI);
2074
2075 CI->replaceAllUsesWith(Extract);
2076 } else {
2077 // Assume the pointer argument points to storage aligned to 32bits
2078 // or more.
2079 // TODO(dneto): Do more analysis to make sure this is true?
2080 //
2081 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2082 // with:
2083 //
2084 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2085 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2086 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2087 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2088 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2089 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2090 // x float> %converted, %index_is_odd32
2091
2092 auto IntPointerTy = PointerType::get(
2093 IntTy, Arg1->getType()->getPointerAddressSpace());
2094
David Neto973e6a82017-05-30 13:48:18 -04002095 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002096 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002097 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002098 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2099
2100 auto One = ConstantInt::get(IntTy, 1);
2101 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2102 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2103
2104 // Index into the correct address of the casted pointer.
2105 auto Ptr =
2106 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2107
2108 // Load from the int* we casted to.
2109 auto Load = new LoadInst(Ptr, "", CI);
2110
2111 // Get our float2.
2112 auto Call = CallInst::Create(NewF, Load, "", CI);
2113
2114 // Extract out the float result, where the element number is
2115 // determined by whether the original index was even or odd.
2116 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2117
2118 CI->replaceAllUsesWith(Extract);
2119 }
David Neto22f144c2017-06-12 14:26:21 -04002120
2121 // Lastly, remember to remove the user.
2122 ToRemoves.push_back(CI);
2123 }
2124 }
2125
2126 Changed = !ToRemoves.empty();
2127
2128 // And cleanup the calls we don't use anymore.
2129 for (auto V : ToRemoves) {
2130 V->eraseFromParent();
2131 }
2132
2133 // And remove the function we don't need either too.
2134 F->eraseFromParent();
2135 }
2136 }
2137
2138 return Changed;
2139}
2140
2141bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002142
Kévin Petite8edce32019-04-10 14:23:32 +01002143 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002144 "_Z11vload_half2jPU3AS1KDh",
2145 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2146 "_Z11vload_half2jPU3AS2KDh",
2147 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2148 };
David Neto22f144c2017-06-12 14:26:21 -04002149
Kévin Petite8edce32019-04-10 14:23:32 +01002150 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2151 // The index argument from vload_half.
2152 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002153
Kévin Petite8edce32019-04-10 14:23:32 +01002154 // The pointer argument from vload_half.
2155 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002156
Kévin Petite8edce32019-04-10 14:23:32 +01002157 auto IntTy = Type::getInt32Ty(M.getContext());
2158 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002159 auto NewPointerTy =
2160 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002161 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002162
Kévin Petite8edce32019-04-10 14:23:32 +01002163 // Cast the half* pointer to int*.
2164 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002165
Kévin Petite8edce32019-04-10 14:23:32 +01002166 // Index into the correct address of the casted pointer.
2167 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002168
Kévin Petite8edce32019-04-10 14:23:32 +01002169 // Load from the int* we casted to.
2170 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002171
Kévin Petite8edce32019-04-10 14:23:32 +01002172 // Our intrinsic to unpack a float2 from an int.
2173 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002174
Kévin Petite8edce32019-04-10 14:23:32 +01002175 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002176
Kévin Petite8edce32019-04-10 14:23:32 +01002177 // Get our float2.
2178 return CallInst::Create(NewF, Load, "", CI);
2179 });
David Neto22f144c2017-06-12 14:26:21 -04002180}
2181
2182bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002183
Kévin Petite8edce32019-04-10 14:23:32 +01002184 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002185 "_Z11vload_half4jPU3AS1KDh",
2186 "_Z12vloada_half4jPU3AS1KDh",
2187 "_Z11vload_half4jPU3AS2KDh",
2188 "_Z12vloada_half4jPU3AS2KDh",
2189 };
David Neto22f144c2017-06-12 14:26:21 -04002190
Kévin Petite8edce32019-04-10 14:23:32 +01002191 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2192 // The index argument from vload_half.
2193 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002194
Kévin Petite8edce32019-04-10 14:23:32 +01002195 // The pointer argument from vload_half.
2196 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002197
Kévin Petite8edce32019-04-10 14:23:32 +01002198 auto IntTy = Type::getInt32Ty(M.getContext());
2199 auto Int2Ty = VectorType::get(IntTy, 2);
2200 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002201 auto NewPointerTy =
2202 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002203 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002204
Kévin Petite8edce32019-04-10 14:23:32 +01002205 // Cast the half* pointer to int2*.
2206 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002207
Kévin Petite8edce32019-04-10 14:23:32 +01002208 // Index into the correct address of the casted pointer.
2209 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002210
Kévin Petite8edce32019-04-10 14:23:32 +01002211 // Load from the int2* we casted to.
2212 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002213
Kévin Petite8edce32019-04-10 14:23:32 +01002214 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002215 auto X =
2216 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2217 auto Y =
2218 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002219
Kévin Petite8edce32019-04-10 14:23:32 +01002220 // Our intrinsic to unpack a float2 from an int.
2221 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002222
Kévin Petite8edce32019-04-10 14:23:32 +01002223 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002224
Kévin Petite8edce32019-04-10 14:23:32 +01002225 // Get the lower (x & y) components of our final float4.
2226 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002227
Kévin Petite8edce32019-04-10 14:23:32 +01002228 // Get the higher (z & w) components of our final float4.
2229 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002230
Kévin Petite8edce32019-04-10 14:23:32 +01002231 Constant *ShuffleMask[4] = {
2232 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2233 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002234
Kévin Petite8edce32019-04-10 14:23:32 +01002235 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002236 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2237 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002238 });
David Neto22f144c2017-06-12 14:26:21 -04002239}
2240
David Neto6ad93232018-06-07 15:42:58 -07002241bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002242
2243 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2244 //
2245 // %u = load i32 %ptr
2246 // %fxy = call <2 x float> Unpack2xHalf(u)
2247 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002248 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002249 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2250 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2251 "_Z20__clspv_vloada_half2jPKj", // private
2252 };
2253
Kévin Petite8edce32019-04-10 14:23:32 +01002254 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2255 auto Index = CI->getOperand(0);
2256 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002257
Kévin Petite8edce32019-04-10 14:23:32 +01002258 auto IntTy = Type::getInt32Ty(M.getContext());
2259 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2260 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002261
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002262 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002263 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002264
Kévin Petite8edce32019-04-10 14:23:32 +01002265 // Our intrinsic to unpack a float2 from an int.
2266 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002267
Kévin Petite8edce32019-04-10 14:23:32 +01002268 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002269
Kévin Petite8edce32019-04-10 14:23:32 +01002270 // Get our final float2.
2271 return CallInst::Create(NewF, Load, "", CI);
2272 });
David Neto6ad93232018-06-07 15:42:58 -07002273}
2274
2275bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002276
2277 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2278 //
2279 // %u2 = load <2 x i32> %ptr
2280 // %u2xy = extractelement %u2, 0
2281 // %u2zw = extractelement %u2, 1
2282 // %fxy = call <2 x float> Unpack2xHalf(uint)
2283 // %fzw = call <2 x float> Unpack2xHalf(uint)
2284 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002285 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002286 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2287 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2288 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2289 };
2290
Kévin Petite8edce32019-04-10 14:23:32 +01002291 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2292 auto Index = CI->getOperand(0);
2293 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002294
Kévin Petite8edce32019-04-10 14:23:32 +01002295 auto IntTy = Type::getInt32Ty(M.getContext());
2296 auto Int2Ty = VectorType::get(IntTy, 2);
2297 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2298 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002299
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002300 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002301 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002302
Kévin Petite8edce32019-04-10 14:23:32 +01002303 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002304 auto X =
2305 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2306 auto Y =
2307 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002308
Kévin Petite8edce32019-04-10 14:23:32 +01002309 // Our intrinsic to unpack a float2 from an int.
2310 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002311
Kévin Petite8edce32019-04-10 14:23:32 +01002312 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002313
Kévin Petite8edce32019-04-10 14:23:32 +01002314 // Get the lower (x & y) components of our final float4.
2315 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002316
Kévin Petite8edce32019-04-10 14:23:32 +01002317 // Get the higher (z & w) components of our final float4.
2318 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002319
Kévin Petite8edce32019-04-10 14:23:32 +01002320 Constant *ShuffleMask[4] = {
2321 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2322 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002323
Kévin Petite8edce32019-04-10 14:23:32 +01002324 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002325 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2326 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002327 });
David Neto6ad93232018-06-07 15:42:58 -07002328}
2329
David Neto22f144c2017-06-12 14:26:21 -04002330bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002331
Kévin Petite8edce32019-04-10 14:23:32 +01002332 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2333 "_Z15vstore_half_rtefjPU3AS1Dh",
2334 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002335
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002336 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002337 // The value to store.
2338 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002339
Kévin Petite8edce32019-04-10 14:23:32 +01002340 // The index argument from vstore_half.
2341 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002342
Kévin Petite8edce32019-04-10 14:23:32 +01002343 // The pointer argument from vstore_half.
2344 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002345
Kévin Petite8edce32019-04-10 14:23:32 +01002346 auto IntTy = Type::getInt32Ty(M.getContext());
2347 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2348 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2349 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002350
Kévin Petite8edce32019-04-10 14:23:32 +01002351 // Our intrinsic to pack a float2 to an int.
2352 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002353
Kévin Petite8edce32019-04-10 14:23:32 +01002354 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002355
Kévin Petite8edce32019-04-10 14:23:32 +01002356 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002357 auto TempVec = InsertElementInst::Create(
2358 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002359
Kévin Petite8edce32019-04-10 14:23:32 +01002360 // Pack the float2 -> half2 (in an int).
2361 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002362
Kévin Petite8edce32019-04-10 14:23:32 +01002363 Value *Ret;
2364 if (clspv::Option::F16BitStorage()) {
2365 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002366 auto ShortPointerTy =
2367 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002368
Kévin Petite8edce32019-04-10 14:23:32 +01002369 // Truncate our i32 to an i16.
2370 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002371
Kévin Petite8edce32019-04-10 14:23:32 +01002372 // Cast the half* pointer to short*.
2373 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002374
Kévin Petite8edce32019-04-10 14:23:32 +01002375 // Index into the correct address of the casted pointer.
2376 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002377
Kévin Petite8edce32019-04-10 14:23:32 +01002378 // Store to the int* we casted to.
2379 Ret = new StoreInst(Trunc, Index, CI);
2380 } else {
2381 // We can only write to 32-bit aligned words.
2382 //
2383 // Assuming base is aligned to 32-bits, replace the equivalent of
2384 // vstore_half(value, index, base)
2385 // with:
2386 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2387 // uint32_t write_to_upper_half = index & 1u;
2388 // uint32_t shift = write_to_upper_half << 4;
2389 //
2390 // // Pack the float value as a half number in bottom 16 bits
2391 // // of an i32.
2392 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2393 //
2394 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2395 // ^ ((packed & 0xffff) << shift)
2396 // // We only need relaxed consistency, but OpenCL 1.2 only has
2397 // // sequentially consistent atomics.
2398 // // TODO(dneto): Use relaxed consistency.
2399 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002400 auto IntPointerTy =
2401 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002402
Kévin Petite8edce32019-04-10 14:23:32 +01002403 auto Four = ConstantInt::get(IntTy, 4);
2404 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002405
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002406 auto IndexIsOdd =
2407 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002408 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002409 auto IndexIntoI32 =
2410 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2411 auto BaseI32Ptr =
2412 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2413 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2414 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002415 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2416 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002417 auto MaskBitsToWrite =
2418 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2419 auto MaskedCurrent = BinaryOperator::CreateAnd(
2420 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002421
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002422 auto XLowerBits =
2423 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2424 auto NewBitsToWrite =
2425 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2426 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2427 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002428
Kévin Petite8edce32019-04-10 14:23:32 +01002429 // Generate the call to atomi_xor.
2430 SmallVector<Type *, 5> ParamTypes;
2431 // The pointer type.
2432 ParamTypes.push_back(IntPointerTy);
2433 // The Types for memory scope, semantics, and value.
2434 ParamTypes.push_back(IntTy);
2435 ParamTypes.push_back(IntTy);
2436 ParamTypes.push_back(IntTy);
2437 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2438 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002439
Kévin Petite8edce32019-04-10 14:23:32 +01002440 const auto ConstantScopeDevice =
2441 ConstantInt::get(IntTy, spv::ScopeDevice);
2442 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2443 // (SPIR-V Workgroup).
2444 const auto AddrSpaceSemanticsBits =
2445 IntPointerTy->getPointerAddressSpace() == 1
2446 ? spv::MemorySemanticsUniformMemoryMask
2447 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002448
Kévin Petite8edce32019-04-10 14:23:32 +01002449 // We're using relaxed consistency here.
2450 const auto ConstantMemorySemantics =
2451 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2452 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002453
Kévin Petite8edce32019-04-10 14:23:32 +01002454 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2455 ConstantMemorySemantics, ValueToXor};
2456 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2457 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002458 }
David Neto22f144c2017-06-12 14:26:21 -04002459
Kévin Petite8edce32019-04-10 14:23:32 +01002460 return Ret;
2461 });
David Neto22f144c2017-06-12 14:26:21 -04002462}
2463
2464bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002465
Kévin Petite8edce32019-04-10 14:23:32 +01002466 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002467 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2468 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2469 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2470 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2471 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2472 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2473 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2474 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2475 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2476 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2477 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2478 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2479 };
David Neto22f144c2017-06-12 14:26:21 -04002480
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002481 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002482 // The value to store.
2483 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002484
Kévin Petite8edce32019-04-10 14:23:32 +01002485 // The index argument from vstore_half.
2486 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002487
Kévin Petite8edce32019-04-10 14:23:32 +01002488 // The pointer argument from vstore_half.
2489 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002490
Kévin Petite8edce32019-04-10 14:23:32 +01002491 auto IntTy = Type::getInt32Ty(M.getContext());
2492 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002493 auto NewPointerTy =
2494 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002495 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002496
Kévin Petite8edce32019-04-10 14:23:32 +01002497 // Our intrinsic to pack a float2 to an int.
2498 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002499
Kévin Petite8edce32019-04-10 14:23:32 +01002500 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002501
Kévin Petite8edce32019-04-10 14:23:32 +01002502 // Turn the packed x & y into the final packing.
2503 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002504
Kévin Petite8edce32019-04-10 14:23:32 +01002505 // Cast the half* pointer to int*.
2506 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002507
Kévin Petite8edce32019-04-10 14:23:32 +01002508 // Index into the correct address of the casted pointer.
2509 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002510
Kévin Petite8edce32019-04-10 14:23:32 +01002511 // Store to the int* we casted to.
2512 return new StoreInst(X, Index, CI);
2513 });
David Neto22f144c2017-06-12 14:26:21 -04002514}
2515
2516bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002517
Kévin Petite8edce32019-04-10 14:23:32 +01002518 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002519 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2520 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2521 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2522 "_Z13vstorea_half4Dv4_fjPDh", // private
2523 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2524 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2525 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2526 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2527 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2528 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2529 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2530 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2531 };
David Neto22f144c2017-06-12 14:26:21 -04002532
Kévin Petite8edce32019-04-10 14:23:32 +01002533 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2534 // The value to store.
2535 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002536
Kévin Petite8edce32019-04-10 14:23:32 +01002537 // The index argument from vstore_half.
2538 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002539
Kévin Petite8edce32019-04-10 14:23:32 +01002540 // The pointer argument from vstore_half.
2541 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002542
Kévin Petite8edce32019-04-10 14:23:32 +01002543 auto IntTy = Type::getInt32Ty(M.getContext());
2544 auto Int2Ty = VectorType::get(IntTy, 2);
2545 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002546 auto NewPointerTy =
2547 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002548 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002549
Kévin Petite8edce32019-04-10 14:23:32 +01002550 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2551 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002552
Kévin Petite8edce32019-04-10 14:23:32 +01002553 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002554 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2555 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002556
Kévin Petite8edce32019-04-10 14:23:32 +01002557 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2558 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002559
Kévin Petite8edce32019-04-10 14:23:32 +01002560 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002561 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2562 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002563
Kévin Petite8edce32019-04-10 14:23:32 +01002564 // Our intrinsic to pack a float2 to an int.
2565 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002566
Kévin Petite8edce32019-04-10 14:23:32 +01002567 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002568
Kévin Petite8edce32019-04-10 14:23:32 +01002569 // Turn the packed x & y into the final component of our int2.
2570 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002571
Kévin Petite8edce32019-04-10 14:23:32 +01002572 // Turn the packed z & w into the final component of our int2.
2573 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002574
Kévin Petite8edce32019-04-10 14:23:32 +01002575 auto Combine = InsertElementInst::Create(
2576 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002577 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2578 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002579
Kévin Petite8edce32019-04-10 14:23:32 +01002580 // Cast the half* pointer to int2*.
2581 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002582
Kévin Petite8edce32019-04-10 14:23:32 +01002583 // Index into the correct address of the casted pointer.
2584 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002585
Kévin Petite8edce32019-04-10 14:23:32 +01002586 // Store to the int2* we casted to.
2587 return new StoreInst(Combine, Index, CI);
2588 });
David Neto22f144c2017-06-12 14:26:21 -04002589}
2590
2591bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2592 bool Changed = false;
2593
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002594 const std::map<const char *, const char *> Map = {
2595 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2596 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2597 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i",
2598 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002599
2600 for (auto Pair : Map) {
2601 // If we find a function with the matching name.
2602 if (auto F = M.getFunction(Pair.first)) {
2603 SmallVector<Instruction *, 4> ToRemoves;
2604
2605 // Walk the users of the function.
2606 for (auto &U : F->uses()) {
2607 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2608 // The image.
2609 auto Arg0 = CI->getOperand(0);
2610
2611 // The sampler.
2612 auto Arg1 = CI->getOperand(1);
2613
2614 // The coordinate (integer type that we can't handle).
2615 auto Arg2 = CI->getOperand(2);
2616
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002617 auto FloatVecTy =
2618 VectorType::get(Type::getFloatTy(M.getContext()),
2619 Arg2->getType()->getVectorNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002620
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002621 auto NewFType = FunctionType::get(
2622 CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy},
2623 false);
David Neto22f144c2017-06-12 14:26:21 -04002624
2625 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2626
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002627 auto Cast =
2628 CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002629
2630 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2631
2632 CI->replaceAllUsesWith(NewCI);
2633
2634 // Lastly, remember to remove the user.
2635 ToRemoves.push_back(CI);
2636 }
2637 }
2638
2639 Changed = !ToRemoves.empty();
2640
2641 // And cleanup the calls we don't use anymore.
2642 for (auto V : ToRemoves) {
2643 V->eraseFromParent();
2644 }
2645
2646 // And remove the function we don't need either too.
2647 F->eraseFromParent();
2648 }
2649 }
2650
2651 return Changed;
2652}
2653
2654bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2655 bool Changed = false;
2656
2657 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002658 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002659 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002660 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002661 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002662 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002663 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002664 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002665 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002666 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002667 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002668 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002669 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002670 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002671 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002672 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002673 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002674 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002675 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002676 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002677 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002678 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002679 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2680 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2681 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002682
2683 for (auto Pair : Map) {
2684 // If we find a function with the matching name.
2685 if (auto F = M.getFunction(Pair.first)) {
2686 SmallVector<Instruction *, 4> ToRemoves;
2687
2688 // Walk the users of the function.
2689 for (auto &U : F->uses()) {
2690 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2691 auto FType = F->getFunctionType();
2692 SmallVector<Type *, 5> ParamTypes;
2693
2694 // The pointer type.
2695 ParamTypes.push_back(FType->getParamType(0));
2696
2697 auto IntTy = Type::getInt32Ty(M.getContext());
2698
2699 // The memory scope type.
2700 ParamTypes.push_back(IntTy);
2701
2702 // The memory semantics type.
2703 ParamTypes.push_back(IntTy);
2704
2705 if (2 < CI->getNumArgOperands()) {
2706 // The unequal memory semantics type.
2707 ParamTypes.push_back(IntTy);
2708
2709 // The value type.
2710 ParamTypes.push_back(FType->getParamType(2));
2711
2712 // The comparator type.
2713 ParamTypes.push_back(FType->getParamType(1));
2714 } else if (1 < CI->getNumArgOperands()) {
2715 // The value type.
2716 ParamTypes.push_back(FType->getParamType(1));
2717 }
2718
2719 auto NewFType =
2720 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2721 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2722
2723 // We need to map the OpenCL constants to the SPIR-V equivalents.
2724 const auto ConstantScopeDevice =
2725 ConstantInt::get(IntTy, spv::ScopeDevice);
2726 const auto ConstantMemorySemantics = ConstantInt::get(
2727 IntTy, spv::MemorySemanticsUniformMemoryMask |
2728 spv::MemorySemanticsSequentiallyConsistentMask);
2729
2730 SmallVector<Value *, 5> Params;
2731
2732 // The pointer.
2733 Params.push_back(CI->getArgOperand(0));
2734
2735 // The memory scope.
2736 Params.push_back(ConstantScopeDevice);
2737
2738 // The memory semantics.
2739 Params.push_back(ConstantMemorySemantics);
2740
2741 if (2 < CI->getNumArgOperands()) {
2742 // The unequal memory semantics.
2743 Params.push_back(ConstantMemorySemantics);
2744
2745 // The value.
2746 Params.push_back(CI->getArgOperand(2));
2747
2748 // The comparator.
2749 Params.push_back(CI->getArgOperand(1));
2750 } else if (1 < CI->getNumArgOperands()) {
2751 // The value.
2752 Params.push_back(CI->getArgOperand(1));
2753 }
2754
2755 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2756
2757 CI->replaceAllUsesWith(NewCI);
2758
2759 // Lastly, remember to remove the user.
2760 ToRemoves.push_back(CI);
2761 }
2762 }
2763
2764 Changed = !ToRemoves.empty();
2765
2766 // And cleanup the calls we don't use anymore.
2767 for (auto V : ToRemoves) {
2768 V->eraseFromParent();
2769 }
2770
2771 // And remove the function we don't need either too.
2772 F->eraseFromParent();
2773 }
2774 }
2775
Neil Henning39672102017-09-29 14:33:13 +01002776 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002777 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002778 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002779 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002780 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002781 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002782 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002783 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002784 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002785 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002786 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002787 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002788 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002789 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002790 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002791 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002792 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002793 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002794 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002795 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002796 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002797 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002798 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002799 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002800 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002801 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002802 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002803 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002804 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002805 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002806 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002807 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002808 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002809 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002810 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002811 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002812 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002813 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002814 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002815 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002816 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002817 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002818 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002819 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002820 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002821 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002822 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002823 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002824 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002825 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002826 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002827 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002828 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002829 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002830 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002831 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002832 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002833 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002834 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002835 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002836 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002837 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002838 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2839 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2840 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002841
2842 for (auto Pair : Map2) {
2843 // If we find a function with the matching name.
2844 if (auto F = M.getFunction(Pair.first)) {
2845 SmallVector<Instruction *, 4> ToRemoves;
2846
2847 // Walk the users of the function.
2848 for (auto &U : F->uses()) {
2849 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2850 auto AtomicOp = new AtomicRMWInst(
2851 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2852 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2853
2854 CI->replaceAllUsesWith(AtomicOp);
2855
2856 // Lastly, remember to remove the user.
2857 ToRemoves.push_back(CI);
2858 }
2859 }
2860
2861 Changed = !ToRemoves.empty();
2862
2863 // And cleanup the calls we don't use anymore.
2864 for (auto V : ToRemoves) {
2865 V->eraseFromParent();
2866 }
2867
2868 // And remove the function we don't need either too.
2869 F->eraseFromParent();
2870 }
2871 }
2872
David Neto22f144c2017-06-12 14:26:21 -04002873 return Changed;
2874}
2875
2876bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002877
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002878 std::vector<const char *> Names = {
2879 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01002880 };
2881
2882 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002883 auto IntTy = Type::getInt32Ty(M.getContext());
2884 auto FloatTy = Type::getFloatTy(M.getContext());
2885
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002886 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2887 ConstantInt::get(IntTy, 1),
2888 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002889
2890 Constant *UpShuffleMask[4] = {
2891 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2892 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2893
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002894 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2895 UndefValue::get(FloatTy),
2896 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002897
Kévin Petite8edce32019-04-10 14:23:32 +01002898 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002899 auto Arg0 =
2900 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2901 ConstantVector::get(DownShuffleMask), "", CI);
2902 auto Arg1 =
2903 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2904 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002905 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002906
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002907 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04002908
Kévin Petite8edce32019-04-10 14:23:32 +01002909 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002910
Kévin Petite8edce32019-04-10 14:23:32 +01002911 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002912
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002913 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2914 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002915 });
David Neto22f144c2017-06-12 14:26:21 -04002916}
David Neto62653202017-10-16 19:05:18 -04002917
2918bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2919 bool Changed = false;
2920
2921 // OpenCL's float result = fract(float x, float* ptr)
2922 //
2923 // In the LLVM domain:
2924 //
2925 // %floor_result = call spir_func float @floor(float %x)
2926 // store float %floor_result, float * %ptr
2927 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2928 // %result = call spir_func float
2929 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2930 //
2931 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2932 // and clspv.fract occur in the SPIR-V generator pass:
2933 //
2934 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2935 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2936 // ...
2937 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2938 // OpStore %ptr %floor_result
2939 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2940 // %fract_result = OpExtInst %float
2941 // %glsl_ext Fmin %fract_intermediate %just_under_1
2942
David Neto62653202017-10-16 19:05:18 -04002943 using std::string;
2944
2945 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2946 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002947 using QuadType =
2948 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04002949 auto make_quad = [](const char *a, const char *b, const char *c,
2950 const char *d) {
2951 return std::tuple<const char *, const char *, const char *, const char *>(
2952 a, b, c, d);
2953 };
2954 const std::vector<QuadType> Functions = {
2955 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002956 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
2957 "clspv.fract.v2f"),
2958 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
2959 "clspv.fract.v3f"),
2960 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
2961 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04002962 };
2963
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002964 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04002965 const StringRef fract_name(std::get<0>(quad));
2966
2967 // If we find a function with the matching name.
2968 if (auto F = M.getFunction(fract_name)) {
2969 if (F->use_begin() == F->use_end())
2970 continue;
2971
2972 // We have some uses.
2973 Changed = true;
2974
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002975 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04002976
2977 const StringRef floor_name(std::get<1>(quad));
2978 const StringRef fmin_name(std::get<2>(quad));
2979 const StringRef clspv_fract_name(std::get<3>(quad));
2980
2981 // This is either float or a float vector. All the float-like
2982 // types are this type.
2983 auto result_ty = F->getReturnType();
2984
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002985 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04002986 if (!fmin_fn) {
2987 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002988 FunctionType *fn_ty =
2989 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04002990 fmin_fn =
2991 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04002992 fmin_fn->addFnAttr(Attribute::ReadNone);
2993 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2994 }
2995
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002996 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04002997 if (!floor_fn) {
2998 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002999 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003000 floor_fn = cast<Function>(
3001 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003002 floor_fn->addFnAttr(Attribute::ReadNone);
3003 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3004 }
3005
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003006 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003007 if (!clspv_fract_fn) {
3008 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003009 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003010 clspv_fract_fn = cast<Function>(
3011 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003012 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3013 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3014 }
3015
3016 // Number of significant significand bits, whether represented or not.
3017 unsigned num_significand_bits;
3018 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003019 case Type::HalfTyID:
3020 num_significand_bits = 11;
3021 break;
3022 case Type::FloatTyID:
3023 num_significand_bits = 24;
3024 break;
3025 case Type::DoubleTyID:
3026 num_significand_bits = 53;
3027 break;
3028 default:
3029 assert(false && "Unhandled float type when processing fract builtin");
3030 break;
David Neto62653202017-10-16 19:05:18 -04003031 }
3032 // Beware that the disassembler displays this value as
3033 // OpConstant %float 1
3034 // which is not quite right.
3035 const double kJustUnderOneScalar =
3036 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3037
3038 Constant *just_under_one =
3039 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3040 if (result_ty->isVectorTy()) {
3041 just_under_one = ConstantVector::getSplat(
3042 result_ty->getVectorNumElements(), just_under_one);
3043 }
3044
3045 IRBuilder<> Builder(Context);
3046
3047 SmallVector<Instruction *, 4> ToRemoves;
3048
3049 // Walk the users of the function.
3050 for (auto &U : F->uses()) {
3051 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3052
3053 Builder.SetInsertPoint(CI);
3054 auto arg = CI->getArgOperand(0);
3055 auto ptr = CI->getArgOperand(1);
3056
3057 // Compute floor result and store it.
3058 auto floor = Builder.CreateCall(floor_fn, {arg});
3059 Builder.CreateStore(floor, ptr);
3060
3061 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003062 auto fract_result =
3063 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003064
3065 CI->replaceAllUsesWith(fract_result);
3066
3067 // Lastly, remember to remove the user.
3068 ToRemoves.push_back(CI);
3069 }
3070 }
3071
3072 // And cleanup the calls we don't use anymore.
3073 for (auto V : ToRemoves) {
3074 V->eraseFromParent();
3075 }
3076
3077 // And remove the function we don't need either too.
3078 F->eraseFromParent();
3079 }
3080 }
3081
3082 return Changed;
3083}