blob: 49b1706c11328cb4e85e7253f63f6ccec9a29c0f [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040032#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070033
Diego Novilloa4c44fa2019-04-11 10:56:15 -040034#include "Passes.h"
35#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050036#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040037
David Neto22f144c2017-06-12 14:26:21 -040038using namespace llvm;
39
40#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
41
42namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000043
44struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040045 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000046 SignedNess signedness;
47};
48
49struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000050 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000051 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000052
Kévin Petit91bc72e2019-04-08 15:17:46 +010053 bool isArgSigned(size_t arg) const {
54 assert(argTypeInfos.size() > arg);
55 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000056 }
57
Kévin Petit91bc72e2019-04-08 15:17:46 +010058 static FunctionInfo getFromMangledName(StringRef name) {
59 FunctionInfo fi;
60 if (!getFromMangledNameCheck(name, &fi)) {
61 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000062 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010063 return fi;
64 }
Kévin Petit8a560882019-03-21 15:24:34 +000065
Kévin Petit91bc72e2019-04-08 15:17:46 +010066 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
67 if (!name.consume_front("_Z")) {
68 return false;
69 }
70 size_t nameLen;
71 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000072 return false;
73 }
74
Kévin Petit91bc72e2019-04-08 15:17:46 +010075 finfo->name = name.take_front(nameLen);
76 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000077
Kévin Petit91bc72e2019-04-08 15:17:46 +010078 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000079
Kévin Petit91bc72e2019-04-08 15:17:46 +010080 while (name.size() != 0) {
81
82 ArgTypeInfo ti;
83
84 // Try parsing a vector prefix
85 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040086 int numElems;
87 if (name.consumeInteger(10, numElems)) {
88 return false;
89 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010090
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040091 if (!name.consume_front("_")) {
92 return false;
93 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010094 }
95
96 // Parse the base type
97 char typeCode = name.front();
98 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040099 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100100 case 'c': // char
101 case 'a': // signed char
102 case 's': // short
103 case 'i': // int
104 case 'l': // long
105 ti.signedness = ArgTypeInfo::SignedNess::Signed;
106 break;
107 case 'h': // unsigned char
108 case 't': // unsigned short
109 case 'j': // unsigned int
110 case 'm': // unsigned long
111 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
112 break;
113 case 'f':
114 ti.signedness = ArgTypeInfo::SignedNess::None;
115 break;
116 case 'S':
117 ti = prev_ti;
118 if (!name.consume_front("_")) {
119 return false;
120 }
121 break;
122 default:
123 return false;
124 }
125
126 finfo->argTypeInfos.push_back(ti);
127
128 prev_ti = ti;
129 }
130
131 return true;
132 };
Kévin Petit8a560882019-03-21 15:24:34 +0000133};
134
David Neto22f144c2017-06-12 14:26:21 -0400135uint32_t clz(uint32_t v) {
136 uint32_t r;
137 uint32_t shift;
138
139 r = (v > 0xFFFF) << 4;
140 v >>= r;
141 shift = (v > 0xFF) << 3;
142 v >>= shift;
143 r |= shift;
144 shift = (v > 0xF) << 2;
145 v >>= shift;
146 r |= shift;
147 shift = (v > 0x3) << 1;
148 v >>= shift;
149 r |= shift;
150 r |= (v >> 1);
151
152 return r;
153}
154
155Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
156 if (1 == elements) {
157 return Type::getInt1Ty(C);
158 } else {
159 return VectorType::get(Type::getInt1Ty(C), elements);
160 }
161}
162
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100163Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
164 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
165 if (Ty->isVectorTy()) {
166 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
167 }
168 return IntTy;
169}
170
David Neto22f144c2017-06-12 14:26:21 -0400171struct ReplaceOpenCLBuiltinPass final : public ModulePass {
172 static char ID;
173 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
174
175 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000176 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100177 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100178 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400179 bool replaceRecip(Module &M);
180 bool replaceDivide(Module &M);
Kévin Petit1329a002019-06-15 05:54:05 +0100181 bool replaceDot(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400182 bool replaceExp10(Module &M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100183 bool replaceFmod(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400184 bool replaceLog10(Module &M);
185 bool replaceBarrier(Module &M);
186 bool replaceMemFence(Module &M);
187 bool replaceRelational(Module &M);
188 bool replaceIsInfAndIsNan(Module &M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100189 bool replaceIsFinite(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400190 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000191 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000192 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000193 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000194 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000195 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000196 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000197 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400198 bool replaceSignbit(Module &M);
199 bool replaceMadandMad24andMul24(Module &M);
200 bool replaceVloadHalf(Module &M);
201 bool replaceVloadHalf2(Module &M);
202 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700203 bool replaceClspvVloadaHalf2(Module &M);
204 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400205 bool replaceVstoreHalf(Module &M);
206 bool replaceVstoreHalf2(Module &M);
207 bool replaceVstoreHalf4(Module &M);
Kévin Petit06517a12019-12-09 19:40:31 +0000208 bool replaceSampledReadImageWithIntCoords(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400209 bool replaceAtomics(Module &M);
210 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400211 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700212 bool replaceVload(Module &M);
213 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400214};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100215} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400216
217char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400218INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
219 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400220
221namespace clspv {
222ModulePass *createReplaceOpenCLBuiltinPass() {
223 return new ReplaceOpenCLBuiltinPass();
224}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400225} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400226
227bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
228 bool Changed = false;
229
Kévin Petit2444e9b2018-11-09 14:14:37 +0000230 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100231 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100232 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400233 Changed |= replaceRecip(M);
234 Changed |= replaceDivide(M);
Kévin Petit1329a002019-06-15 05:54:05 +0100235 Changed |= replaceDot(M);
David Neto22f144c2017-06-12 14:26:21 -0400236 Changed |= replaceExp10(M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100237 Changed |= replaceFmod(M);
David Neto22f144c2017-06-12 14:26:21 -0400238 Changed |= replaceLog10(M);
239 Changed |= replaceBarrier(M);
240 Changed |= replaceMemFence(M);
241 Changed |= replaceRelational(M);
242 Changed |= replaceIsInfAndIsNan(M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100243 Changed |= replaceIsFinite(M);
David Neto22f144c2017-06-12 14:26:21 -0400244 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000245 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000246 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000247 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000248 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000249 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000250 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000251 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400252 Changed |= replaceSignbit(M);
253 Changed |= replaceMadandMad24andMul24(M);
254 Changed |= replaceVloadHalf(M);
255 Changed |= replaceVloadHalf2(M);
256 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700257 Changed |= replaceClspvVloadaHalf2(M);
258 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400259 Changed |= replaceVstoreHalf(M);
260 Changed |= replaceVstoreHalf2(M);
261 Changed |= replaceVstoreHalf4(M);
Kévin Petit06517a12019-12-09 19:40:31 +0000262 Changed |= replaceSampledReadImageWithIntCoords(M);
David Neto22f144c2017-06-12 14:26:21 -0400263 Changed |= replaceAtomics(M);
264 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400265 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700266 Changed |= replaceVload(M);
267 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400268
269 return Changed;
270}
271
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400272bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
273 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000274
Kévin Petite8edce32019-04-10 14:23:32 +0100275 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000276
277 for (auto Name : Names) {
278 // If we find a function with the matching name.
279 if (auto F = M.getFunction(Name)) {
280 SmallVector<Instruction *, 4> ToRemoves;
281
282 // Walk the users of the function.
283 for (auto &U : F->uses()) {
284 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000285
Kévin Petite8edce32019-04-10 14:23:32 +0100286 auto NewValue = Replacer(CI);
287
288 if (NewValue != nullptr) {
289 CI->replaceAllUsesWith(NewValue);
290 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000291
292 // Lastly, remember to remove the user.
293 ToRemoves.push_back(CI);
294 }
295 }
296
297 Changed = !ToRemoves.empty();
298
299 // And cleanup the calls we don't use anymore.
300 for (auto V : ToRemoves) {
301 V->eraseFromParent();
302 }
303
304 // And remove the function we don't need either too.
305 F->eraseFromParent();
306 }
307 }
308
309 return Changed;
310}
311
Kévin Petite8edce32019-04-10 14:23:32 +0100312bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100313
Kévin Petite8edce32019-04-10 14:23:32 +0100314 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400315 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
316 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
317 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
318 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100319 };
320
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400321 return replaceCallsWithValue(M, Names,
322 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100323}
324
325bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
326
327 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400328 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
329 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
330 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
331 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
332 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
333 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
334 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
335 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
336 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
337 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
338 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100339 };
340
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400341 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100342 auto XValue = CI->getOperand(0);
343 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100344
Kévin Petite8edce32019-04-10 14:23:32 +0100345 IRBuilder<> Builder(CI);
346 auto XmY = Builder.CreateSub(XValue, YValue);
347 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100348
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400349 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100350 auto F = CI->getCalledFunction();
351 auto finfo = FunctionInfo::getFromMangledName(F->getName());
352 if (finfo.isArgSigned(0)) {
353 Cmp = Builder.CreateICmpSGT(YValue, XValue);
354 } else {
355 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100356 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100357
Kévin Petite8edce32019-04-10 14:23:32 +0100358 return Builder.CreateSelect(Cmp, YmX, XmY);
359 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100360}
361
Kévin Petit8c1be282019-04-02 19:34:25 +0100362bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100363
Kévin Petite8edce32019-04-10 14:23:32 +0100364 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400365 "_Z8copysignff",
366 "_Z8copysignDv2_fS_",
367 "_Z8copysignDv3_fS_",
368 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100369 };
370
Kévin Petite8edce32019-04-10 14:23:32 +0100371 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
372 auto XValue = CI->getOperand(0);
373 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100374
Kévin Petite8edce32019-04-10 14:23:32 +0100375 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100376
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400377 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100378 if (Ty->isVectorTy()) {
379 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100380 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100381
Kévin Petite8edce32019-04-10 14:23:32 +0100382 // Return X with the sign of Y
383
384 // Sign bit masks
385 auto SignBit = IntTy->getScalarSizeInBits() - 1;
386 auto SignBitMask = 1 << SignBit;
387 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
388 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
389
390 IRBuilder<> Builder(CI);
391
392 // Extract sign of Y
393 auto YInt = Builder.CreateBitCast(YValue, IntTy);
394 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
395
396 // Clear sign bit in X
397 auto XInt = Builder.CreateBitCast(XValue, IntTy);
398 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
399
400 // Insert sign bit of Y into X
401 auto NewXInt = Builder.CreateOr(XInt, YSign);
402
403 // And cast back to floating-point
404 return Builder.CreateBitCast(NewXInt, Ty);
405 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100406}
407
David Neto22f144c2017-06-12 14:26:21 -0400408bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400409
Kévin Petite8edce32019-04-10 14:23:32 +0100410 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400411 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
412 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
413 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
414 };
415
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400416 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100417 // Recip has one arg.
418 auto Arg = CI->getOperand(0);
419 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
420 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
421 });
David Neto22f144c2017-06-12 14:26:21 -0400422}
423
424bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400425
Kévin Petite8edce32019-04-10 14:23:32 +0100426 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400427 "_Z11half_divideff", "_Z13native_divideff",
428 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
429 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
430 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
431 };
432
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400433 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100434 auto Op0 = CI->getOperand(0);
435 auto Op1 = CI->getOperand(1);
436 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
437 });
David Neto22f144c2017-06-12 14:26:21 -0400438}
439
Kévin Petit1329a002019-06-15 05:54:05 +0100440bool ReplaceOpenCLBuiltinPass::replaceDot(Module &M) {
441
442 std::vector<const char *> Names = {
443 "_Z3dotff",
444 "_Z3dotDv2_fS_",
445 "_Z3dotDv3_fS_",
446 "_Z3dotDv4_fS_",
447 };
448
449 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
450 auto Op0 = CI->getOperand(0);
451 auto Op1 = CI->getOperand(1);
452
453 Value *V;
454 if (Op0->getType()->isVectorTy()) {
455 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
456 CI->getType(), {Op0, Op1});
457 } else {
458 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
459 }
460
461 return V;
462 });
463}
464
David Neto22f144c2017-06-12 14:26:21 -0400465bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
466 bool Changed = false;
467
468 const std::map<const char *, const char *> Map = {
469 {"_Z5exp10f", "_Z3expf"},
470 {"_Z10half_exp10f", "_Z8half_expf"},
471 {"_Z12native_exp10f", "_Z10native_expf"},
472 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
473 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
474 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
475 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
476 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
477 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
478 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
479 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
480 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
481
482 for (auto Pair : Map) {
483 // If we find a function with the matching name.
484 if (auto F = M.getFunction(Pair.first)) {
485 SmallVector<Instruction *, 4> ToRemoves;
486
487 // Walk the users of the function.
488 for (auto &U : F->uses()) {
489 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
490 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
491
492 auto Arg = CI->getOperand(0);
493
494 // Constant of the natural log of 10 (ln(10)).
495 const double Ln10 =
496 2.302585092994045684017991454684364207601101488628772976033;
497
498 auto Mul = BinaryOperator::Create(
499 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
500 CI);
501
502 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
503
504 CI->replaceAllUsesWith(NewCI);
505
506 // Lastly, remember to remove the user.
507 ToRemoves.push_back(CI);
508 }
509 }
510
511 Changed = !ToRemoves.empty();
512
513 // And cleanup the calls we don't use anymore.
514 for (auto V : ToRemoves) {
515 V->eraseFromParent();
516 }
517
518 // And remove the function we don't need either too.
519 F->eraseFromParent();
520 }
521 }
522
523 return Changed;
524}
525
Kévin Petit0644a9c2019-06-20 21:08:46 +0100526bool ReplaceOpenCLBuiltinPass::replaceFmod(Module &M) {
527
528 std::vector<const char *> Names = {
529 "_Z4fmodff",
530 "_Z4fmodDv2_fS_",
531 "_Z4fmodDv3_fS_",
532 "_Z4fmodDv4_fS_",
533 };
534
535 // OpenCL fmod(x,y) is x - y * trunc(x/y)
536 // The sign for a non-zero result is taken from x.
537 // (Try an example.)
538 // So translate to FRem
539 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
540 auto Op0 = CI->getOperand(0);
541 auto Op1 = CI->getOperand(1);
542 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
543 });
544}
545
David Neto22f144c2017-06-12 14:26:21 -0400546bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
547 bool Changed = false;
548
549 const std::map<const char *, const char *> Map = {
550 {"_Z5log10f", "_Z3logf"},
551 {"_Z10half_log10f", "_Z8half_logf"},
552 {"_Z12native_log10f", "_Z10native_logf"},
553 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
554 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
555 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
556 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
557 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
558 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
559 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
560 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
561 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
562
563 for (auto Pair : Map) {
564 // If we find a function with the matching name.
565 if (auto F = M.getFunction(Pair.first)) {
566 SmallVector<Instruction *, 4> ToRemoves;
567
568 // Walk the users of the function.
569 for (auto &U : F->uses()) {
570 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
571 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
572
573 auto Arg = CI->getOperand(0);
574
575 // Constant of the reciprocal of the natural log of 10 (ln(10)).
576 const double Ln10 =
577 0.434294481903251827651128918916605082294397005803666566114;
578
579 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
580
581 auto Mul = BinaryOperator::Create(
582 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
583 "", CI);
584
585 CI->replaceAllUsesWith(Mul);
586
587 // Lastly, remember to remove the user.
588 ToRemoves.push_back(CI);
589 }
590 }
591
592 Changed = !ToRemoves.empty();
593
594 // And cleanup the calls we don't use anymore.
595 for (auto V : ToRemoves) {
596 V->eraseFromParent();
597 }
598
599 // And remove the function we don't need either too.
600 F->eraseFromParent();
601 }
602 }
603
604 return Changed;
605}
606
607bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400608
609 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
610
Kévin Petitc4643922019-06-17 19:32:05 +0100611 const std::vector<const char *> Names = {
alan-bakerf3bce4a2019-06-28 16:01:15 -0400612 "_Z7barrierj",
Kévin Petitc4643922019-06-17 19:32:05 +0100613 };
David Neto22f144c2017-06-12 14:26:21 -0400614
Kévin Petitc4643922019-06-17 19:32:05 +0100615 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
616 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400617
Kévin Petitc4643922019-06-17 19:32:05 +0100618 // We need to map the OpenCL constants to the SPIR-V equivalents.
619 const auto LocalMemFence =
620 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
621 const auto GlobalMemFence =
622 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
623 const auto ConstantSequentiallyConsistent = ConstantInt::get(
624 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
625 const auto ConstantScopeDevice =
626 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
627 const auto ConstantScopeWorkgroup =
628 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400629
Kévin Petitc4643922019-06-17 19:32:05 +0100630 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
631 const auto LocalMemFenceMask =
632 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
633 const auto WorkgroupShiftAmount =
634 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
635 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
636 Instruction::Shl, LocalMemFenceMask,
637 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400638
Kévin Petitc4643922019-06-17 19:32:05 +0100639 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
640 const auto GlobalMemFenceMask =
641 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
642 const auto UniformShiftAmount =
643 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
644 const auto MemorySemanticsUniform = BinaryOperator::Create(
645 Instruction::Shl, GlobalMemFenceMask,
646 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400647
Kévin Petitc4643922019-06-17 19:32:05 +0100648 // And combine the above together, also adding in
649 // MemorySemanticsSequentiallyConsistentMask.
650 auto MemorySemantics =
651 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
652 ConstantSequentiallyConsistent, "", CI);
653 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
654 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400655
Kévin Petitc4643922019-06-17 19:32:05 +0100656 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
657 // Device Scope, otherwise Workgroup Scope.
658 const auto Cmp =
659 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, GlobalMemFenceMask,
660 GlobalMemFence, "", CI);
661 const auto MemoryScope = SelectInst::Create(Cmp, ConstantScopeDevice,
662 ConstantScopeWorkgroup, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400663
Kévin Petitc4643922019-06-17 19:32:05 +0100664 // Lastly, the Execution Scope is always Workgroup Scope.
665 const auto ExecutionScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400666
Kévin Petitc4643922019-06-17 19:32:05 +0100667 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
668 {Attribute::NoDuplicate}, CI->getType(),
669 {ExecutionScope, MemoryScope, MemorySemantics});
670 });
David Neto22f144c2017-06-12 14:26:21 -0400671}
672
673bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
674 bool Changed = false;
675
676 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
677
Kévin Petitc4643922019-06-17 19:32:05 +0100678 using Tuple = std::tuple<spv::Op, unsigned>;
Neil Henning39672102017-09-29 14:33:13 +0100679 const std::map<const char *, Tuple> Map = {
Kévin Petitc4643922019-06-17 19:32:05 +0100680 {"_Z9mem_fencej", Tuple(spv::OpMemoryBarrier,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400681 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100682 {"_Z14read_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100683 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsAcquireMask)},
Neil Henning39672102017-09-29 14:33:13 +0100684 {"_Z15write_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100685 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400686
687 for (auto Pair : Map) {
688 // If we find a function with the matching name.
689 if (auto F = M.getFunction(Pair.first)) {
690 SmallVector<Instruction *, 4> ToRemoves;
691
692 // Walk the users of the function.
693 for (auto &U : F->uses()) {
694 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400695
696 auto Arg = CI->getOperand(0);
697
698 // We need to map the OpenCL constants to the SPIR-V equivalents.
699 const auto LocalMemFence =
700 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
701 const auto GlobalMemFence =
702 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
703 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100704 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400705 const auto ConstantScopeDevice =
706 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
707
708 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
709 const auto LocalMemFenceMask = BinaryOperator::Create(
710 Instruction::And, LocalMemFence, Arg, "", CI);
711 const auto WorkgroupShiftAmount =
712 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
713 clz(CLK_LOCAL_MEM_FENCE);
714 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
715 Instruction::Shl, LocalMemFenceMask,
716 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
717
718 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
719 const auto GlobalMemFenceMask = BinaryOperator::Create(
720 Instruction::And, GlobalMemFence, Arg, "", CI);
721 const auto UniformShiftAmount =
722 clz(spv::MemorySemanticsUniformMemoryMask) -
723 clz(CLK_GLOBAL_MEM_FENCE);
724 const auto MemorySemanticsUniform = BinaryOperator::Create(
725 Instruction::Shl, GlobalMemFenceMask,
726 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
727
728 // And combine the above together, also adding in
729 // MemorySemanticsSequentiallyConsistentMask.
730 auto MemorySemantics =
731 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
732 ConstantMemorySemantics, "", CI);
733 MemorySemantics = BinaryOperator::Create(
734 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
735
736 // Memory Scope is always device.
737 const auto MemoryScope = ConstantScopeDevice;
738
Kévin Petitc4643922019-06-17 19:32:05 +0100739 const auto SPIRVOp = std::get<0>(Pair.second);
740 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {}, CI->getType(),
741 {MemoryScope, MemorySemantics});
David Neto22f144c2017-06-12 14:26:21 -0400742
743 CI->replaceAllUsesWith(NewCI);
744
745 // Lastly, remember to remove the user.
746 ToRemoves.push_back(CI);
747 }
748 }
749
750 Changed = !ToRemoves.empty();
751
752 // And cleanup the calls we don't use anymore.
753 for (auto V : ToRemoves) {
754 V->eraseFromParent();
755 }
756
757 // And remove the function we don't need either too.
758 F->eraseFromParent();
759 }
760 }
761
762 return Changed;
763}
764
765bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
766 bool Changed = false;
767
768 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
769 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
770 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
771 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
772 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
773 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
774 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
775 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
776 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
777 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
778 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
779 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
780 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
781 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
782 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
783 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
784 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
785 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
786 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
787 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
788 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
789 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
790 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
791 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
792 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
793 };
794
795 for (auto Pair : Map) {
796 // If we find a function with the matching name.
797 if (auto F = M.getFunction(Pair.first)) {
798 SmallVector<Instruction *, 4> ToRemoves;
799
800 // Walk the users of the function.
801 for (auto &U : F->uses()) {
802 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
803 // The predicate to use in the CmpInst.
804 auto Predicate = Pair.second.first;
805
806 // The value to return for true.
807 auto TrueValue =
808 ConstantInt::getSigned(CI->getType(), Pair.second.second);
809
810 // The value to return for false.
811 auto FalseValue = Constant::getNullValue(CI->getType());
812
813 auto Arg1 = CI->getOperand(0);
814 auto Arg2 = CI->getOperand(1);
815
816 const auto Cmp =
817 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
818
819 const auto Select =
820 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
821
822 CI->replaceAllUsesWith(Select);
823
824 // Lastly, remember to remove the user.
825 ToRemoves.push_back(CI);
826 }
827 }
828
829 Changed = !ToRemoves.empty();
830
831 // And cleanup the calls we don't use anymore.
832 for (auto V : ToRemoves) {
833 V->eraseFromParent();
834 }
835
836 // And remove the function we don't need either too.
837 F->eraseFromParent();
838 }
839 }
840
841 return Changed;
842}
843
844bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
845 bool Changed = false;
846
Kévin Petitff03aee2019-06-12 19:39:03 +0100847 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
848 {"_Z5isinff", {spv::OpIsInf, 1}},
849 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
850 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
851 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
852 {"_Z5isnanf", {spv::OpIsNan, 1}},
853 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
854 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
855 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400856 };
857
858 for (auto Pair : Map) {
859 // If we find a function with the matching name.
860 if (auto F = M.getFunction(Pair.first)) {
861 SmallVector<Instruction *, 4> ToRemoves;
862
863 // Walk the users of the function.
864 for (auto &U : F->uses()) {
865 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
866 const auto CITy = CI->getType();
867
Kévin Petitff03aee2019-06-12 19:39:03 +0100868 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400869
870 // The value to return for true.
871 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
872
873 // The value to return for false.
874 auto FalseValue = Constant::getNullValue(CITy);
875
876 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
877 M.getContext(),
878 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
879
Kévin Petitff03aee2019-06-12 19:39:03 +0100880 auto NewCI =
881 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
882 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400883
884 const auto Select =
885 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
886
887 CI->replaceAllUsesWith(Select);
888
889 // Lastly, remember to remove the user.
890 ToRemoves.push_back(CI);
891 }
892 }
893
894 Changed = !ToRemoves.empty();
895
896 // And cleanup the calls we don't use anymore.
897 for (auto V : ToRemoves) {
898 V->eraseFromParent();
899 }
900
901 // And remove the function we don't need either too.
902 F->eraseFromParent();
903 }
904 }
905
906 return Changed;
907}
908
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100909bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Module &M) {
910 std::vector<const char *> Names = {
911 "_Z8isfiniteh", "_Z8isfiniteDv2_h", "_Z8isfiniteDv3_h",
912 "_Z8isfiniteDv4_h", "_Z8isfinitef", "_Z8isfiniteDv2_f",
913 "_Z8isfiniteDv3_f", "_Z8isfiniteDv4_f", "_Z8isfinited",
914 "_Z8isfiniteDv2_d", "_Z8isfiniteDv3_d", "_Z8isfiniteDv4_d",
915 };
916
917 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
918 auto &C = M.getContext();
919 auto Val = CI->getOperand(0);
920 auto ValTy = Val->getType();
921 auto RetTy = CI->getType();
922
923 // Get a suitable integer type to represent the number
924 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
925
926 // Create Mask
927 auto ScalarSize = ValTy->getScalarSizeInBits();
928 Value *InfMask;
929 switch (ScalarSize) {
930 case 16:
931 InfMask = ConstantInt::get(IntTy, 0x7C00U);
932 break;
933 case 32:
934 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
935 break;
936 case 64:
937 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
938 break;
939 default:
940 llvm_unreachable("Unsupported floating-point type");
941 }
942
943 IRBuilder<> Builder(CI);
944
945 // Bitcast to int
946 auto ValInt = Builder.CreateBitCast(Val, IntTy);
947
948 // Mask and compare
949 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
950 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
951
952 auto RetFalse = ConstantInt::get(RetTy, 0);
953 Value *RetTrue;
954 if (ValTy->isVectorTy()) {
955 RetTrue = ConstantInt::getSigned(RetTy, -1);
956 } else {
957 RetTrue = ConstantInt::get(RetTy, 1);
958 }
959 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
960 });
961}
962
David Neto22f144c2017-06-12 14:26:21 -0400963bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
964 bool Changed = false;
965
Kévin Petitff03aee2019-06-12 19:39:03 +0100966 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000967 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100968 {"_Z3allc", spv::OpNop},
969 {"_Z3allDv2_c", spv::OpAll},
970 {"_Z3allDv3_c", spv::OpAll},
971 {"_Z3allDv4_c", spv::OpAll},
972 {"_Z3alls", spv::OpNop},
973 {"_Z3allDv2_s", spv::OpAll},
974 {"_Z3allDv3_s", spv::OpAll},
975 {"_Z3allDv4_s", spv::OpAll},
976 {"_Z3alli", spv::OpNop},
977 {"_Z3allDv2_i", spv::OpAll},
978 {"_Z3allDv3_i", spv::OpAll},
979 {"_Z3allDv4_i", spv::OpAll},
980 {"_Z3alll", spv::OpNop},
981 {"_Z3allDv2_l", spv::OpAll},
982 {"_Z3allDv3_l", spv::OpAll},
983 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000984
985 // any
Kévin Petitff03aee2019-06-12 19:39:03 +0100986 {"_Z3anyc", spv::OpNop},
987 {"_Z3anyDv2_c", spv::OpAny},
988 {"_Z3anyDv3_c", spv::OpAny},
989 {"_Z3anyDv4_c", spv::OpAny},
990 {"_Z3anys", spv::OpNop},
991 {"_Z3anyDv2_s", spv::OpAny},
992 {"_Z3anyDv3_s", spv::OpAny},
993 {"_Z3anyDv4_s", spv::OpAny},
994 {"_Z3anyi", spv::OpNop},
995 {"_Z3anyDv2_i", spv::OpAny},
996 {"_Z3anyDv3_i", spv::OpAny},
997 {"_Z3anyDv4_i", spv::OpAny},
998 {"_Z3anyl", spv::OpNop},
999 {"_Z3anyDv2_l", spv::OpAny},
1000 {"_Z3anyDv3_l", spv::OpAny},
1001 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -04001002 };
1003
1004 for (auto Pair : Map) {
1005 // If we find a function with the matching name.
1006 if (auto F = M.getFunction(Pair.first)) {
1007 SmallVector<Instruction *, 4> ToRemoves;
1008
1009 // Walk the users of the function.
1010 for (auto &U : F->uses()) {
1011 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04001012
1013 auto Arg = CI->getOperand(0);
1014
1015 Value *V;
1016
Kévin Petitfd27cca2018-10-31 13:00:17 +00001017 // If the argument is a 32-bit int, just use a shift
1018 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1019 V = BinaryOperator::Create(Instruction::LShr, Arg,
1020 ConstantInt::get(Arg->getType(), 31), "",
1021 CI);
1022 } else {
David Neto22f144c2017-06-12 14:26:21 -04001023 // The value for zero to compare against.
1024 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1025
David Neto22f144c2017-06-12 14:26:21 -04001026 // The value to return for true.
1027 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1028
1029 // The value to return for false.
1030 const auto FalseValue = Constant::getNullValue(CI->getType());
1031
Kévin Petitfd27cca2018-10-31 13:00:17 +00001032 const auto Cmp = CmpInst::Create(
1033 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1034
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001035 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001036
1037 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +01001038 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001039
Kévin Petitff03aee2019-06-12 19:39:03 +01001040 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +00001041
Kévin Petitff03aee2019-06-12 19:39:03 +01001042 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +00001043
Kévin Petitff03aee2019-06-12 19:39:03 +01001044 const auto NewCI = clspv::InsertSPIRVOp(
1045 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +00001046 SelectSource = NewCI;
1047
1048 } else {
1049 SelectSource = Cmp;
1050 }
1051
1052 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001053 }
1054
1055 CI->replaceAllUsesWith(V);
1056
1057 // Lastly, remember to remove the user.
1058 ToRemoves.push_back(CI);
1059 }
1060 }
1061
1062 Changed = !ToRemoves.empty();
1063
1064 // And cleanup the calls we don't use anymore.
1065 for (auto V : ToRemoves) {
1066 V->eraseFromParent();
1067 }
1068
1069 // And remove the function we don't need either too.
1070 F->eraseFromParent();
1071 }
1072 }
1073
1074 return Changed;
1075}
1076
Kévin Petitbf0036c2019-03-06 13:57:10 +00001077bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1078 bool Changed = false;
1079
1080 for (auto const &SymVal : M.getValueSymbolTable()) {
1081 // Skip symbols whose name doesn't match
1082 if (!SymVal.getKey().startswith("_Z8upsample")) {
1083 continue;
1084 }
1085 // Is there a function going by that name?
1086 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1087
1088 SmallVector<Instruction *, 4> ToRemoves;
1089
1090 // Walk the users of the function.
1091 for (auto &U : F->uses()) {
1092 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1093
1094 // Get arguments
1095 auto HiValue = CI->getOperand(0);
1096 auto LoValue = CI->getOperand(1);
1097
1098 // Don't touch overloads that aren't in OpenCL C
1099 auto HiType = HiValue->getType();
1100 auto LoType = LoValue->getType();
1101
1102 if (HiType != LoType) {
1103 continue;
1104 }
1105
1106 if (!HiType->isIntOrIntVectorTy()) {
1107 continue;
1108 }
1109
1110 if (HiType->getScalarSizeInBits() * 2 !=
1111 CI->getType()->getScalarSizeInBits()) {
1112 continue;
1113 }
1114
1115 if ((HiType->getScalarSizeInBits() != 8) &&
1116 (HiType->getScalarSizeInBits() != 16) &&
1117 (HiType->getScalarSizeInBits() != 32)) {
1118 continue;
1119 }
1120
1121 if (HiType->isVectorTy()) {
1122 if ((HiType->getVectorNumElements() != 2) &&
1123 (HiType->getVectorNumElements() != 3) &&
1124 (HiType->getVectorNumElements() != 4) &&
1125 (HiType->getVectorNumElements() != 8) &&
1126 (HiType->getVectorNumElements() != 16)) {
1127 continue;
1128 }
1129 }
1130
1131 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001132 auto HiCast =
1133 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1134 auto LoCast =
1135 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001136
1137 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001138 auto ShiftAmount =
1139 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001140 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1141 ShiftAmount, "", CI);
1142
1143 // OR both results
1144 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1145 "", CI);
1146
1147 // Replace call with the expression
1148 CI->replaceAllUsesWith(V);
1149
1150 // Lastly, remember to remove the user.
1151 ToRemoves.push_back(CI);
1152 }
1153 }
1154
1155 Changed = !ToRemoves.empty();
1156
1157 // And cleanup the calls we don't use anymore.
1158 for (auto V : ToRemoves) {
1159 V->eraseFromParent();
1160 }
1161
1162 // And remove the function we don't need either too.
1163 F->eraseFromParent();
1164 }
1165 }
1166
1167 return Changed;
1168}
1169
Kévin Petitd44eef52019-03-08 13:22:14 +00001170bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1171 bool Changed = false;
1172
1173 for (auto const &SymVal : M.getValueSymbolTable()) {
1174 // Skip symbols whose name doesn't match
1175 if (!SymVal.getKey().startswith("_Z6rotate")) {
1176 continue;
1177 }
1178 // Is there a function going by that name?
1179 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1180
1181 SmallVector<Instruction *, 4> ToRemoves;
1182
1183 // Walk the users of the function.
1184 for (auto &U : F->uses()) {
1185 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1186
1187 // Get arguments
1188 auto SrcValue = CI->getOperand(0);
1189 auto RotAmount = CI->getOperand(1);
1190
1191 // Don't touch overloads that aren't in OpenCL C
1192 auto SrcType = SrcValue->getType();
1193 auto RotType = RotAmount->getType();
1194
1195 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1196 continue;
1197 }
1198
1199 if (!SrcType->isIntOrIntVectorTy()) {
1200 continue;
1201 }
1202
1203 if ((SrcType->getScalarSizeInBits() != 8) &&
1204 (SrcType->getScalarSizeInBits() != 16) &&
1205 (SrcType->getScalarSizeInBits() != 32) &&
1206 (SrcType->getScalarSizeInBits() != 64)) {
1207 continue;
1208 }
1209
1210 if (SrcType->isVectorTy()) {
1211 if ((SrcType->getVectorNumElements() != 2) &&
1212 (SrcType->getVectorNumElements() != 3) &&
1213 (SrcType->getVectorNumElements() != 4) &&
1214 (SrcType->getVectorNumElements() != 8) &&
1215 (SrcType->getVectorNumElements() != 16)) {
1216 continue;
1217 }
1218 }
1219
1220 // The approach used is to shift the top bits down, the bottom bits up
1221 // and OR the two shifted values.
1222
1223 // The rotation amount is to be treated modulo the element size.
1224 // Since SPIR-V shift ops don't support this, let's apply the
1225 // modulo ahead of shifting. The element size is always a power of
1226 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001227 auto ModMask =
1228 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001229 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1230 ModMask, "", CI);
1231
1232 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001233 auto ScalarSize =
1234 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001235 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1236 RotAmount, "", CI);
1237
1238 // Now shift the bottom bits up and the top bits down
1239 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1240 RotAmount, "", CI);
1241 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1242 DownAmount, "", CI);
1243
1244 // Finally OR the two shifted values
1245 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1246 HiRotated, "", CI);
1247
1248 // Replace call with the expression
1249 CI->replaceAllUsesWith(V);
1250
1251 // Lastly, remember to remove the user.
1252 ToRemoves.push_back(CI);
1253 }
1254 }
1255
1256 Changed = !ToRemoves.empty();
1257
1258 // And cleanup the calls we don't use anymore.
1259 for (auto V : ToRemoves) {
1260 V->eraseFromParent();
1261 }
1262
1263 // And remove the function we don't need either too.
1264 F->eraseFromParent();
1265 }
1266 }
1267
1268 return Changed;
1269}
1270
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001271bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1272 bool Changed = false;
1273
1274 for (auto const &SymVal : M.getValueSymbolTable()) {
1275
1276 // Skip symbols whose name obviously doesn't match
1277 if (!SymVal.getKey().contains("convert_")) {
1278 continue;
1279 }
1280
1281 // Is there a function going by that name?
1282 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1283
1284 // Get info from the mangled name
1285 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001286 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001287
1288 // All functions of interest are handled by our mangled name parser
1289 if (!parsed) {
1290 continue;
1291 }
1292
1293 // Move on if this isn't a call to convert_
1294 if (!finfo.name.startswith("convert_")) {
1295 continue;
1296 }
1297
1298 // Extract the destination type from the function name
1299 StringRef DstTypeName = finfo.name;
1300 DstTypeName.consume_front("convert_");
1301
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001302 auto DstSignedNess =
1303 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1304 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1305 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1306 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1307 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1308 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1309 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1310 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1311 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1312 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001313
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001314 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001315 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001316
1317 SmallVector<Instruction *, 4> ToRemoves;
1318
1319 // Walk the users of the function.
1320 for (auto &U : F->uses()) {
1321 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1322
1323 // Get arguments
1324 auto SrcValue = CI->getOperand(0);
1325
1326 // Don't touch overloads that aren't in OpenCL C
1327 auto SrcType = SrcValue->getType();
1328 auto DstType = CI->getType();
1329
1330 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1331 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1332 continue;
1333 }
1334
1335 if (SrcType->isVectorTy()) {
1336
1337 if (SrcType->getVectorNumElements() !=
1338 DstType->getVectorNumElements()) {
1339 continue;
1340 }
1341
1342 if ((SrcType->getVectorNumElements() != 2) &&
1343 (SrcType->getVectorNumElements() != 3) &&
1344 (SrcType->getVectorNumElements() != 4) &&
1345 (SrcType->getVectorNumElements() != 8) &&
1346 (SrcType->getVectorNumElements() != 16)) {
1347 continue;
1348 }
1349 }
1350
1351 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1352 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1353
1354 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1355 bool DstIsInt = DstType->isIntOrIntVectorTy();
1356
1357 Value *V;
1358 if (SrcIsFloat && DstIsFloat) {
1359 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1360 } else if (SrcIsFloat && DstIsInt) {
1361 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001362 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1363 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001364 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001365 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1366 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001367 }
1368 } else if (SrcIsInt && DstIsFloat) {
1369 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001370 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1371 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001372 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001373 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1374 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001375 }
1376 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001377 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1378 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001379 } else {
1380 // Not something we're supposed to handle, just move on
1381 continue;
1382 }
1383
1384 // Replace call with the expression
1385 CI->replaceAllUsesWith(V);
1386
1387 // Lastly, remember to remove the user.
1388 ToRemoves.push_back(CI);
1389 }
1390 }
1391
1392 Changed = !ToRemoves.empty();
1393
1394 // And cleanup the calls we don't use anymore.
1395 for (auto V : ToRemoves) {
1396 V->eraseFromParent();
1397 }
1398
1399 // And remove the function we don't need either too.
1400 F->eraseFromParent();
1401 }
1402 }
1403
1404 return Changed;
1405}
1406
Kévin Petit8a560882019-03-21 15:24:34 +00001407bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1408 bool Changed = false;
1409
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001410 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001411
Kévin Petit617a76d2019-04-04 13:54:16 +01001412 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001413 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1414 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1415
1416 // Skip symbols whose name doesn't match
1417 if (!isMad && !isMul) {
1418 continue;
1419 }
1420
1421 // Is there a function going by that name?
1422 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001423 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001424 }
1425 }
1426
Kévin Petit617a76d2019-04-04 13:54:16 +01001427 for (auto F : FnWorklist) {
1428 SmallVector<Instruction *, 4> ToRemoves;
1429
1430 bool isMad = F->getName().startswith("_Z6mad_hi");
1431 // Walk the users of the function.
1432 for (auto &U : F->uses()) {
1433 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1434
1435 // Get arguments
1436 auto AValue = CI->getOperand(0);
1437 auto BValue = CI->getOperand(1);
1438 auto CValue = CI->getOperand(2);
1439
1440 // Don't touch overloads that aren't in OpenCL C
1441 auto AType = AValue->getType();
1442 auto BType = BValue->getType();
1443 auto CType = CValue->getType();
1444
1445 if ((AType != BType) || (CI->getType() != AType) ||
1446 (isMad && (AType != CType))) {
1447 continue;
1448 }
1449
1450 if (!AType->isIntOrIntVectorTy()) {
1451 continue;
1452 }
1453
1454 if ((AType->getScalarSizeInBits() != 8) &&
1455 (AType->getScalarSizeInBits() != 16) &&
1456 (AType->getScalarSizeInBits() != 32) &&
1457 (AType->getScalarSizeInBits() != 64)) {
1458 continue;
1459 }
1460
1461 if (AType->isVectorTy()) {
1462 if ((AType->getVectorNumElements() != 2) &&
1463 (AType->getVectorNumElements() != 3) &&
1464 (AType->getVectorNumElements() != 4) &&
1465 (AType->getVectorNumElements() != 8) &&
1466 (AType->getVectorNumElements() != 16)) {
1467 continue;
1468 }
1469 }
1470
1471 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001472 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001473
1474 // Select the appropriate signed/unsigned SPIR-V op
1475 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001476 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001477 opcode = spv::OpSMulExtended;
1478 } else {
1479 opcode = spv::OpUMulExtended;
1480 }
1481
1482 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001483 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001484 auto ExMulRetType = StructType::create(TwoValueType);
1485
1486 // Call the SPIR-V op
1487 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1488 ExMulRetType, {AValue, BValue});
1489
1490 // Get the high part of the result
1491 unsigned Idxs[] = {1};
1492 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1493
1494 // If we're handling a mad_hi, add the third argument to the result
1495 if (isMad) {
1496 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1497 }
1498
1499 // Replace call with the expression
1500 CI->replaceAllUsesWith(V);
1501
1502 // Lastly, remember to remove the user.
1503 ToRemoves.push_back(CI);
1504 }
1505 }
1506
1507 Changed = !ToRemoves.empty();
1508
1509 // And cleanup the calls we don't use anymore.
1510 for (auto V : ToRemoves) {
1511 V->eraseFromParent();
1512 }
1513
1514 // And remove the function we don't need either too.
1515 F->eraseFromParent();
1516 }
1517
Kévin Petit8a560882019-03-21 15:24:34 +00001518 return Changed;
1519}
1520
Kévin Petitf5b78a22018-10-25 14:32:17 +00001521bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1522 bool Changed = false;
1523
1524 for (auto const &SymVal : M.getValueSymbolTable()) {
1525 // Skip symbols whose name doesn't match
1526 if (!SymVal.getKey().startswith("_Z6select")) {
1527 continue;
1528 }
1529 // Is there a function going by that name?
1530 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1531
1532 SmallVector<Instruction *, 4> ToRemoves;
1533
1534 // Walk the users of the function.
1535 for (auto &U : F->uses()) {
1536 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1537
1538 // Get arguments
1539 auto FalseValue = CI->getOperand(0);
1540 auto TrueValue = CI->getOperand(1);
1541 auto PredicateValue = CI->getOperand(2);
1542
1543 // Don't touch overloads that aren't in OpenCL C
1544 auto FalseType = FalseValue->getType();
1545 auto TrueType = TrueValue->getType();
1546 auto PredicateType = PredicateValue->getType();
1547
1548 if (FalseType != TrueType) {
1549 continue;
1550 }
1551
1552 if (!PredicateType->isIntOrIntVectorTy()) {
1553 continue;
1554 }
1555
1556 if (!FalseType->isIntOrIntVectorTy() &&
1557 !FalseType->getScalarType()->isFloatingPointTy()) {
1558 continue;
1559 }
1560
1561 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1562 continue;
1563 }
1564
1565 if (FalseType->getScalarSizeInBits() !=
1566 PredicateType->getScalarSizeInBits()) {
1567 continue;
1568 }
1569
1570 if (FalseType->isVectorTy()) {
1571 if (FalseType->getVectorNumElements() !=
1572 PredicateType->getVectorNumElements()) {
1573 continue;
1574 }
1575
1576 if ((FalseType->getVectorNumElements() != 2) &&
1577 (FalseType->getVectorNumElements() != 3) &&
1578 (FalseType->getVectorNumElements() != 4) &&
1579 (FalseType->getVectorNumElements() != 8) &&
1580 (FalseType->getVectorNumElements() != 16)) {
1581 continue;
1582 }
1583 }
1584
1585 // Create constant
1586 const auto ZeroValue = Constant::getNullValue(PredicateType);
1587
1588 // Scalar and vector are to be treated differently
1589 CmpInst::Predicate Pred;
1590 if (PredicateType->isVectorTy()) {
1591 Pred = CmpInst::ICMP_SLT;
1592 } else {
1593 Pred = CmpInst::ICMP_NE;
1594 }
1595
1596 // Create comparison instruction
1597 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1598 ZeroValue, "", CI);
1599
1600 // Create select
1601 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1602
1603 // Replace call with the selection
1604 CI->replaceAllUsesWith(V);
1605
1606 // Lastly, remember to remove the user.
1607 ToRemoves.push_back(CI);
1608 }
1609 }
1610
1611 Changed = !ToRemoves.empty();
1612
1613 // And cleanup the calls we don't use anymore.
1614 for (auto V : ToRemoves) {
1615 V->eraseFromParent();
1616 }
1617
1618 // And remove the function we don't need either too.
1619 F->eraseFromParent();
1620 }
1621 }
1622
1623 return Changed;
1624}
1625
Kévin Petite7d0cce2018-10-31 12:38:56 +00001626bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1627 bool Changed = false;
1628
1629 for (auto const &SymVal : M.getValueSymbolTable()) {
1630 // Skip symbols whose name doesn't match
1631 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1632 continue;
1633 }
1634 // Is there a function going by that name?
1635 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1636
1637 SmallVector<Instruction *, 4> ToRemoves;
1638
1639 // Walk the users of the function.
1640 for (auto &U : F->uses()) {
1641 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1642
1643 if (CI->getNumOperands() != 4) {
1644 continue;
1645 }
1646
1647 // Get arguments
1648 auto FalseValue = CI->getOperand(0);
1649 auto TrueValue = CI->getOperand(1);
1650 auto PredicateValue = CI->getOperand(2);
1651
1652 // Don't touch overloads that aren't in OpenCL C
1653 auto FalseType = FalseValue->getType();
1654 auto TrueType = TrueValue->getType();
1655 auto PredicateType = PredicateValue->getType();
1656
1657 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1658 continue;
1659 }
1660
1661 if (TrueType->isVectorTy()) {
1662 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1663 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001664 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001665 }
1666 if ((TrueType->getVectorNumElements() != 2) &&
1667 (TrueType->getVectorNumElements() != 3) &&
1668 (TrueType->getVectorNumElements() != 4) &&
1669 (TrueType->getVectorNumElements() != 8) &&
1670 (TrueType->getVectorNumElements() != 16)) {
1671 continue;
1672 }
1673 }
1674
1675 // Remember the type of the operands
1676 auto OpType = TrueType;
1677
1678 // The actual bit selection will always be done on an integer type,
1679 // declare it here
1680 Type *BitType;
1681
1682 // If the operands are float, then bitcast them to int
1683 if (OpType->getScalarType()->isFloatingPointTy()) {
1684
1685 // First create the new type
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001686 BitType = getIntOrIntVectorTyForCast(M.getContext(), OpType);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001687
1688 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001689 PredicateValue =
1690 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1691 FalseValue =
1692 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1693 TrueValue =
1694 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001695
1696 } else {
1697 // The operands have an integer type, use it directly
1698 BitType = OpType;
1699 }
1700
1701 // All the operands are now always integers
1702 // implement as (c & b) | (~c & a)
1703
1704 // Create our negated predicate value
1705 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001706 auto NotPredicateValue = BinaryOperator::Create(
1707 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001708
1709 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001710 auto BitsFalse = BinaryOperator::Create(
1711 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1712 auto BitsTrue = BinaryOperator::Create(
1713 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001714
1715 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1716 BitsTrue, "", CI);
1717
1718 // If we were dealing with a floating point type, we must bitcast
1719 // the result back to that
1720 if (OpType->getScalarType()->isFloatingPointTy()) {
1721 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1722 }
1723
1724 // Replace call with our new code
1725 CI->replaceAllUsesWith(V);
1726
1727 // Lastly, remember to remove the user.
1728 ToRemoves.push_back(CI);
1729 }
1730 }
1731
1732 Changed = !ToRemoves.empty();
1733
1734 // And cleanup the calls we don't use anymore.
1735 for (auto V : ToRemoves) {
1736 V->eraseFromParent();
1737 }
1738
1739 // And remove the function we don't need either too.
1740 F->eraseFromParent();
1741 }
1742 }
1743
1744 return Changed;
1745}
1746
Kévin Petit6b0a9532018-10-30 20:00:39 +00001747bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1748 bool Changed = false;
1749
1750 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001751 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1752 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1753 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1754 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1755 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1756 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001757 };
1758
1759 for (auto Pair : Map) {
1760 // If we find a function with the matching name.
1761 if (auto F = M.getFunction(Pair.first)) {
1762 SmallVector<Instruction *, 4> ToRemoves;
1763
1764 // Walk the users of the function.
1765 for (auto &U : F->uses()) {
1766 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1767
1768 auto ReplacementFn = Pair.second;
1769
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001770 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001771 Value *VectorArg;
1772
1773 // First figure out which function we're dealing with
1774 if (F->getName().startswith("_Z10smoothstep")) {
1775 ArgsToSplat.push_back(CI->getOperand(1));
1776 VectorArg = CI->getOperand(2);
1777 } else {
1778 VectorArg = CI->getOperand(1);
1779 }
1780
1781 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001782 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001783 auto VecType = VectorArg->getType();
1784
1785 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001786 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001787 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001788 auto index =
1789 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1790 NewVectorArg =
1791 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001792 }
1793 SplatArgs.push_back(NewVectorArg);
1794 }
1795
1796 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001797 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1798 const auto NewFType =
1799 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001800
1801 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1802
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001803 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001804 for (auto arg : SplatArgs) {
1805 NewArgs.push_back(arg);
1806 }
1807 NewArgs.push_back(VectorArg);
1808
1809 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1810
1811 CI->replaceAllUsesWith(NewCI);
1812
1813 // Lastly, remember to remove the user.
1814 ToRemoves.push_back(CI);
1815 }
1816 }
1817
1818 Changed = !ToRemoves.empty();
1819
1820 // And cleanup the calls we don't use anymore.
1821 for (auto V : ToRemoves) {
1822 V->eraseFromParent();
1823 }
1824
1825 // And remove the function we don't need either too.
1826 F->eraseFromParent();
1827 }
1828 }
1829
1830 return Changed;
1831}
1832
David Neto22f144c2017-06-12 14:26:21 -04001833bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1834 bool Changed = false;
1835
1836 const std::map<const char *, Instruction::BinaryOps> Map = {
1837 {"_Z7signbitf", Instruction::LShr},
1838 {"_Z7signbitDv2_f", Instruction::AShr},
1839 {"_Z7signbitDv3_f", Instruction::AShr},
1840 {"_Z7signbitDv4_f", Instruction::AShr},
1841 };
1842
1843 for (auto Pair : Map) {
1844 // If we find a function with the matching name.
1845 if (auto F = M.getFunction(Pair.first)) {
1846 SmallVector<Instruction *, 4> ToRemoves;
1847
1848 // Walk the users of the function.
1849 for (auto &U : F->uses()) {
1850 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1851 auto Arg = CI->getOperand(0);
1852
1853 auto Bitcast =
1854 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1855
1856 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1857 ConstantInt::get(CI->getType(), 31),
1858 "", CI);
1859
1860 CI->replaceAllUsesWith(Shr);
1861
1862 // Lastly, remember to remove the user.
1863 ToRemoves.push_back(CI);
1864 }
1865 }
1866
1867 Changed = !ToRemoves.empty();
1868
1869 // And cleanup the calls we don't use anymore.
1870 for (auto V : ToRemoves) {
1871 V->eraseFromParent();
1872 }
1873
1874 // And remove the function we don't need either too.
1875 F->eraseFromParent();
1876 }
1877 }
1878
1879 return Changed;
1880}
1881
1882bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1883 bool Changed = false;
1884
1885 const std::map<const char *,
1886 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1887 Map = {
1888 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1889 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1890 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1891 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1892 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1893 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1894 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1895 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1896 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1897 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1898 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1899 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1900 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1901 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1902 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1903 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1904 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1905 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1906 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1907 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1908 };
1909
1910 for (auto Pair : Map) {
1911 // If we find a function with the matching name.
1912 if (auto F = M.getFunction(Pair.first)) {
1913 SmallVector<Instruction *, 4> ToRemoves;
1914
1915 // Walk the users of the function.
1916 for (auto &U : F->uses()) {
1917 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1918 // The multiply instruction to use.
1919 auto MulInst = Pair.second.first;
1920
1921 // The add instruction to use.
1922 auto AddInst = Pair.second.second;
1923
1924 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1925
1926 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1927 CI->getArgOperand(1), "", CI);
1928
1929 if (Instruction::BinaryOpsEnd != AddInst) {
1930 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1931 CI);
1932 }
1933
1934 CI->replaceAllUsesWith(I);
1935
1936 // Lastly, remember to remove the user.
1937 ToRemoves.push_back(CI);
1938 }
1939 }
1940
1941 Changed = !ToRemoves.empty();
1942
1943 // And cleanup the calls we don't use anymore.
1944 for (auto V : ToRemoves) {
1945 V->eraseFromParent();
1946 }
1947
1948 // And remove the function we don't need either too.
1949 F->eraseFromParent();
1950 }
1951 }
1952
1953 return Changed;
1954}
1955
Derek Chowcfd368b2017-10-19 20:58:45 -07001956bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1957 bool Changed = false;
1958
alan-bakerf795f392019-06-11 18:24:34 -04001959 for (auto const &SymVal : M.getValueSymbolTable()) {
1960 if (!SymVal.getKey().contains("vstore"))
1961 continue;
1962 if (SymVal.getKey().contains("vstore_"))
1963 continue;
1964 if (SymVal.getKey().contains("vstorea"))
1965 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001966
alan-bakerf795f392019-06-11 18:24:34 -04001967 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001968 SmallVector<Instruction *, 4> ToRemoves;
1969
alan-bakerf795f392019-06-11 18:24:34 -04001970 auto fname = F->getName();
1971 if (!fname.consume_front("_Z"))
1972 continue;
1973 size_t name_len;
1974 if (fname.consumeInteger(10, name_len))
1975 continue;
1976 std::string name = fname.take_front(name_len);
1977
1978 bool ok = StringSwitch<bool>(name)
1979 .Case("vstore2", true)
1980 .Case("vstore3", true)
1981 .Case("vstore4", true)
1982 .Case("vstore8", true)
1983 .Case("vstore16", true)
1984 .Default(false);
1985 if (!ok)
1986 continue;
1987
Derek Chowcfd368b2017-10-19 20:58:45 -07001988 for (auto &U : F->uses()) {
1989 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04001990 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001991
alan-bakerf795f392019-06-11 18:24:34 -04001992 auto data_type = data->getType();
1993 if (!data_type->isVectorTy())
1994 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001995
alan-bakerf795f392019-06-11 18:24:34 -04001996 auto elems = data_type->getVectorNumElements();
1997 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
1998 elems != 16)
1999 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002000
alan-bakerf795f392019-06-11 18:24:34 -04002001 auto offset = CI->getOperand(1);
2002 auto ptr = CI->getOperand(2);
2003 auto ptr_type = ptr->getType();
2004 auto pointee_type = ptr_type->getPointerElementType();
2005 if (pointee_type != data_type->getVectorElementType())
2006 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002007
alan-bakerf795f392019-06-11 18:24:34 -04002008 // Avoid pointer casts. Instead generate the correct number of stores
2009 // and rely on drivers to coalesce appropriately.
2010 IRBuilder<> builder(CI);
2011 auto elems_const = builder.getInt32(elems);
2012 auto adjust = builder.CreateMul(offset, elems_const);
2013 for (auto i = 0; i < elems; ++i) {
2014 auto idx = builder.getInt32(i);
2015 auto add = builder.CreateAdd(adjust, idx);
2016 auto gep = builder.CreateGEP(ptr, add);
2017 auto extract = builder.CreateExtractElement(data, i);
2018 auto store = builder.CreateStore(extract, gep);
2019 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002020
Derek Chowcfd368b2017-10-19 20:58:45 -07002021 ToRemoves.push_back(CI);
2022 }
2023 }
2024
2025 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002026 for (auto V : ToRemoves) {
2027 V->eraseFromParent();
2028 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002029 F->eraseFromParent();
2030 }
2031 }
2032
2033 return Changed;
2034}
2035
2036bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2037 bool Changed = false;
2038
alan-bakerf795f392019-06-11 18:24:34 -04002039 for (auto const &SymVal : M.getValueSymbolTable()) {
2040 if (!SymVal.getKey().contains("vload"))
2041 continue;
2042 if (SymVal.getKey().contains("vload_"))
2043 continue;
2044 if (SymVal.getKey().contains("vloada"))
2045 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002046
alan-bakerf795f392019-06-11 18:24:34 -04002047 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002048 SmallVector<Instruction *, 4> ToRemoves;
2049
alan-bakerf795f392019-06-11 18:24:34 -04002050 auto fname = F->getName();
2051 if (!fname.consume_front("_Z"))
2052 continue;
2053 size_t name_len;
2054 if (fname.consumeInteger(10, name_len))
2055 continue;
2056 std::string name = fname.take_front(name_len);
2057
2058 bool ok = StringSwitch<bool>(name)
2059 .Case("vload2", true)
2060 .Case("vload3", true)
2061 .Case("vload4", true)
2062 .Case("vload8", true)
2063 .Case("vload16", true)
2064 .Default(false);
2065 if (!ok)
2066 continue;
2067
Derek Chowcfd368b2017-10-19 20:58:45 -07002068 for (auto &U : F->uses()) {
2069 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002070 auto ret_type = F->getReturnType();
2071 if (!ret_type->isVectorTy())
2072 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002073
alan-bakerf795f392019-06-11 18:24:34 -04002074 auto elems = ret_type->getVectorNumElements();
2075 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2076 elems != 16)
2077 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002078
alan-bakerf795f392019-06-11 18:24:34 -04002079 auto offset = CI->getOperand(0);
2080 auto ptr = CI->getOperand(1);
2081 auto ptr_type = ptr->getType();
2082 auto pointee_type = ptr_type->getPointerElementType();
2083 if (pointee_type != ret_type->getVectorElementType())
2084 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002085
alan-bakerf795f392019-06-11 18:24:34 -04002086 // Avoid pointer casts. Instead generate the correct number of loads
2087 // and rely on drivers to coalesce appropriately.
2088 IRBuilder<> builder(CI);
2089 auto elems_const = builder.getInt32(elems);
2090 Value *insert = UndefValue::get(ret_type);
2091 auto adjust = builder.CreateMul(offset, elems_const);
2092 for (auto i = 0; i < elems; ++i) {
2093 auto idx = builder.getInt32(i);
2094 auto add = builder.CreateAdd(adjust, idx);
2095 auto gep = builder.CreateGEP(ptr, add);
2096 auto load = builder.CreateLoad(gep);
2097 insert = builder.CreateInsertElement(insert, load, i);
2098 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002099
alan-bakerf795f392019-06-11 18:24:34 -04002100 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002101 ToRemoves.push_back(CI);
2102 }
2103 }
2104
2105 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002106 for (auto V : ToRemoves) {
2107 V->eraseFromParent();
2108 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002109 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002110 }
2111 }
2112
2113 return Changed;
2114}
2115
David Neto22f144c2017-06-12 14:26:21 -04002116bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2117 bool Changed = false;
2118
2119 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2120 "_Z10vload_halfjPU3AS2KDh"};
2121
2122 for (auto Name : Map) {
2123 // If we find a function with the matching name.
2124 if (auto F = M.getFunction(Name)) {
2125 SmallVector<Instruction *, 4> ToRemoves;
2126
2127 // Walk the users of the function.
2128 for (auto &U : F->uses()) {
2129 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2130 // The index argument from vload_half.
2131 auto Arg0 = CI->getOperand(0);
2132
2133 // The pointer argument from vload_half.
2134 auto Arg1 = CI->getOperand(1);
2135
David Neto22f144c2017-06-12 14:26:21 -04002136 auto IntTy = Type::getInt32Ty(M.getContext());
2137 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002138 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2139
David Neto22f144c2017-06-12 14:26:21 -04002140 // Our intrinsic to unpack a float2 from an int.
2141 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2142
2143 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2144
David Neto482550a2018-03-24 05:21:07 -07002145 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002146 auto ShortTy = Type::getInt16Ty(M.getContext());
2147 auto ShortPointerTy = PointerType::get(
2148 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002149
David Netoac825b82017-05-30 12:49:01 -04002150 // Cast the half* pointer to short*.
2151 auto Cast =
2152 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002153
David Netoac825b82017-05-30 12:49:01 -04002154 // Index into the correct address of the casted pointer.
2155 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2156
2157 // Load from the short* we casted to.
2158 auto Load = new LoadInst(Index, "", CI);
2159
2160 // ZExt the short -> int.
2161 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2162
2163 // Get our float2.
2164 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2165
2166 // Extract out the bottom element which is our float result.
2167 auto Extract = ExtractElementInst::Create(
2168 Call, ConstantInt::get(IntTy, 0), "", CI);
2169
2170 CI->replaceAllUsesWith(Extract);
2171 } else {
2172 // Assume the pointer argument points to storage aligned to 32bits
2173 // or more.
2174 // TODO(dneto): Do more analysis to make sure this is true?
2175 //
2176 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2177 // with:
2178 //
2179 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2180 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2181 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2182 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2183 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2184 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2185 // x float> %converted, %index_is_odd32
2186
2187 auto IntPointerTy = PointerType::get(
2188 IntTy, Arg1->getType()->getPointerAddressSpace());
2189
David Neto973e6a82017-05-30 13:48:18 -04002190 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002191 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002192 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002193 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2194
2195 auto One = ConstantInt::get(IntTy, 1);
2196 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2197 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2198
2199 // Index into the correct address of the casted pointer.
2200 auto Ptr =
2201 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2202
2203 // Load from the int* we casted to.
2204 auto Load = new LoadInst(Ptr, "", CI);
2205
2206 // Get our float2.
2207 auto Call = CallInst::Create(NewF, Load, "", CI);
2208
2209 // Extract out the float result, where the element number is
2210 // determined by whether the original index was even or odd.
2211 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2212
2213 CI->replaceAllUsesWith(Extract);
2214 }
David Neto22f144c2017-06-12 14:26:21 -04002215
2216 // Lastly, remember to remove the user.
2217 ToRemoves.push_back(CI);
2218 }
2219 }
2220
2221 Changed = !ToRemoves.empty();
2222
2223 // And cleanup the calls we don't use anymore.
2224 for (auto V : ToRemoves) {
2225 V->eraseFromParent();
2226 }
2227
2228 // And remove the function we don't need either too.
2229 F->eraseFromParent();
2230 }
2231 }
2232
2233 return Changed;
2234}
2235
2236bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002237
Kévin Petite8edce32019-04-10 14:23:32 +01002238 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002239 "_Z11vload_half2jPU3AS1KDh",
2240 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2241 "_Z11vload_half2jPU3AS2KDh",
2242 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2243 };
David Neto22f144c2017-06-12 14:26:21 -04002244
Kévin Petite8edce32019-04-10 14:23:32 +01002245 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2246 // The index argument from vload_half.
2247 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002248
Kévin Petite8edce32019-04-10 14:23:32 +01002249 // The pointer argument from vload_half.
2250 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002251
Kévin Petite8edce32019-04-10 14:23:32 +01002252 auto IntTy = Type::getInt32Ty(M.getContext());
2253 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002254 auto NewPointerTy =
2255 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002256 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002257
Kévin Petite8edce32019-04-10 14:23:32 +01002258 // Cast the half* pointer to int*.
2259 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002260
Kévin Petite8edce32019-04-10 14:23:32 +01002261 // Index into the correct address of the casted pointer.
2262 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002263
Kévin Petite8edce32019-04-10 14:23:32 +01002264 // Load from the int* we casted to.
2265 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002266
Kévin Petite8edce32019-04-10 14:23:32 +01002267 // Our intrinsic to unpack a float2 from an int.
2268 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002269
Kévin Petite8edce32019-04-10 14:23:32 +01002270 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002271
Kévin Petite8edce32019-04-10 14:23:32 +01002272 // Get our float2.
2273 return CallInst::Create(NewF, Load, "", CI);
2274 });
David Neto22f144c2017-06-12 14:26:21 -04002275}
2276
2277bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002278
Kévin Petite8edce32019-04-10 14:23:32 +01002279 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002280 "_Z11vload_half4jPU3AS1KDh",
2281 "_Z12vloada_half4jPU3AS1KDh",
2282 "_Z11vload_half4jPU3AS2KDh",
2283 "_Z12vloada_half4jPU3AS2KDh",
2284 };
David Neto22f144c2017-06-12 14:26:21 -04002285
Kévin Petite8edce32019-04-10 14:23:32 +01002286 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2287 // The index argument from vload_half.
2288 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002289
Kévin Petite8edce32019-04-10 14:23:32 +01002290 // The pointer argument from vload_half.
2291 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002292
Kévin Petite8edce32019-04-10 14:23:32 +01002293 auto IntTy = Type::getInt32Ty(M.getContext());
2294 auto Int2Ty = VectorType::get(IntTy, 2);
2295 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002296 auto NewPointerTy =
2297 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002298 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002299
Kévin Petite8edce32019-04-10 14:23:32 +01002300 // Cast the half* pointer to int2*.
2301 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002302
Kévin Petite8edce32019-04-10 14:23:32 +01002303 // Index into the correct address of the casted pointer.
2304 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002305
Kévin Petite8edce32019-04-10 14:23:32 +01002306 // Load from the int2* we casted to.
2307 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002308
Kévin Petite8edce32019-04-10 14:23:32 +01002309 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002310 auto X =
2311 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2312 auto Y =
2313 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002314
Kévin Petite8edce32019-04-10 14:23:32 +01002315 // Our intrinsic to unpack a float2 from an int.
2316 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002317
Kévin Petite8edce32019-04-10 14:23:32 +01002318 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002319
Kévin Petite8edce32019-04-10 14:23:32 +01002320 // Get the lower (x & y) components of our final float4.
2321 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002322
Kévin Petite8edce32019-04-10 14:23:32 +01002323 // Get the higher (z & w) components of our final float4.
2324 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002325
Kévin Petite8edce32019-04-10 14:23:32 +01002326 Constant *ShuffleMask[4] = {
2327 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2328 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002329
Kévin Petite8edce32019-04-10 14:23:32 +01002330 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002331 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2332 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002333 });
David Neto22f144c2017-06-12 14:26:21 -04002334}
2335
David Neto6ad93232018-06-07 15:42:58 -07002336bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002337
2338 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2339 //
2340 // %u = load i32 %ptr
2341 // %fxy = call <2 x float> Unpack2xHalf(u)
2342 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002343 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002344 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2345 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2346 "_Z20__clspv_vloada_half2jPKj", // private
2347 };
2348
Kévin Petite8edce32019-04-10 14:23:32 +01002349 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2350 auto Index = CI->getOperand(0);
2351 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002352
Kévin Petite8edce32019-04-10 14:23:32 +01002353 auto IntTy = Type::getInt32Ty(M.getContext());
2354 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2355 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002356
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002357 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002358 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002359
Kévin Petite8edce32019-04-10 14:23:32 +01002360 // Our intrinsic to unpack a float2 from an int.
2361 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002362
Kévin Petite8edce32019-04-10 14:23:32 +01002363 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002364
Kévin Petite8edce32019-04-10 14:23:32 +01002365 // Get our final float2.
2366 return CallInst::Create(NewF, Load, "", CI);
2367 });
David Neto6ad93232018-06-07 15:42:58 -07002368}
2369
2370bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002371
2372 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2373 //
2374 // %u2 = load <2 x i32> %ptr
2375 // %u2xy = extractelement %u2, 0
2376 // %u2zw = extractelement %u2, 1
2377 // %fxy = call <2 x float> Unpack2xHalf(uint)
2378 // %fzw = call <2 x float> Unpack2xHalf(uint)
2379 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002380 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002381 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2382 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2383 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2384 };
2385
Kévin Petite8edce32019-04-10 14:23:32 +01002386 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2387 auto Index = CI->getOperand(0);
2388 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002389
Kévin Petite8edce32019-04-10 14:23:32 +01002390 auto IntTy = Type::getInt32Ty(M.getContext());
2391 auto Int2Ty = VectorType::get(IntTy, 2);
2392 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2393 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002394
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002395 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002396 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002397
Kévin Petite8edce32019-04-10 14:23:32 +01002398 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002399 auto X =
2400 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2401 auto Y =
2402 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002403
Kévin Petite8edce32019-04-10 14:23:32 +01002404 // Our intrinsic to unpack a float2 from an int.
2405 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002406
Kévin Petite8edce32019-04-10 14:23:32 +01002407 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002408
Kévin Petite8edce32019-04-10 14:23:32 +01002409 // Get the lower (x & y) components of our final float4.
2410 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002411
Kévin Petite8edce32019-04-10 14:23:32 +01002412 // Get the higher (z & w) components of our final float4.
2413 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002414
Kévin Petite8edce32019-04-10 14:23:32 +01002415 Constant *ShuffleMask[4] = {
2416 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2417 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002418
Kévin Petite8edce32019-04-10 14:23:32 +01002419 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002420 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2421 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002422 });
David Neto6ad93232018-06-07 15:42:58 -07002423}
2424
David Neto22f144c2017-06-12 14:26:21 -04002425bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002426
Kévin Petite8edce32019-04-10 14:23:32 +01002427 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2428 "_Z15vstore_half_rtefjPU3AS1Dh",
2429 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002430
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002431 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002432 // The value to store.
2433 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002434
Kévin Petite8edce32019-04-10 14:23:32 +01002435 // The index argument from vstore_half.
2436 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002437
Kévin Petite8edce32019-04-10 14:23:32 +01002438 // The pointer argument from vstore_half.
2439 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002440
Kévin Petite8edce32019-04-10 14:23:32 +01002441 auto IntTy = Type::getInt32Ty(M.getContext());
2442 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2443 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2444 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002445
Kévin Petite8edce32019-04-10 14:23:32 +01002446 // Our intrinsic to pack a float2 to an int.
2447 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002448
Kévin Petite8edce32019-04-10 14:23:32 +01002449 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002450
Kévin Petite8edce32019-04-10 14:23:32 +01002451 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002452 auto TempVec = InsertElementInst::Create(
2453 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002454
Kévin Petite8edce32019-04-10 14:23:32 +01002455 // Pack the float2 -> half2 (in an int).
2456 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002457
Kévin Petite8edce32019-04-10 14:23:32 +01002458 Value *Ret;
2459 if (clspv::Option::F16BitStorage()) {
2460 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002461 auto ShortPointerTy =
2462 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002463
Kévin Petite8edce32019-04-10 14:23:32 +01002464 // Truncate our i32 to an i16.
2465 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002466
Kévin Petite8edce32019-04-10 14:23:32 +01002467 // Cast the half* pointer to short*.
2468 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002469
Kévin Petite8edce32019-04-10 14:23:32 +01002470 // Index into the correct address of the casted pointer.
2471 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002472
Kévin Petite8edce32019-04-10 14:23:32 +01002473 // Store to the int* we casted to.
2474 Ret = new StoreInst(Trunc, Index, CI);
2475 } else {
2476 // We can only write to 32-bit aligned words.
2477 //
2478 // Assuming base is aligned to 32-bits, replace the equivalent of
2479 // vstore_half(value, index, base)
2480 // with:
2481 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2482 // uint32_t write_to_upper_half = index & 1u;
2483 // uint32_t shift = write_to_upper_half << 4;
2484 //
2485 // // Pack the float value as a half number in bottom 16 bits
2486 // // of an i32.
2487 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2488 //
2489 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2490 // ^ ((packed & 0xffff) << shift)
2491 // // We only need relaxed consistency, but OpenCL 1.2 only has
2492 // // sequentially consistent atomics.
2493 // // TODO(dneto): Use relaxed consistency.
2494 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002495 auto IntPointerTy =
2496 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002497
Kévin Petite8edce32019-04-10 14:23:32 +01002498 auto Four = ConstantInt::get(IntTy, 4);
2499 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002500
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002501 auto IndexIsOdd =
2502 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002503 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002504 auto IndexIntoI32 =
2505 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2506 auto BaseI32Ptr =
2507 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2508 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2509 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002510 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2511 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002512 auto MaskBitsToWrite =
2513 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2514 auto MaskedCurrent = BinaryOperator::CreateAnd(
2515 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002516
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002517 auto XLowerBits =
2518 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2519 auto NewBitsToWrite =
2520 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2521 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2522 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002523
Kévin Petite8edce32019-04-10 14:23:32 +01002524 // Generate the call to atomi_xor.
2525 SmallVector<Type *, 5> ParamTypes;
2526 // The pointer type.
2527 ParamTypes.push_back(IntPointerTy);
2528 // The Types for memory scope, semantics, and value.
2529 ParamTypes.push_back(IntTy);
2530 ParamTypes.push_back(IntTy);
2531 ParamTypes.push_back(IntTy);
2532 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2533 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002534
Kévin Petite8edce32019-04-10 14:23:32 +01002535 const auto ConstantScopeDevice =
2536 ConstantInt::get(IntTy, spv::ScopeDevice);
2537 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2538 // (SPIR-V Workgroup).
2539 const auto AddrSpaceSemanticsBits =
2540 IntPointerTy->getPointerAddressSpace() == 1
2541 ? spv::MemorySemanticsUniformMemoryMask
2542 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002543
Kévin Petite8edce32019-04-10 14:23:32 +01002544 // We're using relaxed consistency here.
2545 const auto ConstantMemorySemantics =
2546 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2547 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002548
Kévin Petite8edce32019-04-10 14:23:32 +01002549 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2550 ConstantMemorySemantics, ValueToXor};
2551 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2552 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002553 }
David Neto22f144c2017-06-12 14:26:21 -04002554
Kévin Petite8edce32019-04-10 14:23:32 +01002555 return Ret;
2556 });
David Neto22f144c2017-06-12 14:26:21 -04002557}
2558
2559bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002560
Kévin Petite8edce32019-04-10 14:23:32 +01002561 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002562 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2563 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2564 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2565 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2566 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2567 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2568 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2569 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2570 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2571 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2572 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2573 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2574 };
David Neto22f144c2017-06-12 14:26:21 -04002575
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002576 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002577 // The value to store.
2578 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002579
Kévin Petite8edce32019-04-10 14:23:32 +01002580 // The index argument from vstore_half.
2581 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002582
Kévin Petite8edce32019-04-10 14:23:32 +01002583 // The pointer argument from vstore_half.
2584 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002585
Kévin Petite8edce32019-04-10 14:23:32 +01002586 auto IntTy = Type::getInt32Ty(M.getContext());
2587 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002588 auto NewPointerTy =
2589 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002590 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002591
Kévin Petite8edce32019-04-10 14:23:32 +01002592 // Our intrinsic to pack a float2 to an int.
2593 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002594
Kévin Petite8edce32019-04-10 14:23:32 +01002595 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002596
Kévin Petite8edce32019-04-10 14:23:32 +01002597 // Turn the packed x & y into the final packing.
2598 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002599
Kévin Petite8edce32019-04-10 14:23:32 +01002600 // Cast the half* pointer to int*.
2601 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002602
Kévin Petite8edce32019-04-10 14:23:32 +01002603 // Index into the correct address of the casted pointer.
2604 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002605
Kévin Petite8edce32019-04-10 14:23:32 +01002606 // Store to the int* we casted to.
2607 return new StoreInst(X, Index, CI);
2608 });
David Neto22f144c2017-06-12 14:26:21 -04002609}
2610
2611bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002612
Kévin Petite8edce32019-04-10 14:23:32 +01002613 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002614 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2615 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2616 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2617 "_Z13vstorea_half4Dv4_fjPDh", // private
2618 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2619 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2620 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2621 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2622 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2623 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2624 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2625 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2626 };
David Neto22f144c2017-06-12 14:26:21 -04002627
Kévin Petite8edce32019-04-10 14:23:32 +01002628 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2629 // The value to store.
2630 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002631
Kévin Petite8edce32019-04-10 14:23:32 +01002632 // The index argument from vstore_half.
2633 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002634
Kévin Petite8edce32019-04-10 14:23:32 +01002635 // The pointer argument from vstore_half.
2636 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002637
Kévin Petite8edce32019-04-10 14:23:32 +01002638 auto IntTy = Type::getInt32Ty(M.getContext());
2639 auto Int2Ty = VectorType::get(IntTy, 2);
2640 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002641 auto NewPointerTy =
2642 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002643 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002644
Kévin Petite8edce32019-04-10 14:23:32 +01002645 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2646 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002647
Kévin Petite8edce32019-04-10 14:23:32 +01002648 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002649 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2650 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002651
Kévin Petite8edce32019-04-10 14:23:32 +01002652 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2653 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002654
Kévin Petite8edce32019-04-10 14:23:32 +01002655 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002656 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2657 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002658
Kévin Petite8edce32019-04-10 14:23:32 +01002659 // Our intrinsic to pack a float2 to an int.
2660 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002661
Kévin Petite8edce32019-04-10 14:23:32 +01002662 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002663
Kévin Petite8edce32019-04-10 14:23:32 +01002664 // Turn the packed x & y into the final component of our int2.
2665 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002666
Kévin Petite8edce32019-04-10 14:23:32 +01002667 // Turn the packed z & w into the final component of our int2.
2668 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002669
Kévin Petite8edce32019-04-10 14:23:32 +01002670 auto Combine = InsertElementInst::Create(
2671 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002672 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2673 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002674
Kévin Petite8edce32019-04-10 14:23:32 +01002675 // Cast the half* pointer to int2*.
2676 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002677
Kévin Petite8edce32019-04-10 14:23:32 +01002678 // Index into the correct address of the casted pointer.
2679 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002680
Kévin Petite8edce32019-04-10 14:23:32 +01002681 // Store to the int2* we casted to.
2682 return new StoreInst(Combine, Index, CI);
2683 });
David Neto22f144c2017-06-12 14:26:21 -04002684}
2685
Kévin Petit06517a12019-12-09 19:40:31 +00002686bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002687 bool Changed = false;
2688
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002689 const std::map<const char *, const char *> Map = {
alan-bakerf906d2b2019-12-10 11:26:23 -05002690 // 1D
2691 {"_Z11read_imagei14ocl_image1d_ro11ocl_sampleri",
2692 "_Z11read_imagei14ocl_image1d_ro11ocl_samplerf"},
2693 {"_Z12read_imageui14ocl_image1d_ro11ocl_sampleri",
2694 "_Z12read_imageui14ocl_image1d_ro11ocl_samplerf"},
2695 {"_Z11read_imagef14ocl_image1d_ro11ocl_sampleri",
2696 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
2697 // TODO 1Darray
Kévin Petit06517a12019-12-09 19:40:31 +00002698 // 2D
2699 {"_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i",
2700 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f"},
2701 {"_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i",
2702 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_f"},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002703 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2704 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
Kévin Petit06517a12019-12-09 19:40:31 +00002705 // TODO 2D array
2706 // 3D
2707 {"_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i",
2708 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_f"},
2709 {"_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i",
2710 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_f"},
2711 {"_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i",
2712 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002713
2714 for (auto Pair : Map) {
2715 // If we find a function with the matching name.
2716 if (auto F = M.getFunction(Pair.first)) {
2717 SmallVector<Instruction *, 4> ToRemoves;
2718
2719 // Walk the users of the function.
2720 for (auto &U : F->uses()) {
2721 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2722 // The image.
2723 auto Arg0 = CI->getOperand(0);
2724
2725 // The sampler.
2726 auto Arg1 = CI->getOperand(1);
2727
2728 // The coordinate (integer type that we can't handle).
2729 auto Arg2 = CI->getOperand(2);
2730
alan-bakerf906d2b2019-12-10 11:26:23 -05002731 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2732 // TODO(alan-baker): when arrayed images are supported fix component
2733 // calculation.
2734 uint32_t components = dim;
2735 Type *float_ty = nullptr;
2736 if (components == 1) {
2737 float_ty = Type::getFloatTy(M.getContext());
2738 } else {
2739 float_ty = VectorType::get(Type::getFloatTy(M.getContext()),
2740 Arg2->getType()->getVectorNumElements());
2741 }
David Neto22f144c2017-06-12 14:26:21 -04002742
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002743 auto NewFType = FunctionType::get(
alan-bakerf906d2b2019-12-10 11:26:23 -05002744 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002745 false);
David Neto22f144c2017-06-12 14:26:21 -04002746
2747 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2748
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002749 auto Cast =
alan-bakerf906d2b2019-12-10 11:26:23 -05002750 CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002751
2752 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2753
2754 CI->replaceAllUsesWith(NewCI);
2755
2756 // Lastly, remember to remove the user.
2757 ToRemoves.push_back(CI);
2758 }
2759 }
2760
2761 Changed = !ToRemoves.empty();
2762
2763 // And cleanup the calls we don't use anymore.
2764 for (auto V : ToRemoves) {
2765 V->eraseFromParent();
2766 }
2767
2768 // And remove the function we don't need either too.
2769 F->eraseFromParent();
2770 }
2771 }
2772
2773 return Changed;
2774}
2775
2776bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2777 bool Changed = false;
2778
Kévin Petit9b340262019-06-19 18:31:11 +01002779 const std::map<const char *, spv::Op> Map = {
2780 {"_Z8atom_incPU3AS1Vi", spv::OpAtomicIIncrement},
2781 {"_Z8atom_incPU3AS3Vi", spv::OpAtomicIIncrement},
2782 {"_Z8atom_incPU3AS1Vj", spv::OpAtomicIIncrement},
2783 {"_Z8atom_incPU3AS3Vj", spv::OpAtomicIIncrement},
2784 {"_Z8atom_decPU3AS1Vi", spv::OpAtomicIDecrement},
2785 {"_Z8atom_decPU3AS3Vi", spv::OpAtomicIDecrement},
2786 {"_Z8atom_decPU3AS1Vj", spv::OpAtomicIDecrement},
2787 {"_Z8atom_decPU3AS3Vj", spv::OpAtomicIDecrement},
2788 {"_Z12atom_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2789 {"_Z12atom_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2790 {"_Z12atom_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2791 {"_Z12atom_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange},
2792 {"_Z10atomic_incPU3AS1Vi", spv::OpAtomicIIncrement},
2793 {"_Z10atomic_incPU3AS3Vi", spv::OpAtomicIIncrement},
2794 {"_Z10atomic_incPU3AS1Vj", spv::OpAtomicIIncrement},
2795 {"_Z10atomic_incPU3AS3Vj", spv::OpAtomicIIncrement},
2796 {"_Z10atomic_decPU3AS1Vi", spv::OpAtomicIDecrement},
2797 {"_Z10atomic_decPU3AS3Vi", spv::OpAtomicIDecrement},
2798 {"_Z10atomic_decPU3AS1Vj", spv::OpAtomicIDecrement},
2799 {"_Z10atomic_decPU3AS3Vj", spv::OpAtomicIDecrement},
2800 {"_Z14atomic_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2801 {"_Z14atomic_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2802 {"_Z14atomic_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2803 {"_Z14atomic_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange}};
David Neto22f144c2017-06-12 14:26:21 -04002804
2805 for (auto Pair : Map) {
2806 // If we find a function with the matching name.
2807 if (auto F = M.getFunction(Pair.first)) {
2808 SmallVector<Instruction *, 4> ToRemoves;
2809
2810 // Walk the users of the function.
2811 for (auto &U : F->uses()) {
2812 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04002813
2814 auto IntTy = Type::getInt32Ty(M.getContext());
2815
David Neto22f144c2017-06-12 14:26:21 -04002816 // We need to map the OpenCL constants to the SPIR-V equivalents.
2817 const auto ConstantScopeDevice =
2818 ConstantInt::get(IntTy, spv::ScopeDevice);
2819 const auto ConstantMemorySemantics = ConstantInt::get(
2820 IntTy, spv::MemorySemanticsUniformMemoryMask |
2821 spv::MemorySemanticsSequentiallyConsistentMask);
2822
2823 SmallVector<Value *, 5> Params;
2824
2825 // The pointer.
2826 Params.push_back(CI->getArgOperand(0));
2827
2828 // The memory scope.
2829 Params.push_back(ConstantScopeDevice);
2830
2831 // The memory semantics.
2832 Params.push_back(ConstantMemorySemantics);
2833
2834 if (2 < CI->getNumArgOperands()) {
2835 // The unequal memory semantics.
2836 Params.push_back(ConstantMemorySemantics);
2837
2838 // The value.
2839 Params.push_back(CI->getArgOperand(2));
2840
2841 // The comparator.
2842 Params.push_back(CI->getArgOperand(1));
2843 } else if (1 < CI->getNumArgOperands()) {
2844 // The value.
2845 Params.push_back(CI->getArgOperand(1));
2846 }
2847
Kévin Petit9b340262019-06-19 18:31:11 +01002848 auto NewCI =
2849 clspv::InsertSPIRVOp(CI, Pair.second, {}, CI->getType(), Params);
David Neto22f144c2017-06-12 14:26:21 -04002850
2851 CI->replaceAllUsesWith(NewCI);
2852
2853 // Lastly, remember to remove the user.
2854 ToRemoves.push_back(CI);
2855 }
2856 }
2857
2858 Changed = !ToRemoves.empty();
2859
2860 // And cleanup the calls we don't use anymore.
2861 for (auto V : ToRemoves) {
2862 V->eraseFromParent();
2863 }
2864
2865 // And remove the function we don't need either too.
2866 F->eraseFromParent();
2867 }
2868 }
2869
Neil Henning39672102017-09-29 14:33:13 +01002870 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002871 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002872 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002873 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002874 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002875 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002876 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002877 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002878 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002879 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002880 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002881 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002882 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002883 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002884 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002885 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002886 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002887 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002888 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002889 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002890 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002891 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002892 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002893 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002894 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002895 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002896 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002897 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002898 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002899 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002900 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002901 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002902 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002903 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002904 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002905 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002906 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002907 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002908 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002909 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002910 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002911 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002912 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002913 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002914 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002915 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002916 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002917 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002918 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002919 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002920 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002921 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002922 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002923 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002924 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002925 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002926 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002927 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002928 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002929 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002930 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002931 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002932 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2933 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2934 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002935
2936 for (auto Pair : Map2) {
2937 // If we find a function with the matching name.
2938 if (auto F = M.getFunction(Pair.first)) {
2939 SmallVector<Instruction *, 4> ToRemoves;
2940
2941 // Walk the users of the function.
2942 for (auto &U : F->uses()) {
2943 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2944 auto AtomicOp = new AtomicRMWInst(
2945 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2946 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2947
2948 CI->replaceAllUsesWith(AtomicOp);
2949
2950 // Lastly, remember to remove the user.
2951 ToRemoves.push_back(CI);
2952 }
2953 }
2954
2955 Changed = !ToRemoves.empty();
2956
2957 // And cleanup the calls we don't use anymore.
2958 for (auto V : ToRemoves) {
2959 V->eraseFromParent();
2960 }
2961
2962 // And remove the function we don't need either too.
2963 F->eraseFromParent();
2964 }
2965 }
2966
David Neto22f144c2017-06-12 14:26:21 -04002967 return Changed;
2968}
2969
2970bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002971
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002972 std::vector<const char *> Names = {
2973 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01002974 };
2975
2976 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002977 auto IntTy = Type::getInt32Ty(M.getContext());
2978 auto FloatTy = Type::getFloatTy(M.getContext());
2979
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002980 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2981 ConstantInt::get(IntTy, 1),
2982 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002983
2984 Constant *UpShuffleMask[4] = {
2985 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2986 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2987
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002988 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2989 UndefValue::get(FloatTy),
2990 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002991
Kévin Petite8edce32019-04-10 14:23:32 +01002992 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002993 auto Arg0 =
2994 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2995 ConstantVector::get(DownShuffleMask), "", CI);
2996 auto Arg1 =
2997 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2998 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002999 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003000
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003001 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04003002
Kévin Petite8edce32019-04-10 14:23:32 +01003003 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003004
Kévin Petite8edce32019-04-10 14:23:32 +01003005 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003006
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003007 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3008 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003009 });
David Neto22f144c2017-06-12 14:26:21 -04003010}
David Neto62653202017-10-16 19:05:18 -04003011
3012bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3013 bool Changed = false;
3014
3015 // OpenCL's float result = fract(float x, float* ptr)
3016 //
3017 // In the LLVM domain:
3018 //
3019 // %floor_result = call spir_func float @floor(float %x)
3020 // store float %floor_result, float * %ptr
3021 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3022 // %result = call spir_func float
3023 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3024 //
3025 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3026 // and clspv.fract occur in the SPIR-V generator pass:
3027 //
3028 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3029 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3030 // ...
3031 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3032 // OpStore %ptr %floor_result
3033 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3034 // %fract_result = OpExtInst %float
3035 // %glsl_ext Fmin %fract_intermediate %just_under_1
3036
David Neto62653202017-10-16 19:05:18 -04003037 using std::string;
3038
3039 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3040 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003041 using QuadType =
3042 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04003043 auto make_quad = [](const char *a, const char *b, const char *c,
3044 const char *d) {
3045 return std::tuple<const char *, const char *, const char *, const char *>(
3046 a, b, c, d);
3047 };
3048 const std::vector<QuadType> Functions = {
3049 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003050 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
3051 "clspv.fract.v2f"),
3052 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
3053 "clspv.fract.v3f"),
3054 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
3055 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04003056 };
3057
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003058 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04003059 const StringRef fract_name(std::get<0>(quad));
3060
3061 // If we find a function with the matching name.
3062 if (auto F = M.getFunction(fract_name)) {
3063 if (F->use_begin() == F->use_end())
3064 continue;
3065
3066 // We have some uses.
3067 Changed = true;
3068
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003069 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04003070
3071 const StringRef floor_name(std::get<1>(quad));
3072 const StringRef fmin_name(std::get<2>(quad));
3073 const StringRef clspv_fract_name(std::get<3>(quad));
3074
3075 // This is either float or a float vector. All the float-like
3076 // types are this type.
3077 auto result_ty = F->getReturnType();
3078
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003079 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04003080 if (!fmin_fn) {
3081 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003082 FunctionType *fn_ty =
3083 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003084 fmin_fn =
3085 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003086 fmin_fn->addFnAttr(Attribute::ReadNone);
3087 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3088 }
3089
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003090 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003091 if (!floor_fn) {
3092 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003093 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003094 floor_fn = cast<Function>(
3095 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003096 floor_fn->addFnAttr(Attribute::ReadNone);
3097 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3098 }
3099
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003100 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003101 if (!clspv_fract_fn) {
3102 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003103 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003104 clspv_fract_fn = cast<Function>(
3105 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003106 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3107 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3108 }
3109
3110 // Number of significant significand bits, whether represented or not.
3111 unsigned num_significand_bits;
3112 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003113 case Type::HalfTyID:
3114 num_significand_bits = 11;
3115 break;
3116 case Type::FloatTyID:
3117 num_significand_bits = 24;
3118 break;
3119 case Type::DoubleTyID:
3120 num_significand_bits = 53;
3121 break;
3122 default:
3123 assert(false && "Unhandled float type when processing fract builtin");
3124 break;
David Neto62653202017-10-16 19:05:18 -04003125 }
3126 // Beware that the disassembler displays this value as
3127 // OpConstant %float 1
3128 // which is not quite right.
3129 const double kJustUnderOneScalar =
3130 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3131
3132 Constant *just_under_one =
3133 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3134 if (result_ty->isVectorTy()) {
3135 just_under_one = ConstantVector::getSplat(
3136 result_ty->getVectorNumElements(), just_under_one);
3137 }
3138
3139 IRBuilder<> Builder(Context);
3140
3141 SmallVector<Instruction *, 4> ToRemoves;
3142
3143 // Walk the users of the function.
3144 for (auto &U : F->uses()) {
3145 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3146
3147 Builder.SetInsertPoint(CI);
3148 auto arg = CI->getArgOperand(0);
3149 auto ptr = CI->getArgOperand(1);
3150
3151 // Compute floor result and store it.
3152 auto floor = Builder.CreateCall(floor_fn, {arg});
3153 Builder.CreateStore(floor, ptr);
3154
3155 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003156 auto fract_result =
3157 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003158
3159 CI->replaceAllUsesWith(fract_result);
3160
3161 // Lastly, remember to remove the user.
3162 ToRemoves.push_back(CI);
3163 }
3164 }
3165
3166 // And cleanup the calls we don't use anymore.
3167 for (auto V : ToRemoves) {
3168 V->eraseFromParent();
3169 }
3170
3171 // And remove the function we don't need either too.
3172 F->eraseFromParent();
3173 }
3174 }
3175
3176 return Changed;
3177}