blob: 0ea1dc6bf73e6f5ac3593b397fb877866fcd367a [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040033#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070034
alan-baker931d18a2019-12-12 08:21:32 -050035#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040036#include "Passes.h"
37#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050038#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039
David Neto22f144c2017-06-12 14:26:21 -040040using namespace llvm;
41
42#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
43
44namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000045
46struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040047 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000048 SignedNess signedness;
49};
50
51struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000052 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000053 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000054
Kévin Petit91bc72e2019-04-08 15:17:46 +010055 bool isArgSigned(size_t arg) const {
56 assert(argTypeInfos.size() > arg);
57 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000058 }
59
Kévin Petit91bc72e2019-04-08 15:17:46 +010060 static FunctionInfo getFromMangledName(StringRef name) {
61 FunctionInfo fi;
62 if (!getFromMangledNameCheck(name, &fi)) {
63 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000064 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010065 return fi;
66 }
Kévin Petit8a560882019-03-21 15:24:34 +000067
Kévin Petit91bc72e2019-04-08 15:17:46 +010068 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
69 if (!name.consume_front("_Z")) {
70 return false;
71 }
72 size_t nameLen;
73 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000074 return false;
75 }
76
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 finfo->name = name.take_front(nameLen);
78 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000079
Kévin Petit91bc72e2019-04-08 15:17:46 +010080 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000081
Kévin Petit91bc72e2019-04-08 15:17:46 +010082 while (name.size() != 0) {
83
84 ArgTypeInfo ti;
85
86 // Try parsing a vector prefix
87 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040088 int numElems;
89 if (name.consumeInteger(10, numElems)) {
90 return false;
91 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010092
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040093 if (!name.consume_front("_")) {
94 return false;
95 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010096 }
97
98 // Parse the base type
99 char typeCode = name.front();
100 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400101 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100102 case 'c': // char
103 case 'a': // signed char
104 case 's': // short
105 case 'i': // int
106 case 'l': // long
107 ti.signedness = ArgTypeInfo::SignedNess::Signed;
108 break;
109 case 'h': // unsigned char
110 case 't': // unsigned short
111 case 'j': // unsigned int
112 case 'm': // unsigned long
113 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
114 break;
115 case 'f':
116 ti.signedness = ArgTypeInfo::SignedNess::None;
117 break;
118 case 'S':
119 ti = prev_ti;
120 if (!name.consume_front("_")) {
121 return false;
122 }
123 break;
124 default:
125 return false;
126 }
127
128 finfo->argTypeInfos.push_back(ti);
129
130 prev_ti = ti;
131 }
132
133 return true;
134 };
Kévin Petit8a560882019-03-21 15:24:34 +0000135};
136
David Neto22f144c2017-06-12 14:26:21 -0400137uint32_t clz(uint32_t v) {
138 uint32_t r;
139 uint32_t shift;
140
141 r = (v > 0xFFFF) << 4;
142 v >>= r;
143 shift = (v > 0xFF) << 3;
144 v >>= shift;
145 r |= shift;
146 shift = (v > 0xF) << 2;
147 v >>= shift;
148 r |= shift;
149 shift = (v > 0x3) << 1;
150 v >>= shift;
151 r |= shift;
152 r |= (v >> 1);
153
154 return r;
155}
156
157Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
158 if (1 == elements) {
159 return Type::getInt1Ty(C);
160 } else {
161 return VectorType::get(Type::getInt1Ty(C), elements);
162 }
163}
164
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100165Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
166 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
167 if (Ty->isVectorTy()) {
168 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
169 }
170 return IntTy;
171}
172
David Neto22f144c2017-06-12 14:26:21 -0400173struct ReplaceOpenCLBuiltinPass final : public ModulePass {
174 static char ID;
175 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
176
177 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000178 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100179 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100180 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400181 bool replaceRecip(Module &M);
182 bool replaceDivide(Module &M);
Kévin Petit1329a002019-06-15 05:54:05 +0100183 bool replaceDot(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400184 bool replaceExp10(Module &M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100185 bool replaceFmod(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceLog10(Module &M);
187 bool replaceBarrier(Module &M);
188 bool replaceMemFence(Module &M);
189 bool replaceRelational(Module &M);
190 bool replaceIsInfAndIsNan(Module &M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100191 bool replaceIsFinite(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400192 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000193 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000194 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000195 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000196 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000197 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000198 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000199 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400200 bool replaceSignbit(Module &M);
201 bool replaceMadandMad24andMul24(Module &M);
202 bool replaceVloadHalf(Module &M);
203 bool replaceVloadHalf2(Module &M);
204 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700205 bool replaceClspvVloadaHalf2(Module &M);
206 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400207 bool replaceVstoreHalf(Module &M);
208 bool replaceVstoreHalf2(Module &M);
209 bool replaceVstoreHalf4(Module &M);
alan-baker931d18a2019-12-12 08:21:32 -0500210 bool replaceUnsampledReadImage(Module &M);
Kévin Petit06517a12019-12-09 19:40:31 +0000211 bool replaceSampledReadImageWithIntCoords(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400212 bool replaceAtomics(Module &M);
213 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400214 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700215 bool replaceVload(Module &M);
216 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400217};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100218} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400219
220char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400221INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
222 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400223
224namespace clspv {
225ModulePass *createReplaceOpenCLBuiltinPass() {
226 return new ReplaceOpenCLBuiltinPass();
227}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400228} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400229
230bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
231 bool Changed = false;
232
Kévin Petit2444e9b2018-11-09 14:14:37 +0000233 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100234 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100235 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400236 Changed |= replaceRecip(M);
237 Changed |= replaceDivide(M);
Kévin Petit1329a002019-06-15 05:54:05 +0100238 Changed |= replaceDot(M);
David Neto22f144c2017-06-12 14:26:21 -0400239 Changed |= replaceExp10(M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100240 Changed |= replaceFmod(M);
David Neto22f144c2017-06-12 14:26:21 -0400241 Changed |= replaceLog10(M);
242 Changed |= replaceBarrier(M);
243 Changed |= replaceMemFence(M);
244 Changed |= replaceRelational(M);
245 Changed |= replaceIsInfAndIsNan(M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100246 Changed |= replaceIsFinite(M);
David Neto22f144c2017-06-12 14:26:21 -0400247 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000248 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000249 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000250 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000251 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000252 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000253 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000254 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400255 Changed |= replaceSignbit(M);
256 Changed |= replaceMadandMad24andMul24(M);
257 Changed |= replaceVloadHalf(M);
258 Changed |= replaceVloadHalf2(M);
259 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700260 Changed |= replaceClspvVloadaHalf2(M);
261 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400262 Changed |= replaceVstoreHalf(M);
263 Changed |= replaceVstoreHalf2(M);
264 Changed |= replaceVstoreHalf4(M);
alan-baker931d18a2019-12-12 08:21:32 -0500265 // Replace unsampled reads before converting sampled read coordinates.
266 Changed |= replaceUnsampledReadImage(M);
Kévin Petit06517a12019-12-09 19:40:31 +0000267 Changed |= replaceSampledReadImageWithIntCoords(M);
David Neto22f144c2017-06-12 14:26:21 -0400268 Changed |= replaceAtomics(M);
269 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400270 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700271 Changed |= replaceVload(M);
272 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400273
274 return Changed;
275}
276
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400277bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
278 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000279
Kévin Petite8edce32019-04-10 14:23:32 +0100280 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000281
282 for (auto Name : Names) {
283 // If we find a function with the matching name.
284 if (auto F = M.getFunction(Name)) {
285 SmallVector<Instruction *, 4> ToRemoves;
286
287 // Walk the users of the function.
288 for (auto &U : F->uses()) {
289 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000290
Kévin Petite8edce32019-04-10 14:23:32 +0100291 auto NewValue = Replacer(CI);
292
293 if (NewValue != nullptr) {
294 CI->replaceAllUsesWith(NewValue);
295 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000296
297 // Lastly, remember to remove the user.
298 ToRemoves.push_back(CI);
299 }
300 }
301
302 Changed = !ToRemoves.empty();
303
304 // And cleanup the calls we don't use anymore.
305 for (auto V : ToRemoves) {
306 V->eraseFromParent();
307 }
308
309 // And remove the function we don't need either too.
310 F->eraseFromParent();
311 }
312 }
313
314 return Changed;
315}
316
Kévin Petite8edce32019-04-10 14:23:32 +0100317bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100318
Kévin Petite8edce32019-04-10 14:23:32 +0100319 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400320 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
321 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
322 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
323 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100324 };
325
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400326 return replaceCallsWithValue(M, Names,
327 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100328}
329
330bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
331
332 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400333 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
334 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
335 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
336 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
337 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
338 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
339 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
340 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
341 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
342 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
343 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100344 };
345
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400346 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100347 auto XValue = CI->getOperand(0);
348 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100349
Kévin Petite8edce32019-04-10 14:23:32 +0100350 IRBuilder<> Builder(CI);
351 auto XmY = Builder.CreateSub(XValue, YValue);
352 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100353
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400354 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100355 auto F = CI->getCalledFunction();
356 auto finfo = FunctionInfo::getFromMangledName(F->getName());
357 if (finfo.isArgSigned(0)) {
358 Cmp = Builder.CreateICmpSGT(YValue, XValue);
359 } else {
360 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100361 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100362
Kévin Petite8edce32019-04-10 14:23:32 +0100363 return Builder.CreateSelect(Cmp, YmX, XmY);
364 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100365}
366
Kévin Petit8c1be282019-04-02 19:34:25 +0100367bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100368
Kévin Petite8edce32019-04-10 14:23:32 +0100369 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400370 "_Z8copysignff",
371 "_Z8copysignDv2_fS_",
372 "_Z8copysignDv3_fS_",
373 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100374 };
375
Kévin Petite8edce32019-04-10 14:23:32 +0100376 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
377 auto XValue = CI->getOperand(0);
378 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100379
Kévin Petite8edce32019-04-10 14:23:32 +0100380 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100381
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400382 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100383 if (Ty->isVectorTy()) {
384 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100385 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100386
Kévin Petite8edce32019-04-10 14:23:32 +0100387 // Return X with the sign of Y
388
389 // Sign bit masks
390 auto SignBit = IntTy->getScalarSizeInBits() - 1;
391 auto SignBitMask = 1 << SignBit;
392 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
393 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
394
395 IRBuilder<> Builder(CI);
396
397 // Extract sign of Y
398 auto YInt = Builder.CreateBitCast(YValue, IntTy);
399 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
400
401 // Clear sign bit in X
402 auto XInt = Builder.CreateBitCast(XValue, IntTy);
403 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
404
405 // Insert sign bit of Y into X
406 auto NewXInt = Builder.CreateOr(XInt, YSign);
407
408 // And cast back to floating-point
409 return Builder.CreateBitCast(NewXInt, Ty);
410 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100411}
412
David Neto22f144c2017-06-12 14:26:21 -0400413bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400414
Kévin Petite8edce32019-04-10 14:23:32 +0100415 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400416 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
417 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
418 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
419 };
420
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400421 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100422 // Recip has one arg.
423 auto Arg = CI->getOperand(0);
424 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
425 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
426 });
David Neto22f144c2017-06-12 14:26:21 -0400427}
428
429bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400430
Kévin Petite8edce32019-04-10 14:23:32 +0100431 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400432 "_Z11half_divideff", "_Z13native_divideff",
433 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
434 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
435 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
436 };
437
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400438 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100439 auto Op0 = CI->getOperand(0);
440 auto Op1 = CI->getOperand(1);
441 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
442 });
David Neto22f144c2017-06-12 14:26:21 -0400443}
444
Kévin Petit1329a002019-06-15 05:54:05 +0100445bool ReplaceOpenCLBuiltinPass::replaceDot(Module &M) {
446
447 std::vector<const char *> Names = {
448 "_Z3dotff",
449 "_Z3dotDv2_fS_",
450 "_Z3dotDv3_fS_",
451 "_Z3dotDv4_fS_",
452 };
453
454 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
455 auto Op0 = CI->getOperand(0);
456 auto Op1 = CI->getOperand(1);
457
458 Value *V;
459 if (Op0->getType()->isVectorTy()) {
460 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
461 CI->getType(), {Op0, Op1});
462 } else {
463 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
464 }
465
466 return V;
467 });
468}
469
David Neto22f144c2017-06-12 14:26:21 -0400470bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
471 bool Changed = false;
472
473 const std::map<const char *, const char *> Map = {
474 {"_Z5exp10f", "_Z3expf"},
475 {"_Z10half_exp10f", "_Z8half_expf"},
476 {"_Z12native_exp10f", "_Z10native_expf"},
477 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
478 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
479 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
480 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
481 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
482 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
483 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
484 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
485 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
486
487 for (auto Pair : Map) {
488 // If we find a function with the matching name.
489 if (auto F = M.getFunction(Pair.first)) {
490 SmallVector<Instruction *, 4> ToRemoves;
491
492 // Walk the users of the function.
493 for (auto &U : F->uses()) {
494 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
495 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
496
497 auto Arg = CI->getOperand(0);
498
499 // Constant of the natural log of 10 (ln(10)).
500 const double Ln10 =
501 2.302585092994045684017991454684364207601101488628772976033;
502
503 auto Mul = BinaryOperator::Create(
504 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
505 CI);
506
507 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
508
509 CI->replaceAllUsesWith(NewCI);
510
511 // Lastly, remember to remove the user.
512 ToRemoves.push_back(CI);
513 }
514 }
515
516 Changed = !ToRemoves.empty();
517
518 // And cleanup the calls we don't use anymore.
519 for (auto V : ToRemoves) {
520 V->eraseFromParent();
521 }
522
523 // And remove the function we don't need either too.
524 F->eraseFromParent();
525 }
526 }
527
528 return Changed;
529}
530
Kévin Petit0644a9c2019-06-20 21:08:46 +0100531bool ReplaceOpenCLBuiltinPass::replaceFmod(Module &M) {
532
533 std::vector<const char *> Names = {
534 "_Z4fmodff",
535 "_Z4fmodDv2_fS_",
536 "_Z4fmodDv3_fS_",
537 "_Z4fmodDv4_fS_",
538 };
539
540 // OpenCL fmod(x,y) is x - y * trunc(x/y)
541 // The sign for a non-zero result is taken from x.
542 // (Try an example.)
543 // So translate to FRem
544 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
545 auto Op0 = CI->getOperand(0);
546 auto Op1 = CI->getOperand(1);
547 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
548 });
549}
550
David Neto22f144c2017-06-12 14:26:21 -0400551bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
552 bool Changed = false;
553
554 const std::map<const char *, const char *> Map = {
555 {"_Z5log10f", "_Z3logf"},
556 {"_Z10half_log10f", "_Z8half_logf"},
557 {"_Z12native_log10f", "_Z10native_logf"},
558 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
559 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
560 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
561 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
562 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
563 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
564 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
565 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
566 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
567
568 for (auto Pair : Map) {
569 // If we find a function with the matching name.
570 if (auto F = M.getFunction(Pair.first)) {
571 SmallVector<Instruction *, 4> ToRemoves;
572
573 // Walk the users of the function.
574 for (auto &U : F->uses()) {
575 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
576 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
577
578 auto Arg = CI->getOperand(0);
579
580 // Constant of the reciprocal of the natural log of 10 (ln(10)).
581 const double Ln10 =
582 0.434294481903251827651128918916605082294397005803666566114;
583
584 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
585
586 auto Mul = BinaryOperator::Create(
587 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
588 "", CI);
589
590 CI->replaceAllUsesWith(Mul);
591
592 // Lastly, remember to remove the user.
593 ToRemoves.push_back(CI);
594 }
595 }
596
597 Changed = !ToRemoves.empty();
598
599 // And cleanup the calls we don't use anymore.
600 for (auto V : ToRemoves) {
601 V->eraseFromParent();
602 }
603
604 // And remove the function we don't need either too.
605 F->eraseFromParent();
606 }
607 }
608
609 return Changed;
610}
611
612bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400613
614 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
615
Kévin Petitc4643922019-06-17 19:32:05 +0100616 const std::vector<const char *> Names = {
alan-bakerf3bce4a2019-06-28 16:01:15 -0400617 "_Z7barrierj",
Kévin Petitc4643922019-06-17 19:32:05 +0100618 };
David Neto22f144c2017-06-12 14:26:21 -0400619
Kévin Petitc4643922019-06-17 19:32:05 +0100620 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
621 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400622
Kévin Petitc4643922019-06-17 19:32:05 +0100623 // We need to map the OpenCL constants to the SPIR-V equivalents.
624 const auto LocalMemFence =
625 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
626 const auto GlobalMemFence =
627 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
628 const auto ConstantSequentiallyConsistent = ConstantInt::get(
629 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
630 const auto ConstantScopeDevice =
631 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
632 const auto ConstantScopeWorkgroup =
633 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400634
Kévin Petitc4643922019-06-17 19:32:05 +0100635 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
636 const auto LocalMemFenceMask =
637 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
638 const auto WorkgroupShiftAmount =
639 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
640 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
641 Instruction::Shl, LocalMemFenceMask,
642 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400643
Kévin Petitc4643922019-06-17 19:32:05 +0100644 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
645 const auto GlobalMemFenceMask =
646 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
647 const auto UniformShiftAmount =
648 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
649 const auto MemorySemanticsUniform = BinaryOperator::Create(
650 Instruction::Shl, GlobalMemFenceMask,
651 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400652
Kévin Petitc4643922019-06-17 19:32:05 +0100653 // And combine the above together, also adding in
654 // MemorySemanticsSequentiallyConsistentMask.
655 auto MemorySemantics =
656 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
657 ConstantSequentiallyConsistent, "", CI);
658 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
659 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400660
Kévin Petitc4643922019-06-17 19:32:05 +0100661 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
662 // Device Scope, otherwise Workgroup Scope.
663 const auto Cmp =
664 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, GlobalMemFenceMask,
665 GlobalMemFence, "", CI);
666 const auto MemoryScope = SelectInst::Create(Cmp, ConstantScopeDevice,
667 ConstantScopeWorkgroup, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400668
Kévin Petitc4643922019-06-17 19:32:05 +0100669 // Lastly, the Execution Scope is always Workgroup Scope.
670 const auto ExecutionScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400671
Kévin Petitc4643922019-06-17 19:32:05 +0100672 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
673 {Attribute::NoDuplicate}, CI->getType(),
674 {ExecutionScope, MemoryScope, MemorySemantics});
675 });
David Neto22f144c2017-06-12 14:26:21 -0400676}
677
678bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
679 bool Changed = false;
680
681 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
682
Kévin Petitc4643922019-06-17 19:32:05 +0100683 using Tuple = std::tuple<spv::Op, unsigned>;
Neil Henning39672102017-09-29 14:33:13 +0100684 const std::map<const char *, Tuple> Map = {
Kévin Petitc4643922019-06-17 19:32:05 +0100685 {"_Z9mem_fencej", Tuple(spv::OpMemoryBarrier,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400686 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100687 {"_Z14read_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100688 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsAcquireMask)},
Neil Henning39672102017-09-29 14:33:13 +0100689 {"_Z15write_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100690 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400691
692 for (auto Pair : Map) {
693 // If we find a function with the matching name.
694 if (auto F = M.getFunction(Pair.first)) {
695 SmallVector<Instruction *, 4> ToRemoves;
696
697 // Walk the users of the function.
698 for (auto &U : F->uses()) {
699 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400700
701 auto Arg = CI->getOperand(0);
702
703 // We need to map the OpenCL constants to the SPIR-V equivalents.
704 const auto LocalMemFence =
705 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
706 const auto GlobalMemFence =
707 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
708 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100709 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400710 const auto ConstantScopeDevice =
711 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
712
713 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
714 const auto LocalMemFenceMask = BinaryOperator::Create(
715 Instruction::And, LocalMemFence, Arg, "", CI);
716 const auto WorkgroupShiftAmount =
717 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
718 clz(CLK_LOCAL_MEM_FENCE);
719 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
720 Instruction::Shl, LocalMemFenceMask,
721 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
722
723 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
724 const auto GlobalMemFenceMask = BinaryOperator::Create(
725 Instruction::And, GlobalMemFence, Arg, "", CI);
726 const auto UniformShiftAmount =
727 clz(spv::MemorySemanticsUniformMemoryMask) -
728 clz(CLK_GLOBAL_MEM_FENCE);
729 const auto MemorySemanticsUniform = BinaryOperator::Create(
730 Instruction::Shl, GlobalMemFenceMask,
731 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
732
733 // And combine the above together, also adding in
734 // MemorySemanticsSequentiallyConsistentMask.
735 auto MemorySemantics =
736 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
737 ConstantMemorySemantics, "", CI);
738 MemorySemantics = BinaryOperator::Create(
739 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
740
741 // Memory Scope is always device.
742 const auto MemoryScope = ConstantScopeDevice;
743
Kévin Petitc4643922019-06-17 19:32:05 +0100744 const auto SPIRVOp = std::get<0>(Pair.second);
745 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {}, CI->getType(),
746 {MemoryScope, MemorySemantics});
David Neto22f144c2017-06-12 14:26:21 -0400747
748 CI->replaceAllUsesWith(NewCI);
749
750 // Lastly, remember to remove the user.
751 ToRemoves.push_back(CI);
752 }
753 }
754
755 Changed = !ToRemoves.empty();
756
757 // And cleanup the calls we don't use anymore.
758 for (auto V : ToRemoves) {
759 V->eraseFromParent();
760 }
761
762 // And remove the function we don't need either too.
763 F->eraseFromParent();
764 }
765 }
766
767 return Changed;
768}
769
770bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
771 bool Changed = false;
772
773 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
774 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
775 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
776 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
777 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
778 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
779 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
780 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
781 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
782 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
783 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
784 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
785 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
786 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
787 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
788 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
789 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
790 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
791 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
792 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
793 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
794 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
795 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
796 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
797 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
798 };
799
800 for (auto Pair : Map) {
801 // If we find a function with the matching name.
802 if (auto F = M.getFunction(Pair.first)) {
803 SmallVector<Instruction *, 4> ToRemoves;
804
805 // Walk the users of the function.
806 for (auto &U : F->uses()) {
807 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
808 // The predicate to use in the CmpInst.
809 auto Predicate = Pair.second.first;
810
811 // The value to return for true.
812 auto TrueValue =
813 ConstantInt::getSigned(CI->getType(), Pair.second.second);
814
815 // The value to return for false.
816 auto FalseValue = Constant::getNullValue(CI->getType());
817
818 auto Arg1 = CI->getOperand(0);
819 auto Arg2 = CI->getOperand(1);
820
821 const auto Cmp =
822 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
823
824 const auto Select =
825 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
826
827 CI->replaceAllUsesWith(Select);
828
829 // Lastly, remember to remove the user.
830 ToRemoves.push_back(CI);
831 }
832 }
833
834 Changed = !ToRemoves.empty();
835
836 // And cleanup the calls we don't use anymore.
837 for (auto V : ToRemoves) {
838 V->eraseFromParent();
839 }
840
841 // And remove the function we don't need either too.
842 F->eraseFromParent();
843 }
844 }
845
846 return Changed;
847}
848
849bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
850 bool Changed = false;
851
Kévin Petitff03aee2019-06-12 19:39:03 +0100852 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
853 {"_Z5isinff", {spv::OpIsInf, 1}},
854 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
855 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
856 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
857 {"_Z5isnanf", {spv::OpIsNan, 1}},
858 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
859 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
860 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400861 };
862
863 for (auto Pair : Map) {
864 // If we find a function with the matching name.
865 if (auto F = M.getFunction(Pair.first)) {
866 SmallVector<Instruction *, 4> ToRemoves;
867
868 // Walk the users of the function.
869 for (auto &U : F->uses()) {
870 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
871 const auto CITy = CI->getType();
872
Kévin Petitff03aee2019-06-12 19:39:03 +0100873 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400874
875 // The value to return for true.
876 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
877
878 // The value to return for false.
879 auto FalseValue = Constant::getNullValue(CITy);
880
881 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
882 M.getContext(),
883 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
884
Kévin Petitff03aee2019-06-12 19:39:03 +0100885 auto NewCI =
886 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
887 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400888
889 const auto Select =
890 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
891
892 CI->replaceAllUsesWith(Select);
893
894 // Lastly, remember to remove the user.
895 ToRemoves.push_back(CI);
896 }
897 }
898
899 Changed = !ToRemoves.empty();
900
901 // And cleanup the calls we don't use anymore.
902 for (auto V : ToRemoves) {
903 V->eraseFromParent();
904 }
905
906 // And remove the function we don't need either too.
907 F->eraseFromParent();
908 }
909 }
910
911 return Changed;
912}
913
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100914bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Module &M) {
915 std::vector<const char *> Names = {
916 "_Z8isfiniteh", "_Z8isfiniteDv2_h", "_Z8isfiniteDv3_h",
917 "_Z8isfiniteDv4_h", "_Z8isfinitef", "_Z8isfiniteDv2_f",
918 "_Z8isfiniteDv3_f", "_Z8isfiniteDv4_f", "_Z8isfinited",
919 "_Z8isfiniteDv2_d", "_Z8isfiniteDv3_d", "_Z8isfiniteDv4_d",
920 };
921
922 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
923 auto &C = M.getContext();
924 auto Val = CI->getOperand(0);
925 auto ValTy = Val->getType();
926 auto RetTy = CI->getType();
927
928 // Get a suitable integer type to represent the number
929 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
930
931 // Create Mask
932 auto ScalarSize = ValTy->getScalarSizeInBits();
933 Value *InfMask;
934 switch (ScalarSize) {
935 case 16:
936 InfMask = ConstantInt::get(IntTy, 0x7C00U);
937 break;
938 case 32:
939 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
940 break;
941 case 64:
942 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
943 break;
944 default:
945 llvm_unreachable("Unsupported floating-point type");
946 }
947
948 IRBuilder<> Builder(CI);
949
950 // Bitcast to int
951 auto ValInt = Builder.CreateBitCast(Val, IntTy);
952
953 // Mask and compare
954 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
955 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
956
957 auto RetFalse = ConstantInt::get(RetTy, 0);
958 Value *RetTrue;
959 if (ValTy->isVectorTy()) {
960 RetTrue = ConstantInt::getSigned(RetTy, -1);
961 } else {
962 RetTrue = ConstantInt::get(RetTy, 1);
963 }
964 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
965 });
966}
967
David Neto22f144c2017-06-12 14:26:21 -0400968bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
969 bool Changed = false;
970
Kévin Petitff03aee2019-06-12 19:39:03 +0100971 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000972 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100973 {"_Z3allc", spv::OpNop},
974 {"_Z3allDv2_c", spv::OpAll},
975 {"_Z3allDv3_c", spv::OpAll},
976 {"_Z3allDv4_c", spv::OpAll},
977 {"_Z3alls", spv::OpNop},
978 {"_Z3allDv2_s", spv::OpAll},
979 {"_Z3allDv3_s", spv::OpAll},
980 {"_Z3allDv4_s", spv::OpAll},
981 {"_Z3alli", spv::OpNop},
982 {"_Z3allDv2_i", spv::OpAll},
983 {"_Z3allDv3_i", spv::OpAll},
984 {"_Z3allDv4_i", spv::OpAll},
985 {"_Z3alll", spv::OpNop},
986 {"_Z3allDv2_l", spv::OpAll},
987 {"_Z3allDv3_l", spv::OpAll},
988 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000989
990 // any
Kévin Petitff03aee2019-06-12 19:39:03 +0100991 {"_Z3anyc", spv::OpNop},
992 {"_Z3anyDv2_c", spv::OpAny},
993 {"_Z3anyDv3_c", spv::OpAny},
994 {"_Z3anyDv4_c", spv::OpAny},
995 {"_Z3anys", spv::OpNop},
996 {"_Z3anyDv2_s", spv::OpAny},
997 {"_Z3anyDv3_s", spv::OpAny},
998 {"_Z3anyDv4_s", spv::OpAny},
999 {"_Z3anyi", spv::OpNop},
1000 {"_Z3anyDv2_i", spv::OpAny},
1001 {"_Z3anyDv3_i", spv::OpAny},
1002 {"_Z3anyDv4_i", spv::OpAny},
1003 {"_Z3anyl", spv::OpNop},
1004 {"_Z3anyDv2_l", spv::OpAny},
1005 {"_Z3anyDv3_l", spv::OpAny},
1006 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -04001007 };
1008
1009 for (auto Pair : Map) {
1010 // If we find a function with the matching name.
1011 if (auto F = M.getFunction(Pair.first)) {
1012 SmallVector<Instruction *, 4> ToRemoves;
1013
1014 // Walk the users of the function.
1015 for (auto &U : F->uses()) {
1016 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04001017
1018 auto Arg = CI->getOperand(0);
1019
1020 Value *V;
1021
Kévin Petitfd27cca2018-10-31 13:00:17 +00001022 // If the argument is a 32-bit int, just use a shift
1023 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1024 V = BinaryOperator::Create(Instruction::LShr, Arg,
1025 ConstantInt::get(Arg->getType(), 31), "",
1026 CI);
1027 } else {
David Neto22f144c2017-06-12 14:26:21 -04001028 // The value for zero to compare against.
1029 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1030
David Neto22f144c2017-06-12 14:26:21 -04001031 // The value to return for true.
1032 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1033
1034 // The value to return for false.
1035 const auto FalseValue = Constant::getNullValue(CI->getType());
1036
Kévin Petitfd27cca2018-10-31 13:00:17 +00001037 const auto Cmp = CmpInst::Create(
1038 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1039
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001040 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001041
1042 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +01001043 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001044
Kévin Petitff03aee2019-06-12 19:39:03 +01001045 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +00001046
Kévin Petitff03aee2019-06-12 19:39:03 +01001047 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +00001048
Kévin Petitff03aee2019-06-12 19:39:03 +01001049 const auto NewCI = clspv::InsertSPIRVOp(
1050 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +00001051 SelectSource = NewCI;
1052
1053 } else {
1054 SelectSource = Cmp;
1055 }
1056
1057 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001058 }
1059
1060 CI->replaceAllUsesWith(V);
1061
1062 // Lastly, remember to remove the user.
1063 ToRemoves.push_back(CI);
1064 }
1065 }
1066
1067 Changed = !ToRemoves.empty();
1068
1069 // And cleanup the calls we don't use anymore.
1070 for (auto V : ToRemoves) {
1071 V->eraseFromParent();
1072 }
1073
1074 // And remove the function we don't need either too.
1075 F->eraseFromParent();
1076 }
1077 }
1078
1079 return Changed;
1080}
1081
Kévin Petitbf0036c2019-03-06 13:57:10 +00001082bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1083 bool Changed = false;
1084
1085 for (auto const &SymVal : M.getValueSymbolTable()) {
1086 // Skip symbols whose name doesn't match
1087 if (!SymVal.getKey().startswith("_Z8upsample")) {
1088 continue;
1089 }
1090 // Is there a function going by that name?
1091 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1092
1093 SmallVector<Instruction *, 4> ToRemoves;
1094
1095 // Walk the users of the function.
1096 for (auto &U : F->uses()) {
1097 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1098
1099 // Get arguments
1100 auto HiValue = CI->getOperand(0);
1101 auto LoValue = CI->getOperand(1);
1102
1103 // Don't touch overloads that aren't in OpenCL C
1104 auto HiType = HiValue->getType();
1105 auto LoType = LoValue->getType();
1106
1107 if (HiType != LoType) {
1108 continue;
1109 }
1110
1111 if (!HiType->isIntOrIntVectorTy()) {
1112 continue;
1113 }
1114
1115 if (HiType->getScalarSizeInBits() * 2 !=
1116 CI->getType()->getScalarSizeInBits()) {
1117 continue;
1118 }
1119
1120 if ((HiType->getScalarSizeInBits() != 8) &&
1121 (HiType->getScalarSizeInBits() != 16) &&
1122 (HiType->getScalarSizeInBits() != 32)) {
1123 continue;
1124 }
1125
1126 if (HiType->isVectorTy()) {
1127 if ((HiType->getVectorNumElements() != 2) &&
1128 (HiType->getVectorNumElements() != 3) &&
1129 (HiType->getVectorNumElements() != 4) &&
1130 (HiType->getVectorNumElements() != 8) &&
1131 (HiType->getVectorNumElements() != 16)) {
1132 continue;
1133 }
1134 }
1135
1136 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001137 auto HiCast =
1138 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1139 auto LoCast =
1140 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001141
1142 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001143 auto ShiftAmount =
1144 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001145 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1146 ShiftAmount, "", CI);
1147
1148 // OR both results
1149 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1150 "", CI);
1151
1152 // Replace call with the expression
1153 CI->replaceAllUsesWith(V);
1154
1155 // Lastly, remember to remove the user.
1156 ToRemoves.push_back(CI);
1157 }
1158 }
1159
1160 Changed = !ToRemoves.empty();
1161
1162 // And cleanup the calls we don't use anymore.
1163 for (auto V : ToRemoves) {
1164 V->eraseFromParent();
1165 }
1166
1167 // And remove the function we don't need either too.
1168 F->eraseFromParent();
1169 }
1170 }
1171
1172 return Changed;
1173}
1174
Kévin Petitd44eef52019-03-08 13:22:14 +00001175bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1176 bool Changed = false;
1177
1178 for (auto const &SymVal : M.getValueSymbolTable()) {
1179 // Skip symbols whose name doesn't match
1180 if (!SymVal.getKey().startswith("_Z6rotate")) {
1181 continue;
1182 }
1183 // Is there a function going by that name?
1184 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1185
1186 SmallVector<Instruction *, 4> ToRemoves;
1187
1188 // Walk the users of the function.
1189 for (auto &U : F->uses()) {
1190 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1191
1192 // Get arguments
1193 auto SrcValue = CI->getOperand(0);
1194 auto RotAmount = CI->getOperand(1);
1195
1196 // Don't touch overloads that aren't in OpenCL C
1197 auto SrcType = SrcValue->getType();
1198 auto RotType = RotAmount->getType();
1199
1200 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1201 continue;
1202 }
1203
1204 if (!SrcType->isIntOrIntVectorTy()) {
1205 continue;
1206 }
1207
1208 if ((SrcType->getScalarSizeInBits() != 8) &&
1209 (SrcType->getScalarSizeInBits() != 16) &&
1210 (SrcType->getScalarSizeInBits() != 32) &&
1211 (SrcType->getScalarSizeInBits() != 64)) {
1212 continue;
1213 }
1214
1215 if (SrcType->isVectorTy()) {
1216 if ((SrcType->getVectorNumElements() != 2) &&
1217 (SrcType->getVectorNumElements() != 3) &&
1218 (SrcType->getVectorNumElements() != 4) &&
1219 (SrcType->getVectorNumElements() != 8) &&
1220 (SrcType->getVectorNumElements() != 16)) {
1221 continue;
1222 }
1223 }
1224
1225 // The approach used is to shift the top bits down, the bottom bits up
1226 // and OR the two shifted values.
1227
1228 // The rotation amount is to be treated modulo the element size.
1229 // Since SPIR-V shift ops don't support this, let's apply the
1230 // modulo ahead of shifting. The element size is always a power of
1231 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001232 auto ModMask =
1233 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001234 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1235 ModMask, "", CI);
1236
1237 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001238 auto ScalarSize =
1239 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001240 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1241 RotAmount, "", CI);
1242
1243 // Now shift the bottom bits up and the top bits down
1244 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1245 RotAmount, "", CI);
1246 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1247 DownAmount, "", CI);
1248
1249 // Finally OR the two shifted values
1250 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1251 HiRotated, "", CI);
1252
1253 // Replace call with the expression
1254 CI->replaceAllUsesWith(V);
1255
1256 // Lastly, remember to remove the user.
1257 ToRemoves.push_back(CI);
1258 }
1259 }
1260
1261 Changed = !ToRemoves.empty();
1262
1263 // And cleanup the calls we don't use anymore.
1264 for (auto V : ToRemoves) {
1265 V->eraseFromParent();
1266 }
1267
1268 // And remove the function we don't need either too.
1269 F->eraseFromParent();
1270 }
1271 }
1272
1273 return Changed;
1274}
1275
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001276bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1277 bool Changed = false;
1278
1279 for (auto const &SymVal : M.getValueSymbolTable()) {
1280
1281 // Skip symbols whose name obviously doesn't match
1282 if (!SymVal.getKey().contains("convert_")) {
1283 continue;
1284 }
1285
1286 // Is there a function going by that name?
1287 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1288
1289 // Get info from the mangled name
1290 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001291 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001292
1293 // All functions of interest are handled by our mangled name parser
1294 if (!parsed) {
1295 continue;
1296 }
1297
1298 // Move on if this isn't a call to convert_
1299 if (!finfo.name.startswith("convert_")) {
1300 continue;
1301 }
1302
1303 // Extract the destination type from the function name
1304 StringRef DstTypeName = finfo.name;
1305 DstTypeName.consume_front("convert_");
1306
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001307 auto DstSignedNess =
1308 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1309 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1310 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1311 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1312 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1313 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1314 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1315 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1316 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1317 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001318
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001319 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001320 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001321
1322 SmallVector<Instruction *, 4> ToRemoves;
1323
1324 // Walk the users of the function.
1325 for (auto &U : F->uses()) {
1326 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1327
1328 // Get arguments
1329 auto SrcValue = CI->getOperand(0);
1330
1331 // Don't touch overloads that aren't in OpenCL C
1332 auto SrcType = SrcValue->getType();
1333 auto DstType = CI->getType();
1334
1335 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1336 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1337 continue;
1338 }
1339
1340 if (SrcType->isVectorTy()) {
1341
1342 if (SrcType->getVectorNumElements() !=
1343 DstType->getVectorNumElements()) {
1344 continue;
1345 }
1346
1347 if ((SrcType->getVectorNumElements() != 2) &&
1348 (SrcType->getVectorNumElements() != 3) &&
1349 (SrcType->getVectorNumElements() != 4) &&
1350 (SrcType->getVectorNumElements() != 8) &&
1351 (SrcType->getVectorNumElements() != 16)) {
1352 continue;
1353 }
1354 }
1355
1356 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1357 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1358
1359 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1360 bool DstIsInt = DstType->isIntOrIntVectorTy();
1361
1362 Value *V;
1363 if (SrcIsFloat && DstIsFloat) {
1364 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1365 } else if (SrcIsFloat && DstIsInt) {
1366 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001367 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1368 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001369 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001370 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1371 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001372 }
1373 } else if (SrcIsInt && DstIsFloat) {
1374 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001375 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1376 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001377 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001378 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1379 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001380 }
1381 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001382 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1383 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001384 } else {
1385 // Not something we're supposed to handle, just move on
1386 continue;
1387 }
1388
1389 // Replace call with the expression
1390 CI->replaceAllUsesWith(V);
1391
1392 // Lastly, remember to remove the user.
1393 ToRemoves.push_back(CI);
1394 }
1395 }
1396
1397 Changed = !ToRemoves.empty();
1398
1399 // And cleanup the calls we don't use anymore.
1400 for (auto V : ToRemoves) {
1401 V->eraseFromParent();
1402 }
1403
1404 // And remove the function we don't need either too.
1405 F->eraseFromParent();
1406 }
1407 }
1408
1409 return Changed;
1410}
1411
Kévin Petit8a560882019-03-21 15:24:34 +00001412bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1413 bool Changed = false;
1414
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001415 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001416
Kévin Petit617a76d2019-04-04 13:54:16 +01001417 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001418 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1419 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1420
1421 // Skip symbols whose name doesn't match
1422 if (!isMad && !isMul) {
1423 continue;
1424 }
1425
1426 // Is there a function going by that name?
1427 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001428 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001429 }
1430 }
1431
Kévin Petit617a76d2019-04-04 13:54:16 +01001432 for (auto F : FnWorklist) {
1433 SmallVector<Instruction *, 4> ToRemoves;
1434
1435 bool isMad = F->getName().startswith("_Z6mad_hi");
1436 // Walk the users of the function.
1437 for (auto &U : F->uses()) {
1438 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1439
1440 // Get arguments
1441 auto AValue = CI->getOperand(0);
1442 auto BValue = CI->getOperand(1);
1443 auto CValue = CI->getOperand(2);
1444
1445 // Don't touch overloads that aren't in OpenCL C
1446 auto AType = AValue->getType();
1447 auto BType = BValue->getType();
1448 auto CType = CValue->getType();
1449
1450 if ((AType != BType) || (CI->getType() != AType) ||
1451 (isMad && (AType != CType))) {
1452 continue;
1453 }
1454
1455 if (!AType->isIntOrIntVectorTy()) {
1456 continue;
1457 }
1458
1459 if ((AType->getScalarSizeInBits() != 8) &&
1460 (AType->getScalarSizeInBits() != 16) &&
1461 (AType->getScalarSizeInBits() != 32) &&
1462 (AType->getScalarSizeInBits() != 64)) {
1463 continue;
1464 }
1465
1466 if (AType->isVectorTy()) {
1467 if ((AType->getVectorNumElements() != 2) &&
1468 (AType->getVectorNumElements() != 3) &&
1469 (AType->getVectorNumElements() != 4) &&
1470 (AType->getVectorNumElements() != 8) &&
1471 (AType->getVectorNumElements() != 16)) {
1472 continue;
1473 }
1474 }
1475
1476 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001477 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001478
1479 // Select the appropriate signed/unsigned SPIR-V op
1480 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001481 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001482 opcode = spv::OpSMulExtended;
1483 } else {
1484 opcode = spv::OpUMulExtended;
1485 }
1486
1487 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001488 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001489 auto ExMulRetType = StructType::create(TwoValueType);
1490
1491 // Call the SPIR-V op
1492 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1493 ExMulRetType, {AValue, BValue});
1494
1495 // Get the high part of the result
1496 unsigned Idxs[] = {1};
1497 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1498
1499 // If we're handling a mad_hi, add the third argument to the result
1500 if (isMad) {
1501 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1502 }
1503
1504 // Replace call with the expression
1505 CI->replaceAllUsesWith(V);
1506
1507 // Lastly, remember to remove the user.
1508 ToRemoves.push_back(CI);
1509 }
1510 }
1511
1512 Changed = !ToRemoves.empty();
1513
1514 // And cleanup the calls we don't use anymore.
1515 for (auto V : ToRemoves) {
1516 V->eraseFromParent();
1517 }
1518
1519 // And remove the function we don't need either too.
1520 F->eraseFromParent();
1521 }
1522
Kévin Petit8a560882019-03-21 15:24:34 +00001523 return Changed;
1524}
1525
Kévin Petitf5b78a22018-10-25 14:32:17 +00001526bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1527 bool Changed = false;
1528
1529 for (auto const &SymVal : M.getValueSymbolTable()) {
1530 // Skip symbols whose name doesn't match
1531 if (!SymVal.getKey().startswith("_Z6select")) {
1532 continue;
1533 }
1534 // Is there a function going by that name?
1535 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1536
1537 SmallVector<Instruction *, 4> ToRemoves;
1538
1539 // Walk the users of the function.
1540 for (auto &U : F->uses()) {
1541 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1542
1543 // Get arguments
1544 auto FalseValue = CI->getOperand(0);
1545 auto TrueValue = CI->getOperand(1);
1546 auto PredicateValue = CI->getOperand(2);
1547
1548 // Don't touch overloads that aren't in OpenCL C
1549 auto FalseType = FalseValue->getType();
1550 auto TrueType = TrueValue->getType();
1551 auto PredicateType = PredicateValue->getType();
1552
1553 if (FalseType != TrueType) {
1554 continue;
1555 }
1556
1557 if (!PredicateType->isIntOrIntVectorTy()) {
1558 continue;
1559 }
1560
1561 if (!FalseType->isIntOrIntVectorTy() &&
1562 !FalseType->getScalarType()->isFloatingPointTy()) {
1563 continue;
1564 }
1565
1566 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1567 continue;
1568 }
1569
1570 if (FalseType->getScalarSizeInBits() !=
1571 PredicateType->getScalarSizeInBits()) {
1572 continue;
1573 }
1574
1575 if (FalseType->isVectorTy()) {
1576 if (FalseType->getVectorNumElements() !=
1577 PredicateType->getVectorNumElements()) {
1578 continue;
1579 }
1580
1581 if ((FalseType->getVectorNumElements() != 2) &&
1582 (FalseType->getVectorNumElements() != 3) &&
1583 (FalseType->getVectorNumElements() != 4) &&
1584 (FalseType->getVectorNumElements() != 8) &&
1585 (FalseType->getVectorNumElements() != 16)) {
1586 continue;
1587 }
1588 }
1589
1590 // Create constant
1591 const auto ZeroValue = Constant::getNullValue(PredicateType);
1592
1593 // Scalar and vector are to be treated differently
1594 CmpInst::Predicate Pred;
1595 if (PredicateType->isVectorTy()) {
1596 Pred = CmpInst::ICMP_SLT;
1597 } else {
1598 Pred = CmpInst::ICMP_NE;
1599 }
1600
1601 // Create comparison instruction
1602 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1603 ZeroValue, "", CI);
1604
1605 // Create select
1606 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1607
1608 // Replace call with the selection
1609 CI->replaceAllUsesWith(V);
1610
1611 // Lastly, remember to remove the user.
1612 ToRemoves.push_back(CI);
1613 }
1614 }
1615
1616 Changed = !ToRemoves.empty();
1617
1618 // And cleanup the calls we don't use anymore.
1619 for (auto V : ToRemoves) {
1620 V->eraseFromParent();
1621 }
1622
1623 // And remove the function we don't need either too.
1624 F->eraseFromParent();
1625 }
1626 }
1627
1628 return Changed;
1629}
1630
Kévin Petite7d0cce2018-10-31 12:38:56 +00001631bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1632 bool Changed = false;
1633
1634 for (auto const &SymVal : M.getValueSymbolTable()) {
1635 // Skip symbols whose name doesn't match
1636 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1637 continue;
1638 }
1639 // Is there a function going by that name?
1640 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1641
1642 SmallVector<Instruction *, 4> ToRemoves;
1643
1644 // Walk the users of the function.
1645 for (auto &U : F->uses()) {
1646 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1647
1648 if (CI->getNumOperands() != 4) {
1649 continue;
1650 }
1651
1652 // Get arguments
1653 auto FalseValue = CI->getOperand(0);
1654 auto TrueValue = CI->getOperand(1);
1655 auto PredicateValue = CI->getOperand(2);
1656
1657 // Don't touch overloads that aren't in OpenCL C
1658 auto FalseType = FalseValue->getType();
1659 auto TrueType = TrueValue->getType();
1660 auto PredicateType = PredicateValue->getType();
1661
1662 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1663 continue;
1664 }
1665
1666 if (TrueType->isVectorTy()) {
1667 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1668 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001669 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001670 }
1671 if ((TrueType->getVectorNumElements() != 2) &&
1672 (TrueType->getVectorNumElements() != 3) &&
1673 (TrueType->getVectorNumElements() != 4) &&
1674 (TrueType->getVectorNumElements() != 8) &&
1675 (TrueType->getVectorNumElements() != 16)) {
1676 continue;
1677 }
1678 }
1679
1680 // Remember the type of the operands
1681 auto OpType = TrueType;
1682
1683 // The actual bit selection will always be done on an integer type,
1684 // declare it here
1685 Type *BitType;
1686
1687 // If the operands are float, then bitcast them to int
1688 if (OpType->getScalarType()->isFloatingPointTy()) {
1689
1690 // First create the new type
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001691 BitType = getIntOrIntVectorTyForCast(M.getContext(), OpType);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001692
1693 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001694 PredicateValue =
1695 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1696 FalseValue =
1697 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1698 TrueValue =
1699 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001700
1701 } else {
1702 // The operands have an integer type, use it directly
1703 BitType = OpType;
1704 }
1705
1706 // All the operands are now always integers
1707 // implement as (c & b) | (~c & a)
1708
1709 // Create our negated predicate value
1710 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001711 auto NotPredicateValue = BinaryOperator::Create(
1712 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001713
1714 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001715 auto BitsFalse = BinaryOperator::Create(
1716 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1717 auto BitsTrue = BinaryOperator::Create(
1718 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001719
1720 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1721 BitsTrue, "", CI);
1722
1723 // If we were dealing with a floating point type, we must bitcast
1724 // the result back to that
1725 if (OpType->getScalarType()->isFloatingPointTy()) {
1726 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1727 }
1728
1729 // Replace call with our new code
1730 CI->replaceAllUsesWith(V);
1731
1732 // Lastly, remember to remove the user.
1733 ToRemoves.push_back(CI);
1734 }
1735 }
1736
1737 Changed = !ToRemoves.empty();
1738
1739 // And cleanup the calls we don't use anymore.
1740 for (auto V : ToRemoves) {
1741 V->eraseFromParent();
1742 }
1743
1744 // And remove the function we don't need either too.
1745 F->eraseFromParent();
1746 }
1747 }
1748
1749 return Changed;
1750}
1751
Kévin Petit6b0a9532018-10-30 20:00:39 +00001752bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1753 bool Changed = false;
1754
1755 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001756 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1757 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1758 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1759 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1760 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1761 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001762 };
1763
1764 for (auto Pair : Map) {
1765 // If we find a function with the matching name.
1766 if (auto F = M.getFunction(Pair.first)) {
1767 SmallVector<Instruction *, 4> ToRemoves;
1768
1769 // Walk the users of the function.
1770 for (auto &U : F->uses()) {
1771 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1772
1773 auto ReplacementFn = Pair.second;
1774
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001775 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001776 Value *VectorArg;
1777
1778 // First figure out which function we're dealing with
1779 if (F->getName().startswith("_Z10smoothstep")) {
1780 ArgsToSplat.push_back(CI->getOperand(1));
1781 VectorArg = CI->getOperand(2);
1782 } else {
1783 VectorArg = CI->getOperand(1);
1784 }
1785
1786 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001787 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001788 auto VecType = VectorArg->getType();
1789
1790 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001791 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001792 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001793 auto index =
1794 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1795 NewVectorArg =
1796 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001797 }
1798 SplatArgs.push_back(NewVectorArg);
1799 }
1800
1801 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001802 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1803 const auto NewFType =
1804 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001805
1806 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1807
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001808 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001809 for (auto arg : SplatArgs) {
1810 NewArgs.push_back(arg);
1811 }
1812 NewArgs.push_back(VectorArg);
1813
1814 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1815
1816 CI->replaceAllUsesWith(NewCI);
1817
1818 // Lastly, remember to remove the user.
1819 ToRemoves.push_back(CI);
1820 }
1821 }
1822
1823 Changed = !ToRemoves.empty();
1824
1825 // And cleanup the calls we don't use anymore.
1826 for (auto V : ToRemoves) {
1827 V->eraseFromParent();
1828 }
1829
1830 // And remove the function we don't need either too.
1831 F->eraseFromParent();
1832 }
1833 }
1834
1835 return Changed;
1836}
1837
David Neto22f144c2017-06-12 14:26:21 -04001838bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1839 bool Changed = false;
1840
1841 const std::map<const char *, Instruction::BinaryOps> Map = {
1842 {"_Z7signbitf", Instruction::LShr},
1843 {"_Z7signbitDv2_f", Instruction::AShr},
1844 {"_Z7signbitDv3_f", Instruction::AShr},
1845 {"_Z7signbitDv4_f", Instruction::AShr},
1846 };
1847
1848 for (auto Pair : Map) {
1849 // If we find a function with the matching name.
1850 if (auto F = M.getFunction(Pair.first)) {
1851 SmallVector<Instruction *, 4> ToRemoves;
1852
1853 // Walk the users of the function.
1854 for (auto &U : F->uses()) {
1855 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1856 auto Arg = CI->getOperand(0);
1857
1858 auto Bitcast =
1859 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1860
1861 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1862 ConstantInt::get(CI->getType(), 31),
1863 "", CI);
1864
1865 CI->replaceAllUsesWith(Shr);
1866
1867 // Lastly, remember to remove the user.
1868 ToRemoves.push_back(CI);
1869 }
1870 }
1871
1872 Changed = !ToRemoves.empty();
1873
1874 // And cleanup the calls we don't use anymore.
1875 for (auto V : ToRemoves) {
1876 V->eraseFromParent();
1877 }
1878
1879 // And remove the function we don't need either too.
1880 F->eraseFromParent();
1881 }
1882 }
1883
1884 return Changed;
1885}
1886
1887bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1888 bool Changed = false;
1889
1890 const std::map<const char *,
1891 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1892 Map = {
1893 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1894 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1895 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1896 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1897 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1898 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1899 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1900 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1901 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1902 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1903 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1904 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1905 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1906 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1907 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1908 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1909 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1910 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1911 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1912 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1913 };
1914
1915 for (auto Pair : Map) {
1916 // If we find a function with the matching name.
1917 if (auto F = M.getFunction(Pair.first)) {
1918 SmallVector<Instruction *, 4> ToRemoves;
1919
1920 // Walk the users of the function.
1921 for (auto &U : F->uses()) {
1922 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1923 // The multiply instruction to use.
1924 auto MulInst = Pair.second.first;
1925
1926 // The add instruction to use.
1927 auto AddInst = Pair.second.second;
1928
1929 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1930
1931 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1932 CI->getArgOperand(1), "", CI);
1933
1934 if (Instruction::BinaryOpsEnd != AddInst) {
1935 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1936 CI);
1937 }
1938
1939 CI->replaceAllUsesWith(I);
1940
1941 // Lastly, remember to remove the user.
1942 ToRemoves.push_back(CI);
1943 }
1944 }
1945
1946 Changed = !ToRemoves.empty();
1947
1948 // And cleanup the calls we don't use anymore.
1949 for (auto V : ToRemoves) {
1950 V->eraseFromParent();
1951 }
1952
1953 // And remove the function we don't need either too.
1954 F->eraseFromParent();
1955 }
1956 }
1957
1958 return Changed;
1959}
1960
Derek Chowcfd368b2017-10-19 20:58:45 -07001961bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1962 bool Changed = false;
1963
alan-bakerf795f392019-06-11 18:24:34 -04001964 for (auto const &SymVal : M.getValueSymbolTable()) {
1965 if (!SymVal.getKey().contains("vstore"))
1966 continue;
1967 if (SymVal.getKey().contains("vstore_"))
1968 continue;
1969 if (SymVal.getKey().contains("vstorea"))
1970 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001971
alan-bakerf795f392019-06-11 18:24:34 -04001972 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001973 SmallVector<Instruction *, 4> ToRemoves;
1974
alan-bakerf795f392019-06-11 18:24:34 -04001975 auto fname = F->getName();
1976 if (!fname.consume_front("_Z"))
1977 continue;
1978 size_t name_len;
1979 if (fname.consumeInteger(10, name_len))
1980 continue;
1981 std::string name = fname.take_front(name_len);
1982
1983 bool ok = StringSwitch<bool>(name)
1984 .Case("vstore2", true)
1985 .Case("vstore3", true)
1986 .Case("vstore4", true)
1987 .Case("vstore8", true)
1988 .Case("vstore16", true)
1989 .Default(false);
1990 if (!ok)
1991 continue;
1992
Derek Chowcfd368b2017-10-19 20:58:45 -07001993 for (auto &U : F->uses()) {
1994 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04001995 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001996
alan-bakerf795f392019-06-11 18:24:34 -04001997 auto data_type = data->getType();
1998 if (!data_type->isVectorTy())
1999 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002000
alan-bakerf795f392019-06-11 18:24:34 -04002001 auto elems = data_type->getVectorNumElements();
2002 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2003 elems != 16)
2004 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002005
alan-bakerf795f392019-06-11 18:24:34 -04002006 auto offset = CI->getOperand(1);
2007 auto ptr = CI->getOperand(2);
2008 auto ptr_type = ptr->getType();
2009 auto pointee_type = ptr_type->getPointerElementType();
2010 if (pointee_type != data_type->getVectorElementType())
2011 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002012
alan-bakerf795f392019-06-11 18:24:34 -04002013 // Avoid pointer casts. Instead generate the correct number of stores
2014 // and rely on drivers to coalesce appropriately.
2015 IRBuilder<> builder(CI);
2016 auto elems_const = builder.getInt32(elems);
2017 auto adjust = builder.CreateMul(offset, elems_const);
2018 for (auto i = 0; i < elems; ++i) {
2019 auto idx = builder.getInt32(i);
2020 auto add = builder.CreateAdd(adjust, idx);
2021 auto gep = builder.CreateGEP(ptr, add);
2022 auto extract = builder.CreateExtractElement(data, i);
2023 auto store = builder.CreateStore(extract, gep);
2024 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002025
Derek Chowcfd368b2017-10-19 20:58:45 -07002026 ToRemoves.push_back(CI);
2027 }
2028 }
2029
2030 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002031 for (auto V : ToRemoves) {
2032 V->eraseFromParent();
2033 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002034 F->eraseFromParent();
2035 }
2036 }
2037
2038 return Changed;
2039}
2040
2041bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2042 bool Changed = false;
2043
alan-bakerf795f392019-06-11 18:24:34 -04002044 for (auto const &SymVal : M.getValueSymbolTable()) {
2045 if (!SymVal.getKey().contains("vload"))
2046 continue;
2047 if (SymVal.getKey().contains("vload_"))
2048 continue;
2049 if (SymVal.getKey().contains("vloada"))
2050 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002051
alan-bakerf795f392019-06-11 18:24:34 -04002052 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002053 SmallVector<Instruction *, 4> ToRemoves;
2054
alan-bakerf795f392019-06-11 18:24:34 -04002055 auto fname = F->getName();
2056 if (!fname.consume_front("_Z"))
2057 continue;
2058 size_t name_len;
2059 if (fname.consumeInteger(10, name_len))
2060 continue;
2061 std::string name = fname.take_front(name_len);
2062
2063 bool ok = StringSwitch<bool>(name)
2064 .Case("vload2", true)
2065 .Case("vload3", true)
2066 .Case("vload4", true)
2067 .Case("vload8", true)
2068 .Case("vload16", true)
2069 .Default(false);
2070 if (!ok)
2071 continue;
2072
Derek Chowcfd368b2017-10-19 20:58:45 -07002073 for (auto &U : F->uses()) {
2074 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002075 auto ret_type = F->getReturnType();
2076 if (!ret_type->isVectorTy())
2077 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002078
alan-bakerf795f392019-06-11 18:24:34 -04002079 auto elems = ret_type->getVectorNumElements();
2080 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2081 elems != 16)
2082 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002083
alan-bakerf795f392019-06-11 18:24:34 -04002084 auto offset = CI->getOperand(0);
2085 auto ptr = CI->getOperand(1);
2086 auto ptr_type = ptr->getType();
2087 auto pointee_type = ptr_type->getPointerElementType();
2088 if (pointee_type != ret_type->getVectorElementType())
2089 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002090
alan-bakerf795f392019-06-11 18:24:34 -04002091 // Avoid pointer casts. Instead generate the correct number of loads
2092 // and rely on drivers to coalesce appropriately.
2093 IRBuilder<> builder(CI);
2094 auto elems_const = builder.getInt32(elems);
2095 Value *insert = UndefValue::get(ret_type);
2096 auto adjust = builder.CreateMul(offset, elems_const);
2097 for (auto i = 0; i < elems; ++i) {
2098 auto idx = builder.getInt32(i);
2099 auto add = builder.CreateAdd(adjust, idx);
2100 auto gep = builder.CreateGEP(ptr, add);
2101 auto load = builder.CreateLoad(gep);
2102 insert = builder.CreateInsertElement(insert, load, i);
2103 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002104
alan-bakerf795f392019-06-11 18:24:34 -04002105 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002106 ToRemoves.push_back(CI);
2107 }
2108 }
2109
2110 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002111 for (auto V : ToRemoves) {
2112 V->eraseFromParent();
2113 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002114 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002115 }
2116 }
2117
2118 return Changed;
2119}
2120
David Neto22f144c2017-06-12 14:26:21 -04002121bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2122 bool Changed = false;
2123
2124 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2125 "_Z10vload_halfjPU3AS2KDh"};
2126
2127 for (auto Name : Map) {
2128 // If we find a function with the matching name.
2129 if (auto F = M.getFunction(Name)) {
2130 SmallVector<Instruction *, 4> ToRemoves;
2131
2132 // Walk the users of the function.
2133 for (auto &U : F->uses()) {
2134 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2135 // The index argument from vload_half.
2136 auto Arg0 = CI->getOperand(0);
2137
2138 // The pointer argument from vload_half.
2139 auto Arg1 = CI->getOperand(1);
2140
David Neto22f144c2017-06-12 14:26:21 -04002141 auto IntTy = Type::getInt32Ty(M.getContext());
2142 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002143 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2144
David Neto22f144c2017-06-12 14:26:21 -04002145 // Our intrinsic to unpack a float2 from an int.
2146 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2147
2148 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2149
David Neto482550a2018-03-24 05:21:07 -07002150 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002151 auto ShortTy = Type::getInt16Ty(M.getContext());
2152 auto ShortPointerTy = PointerType::get(
2153 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002154
David Netoac825b82017-05-30 12:49:01 -04002155 // Cast the half* pointer to short*.
2156 auto Cast =
2157 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002158
David Netoac825b82017-05-30 12:49:01 -04002159 // Index into the correct address of the casted pointer.
2160 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2161
2162 // Load from the short* we casted to.
2163 auto Load = new LoadInst(Index, "", CI);
2164
2165 // ZExt the short -> int.
2166 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2167
2168 // Get our float2.
2169 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2170
2171 // Extract out the bottom element which is our float result.
2172 auto Extract = ExtractElementInst::Create(
2173 Call, ConstantInt::get(IntTy, 0), "", CI);
2174
2175 CI->replaceAllUsesWith(Extract);
2176 } else {
2177 // Assume the pointer argument points to storage aligned to 32bits
2178 // or more.
2179 // TODO(dneto): Do more analysis to make sure this is true?
2180 //
2181 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2182 // with:
2183 //
2184 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2185 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2186 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2187 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2188 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2189 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2190 // x float> %converted, %index_is_odd32
2191
2192 auto IntPointerTy = PointerType::get(
2193 IntTy, Arg1->getType()->getPointerAddressSpace());
2194
David Neto973e6a82017-05-30 13:48:18 -04002195 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002196 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002197 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002198 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2199
2200 auto One = ConstantInt::get(IntTy, 1);
2201 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2202 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2203
2204 // Index into the correct address of the casted pointer.
2205 auto Ptr =
2206 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2207
2208 // Load from the int* we casted to.
2209 auto Load = new LoadInst(Ptr, "", CI);
2210
2211 // Get our float2.
2212 auto Call = CallInst::Create(NewF, Load, "", CI);
2213
2214 // Extract out the float result, where the element number is
2215 // determined by whether the original index was even or odd.
2216 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2217
2218 CI->replaceAllUsesWith(Extract);
2219 }
David Neto22f144c2017-06-12 14:26:21 -04002220
2221 // Lastly, remember to remove the user.
2222 ToRemoves.push_back(CI);
2223 }
2224 }
2225
2226 Changed = !ToRemoves.empty();
2227
2228 // And cleanup the calls we don't use anymore.
2229 for (auto V : ToRemoves) {
2230 V->eraseFromParent();
2231 }
2232
2233 // And remove the function we don't need either too.
2234 F->eraseFromParent();
2235 }
2236 }
2237
2238 return Changed;
2239}
2240
2241bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002242
Kévin Petite8edce32019-04-10 14:23:32 +01002243 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002244 "_Z11vload_half2jPU3AS1KDh",
2245 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2246 "_Z11vload_half2jPU3AS2KDh",
2247 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2248 };
David Neto22f144c2017-06-12 14:26:21 -04002249
Kévin Petite8edce32019-04-10 14:23:32 +01002250 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2251 // The index argument from vload_half.
2252 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002253
Kévin Petite8edce32019-04-10 14:23:32 +01002254 // The pointer argument from vload_half.
2255 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002256
Kévin Petite8edce32019-04-10 14:23:32 +01002257 auto IntTy = Type::getInt32Ty(M.getContext());
2258 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002259 auto NewPointerTy =
2260 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002261 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002262
Kévin Petite8edce32019-04-10 14:23:32 +01002263 // Cast the half* pointer to int*.
2264 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002265
Kévin Petite8edce32019-04-10 14:23:32 +01002266 // Index into the correct address of the casted pointer.
2267 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002268
Kévin Petite8edce32019-04-10 14:23:32 +01002269 // Load from the int* we casted to.
2270 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002271
Kévin Petite8edce32019-04-10 14:23:32 +01002272 // Our intrinsic to unpack a float2 from an int.
2273 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002274
Kévin Petite8edce32019-04-10 14:23:32 +01002275 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002276
Kévin Petite8edce32019-04-10 14:23:32 +01002277 // Get our float2.
2278 return CallInst::Create(NewF, Load, "", CI);
2279 });
David Neto22f144c2017-06-12 14:26:21 -04002280}
2281
2282bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002283
Kévin Petite8edce32019-04-10 14:23:32 +01002284 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002285 "_Z11vload_half4jPU3AS1KDh",
2286 "_Z12vloada_half4jPU3AS1KDh",
2287 "_Z11vload_half4jPU3AS2KDh",
2288 "_Z12vloada_half4jPU3AS2KDh",
2289 };
David Neto22f144c2017-06-12 14:26:21 -04002290
Kévin Petite8edce32019-04-10 14:23:32 +01002291 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2292 // The index argument from vload_half.
2293 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002294
Kévin Petite8edce32019-04-10 14:23:32 +01002295 // The pointer argument from vload_half.
2296 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002297
Kévin Petite8edce32019-04-10 14:23:32 +01002298 auto IntTy = Type::getInt32Ty(M.getContext());
2299 auto Int2Ty = VectorType::get(IntTy, 2);
2300 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002301 auto NewPointerTy =
2302 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002303 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002304
Kévin Petite8edce32019-04-10 14:23:32 +01002305 // Cast the half* pointer to int2*.
2306 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002307
Kévin Petite8edce32019-04-10 14:23:32 +01002308 // Index into the correct address of the casted pointer.
2309 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002310
Kévin Petite8edce32019-04-10 14:23:32 +01002311 // Load from the int2* we casted to.
2312 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002313
Kévin Petite8edce32019-04-10 14:23:32 +01002314 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002315 auto X =
2316 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2317 auto Y =
2318 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002319
Kévin Petite8edce32019-04-10 14:23:32 +01002320 // Our intrinsic to unpack a float2 from an int.
2321 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002322
Kévin Petite8edce32019-04-10 14:23:32 +01002323 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002324
Kévin Petite8edce32019-04-10 14:23:32 +01002325 // Get the lower (x & y) components of our final float4.
2326 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002327
Kévin Petite8edce32019-04-10 14:23:32 +01002328 // Get the higher (z & w) components of our final float4.
2329 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002330
Kévin Petite8edce32019-04-10 14:23:32 +01002331 Constant *ShuffleMask[4] = {
2332 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2333 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002334
Kévin Petite8edce32019-04-10 14:23:32 +01002335 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002336 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2337 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002338 });
David Neto22f144c2017-06-12 14:26:21 -04002339}
2340
David Neto6ad93232018-06-07 15:42:58 -07002341bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002342
2343 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2344 //
2345 // %u = load i32 %ptr
2346 // %fxy = call <2 x float> Unpack2xHalf(u)
2347 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002348 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002349 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2350 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2351 "_Z20__clspv_vloada_half2jPKj", // private
2352 };
2353
Kévin Petite8edce32019-04-10 14:23:32 +01002354 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2355 auto Index = CI->getOperand(0);
2356 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002357
Kévin Petite8edce32019-04-10 14:23:32 +01002358 auto IntTy = Type::getInt32Ty(M.getContext());
2359 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2360 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002361
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002362 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002363 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002364
Kévin Petite8edce32019-04-10 14:23:32 +01002365 // Our intrinsic to unpack a float2 from an int.
2366 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002367
Kévin Petite8edce32019-04-10 14:23:32 +01002368 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002369
Kévin Petite8edce32019-04-10 14:23:32 +01002370 // Get our final float2.
2371 return CallInst::Create(NewF, Load, "", CI);
2372 });
David Neto6ad93232018-06-07 15:42:58 -07002373}
2374
2375bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002376
2377 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2378 //
2379 // %u2 = load <2 x i32> %ptr
2380 // %u2xy = extractelement %u2, 0
2381 // %u2zw = extractelement %u2, 1
2382 // %fxy = call <2 x float> Unpack2xHalf(uint)
2383 // %fzw = call <2 x float> Unpack2xHalf(uint)
2384 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002385 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002386 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2387 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2388 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2389 };
2390
Kévin Petite8edce32019-04-10 14:23:32 +01002391 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2392 auto Index = CI->getOperand(0);
2393 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002394
Kévin Petite8edce32019-04-10 14:23:32 +01002395 auto IntTy = Type::getInt32Ty(M.getContext());
2396 auto Int2Ty = VectorType::get(IntTy, 2);
2397 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2398 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002399
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002400 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002401 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002402
Kévin Petite8edce32019-04-10 14:23:32 +01002403 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002404 auto X =
2405 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2406 auto Y =
2407 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002408
Kévin Petite8edce32019-04-10 14:23:32 +01002409 // Our intrinsic to unpack a float2 from an int.
2410 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002411
Kévin Petite8edce32019-04-10 14:23:32 +01002412 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002413
Kévin Petite8edce32019-04-10 14:23:32 +01002414 // Get the lower (x & y) components of our final float4.
2415 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002416
Kévin Petite8edce32019-04-10 14:23:32 +01002417 // Get the higher (z & w) components of our final float4.
2418 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002419
Kévin Petite8edce32019-04-10 14:23:32 +01002420 Constant *ShuffleMask[4] = {
2421 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2422 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002423
Kévin Petite8edce32019-04-10 14:23:32 +01002424 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002425 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2426 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002427 });
David Neto6ad93232018-06-07 15:42:58 -07002428}
2429
David Neto22f144c2017-06-12 14:26:21 -04002430bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002431
Kévin Petite8edce32019-04-10 14:23:32 +01002432 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2433 "_Z15vstore_half_rtefjPU3AS1Dh",
2434 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002435
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002436 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002437 // The value to store.
2438 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002439
Kévin Petite8edce32019-04-10 14:23:32 +01002440 // The index argument from vstore_half.
2441 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002442
Kévin Petite8edce32019-04-10 14:23:32 +01002443 // The pointer argument from vstore_half.
2444 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002445
Kévin Petite8edce32019-04-10 14:23:32 +01002446 auto IntTy = Type::getInt32Ty(M.getContext());
2447 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2448 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2449 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002450
Kévin Petite8edce32019-04-10 14:23:32 +01002451 // Our intrinsic to pack a float2 to an int.
2452 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002453
Kévin Petite8edce32019-04-10 14:23:32 +01002454 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002455
Kévin Petite8edce32019-04-10 14:23:32 +01002456 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002457 auto TempVec = InsertElementInst::Create(
2458 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002459
Kévin Petite8edce32019-04-10 14:23:32 +01002460 // Pack the float2 -> half2 (in an int).
2461 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002462
Kévin Petite8edce32019-04-10 14:23:32 +01002463 Value *Ret;
2464 if (clspv::Option::F16BitStorage()) {
2465 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002466 auto ShortPointerTy =
2467 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002468
Kévin Petite8edce32019-04-10 14:23:32 +01002469 // Truncate our i32 to an i16.
2470 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002471
Kévin Petite8edce32019-04-10 14:23:32 +01002472 // Cast the half* pointer to short*.
2473 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002474
Kévin Petite8edce32019-04-10 14:23:32 +01002475 // Index into the correct address of the casted pointer.
2476 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002477
Kévin Petite8edce32019-04-10 14:23:32 +01002478 // Store to the int* we casted to.
2479 Ret = new StoreInst(Trunc, Index, CI);
2480 } else {
2481 // We can only write to 32-bit aligned words.
2482 //
2483 // Assuming base is aligned to 32-bits, replace the equivalent of
2484 // vstore_half(value, index, base)
2485 // with:
2486 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2487 // uint32_t write_to_upper_half = index & 1u;
2488 // uint32_t shift = write_to_upper_half << 4;
2489 //
2490 // // Pack the float value as a half number in bottom 16 bits
2491 // // of an i32.
2492 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2493 //
2494 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2495 // ^ ((packed & 0xffff) << shift)
2496 // // We only need relaxed consistency, but OpenCL 1.2 only has
2497 // // sequentially consistent atomics.
2498 // // TODO(dneto): Use relaxed consistency.
2499 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002500 auto IntPointerTy =
2501 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002502
Kévin Petite8edce32019-04-10 14:23:32 +01002503 auto Four = ConstantInt::get(IntTy, 4);
2504 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002505
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002506 auto IndexIsOdd =
2507 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002508 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002509 auto IndexIntoI32 =
2510 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2511 auto BaseI32Ptr =
2512 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2513 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2514 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002515 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2516 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002517 auto MaskBitsToWrite =
2518 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2519 auto MaskedCurrent = BinaryOperator::CreateAnd(
2520 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002521
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002522 auto XLowerBits =
2523 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2524 auto NewBitsToWrite =
2525 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2526 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2527 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002528
Kévin Petite8edce32019-04-10 14:23:32 +01002529 // Generate the call to atomi_xor.
2530 SmallVector<Type *, 5> ParamTypes;
2531 // The pointer type.
2532 ParamTypes.push_back(IntPointerTy);
2533 // The Types for memory scope, semantics, and value.
2534 ParamTypes.push_back(IntTy);
2535 ParamTypes.push_back(IntTy);
2536 ParamTypes.push_back(IntTy);
2537 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2538 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002539
Kévin Petite8edce32019-04-10 14:23:32 +01002540 const auto ConstantScopeDevice =
2541 ConstantInt::get(IntTy, spv::ScopeDevice);
2542 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2543 // (SPIR-V Workgroup).
2544 const auto AddrSpaceSemanticsBits =
2545 IntPointerTy->getPointerAddressSpace() == 1
2546 ? spv::MemorySemanticsUniformMemoryMask
2547 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002548
Kévin Petite8edce32019-04-10 14:23:32 +01002549 // We're using relaxed consistency here.
2550 const auto ConstantMemorySemantics =
2551 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2552 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002553
Kévin Petite8edce32019-04-10 14:23:32 +01002554 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2555 ConstantMemorySemantics, ValueToXor};
2556 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2557 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002558 }
David Neto22f144c2017-06-12 14:26:21 -04002559
Kévin Petite8edce32019-04-10 14:23:32 +01002560 return Ret;
2561 });
David Neto22f144c2017-06-12 14:26:21 -04002562}
2563
2564bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002565
Kévin Petite8edce32019-04-10 14:23:32 +01002566 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002567 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2568 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2569 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2570 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2571 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2572 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2573 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2574 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2575 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2576 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2577 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2578 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2579 };
David Neto22f144c2017-06-12 14:26:21 -04002580
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002581 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002582 // The value to store.
2583 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002584
Kévin Petite8edce32019-04-10 14:23:32 +01002585 // The index argument from vstore_half.
2586 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002587
Kévin Petite8edce32019-04-10 14:23:32 +01002588 // The pointer argument from vstore_half.
2589 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002590
Kévin Petite8edce32019-04-10 14:23:32 +01002591 auto IntTy = Type::getInt32Ty(M.getContext());
2592 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002593 auto NewPointerTy =
2594 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002595 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002596
Kévin Petite8edce32019-04-10 14:23:32 +01002597 // Our intrinsic to pack a float2 to an int.
2598 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002599
Kévin Petite8edce32019-04-10 14:23:32 +01002600 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002601
Kévin Petite8edce32019-04-10 14:23:32 +01002602 // Turn the packed x & y into the final packing.
2603 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002604
Kévin Petite8edce32019-04-10 14:23:32 +01002605 // Cast the half* pointer to int*.
2606 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002607
Kévin Petite8edce32019-04-10 14:23:32 +01002608 // Index into the correct address of the casted pointer.
2609 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002610
Kévin Petite8edce32019-04-10 14:23:32 +01002611 // Store to the int* we casted to.
2612 return new StoreInst(X, Index, CI);
2613 });
David Neto22f144c2017-06-12 14:26:21 -04002614}
2615
2616bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002617
Kévin Petite8edce32019-04-10 14:23:32 +01002618 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002619 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2620 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2621 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2622 "_Z13vstorea_half4Dv4_fjPDh", // private
2623 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2624 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2625 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2626 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2627 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2628 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2629 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2630 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2631 };
David Neto22f144c2017-06-12 14:26:21 -04002632
Kévin Petite8edce32019-04-10 14:23:32 +01002633 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2634 // The value to store.
2635 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002636
Kévin Petite8edce32019-04-10 14:23:32 +01002637 // The index argument from vstore_half.
2638 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002639
Kévin Petite8edce32019-04-10 14:23:32 +01002640 // The pointer argument from vstore_half.
2641 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002642
Kévin Petite8edce32019-04-10 14:23:32 +01002643 auto IntTy = Type::getInt32Ty(M.getContext());
2644 auto Int2Ty = VectorType::get(IntTy, 2);
2645 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002646 auto NewPointerTy =
2647 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002648 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002649
Kévin Petite8edce32019-04-10 14:23:32 +01002650 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2651 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002652
Kévin Petite8edce32019-04-10 14:23:32 +01002653 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002654 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2655 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002656
Kévin Petite8edce32019-04-10 14:23:32 +01002657 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2658 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002659
Kévin Petite8edce32019-04-10 14:23:32 +01002660 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002661 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2662 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002663
Kévin Petite8edce32019-04-10 14:23:32 +01002664 // Our intrinsic to pack a float2 to an int.
2665 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002666
Kévin Petite8edce32019-04-10 14:23:32 +01002667 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002668
Kévin Petite8edce32019-04-10 14:23:32 +01002669 // Turn the packed x & y into the final component of our int2.
2670 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002671
Kévin Petite8edce32019-04-10 14:23:32 +01002672 // Turn the packed z & w into the final component of our int2.
2673 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002674
Kévin Petite8edce32019-04-10 14:23:32 +01002675 auto Combine = InsertElementInst::Create(
2676 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002677 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2678 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002679
Kévin Petite8edce32019-04-10 14:23:32 +01002680 // Cast the half* pointer to int2*.
2681 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002682
Kévin Petite8edce32019-04-10 14:23:32 +01002683 // Index into the correct address of the casted pointer.
2684 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002685
Kévin Petite8edce32019-04-10 14:23:32 +01002686 // Store to the int2* we casted to.
2687 return new StoreInst(Combine, Index, CI);
2688 });
David Neto22f144c2017-06-12 14:26:21 -04002689}
2690
alan-baker931d18a2019-12-12 08:21:32 -05002691bool ReplaceOpenCLBuiltinPass::replaceUnsampledReadImage(Module &M) {
2692 bool Changed = false;
2693 const std::map<const char *, const char *> Map = {
2694 // 1D
2695 {"_Z11read_imagef14ocl_image1d_roi",
2696 "_Z11read_imagef14ocl_image1d_ro11ocl_sampleri"},
2697 {"_Z11read_imagei14ocl_image1d_roi",
2698 "_Z11read_imagei14ocl_image1d_ro11ocl_sampleri"},
2699 {"_Z12read_imageui14ocl_image1d_roi",
2700 "_Z12read_imageui14ocl_image1d_ro11ocl_sampleri"},
2701 // TODO 1D array
2702 // 2D
2703 {"_Z11read_imagef14ocl_image2d_roDv2_i",
2704 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i"},
2705 {"_Z11read_imagei14ocl_image2d_roDv2_i",
2706 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i"},
2707 {"_Z12read_imageui14ocl_image2d_roDv2_i",
2708 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i"},
2709 // TODO 2D array
2710 // 3D
2711 {"_Z11read_imagef14ocl_image3d_roDv4_i",
2712 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i"},
2713 {"_Z11read_imagei14ocl_image3d_roDv4_i",
2714 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i"},
2715 {"_Z12read_imageui14ocl_image3d_roDv4_i",
2716 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i"}};
2717
2718 Function *translate_sampler =
2719 M.getFunction(clspv::TranslateSamplerInitializerFunction());
2720 Type *sampler_type = M.getTypeByName("opencl.sampler_t");
2721 for (auto Pair : Map) {
2722 // If we find a function with the matching name.
2723 if (auto F = M.getFunction(Pair.first)) {
2724 SmallVector<Instruction *, 4> ToRemoves;
2725
2726 // Walk the users of the function.
2727 for (auto &U : F->uses()) {
2728 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2729 // The image.
2730 auto Image = CI->getOperand(0);
2731
2732 // The coordinate.
2733 auto Coord = CI->getOperand(1);
2734
2735 // Create the sampler translation function if necessary.
2736 if (!translate_sampler) {
2737 // Create the sampler type if necessary.
2738 if (!sampler_type) {
2739 sampler_type =
2740 StructType::create(M.getContext(), "opencl.sampler_t");
2741 sampler_type =
2742 sampler_type->getPointerTo(clspv::AddressSpace::Constant);
2743 }
2744 auto fn_type = FunctionType::get(
2745 sampler_type, {Type::getInt32Ty(M.getContext())}, false);
2746 auto callee = M.getOrInsertFunction(
2747 clspv::TranslateSamplerInitializerFunction(), fn_type);
2748 translate_sampler = cast<Function>(callee.getCallee());
2749 }
2750
2751 auto NewFType = FunctionType::get(
2752 CI->getType(), {Image->getType(), sampler_type, Coord->getType()},
2753 false);
2754
2755 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2756
2757 // Sampler is:
2758 // CLK_ADDRESS_NONE = 0
2759 // CLK_FILTER_NEAREST = 0x10
2760 // CLK_NORMALIZED_COORDS_FALSE = 0
2761 const uint64_t data_mask = 0x10;
2762 auto NewSamplerCI = CallInst::Create(
2763 translate_sampler,
2764 {ConstantInt::get(Type::getInt32Ty(M.getContext()), data_mask)},
2765 "", CI);
2766 auto NewCI =
2767 CallInst::Create(NewF, {Image, NewSamplerCI, Coord}, "", CI);
2768
2769 CI->replaceAllUsesWith(NewCI);
2770
2771 // Lastly, remember to remove the user.
2772 ToRemoves.push_back(CI);
2773 }
2774 }
2775
2776 Changed = !ToRemoves.empty();
2777
2778 // And cleanup the calls we don't use anymore.
2779 for (auto V : ToRemoves) {
2780 V->eraseFromParent();
2781 }
2782
2783 // And remove the function we don't need either too.
2784 F->eraseFromParent();
2785 }
2786 }
2787
2788 return Changed;
2789}
2790
Kévin Petit06517a12019-12-09 19:40:31 +00002791bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002792 bool Changed = false;
2793
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002794 const std::map<const char *, const char *> Map = {
alan-bakerf906d2b2019-12-10 11:26:23 -05002795 // 1D
2796 {"_Z11read_imagei14ocl_image1d_ro11ocl_sampleri",
2797 "_Z11read_imagei14ocl_image1d_ro11ocl_samplerf"},
2798 {"_Z12read_imageui14ocl_image1d_ro11ocl_sampleri",
2799 "_Z12read_imageui14ocl_image1d_ro11ocl_samplerf"},
2800 {"_Z11read_imagef14ocl_image1d_ro11ocl_sampleri",
2801 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
2802 // TODO 1Darray
Kévin Petit06517a12019-12-09 19:40:31 +00002803 // 2D
2804 {"_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i",
2805 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f"},
2806 {"_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i",
2807 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_f"},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002808 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2809 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
Kévin Petit06517a12019-12-09 19:40:31 +00002810 // TODO 2D array
2811 // 3D
2812 {"_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i",
2813 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_f"},
2814 {"_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i",
2815 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_f"},
2816 {"_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i",
2817 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002818
2819 for (auto Pair : Map) {
2820 // If we find a function with the matching name.
2821 if (auto F = M.getFunction(Pair.first)) {
2822 SmallVector<Instruction *, 4> ToRemoves;
2823
2824 // Walk the users of the function.
2825 for (auto &U : F->uses()) {
2826 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2827 // The image.
2828 auto Arg0 = CI->getOperand(0);
2829
2830 // The sampler.
2831 auto Arg1 = CI->getOperand(1);
2832
2833 // The coordinate (integer type that we can't handle).
2834 auto Arg2 = CI->getOperand(2);
2835
alan-bakerf906d2b2019-12-10 11:26:23 -05002836 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2837 // TODO(alan-baker): when arrayed images are supported fix component
2838 // calculation.
2839 uint32_t components = dim;
2840 Type *float_ty = nullptr;
2841 if (components == 1) {
2842 float_ty = Type::getFloatTy(M.getContext());
2843 } else {
2844 float_ty = VectorType::get(Type::getFloatTy(M.getContext()),
2845 Arg2->getType()->getVectorNumElements());
2846 }
David Neto22f144c2017-06-12 14:26:21 -04002847
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002848 auto NewFType = FunctionType::get(
alan-bakerf906d2b2019-12-10 11:26:23 -05002849 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002850 false);
David Neto22f144c2017-06-12 14:26:21 -04002851
2852 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2853
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002854 auto Cast =
alan-bakerf906d2b2019-12-10 11:26:23 -05002855 CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002856
2857 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2858
2859 CI->replaceAllUsesWith(NewCI);
2860
2861 // Lastly, remember to remove the user.
2862 ToRemoves.push_back(CI);
2863 }
2864 }
2865
2866 Changed = !ToRemoves.empty();
2867
2868 // And cleanup the calls we don't use anymore.
2869 for (auto V : ToRemoves) {
2870 V->eraseFromParent();
2871 }
2872
2873 // And remove the function we don't need either too.
2874 F->eraseFromParent();
2875 }
2876 }
2877
2878 return Changed;
2879}
2880
2881bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2882 bool Changed = false;
2883
Kévin Petit9b340262019-06-19 18:31:11 +01002884 const std::map<const char *, spv::Op> Map = {
2885 {"_Z8atom_incPU3AS1Vi", spv::OpAtomicIIncrement},
2886 {"_Z8atom_incPU3AS3Vi", spv::OpAtomicIIncrement},
2887 {"_Z8atom_incPU3AS1Vj", spv::OpAtomicIIncrement},
2888 {"_Z8atom_incPU3AS3Vj", spv::OpAtomicIIncrement},
2889 {"_Z8atom_decPU3AS1Vi", spv::OpAtomicIDecrement},
2890 {"_Z8atom_decPU3AS3Vi", spv::OpAtomicIDecrement},
2891 {"_Z8atom_decPU3AS1Vj", spv::OpAtomicIDecrement},
2892 {"_Z8atom_decPU3AS3Vj", spv::OpAtomicIDecrement},
2893 {"_Z12atom_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2894 {"_Z12atom_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2895 {"_Z12atom_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2896 {"_Z12atom_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange},
2897 {"_Z10atomic_incPU3AS1Vi", spv::OpAtomicIIncrement},
2898 {"_Z10atomic_incPU3AS3Vi", spv::OpAtomicIIncrement},
2899 {"_Z10atomic_incPU3AS1Vj", spv::OpAtomicIIncrement},
2900 {"_Z10atomic_incPU3AS3Vj", spv::OpAtomicIIncrement},
2901 {"_Z10atomic_decPU3AS1Vi", spv::OpAtomicIDecrement},
2902 {"_Z10atomic_decPU3AS3Vi", spv::OpAtomicIDecrement},
2903 {"_Z10atomic_decPU3AS1Vj", spv::OpAtomicIDecrement},
2904 {"_Z10atomic_decPU3AS3Vj", spv::OpAtomicIDecrement},
2905 {"_Z14atomic_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2906 {"_Z14atomic_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2907 {"_Z14atomic_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2908 {"_Z14atomic_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange}};
David Neto22f144c2017-06-12 14:26:21 -04002909
2910 for (auto Pair : Map) {
2911 // If we find a function with the matching name.
2912 if (auto F = M.getFunction(Pair.first)) {
2913 SmallVector<Instruction *, 4> ToRemoves;
2914
2915 // Walk the users of the function.
2916 for (auto &U : F->uses()) {
2917 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04002918
2919 auto IntTy = Type::getInt32Ty(M.getContext());
2920
David Neto22f144c2017-06-12 14:26:21 -04002921 // We need to map the OpenCL constants to the SPIR-V equivalents.
2922 const auto ConstantScopeDevice =
2923 ConstantInt::get(IntTy, spv::ScopeDevice);
2924 const auto ConstantMemorySemantics = ConstantInt::get(
2925 IntTy, spv::MemorySemanticsUniformMemoryMask |
2926 spv::MemorySemanticsSequentiallyConsistentMask);
2927
2928 SmallVector<Value *, 5> Params;
2929
2930 // The pointer.
2931 Params.push_back(CI->getArgOperand(0));
2932
2933 // The memory scope.
2934 Params.push_back(ConstantScopeDevice);
2935
2936 // The memory semantics.
2937 Params.push_back(ConstantMemorySemantics);
2938
2939 if (2 < CI->getNumArgOperands()) {
2940 // The unequal memory semantics.
2941 Params.push_back(ConstantMemorySemantics);
2942
2943 // The value.
2944 Params.push_back(CI->getArgOperand(2));
2945
2946 // The comparator.
2947 Params.push_back(CI->getArgOperand(1));
2948 } else if (1 < CI->getNumArgOperands()) {
2949 // The value.
2950 Params.push_back(CI->getArgOperand(1));
2951 }
2952
Kévin Petit9b340262019-06-19 18:31:11 +01002953 auto NewCI =
2954 clspv::InsertSPIRVOp(CI, Pair.second, {}, CI->getType(), Params);
David Neto22f144c2017-06-12 14:26:21 -04002955
2956 CI->replaceAllUsesWith(NewCI);
2957
2958 // Lastly, remember to remove the user.
2959 ToRemoves.push_back(CI);
2960 }
2961 }
2962
2963 Changed = !ToRemoves.empty();
2964
2965 // And cleanup the calls we don't use anymore.
2966 for (auto V : ToRemoves) {
2967 V->eraseFromParent();
2968 }
2969
2970 // And remove the function we don't need either too.
2971 F->eraseFromParent();
2972 }
2973 }
2974
Neil Henning39672102017-09-29 14:33:13 +01002975 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002976 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002977 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002978 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002979 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002980 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002981 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002982 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002983 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002984 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002985 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002986 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002987 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002988 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002989 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002990 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002991 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002992 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002993 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002994 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002995 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002996 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002997 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002998 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002999 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003000 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003001 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003002 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003003 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003004 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003005 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003006 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003007 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01003008 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003009 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003010 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003011 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003012 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003013 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003014 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003015 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003016 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003017 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003018 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003019 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003020 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003021 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01003022 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003023 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01003024 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003025 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01003026 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003027 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01003028 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003029 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003030 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003031 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003032 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003033 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003034 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003035 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003036 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003037 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
3038 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
3039 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01003040
3041 for (auto Pair : Map2) {
3042 // If we find a function with the matching name.
3043 if (auto F = M.getFunction(Pair.first)) {
3044 SmallVector<Instruction *, 4> ToRemoves;
3045
3046 // Walk the users of the function.
3047 for (auto &U : F->uses()) {
3048 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3049 auto AtomicOp = new AtomicRMWInst(
3050 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
3051 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
3052
3053 CI->replaceAllUsesWith(AtomicOp);
3054
3055 // Lastly, remember to remove the user.
3056 ToRemoves.push_back(CI);
3057 }
3058 }
3059
3060 Changed = !ToRemoves.empty();
3061
3062 // And cleanup the calls we don't use anymore.
3063 for (auto V : ToRemoves) {
3064 V->eraseFromParent();
3065 }
3066
3067 // And remove the function we don't need either too.
3068 F->eraseFromParent();
3069 }
3070 }
3071
David Neto22f144c2017-06-12 14:26:21 -04003072 return Changed;
3073}
3074
3075bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04003076
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003077 std::vector<const char *> Names = {
3078 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01003079 };
3080
3081 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04003082 auto IntTy = Type::getInt32Ty(M.getContext());
3083 auto FloatTy = Type::getFloatTy(M.getContext());
3084
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003085 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
3086 ConstantInt::get(IntTy, 1),
3087 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04003088
3089 Constant *UpShuffleMask[4] = {
3090 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3091 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3092
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003093 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
3094 UndefValue::get(FloatTy),
3095 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04003096
Kévin Petite8edce32019-04-10 14:23:32 +01003097 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003098 auto Arg0 =
3099 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
3100 ConstantVector::get(DownShuffleMask), "", CI);
3101 auto Arg1 =
3102 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
3103 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003104 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003105
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003106 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04003107
Kévin Petite8edce32019-04-10 14:23:32 +01003108 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003109
Kévin Petite8edce32019-04-10 14:23:32 +01003110 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003111
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003112 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3113 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003114 });
David Neto22f144c2017-06-12 14:26:21 -04003115}
David Neto62653202017-10-16 19:05:18 -04003116
3117bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3118 bool Changed = false;
3119
3120 // OpenCL's float result = fract(float x, float* ptr)
3121 //
3122 // In the LLVM domain:
3123 //
3124 // %floor_result = call spir_func float @floor(float %x)
3125 // store float %floor_result, float * %ptr
3126 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3127 // %result = call spir_func float
3128 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3129 //
3130 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3131 // and clspv.fract occur in the SPIR-V generator pass:
3132 //
3133 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3134 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3135 // ...
3136 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3137 // OpStore %ptr %floor_result
3138 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3139 // %fract_result = OpExtInst %float
3140 // %glsl_ext Fmin %fract_intermediate %just_under_1
3141
David Neto62653202017-10-16 19:05:18 -04003142 using std::string;
3143
3144 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3145 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003146 using QuadType =
3147 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04003148 auto make_quad = [](const char *a, const char *b, const char *c,
3149 const char *d) {
3150 return std::tuple<const char *, const char *, const char *, const char *>(
3151 a, b, c, d);
3152 };
3153 const std::vector<QuadType> Functions = {
3154 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003155 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
3156 "clspv.fract.v2f"),
3157 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
3158 "clspv.fract.v3f"),
3159 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
3160 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04003161 };
3162
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003163 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04003164 const StringRef fract_name(std::get<0>(quad));
3165
3166 // If we find a function with the matching name.
3167 if (auto F = M.getFunction(fract_name)) {
3168 if (F->use_begin() == F->use_end())
3169 continue;
3170
3171 // We have some uses.
3172 Changed = true;
3173
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003174 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04003175
3176 const StringRef floor_name(std::get<1>(quad));
3177 const StringRef fmin_name(std::get<2>(quad));
3178 const StringRef clspv_fract_name(std::get<3>(quad));
3179
3180 // This is either float or a float vector. All the float-like
3181 // types are this type.
3182 auto result_ty = F->getReturnType();
3183
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003184 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04003185 if (!fmin_fn) {
3186 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003187 FunctionType *fn_ty =
3188 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003189 fmin_fn =
3190 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003191 fmin_fn->addFnAttr(Attribute::ReadNone);
3192 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3193 }
3194
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003195 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003196 if (!floor_fn) {
3197 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003198 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003199 floor_fn = cast<Function>(
3200 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003201 floor_fn->addFnAttr(Attribute::ReadNone);
3202 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3203 }
3204
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003205 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003206 if (!clspv_fract_fn) {
3207 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003208 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003209 clspv_fract_fn = cast<Function>(
3210 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003211 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3212 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3213 }
3214
3215 // Number of significant significand bits, whether represented or not.
3216 unsigned num_significand_bits;
3217 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003218 case Type::HalfTyID:
3219 num_significand_bits = 11;
3220 break;
3221 case Type::FloatTyID:
3222 num_significand_bits = 24;
3223 break;
3224 case Type::DoubleTyID:
3225 num_significand_bits = 53;
3226 break;
3227 default:
3228 assert(false && "Unhandled float type when processing fract builtin");
3229 break;
David Neto62653202017-10-16 19:05:18 -04003230 }
3231 // Beware that the disassembler displays this value as
3232 // OpConstant %float 1
3233 // which is not quite right.
3234 const double kJustUnderOneScalar =
3235 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3236
3237 Constant *just_under_one =
3238 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3239 if (result_ty->isVectorTy()) {
3240 just_under_one = ConstantVector::getSplat(
3241 result_ty->getVectorNumElements(), just_under_one);
3242 }
3243
3244 IRBuilder<> Builder(Context);
3245
3246 SmallVector<Instruction *, 4> ToRemoves;
3247
3248 // Walk the users of the function.
3249 for (auto &U : F->uses()) {
3250 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3251
3252 Builder.SetInsertPoint(CI);
3253 auto arg = CI->getArgOperand(0);
3254 auto ptr = CI->getArgOperand(1);
3255
3256 // Compute floor result and store it.
3257 auto floor = Builder.CreateCall(floor_fn, {arg});
3258 Builder.CreateStore(floor, ptr);
3259
3260 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003261 auto fract_result =
3262 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003263
3264 CI->replaceAllUsesWith(fract_result);
3265
3266 // Lastly, remember to remove the user.
3267 ToRemoves.push_back(CI);
3268 }
3269 }
3270
3271 // And cleanup the calls we don't use anymore.
3272 for (auto V : ToRemoves) {
3273 V->eraseFromParent();
3274 }
3275
3276 // And remove the function we don't need either too.
3277 F->eraseFromParent();
3278 }
3279 }
3280
3281 return Changed;
3282}