blob: 86d5526da86b3dc6b9c915619f45803ec20584a9 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040033#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070034
alan-baker931d18a2019-12-12 08:21:32 -050035#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040036#include "Passes.h"
37#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050038#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040039
David Neto22f144c2017-06-12 14:26:21 -040040using namespace llvm;
41
42#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
43
44namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000045
46struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040047 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000048 SignedNess signedness;
49};
50
51struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000052 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000053 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000054
Kévin Petit91bc72e2019-04-08 15:17:46 +010055 bool isArgSigned(size_t arg) const {
56 assert(argTypeInfos.size() > arg);
57 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000058 }
59
Kévin Petit91bc72e2019-04-08 15:17:46 +010060 static FunctionInfo getFromMangledName(StringRef name) {
61 FunctionInfo fi;
62 if (!getFromMangledNameCheck(name, &fi)) {
63 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000064 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010065 return fi;
66 }
Kévin Petit8a560882019-03-21 15:24:34 +000067
Kévin Petit91bc72e2019-04-08 15:17:46 +010068 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
69 if (!name.consume_front("_Z")) {
70 return false;
71 }
72 size_t nameLen;
73 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000074 return false;
75 }
76
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 finfo->name = name.take_front(nameLen);
78 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000079
Kévin Petit91bc72e2019-04-08 15:17:46 +010080 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000081
Kévin Petit91bc72e2019-04-08 15:17:46 +010082 while (name.size() != 0) {
83
84 ArgTypeInfo ti;
85
86 // Try parsing a vector prefix
87 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040088 int numElems;
89 if (name.consumeInteger(10, numElems)) {
90 return false;
91 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010092
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040093 if (!name.consume_front("_")) {
94 return false;
95 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010096 }
97
98 // Parse the base type
99 char typeCode = name.front();
100 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400101 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100102 case 'c': // char
103 case 'a': // signed char
104 case 's': // short
105 case 'i': // int
106 case 'l': // long
107 ti.signedness = ArgTypeInfo::SignedNess::Signed;
108 break;
109 case 'h': // unsigned char
110 case 't': // unsigned short
111 case 'j': // unsigned int
112 case 'm': // unsigned long
113 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
114 break;
115 case 'f':
116 ti.signedness = ArgTypeInfo::SignedNess::None;
117 break;
118 case 'S':
119 ti = prev_ti;
120 if (!name.consume_front("_")) {
121 return false;
122 }
123 break;
124 default:
125 return false;
126 }
127
128 finfo->argTypeInfos.push_back(ti);
129
130 prev_ti = ti;
131 }
132
133 return true;
134 };
Kévin Petit8a560882019-03-21 15:24:34 +0000135};
136
David Neto22f144c2017-06-12 14:26:21 -0400137uint32_t clz(uint32_t v) {
138 uint32_t r;
139 uint32_t shift;
140
141 r = (v > 0xFFFF) << 4;
142 v >>= r;
143 shift = (v > 0xFF) << 3;
144 v >>= shift;
145 r |= shift;
146 shift = (v > 0xF) << 2;
147 v >>= shift;
148 r |= shift;
149 shift = (v > 0x3) << 1;
150 v >>= shift;
151 r |= shift;
152 r |= (v >> 1);
153
154 return r;
155}
156
157Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
158 if (1 == elements) {
159 return Type::getInt1Ty(C);
160 } else {
161 return VectorType::get(Type::getInt1Ty(C), elements);
162 }
163}
164
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100165Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
166 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
167 if (Ty->isVectorTy()) {
168 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
169 }
170 return IntTy;
171}
172
David Neto22f144c2017-06-12 14:26:21 -0400173struct ReplaceOpenCLBuiltinPass final : public ModulePass {
174 static char ID;
175 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
176
177 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000178 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100179 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100180 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400181 bool replaceRecip(Module &M);
182 bool replaceDivide(Module &M);
Kévin Petit1329a002019-06-15 05:54:05 +0100183 bool replaceDot(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400184 bool replaceExp10(Module &M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100185 bool replaceFmod(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceLog10(Module &M);
187 bool replaceBarrier(Module &M);
188 bool replaceMemFence(Module &M);
189 bool replaceRelational(Module &M);
190 bool replaceIsInfAndIsNan(Module &M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100191 bool replaceIsFinite(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400192 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000193 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000194 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000195 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000196 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000197 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000198 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000199 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400200 bool replaceSignbit(Module &M);
201 bool replaceMadandMad24andMul24(Module &M);
202 bool replaceVloadHalf(Module &M);
203 bool replaceVloadHalf2(Module &M);
204 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700205 bool replaceClspvVloadaHalf2(Module &M);
206 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400207 bool replaceVstoreHalf(Module &M);
208 bool replaceVstoreHalf2(Module &M);
209 bool replaceVstoreHalf4(Module &M);
alan-bakerf7e17cb2020-01-02 07:29:59 -0500210 bool replaceHalfReadImage(Module &M);
211 bool replaceHalfWriteImage(Module &M);
alan-baker931d18a2019-12-12 08:21:32 -0500212 bool replaceUnsampledReadImage(Module &M);
Kévin Petit06517a12019-12-09 19:40:31 +0000213 bool replaceSampledReadImageWithIntCoords(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400214 bool replaceAtomics(Module &M);
215 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400216 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700217 bool replaceVload(Module &M);
218 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400219};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100220} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400221
222char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400223INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
224 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400225
226namespace clspv {
227ModulePass *createReplaceOpenCLBuiltinPass() {
228 return new ReplaceOpenCLBuiltinPass();
229}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400230} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400231
232bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
233 bool Changed = false;
234
Kévin Petit2444e9b2018-11-09 14:14:37 +0000235 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100236 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100237 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400238 Changed |= replaceRecip(M);
239 Changed |= replaceDivide(M);
Kévin Petit1329a002019-06-15 05:54:05 +0100240 Changed |= replaceDot(M);
David Neto22f144c2017-06-12 14:26:21 -0400241 Changed |= replaceExp10(M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100242 Changed |= replaceFmod(M);
David Neto22f144c2017-06-12 14:26:21 -0400243 Changed |= replaceLog10(M);
244 Changed |= replaceBarrier(M);
245 Changed |= replaceMemFence(M);
246 Changed |= replaceRelational(M);
247 Changed |= replaceIsInfAndIsNan(M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100248 Changed |= replaceIsFinite(M);
David Neto22f144c2017-06-12 14:26:21 -0400249 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000250 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000251 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000252 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000253 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000254 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000255 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000256 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400257 Changed |= replaceSignbit(M);
258 Changed |= replaceMadandMad24andMul24(M);
259 Changed |= replaceVloadHalf(M);
260 Changed |= replaceVloadHalf2(M);
261 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700262 Changed |= replaceClspvVloadaHalf2(M);
263 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400264 Changed |= replaceVstoreHalf(M);
265 Changed |= replaceVstoreHalf2(M);
266 Changed |= replaceVstoreHalf4(M);
alan-bakerf7e17cb2020-01-02 07:29:59 -0500267 // Replace the half image builtins before handling other image builtins.
268 Changed |= replaceHalfReadImage(M);
269 Changed |= replaceHalfWriteImage(M);
alan-baker931d18a2019-12-12 08:21:32 -0500270 // Replace unsampled reads before converting sampled read coordinates.
271 Changed |= replaceUnsampledReadImage(M);
Kévin Petit06517a12019-12-09 19:40:31 +0000272 Changed |= replaceSampledReadImageWithIntCoords(M);
David Neto22f144c2017-06-12 14:26:21 -0400273 Changed |= replaceAtomics(M);
274 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400275 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700276 Changed |= replaceVload(M);
277 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400278
279 return Changed;
280}
281
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400282bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
283 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000284
Kévin Petite8edce32019-04-10 14:23:32 +0100285 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000286
287 for (auto Name : Names) {
288 // If we find a function with the matching name.
289 if (auto F = M.getFunction(Name)) {
290 SmallVector<Instruction *, 4> ToRemoves;
291
292 // Walk the users of the function.
293 for (auto &U : F->uses()) {
294 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000295
Kévin Petite8edce32019-04-10 14:23:32 +0100296 auto NewValue = Replacer(CI);
297
298 if (NewValue != nullptr) {
299 CI->replaceAllUsesWith(NewValue);
300 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000301
302 // Lastly, remember to remove the user.
303 ToRemoves.push_back(CI);
304 }
305 }
306
307 Changed = !ToRemoves.empty();
308
309 // And cleanup the calls we don't use anymore.
310 for (auto V : ToRemoves) {
311 V->eraseFromParent();
312 }
313
314 // And remove the function we don't need either too.
315 F->eraseFromParent();
316 }
317 }
318
319 return Changed;
320}
321
Kévin Petite8edce32019-04-10 14:23:32 +0100322bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100323
Kévin Petite8edce32019-04-10 14:23:32 +0100324 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400325 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
326 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
327 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
328 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100329 };
330
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400331 return replaceCallsWithValue(M, Names,
332 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100333}
334
335bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
336
337 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400338 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
339 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
340 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
341 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
342 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
343 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
344 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
345 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
346 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
347 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
348 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100349 };
350
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400351 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100352 auto XValue = CI->getOperand(0);
353 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100354
Kévin Petite8edce32019-04-10 14:23:32 +0100355 IRBuilder<> Builder(CI);
356 auto XmY = Builder.CreateSub(XValue, YValue);
357 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100358
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400359 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100360 auto F = CI->getCalledFunction();
361 auto finfo = FunctionInfo::getFromMangledName(F->getName());
362 if (finfo.isArgSigned(0)) {
363 Cmp = Builder.CreateICmpSGT(YValue, XValue);
364 } else {
365 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100366 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100367
Kévin Petite8edce32019-04-10 14:23:32 +0100368 return Builder.CreateSelect(Cmp, YmX, XmY);
369 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100370}
371
Kévin Petit8c1be282019-04-02 19:34:25 +0100372bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100373
Kévin Petite8edce32019-04-10 14:23:32 +0100374 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400375 "_Z8copysignff",
376 "_Z8copysignDv2_fS_",
377 "_Z8copysignDv3_fS_",
378 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100379 };
380
Kévin Petite8edce32019-04-10 14:23:32 +0100381 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
382 auto XValue = CI->getOperand(0);
383 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100384
Kévin Petite8edce32019-04-10 14:23:32 +0100385 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100386
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400387 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100388 if (Ty->isVectorTy()) {
389 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100390 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100391
Kévin Petite8edce32019-04-10 14:23:32 +0100392 // Return X with the sign of Y
393
394 // Sign bit masks
395 auto SignBit = IntTy->getScalarSizeInBits() - 1;
396 auto SignBitMask = 1 << SignBit;
397 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
398 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
399
400 IRBuilder<> Builder(CI);
401
402 // Extract sign of Y
403 auto YInt = Builder.CreateBitCast(YValue, IntTy);
404 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
405
406 // Clear sign bit in X
407 auto XInt = Builder.CreateBitCast(XValue, IntTy);
408 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
409
410 // Insert sign bit of Y into X
411 auto NewXInt = Builder.CreateOr(XInt, YSign);
412
413 // And cast back to floating-point
414 return Builder.CreateBitCast(NewXInt, Ty);
415 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100416}
417
David Neto22f144c2017-06-12 14:26:21 -0400418bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400419
Kévin Petite8edce32019-04-10 14:23:32 +0100420 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400421 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
422 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
423 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
424 };
425
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400426 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100427 // Recip has one arg.
428 auto Arg = CI->getOperand(0);
429 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
430 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
431 });
David Neto22f144c2017-06-12 14:26:21 -0400432}
433
434bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400435
Kévin Petite8edce32019-04-10 14:23:32 +0100436 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400437 "_Z11half_divideff", "_Z13native_divideff",
438 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
439 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
440 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
441 };
442
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400443 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100444 auto Op0 = CI->getOperand(0);
445 auto Op1 = CI->getOperand(1);
446 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
447 });
David Neto22f144c2017-06-12 14:26:21 -0400448}
449
Kévin Petit1329a002019-06-15 05:54:05 +0100450bool ReplaceOpenCLBuiltinPass::replaceDot(Module &M) {
451
452 std::vector<const char *> Names = {
453 "_Z3dotff",
454 "_Z3dotDv2_fS_",
455 "_Z3dotDv3_fS_",
456 "_Z3dotDv4_fS_",
457 };
458
459 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
460 auto Op0 = CI->getOperand(0);
461 auto Op1 = CI->getOperand(1);
462
463 Value *V;
464 if (Op0->getType()->isVectorTy()) {
465 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
466 CI->getType(), {Op0, Op1});
467 } else {
468 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
469 }
470
471 return V;
472 });
473}
474
David Neto22f144c2017-06-12 14:26:21 -0400475bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
476 bool Changed = false;
477
478 const std::map<const char *, const char *> Map = {
479 {"_Z5exp10f", "_Z3expf"},
480 {"_Z10half_exp10f", "_Z8half_expf"},
481 {"_Z12native_exp10f", "_Z10native_expf"},
482 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
483 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
484 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
485 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
486 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
487 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
488 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
489 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
490 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
491
492 for (auto Pair : Map) {
493 // If we find a function with the matching name.
494 if (auto F = M.getFunction(Pair.first)) {
495 SmallVector<Instruction *, 4> ToRemoves;
496
497 // Walk the users of the function.
498 for (auto &U : F->uses()) {
499 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
500 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
501
502 auto Arg = CI->getOperand(0);
503
504 // Constant of the natural log of 10 (ln(10)).
505 const double Ln10 =
506 2.302585092994045684017991454684364207601101488628772976033;
507
508 auto Mul = BinaryOperator::Create(
509 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
510 CI);
511
512 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
513
514 CI->replaceAllUsesWith(NewCI);
515
516 // Lastly, remember to remove the user.
517 ToRemoves.push_back(CI);
518 }
519 }
520
521 Changed = !ToRemoves.empty();
522
523 // And cleanup the calls we don't use anymore.
524 for (auto V : ToRemoves) {
525 V->eraseFromParent();
526 }
527
528 // And remove the function we don't need either too.
529 F->eraseFromParent();
530 }
531 }
532
533 return Changed;
534}
535
Kévin Petit0644a9c2019-06-20 21:08:46 +0100536bool ReplaceOpenCLBuiltinPass::replaceFmod(Module &M) {
537
538 std::vector<const char *> Names = {
539 "_Z4fmodff",
540 "_Z4fmodDv2_fS_",
541 "_Z4fmodDv3_fS_",
542 "_Z4fmodDv4_fS_",
543 };
544
545 // OpenCL fmod(x,y) is x - y * trunc(x/y)
546 // The sign for a non-zero result is taken from x.
547 // (Try an example.)
548 // So translate to FRem
549 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
550 auto Op0 = CI->getOperand(0);
551 auto Op1 = CI->getOperand(1);
552 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
553 });
554}
555
David Neto22f144c2017-06-12 14:26:21 -0400556bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
557 bool Changed = false;
558
559 const std::map<const char *, const char *> Map = {
560 {"_Z5log10f", "_Z3logf"},
561 {"_Z10half_log10f", "_Z8half_logf"},
562 {"_Z12native_log10f", "_Z10native_logf"},
563 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
564 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
565 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
566 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
567 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
568 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
569 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
570 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
571 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
572
573 for (auto Pair : Map) {
574 // If we find a function with the matching name.
575 if (auto F = M.getFunction(Pair.first)) {
576 SmallVector<Instruction *, 4> ToRemoves;
577
578 // Walk the users of the function.
579 for (auto &U : F->uses()) {
580 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
581 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
582
583 auto Arg = CI->getOperand(0);
584
585 // Constant of the reciprocal of the natural log of 10 (ln(10)).
586 const double Ln10 =
587 0.434294481903251827651128918916605082294397005803666566114;
588
589 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
590
591 auto Mul = BinaryOperator::Create(
592 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
593 "", CI);
594
595 CI->replaceAllUsesWith(Mul);
596
597 // Lastly, remember to remove the user.
598 ToRemoves.push_back(CI);
599 }
600 }
601
602 Changed = !ToRemoves.empty();
603
604 // And cleanup the calls we don't use anymore.
605 for (auto V : ToRemoves) {
606 V->eraseFromParent();
607 }
608
609 // And remove the function we don't need either too.
610 F->eraseFromParent();
611 }
612 }
613
614 return Changed;
615}
616
617bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400618
619 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
620
alan-bakerb60b1fc2019-12-13 19:09:38 -0500621 const std::vector<const char *> Names = {"_Z7barrierj",
622 // OpenCL 2.0 alias for barrier.
623 "_Z18work_group_barrierj"};
David Neto22f144c2017-06-12 14:26:21 -0400624
Kévin Petitc4643922019-06-17 19:32:05 +0100625 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
626 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400627
Kévin Petitc4643922019-06-17 19:32:05 +0100628 // We need to map the OpenCL constants to the SPIR-V equivalents.
629 const auto LocalMemFence =
630 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
631 const auto GlobalMemFence =
632 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
633 const auto ConstantSequentiallyConsistent = ConstantInt::get(
634 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
635 const auto ConstantScopeDevice =
636 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
637 const auto ConstantScopeWorkgroup =
638 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400639
Kévin Petitc4643922019-06-17 19:32:05 +0100640 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
641 const auto LocalMemFenceMask =
642 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
643 const auto WorkgroupShiftAmount =
644 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
645 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
646 Instruction::Shl, LocalMemFenceMask,
647 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400648
Kévin Petitc4643922019-06-17 19:32:05 +0100649 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
650 const auto GlobalMemFenceMask =
651 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
652 const auto UniformShiftAmount =
653 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
654 const auto MemorySemanticsUniform = BinaryOperator::Create(
655 Instruction::Shl, GlobalMemFenceMask,
656 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400657
Kévin Petitc4643922019-06-17 19:32:05 +0100658 // And combine the above together, also adding in
659 // MemorySemanticsSequentiallyConsistentMask.
660 auto MemorySemantics =
661 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
662 ConstantSequentiallyConsistent, "", CI);
663 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
664 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400665
Kévin Petitc4643922019-06-17 19:32:05 +0100666 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
667 // Device Scope, otherwise Workgroup Scope.
668 const auto Cmp =
669 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, GlobalMemFenceMask,
670 GlobalMemFence, "", CI);
671 const auto MemoryScope = SelectInst::Create(Cmp, ConstantScopeDevice,
672 ConstantScopeWorkgroup, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400673
Kévin Petitc4643922019-06-17 19:32:05 +0100674 // Lastly, the Execution Scope is always Workgroup Scope.
675 const auto ExecutionScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400676
Kévin Petitc4643922019-06-17 19:32:05 +0100677 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
678 {Attribute::NoDuplicate}, CI->getType(),
679 {ExecutionScope, MemoryScope, MemorySemantics});
680 });
David Neto22f144c2017-06-12 14:26:21 -0400681}
682
683bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
684 bool Changed = false;
685
686 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
687
Kévin Petitc4643922019-06-17 19:32:05 +0100688 using Tuple = std::tuple<spv::Op, unsigned>;
Neil Henning39672102017-09-29 14:33:13 +0100689 const std::map<const char *, Tuple> Map = {
Kévin Petitc4643922019-06-17 19:32:05 +0100690 {"_Z9mem_fencej", Tuple(spv::OpMemoryBarrier,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400691 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100692 {"_Z14read_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100693 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsAcquireMask)},
Neil Henning39672102017-09-29 14:33:13 +0100694 {"_Z15write_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100695 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400696
697 for (auto Pair : Map) {
698 // If we find a function with the matching name.
699 if (auto F = M.getFunction(Pair.first)) {
700 SmallVector<Instruction *, 4> ToRemoves;
701
702 // Walk the users of the function.
703 for (auto &U : F->uses()) {
704 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400705
706 auto Arg = CI->getOperand(0);
707
708 // We need to map the OpenCL constants to the SPIR-V equivalents.
709 const auto LocalMemFence =
710 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
711 const auto GlobalMemFence =
712 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
713 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100714 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400715 const auto ConstantScopeDevice =
716 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
717
718 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
719 const auto LocalMemFenceMask = BinaryOperator::Create(
720 Instruction::And, LocalMemFence, Arg, "", CI);
721 const auto WorkgroupShiftAmount =
722 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
723 clz(CLK_LOCAL_MEM_FENCE);
724 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
725 Instruction::Shl, LocalMemFenceMask,
726 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
727
728 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
729 const auto GlobalMemFenceMask = BinaryOperator::Create(
730 Instruction::And, GlobalMemFence, Arg, "", CI);
731 const auto UniformShiftAmount =
732 clz(spv::MemorySemanticsUniformMemoryMask) -
733 clz(CLK_GLOBAL_MEM_FENCE);
734 const auto MemorySemanticsUniform = BinaryOperator::Create(
735 Instruction::Shl, GlobalMemFenceMask,
736 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
737
738 // And combine the above together, also adding in
739 // MemorySemanticsSequentiallyConsistentMask.
740 auto MemorySemantics =
741 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
742 ConstantMemorySemantics, "", CI);
743 MemorySemantics = BinaryOperator::Create(
744 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
745
746 // Memory Scope is always device.
747 const auto MemoryScope = ConstantScopeDevice;
748
Kévin Petitc4643922019-06-17 19:32:05 +0100749 const auto SPIRVOp = std::get<0>(Pair.second);
750 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {}, CI->getType(),
751 {MemoryScope, MemorySemantics});
David Neto22f144c2017-06-12 14:26:21 -0400752
753 CI->replaceAllUsesWith(NewCI);
754
755 // Lastly, remember to remove the user.
756 ToRemoves.push_back(CI);
757 }
758 }
759
760 Changed = !ToRemoves.empty();
761
762 // And cleanup the calls we don't use anymore.
763 for (auto V : ToRemoves) {
764 V->eraseFromParent();
765 }
766
767 // And remove the function we don't need either too.
768 F->eraseFromParent();
769 }
770 }
771
772 return Changed;
773}
774
775bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
776 bool Changed = false;
777
778 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
779 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
780 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
781 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
782 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
783 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
784 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
785 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
786 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
787 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
788 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
789 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
790 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
791 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
792 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
793 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
794 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
795 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
796 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
797 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
798 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
799 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
800 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
801 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
802 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
803 };
804
805 for (auto Pair : Map) {
806 // If we find a function with the matching name.
807 if (auto F = M.getFunction(Pair.first)) {
808 SmallVector<Instruction *, 4> ToRemoves;
809
810 // Walk the users of the function.
811 for (auto &U : F->uses()) {
812 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
813 // The predicate to use in the CmpInst.
814 auto Predicate = Pair.second.first;
815
816 // The value to return for true.
817 auto TrueValue =
818 ConstantInt::getSigned(CI->getType(), Pair.second.second);
819
820 // The value to return for false.
821 auto FalseValue = Constant::getNullValue(CI->getType());
822
823 auto Arg1 = CI->getOperand(0);
824 auto Arg2 = CI->getOperand(1);
825
826 const auto Cmp =
827 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
828
829 const auto Select =
830 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
831
832 CI->replaceAllUsesWith(Select);
833
834 // Lastly, remember to remove the user.
835 ToRemoves.push_back(CI);
836 }
837 }
838
839 Changed = !ToRemoves.empty();
840
841 // And cleanup the calls we don't use anymore.
842 for (auto V : ToRemoves) {
843 V->eraseFromParent();
844 }
845
846 // And remove the function we don't need either too.
847 F->eraseFromParent();
848 }
849 }
850
851 return Changed;
852}
853
854bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
855 bool Changed = false;
856
Kévin Petitff03aee2019-06-12 19:39:03 +0100857 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
858 {"_Z5isinff", {spv::OpIsInf, 1}},
859 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
860 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
861 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
862 {"_Z5isnanf", {spv::OpIsNan, 1}},
863 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
864 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
865 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400866 };
867
868 for (auto Pair : Map) {
869 // If we find a function with the matching name.
870 if (auto F = M.getFunction(Pair.first)) {
871 SmallVector<Instruction *, 4> ToRemoves;
872
873 // Walk the users of the function.
874 for (auto &U : F->uses()) {
875 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
876 const auto CITy = CI->getType();
877
Kévin Petitff03aee2019-06-12 19:39:03 +0100878 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400879
880 // The value to return for true.
881 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
882
883 // The value to return for false.
884 auto FalseValue = Constant::getNullValue(CITy);
885
886 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
887 M.getContext(),
888 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
889
Kévin Petitff03aee2019-06-12 19:39:03 +0100890 auto NewCI =
891 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
892 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400893
894 const auto Select =
895 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
896
897 CI->replaceAllUsesWith(Select);
898
899 // Lastly, remember to remove the user.
900 ToRemoves.push_back(CI);
901 }
902 }
903
904 Changed = !ToRemoves.empty();
905
906 // And cleanup the calls we don't use anymore.
907 for (auto V : ToRemoves) {
908 V->eraseFromParent();
909 }
910
911 // And remove the function we don't need either too.
912 F->eraseFromParent();
913 }
914 }
915
916 return Changed;
917}
918
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100919bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Module &M) {
920 std::vector<const char *> Names = {
921 "_Z8isfiniteh", "_Z8isfiniteDv2_h", "_Z8isfiniteDv3_h",
922 "_Z8isfiniteDv4_h", "_Z8isfinitef", "_Z8isfiniteDv2_f",
923 "_Z8isfiniteDv3_f", "_Z8isfiniteDv4_f", "_Z8isfinited",
924 "_Z8isfiniteDv2_d", "_Z8isfiniteDv3_d", "_Z8isfiniteDv4_d",
925 };
926
927 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
928 auto &C = M.getContext();
929 auto Val = CI->getOperand(0);
930 auto ValTy = Val->getType();
931 auto RetTy = CI->getType();
932
933 // Get a suitable integer type to represent the number
934 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
935
936 // Create Mask
937 auto ScalarSize = ValTy->getScalarSizeInBits();
938 Value *InfMask;
939 switch (ScalarSize) {
940 case 16:
941 InfMask = ConstantInt::get(IntTy, 0x7C00U);
942 break;
943 case 32:
944 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
945 break;
946 case 64:
947 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
948 break;
949 default:
950 llvm_unreachable("Unsupported floating-point type");
951 }
952
953 IRBuilder<> Builder(CI);
954
955 // Bitcast to int
956 auto ValInt = Builder.CreateBitCast(Val, IntTy);
957
958 // Mask and compare
959 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
960 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
961
962 auto RetFalse = ConstantInt::get(RetTy, 0);
963 Value *RetTrue;
964 if (ValTy->isVectorTy()) {
965 RetTrue = ConstantInt::getSigned(RetTy, -1);
966 } else {
967 RetTrue = ConstantInt::get(RetTy, 1);
968 }
969 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
970 });
971}
972
David Neto22f144c2017-06-12 14:26:21 -0400973bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
974 bool Changed = false;
975
Kévin Petitff03aee2019-06-12 19:39:03 +0100976 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000977 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100978 {"_Z3allc", spv::OpNop},
979 {"_Z3allDv2_c", spv::OpAll},
980 {"_Z3allDv3_c", spv::OpAll},
981 {"_Z3allDv4_c", spv::OpAll},
982 {"_Z3alls", spv::OpNop},
983 {"_Z3allDv2_s", spv::OpAll},
984 {"_Z3allDv3_s", spv::OpAll},
985 {"_Z3allDv4_s", spv::OpAll},
986 {"_Z3alli", spv::OpNop},
987 {"_Z3allDv2_i", spv::OpAll},
988 {"_Z3allDv3_i", spv::OpAll},
989 {"_Z3allDv4_i", spv::OpAll},
990 {"_Z3alll", spv::OpNop},
991 {"_Z3allDv2_l", spv::OpAll},
992 {"_Z3allDv3_l", spv::OpAll},
993 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000994
995 // any
Kévin Petitff03aee2019-06-12 19:39:03 +0100996 {"_Z3anyc", spv::OpNop},
997 {"_Z3anyDv2_c", spv::OpAny},
998 {"_Z3anyDv3_c", spv::OpAny},
999 {"_Z3anyDv4_c", spv::OpAny},
1000 {"_Z3anys", spv::OpNop},
1001 {"_Z3anyDv2_s", spv::OpAny},
1002 {"_Z3anyDv3_s", spv::OpAny},
1003 {"_Z3anyDv4_s", spv::OpAny},
1004 {"_Z3anyi", spv::OpNop},
1005 {"_Z3anyDv2_i", spv::OpAny},
1006 {"_Z3anyDv3_i", spv::OpAny},
1007 {"_Z3anyDv4_i", spv::OpAny},
1008 {"_Z3anyl", spv::OpNop},
1009 {"_Z3anyDv2_l", spv::OpAny},
1010 {"_Z3anyDv3_l", spv::OpAny},
1011 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -04001012 };
1013
1014 for (auto Pair : Map) {
1015 // If we find a function with the matching name.
1016 if (auto F = M.getFunction(Pair.first)) {
1017 SmallVector<Instruction *, 4> ToRemoves;
1018
1019 // Walk the users of the function.
1020 for (auto &U : F->uses()) {
1021 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04001022
1023 auto Arg = CI->getOperand(0);
1024
1025 Value *V;
1026
Kévin Petitfd27cca2018-10-31 13:00:17 +00001027 // If the argument is a 32-bit int, just use a shift
1028 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1029 V = BinaryOperator::Create(Instruction::LShr, Arg,
1030 ConstantInt::get(Arg->getType(), 31), "",
1031 CI);
1032 } else {
David Neto22f144c2017-06-12 14:26:21 -04001033 // The value for zero to compare against.
1034 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1035
David Neto22f144c2017-06-12 14:26:21 -04001036 // The value to return for true.
1037 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1038
1039 // The value to return for false.
1040 const auto FalseValue = Constant::getNullValue(CI->getType());
1041
Kévin Petitfd27cca2018-10-31 13:00:17 +00001042 const auto Cmp = CmpInst::Create(
1043 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1044
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001045 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001046
1047 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +01001048 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001049
Kévin Petitff03aee2019-06-12 19:39:03 +01001050 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +00001051
Kévin Petitff03aee2019-06-12 19:39:03 +01001052 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +00001053
Kévin Petitff03aee2019-06-12 19:39:03 +01001054 const auto NewCI = clspv::InsertSPIRVOp(
1055 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +00001056 SelectSource = NewCI;
1057
1058 } else {
1059 SelectSource = Cmp;
1060 }
1061
1062 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001063 }
1064
1065 CI->replaceAllUsesWith(V);
1066
1067 // Lastly, remember to remove the user.
1068 ToRemoves.push_back(CI);
1069 }
1070 }
1071
1072 Changed = !ToRemoves.empty();
1073
1074 // And cleanup the calls we don't use anymore.
1075 for (auto V : ToRemoves) {
1076 V->eraseFromParent();
1077 }
1078
1079 // And remove the function we don't need either too.
1080 F->eraseFromParent();
1081 }
1082 }
1083
1084 return Changed;
1085}
1086
Kévin Petitbf0036c2019-03-06 13:57:10 +00001087bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1088 bool Changed = false;
1089
1090 for (auto const &SymVal : M.getValueSymbolTable()) {
1091 // Skip symbols whose name doesn't match
1092 if (!SymVal.getKey().startswith("_Z8upsample")) {
1093 continue;
1094 }
1095 // Is there a function going by that name?
1096 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1097
1098 SmallVector<Instruction *, 4> ToRemoves;
1099
1100 // Walk the users of the function.
1101 for (auto &U : F->uses()) {
1102 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1103
1104 // Get arguments
1105 auto HiValue = CI->getOperand(0);
1106 auto LoValue = CI->getOperand(1);
1107
1108 // Don't touch overloads that aren't in OpenCL C
1109 auto HiType = HiValue->getType();
1110 auto LoType = LoValue->getType();
1111
1112 if (HiType != LoType) {
1113 continue;
1114 }
1115
1116 if (!HiType->isIntOrIntVectorTy()) {
1117 continue;
1118 }
1119
1120 if (HiType->getScalarSizeInBits() * 2 !=
1121 CI->getType()->getScalarSizeInBits()) {
1122 continue;
1123 }
1124
1125 if ((HiType->getScalarSizeInBits() != 8) &&
1126 (HiType->getScalarSizeInBits() != 16) &&
1127 (HiType->getScalarSizeInBits() != 32)) {
1128 continue;
1129 }
1130
1131 if (HiType->isVectorTy()) {
1132 if ((HiType->getVectorNumElements() != 2) &&
1133 (HiType->getVectorNumElements() != 3) &&
1134 (HiType->getVectorNumElements() != 4) &&
1135 (HiType->getVectorNumElements() != 8) &&
1136 (HiType->getVectorNumElements() != 16)) {
1137 continue;
1138 }
1139 }
1140
1141 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001142 auto HiCast =
1143 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1144 auto LoCast =
1145 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001146
1147 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001148 auto ShiftAmount =
1149 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001150 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1151 ShiftAmount, "", CI);
1152
1153 // OR both results
1154 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1155 "", CI);
1156
1157 // Replace call with the expression
1158 CI->replaceAllUsesWith(V);
1159
1160 // Lastly, remember to remove the user.
1161 ToRemoves.push_back(CI);
1162 }
1163 }
1164
1165 Changed = !ToRemoves.empty();
1166
1167 // And cleanup the calls we don't use anymore.
1168 for (auto V : ToRemoves) {
1169 V->eraseFromParent();
1170 }
1171
1172 // And remove the function we don't need either too.
1173 F->eraseFromParent();
1174 }
1175 }
1176
1177 return Changed;
1178}
1179
Kévin Petitd44eef52019-03-08 13:22:14 +00001180bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1181 bool Changed = false;
1182
1183 for (auto const &SymVal : M.getValueSymbolTable()) {
1184 // Skip symbols whose name doesn't match
1185 if (!SymVal.getKey().startswith("_Z6rotate")) {
1186 continue;
1187 }
1188 // Is there a function going by that name?
1189 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1190
1191 SmallVector<Instruction *, 4> ToRemoves;
1192
1193 // Walk the users of the function.
1194 for (auto &U : F->uses()) {
1195 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1196
1197 // Get arguments
1198 auto SrcValue = CI->getOperand(0);
1199 auto RotAmount = CI->getOperand(1);
1200
1201 // Don't touch overloads that aren't in OpenCL C
1202 auto SrcType = SrcValue->getType();
1203 auto RotType = RotAmount->getType();
1204
1205 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1206 continue;
1207 }
1208
1209 if (!SrcType->isIntOrIntVectorTy()) {
1210 continue;
1211 }
1212
1213 if ((SrcType->getScalarSizeInBits() != 8) &&
1214 (SrcType->getScalarSizeInBits() != 16) &&
1215 (SrcType->getScalarSizeInBits() != 32) &&
1216 (SrcType->getScalarSizeInBits() != 64)) {
1217 continue;
1218 }
1219
1220 if (SrcType->isVectorTy()) {
1221 if ((SrcType->getVectorNumElements() != 2) &&
1222 (SrcType->getVectorNumElements() != 3) &&
1223 (SrcType->getVectorNumElements() != 4) &&
1224 (SrcType->getVectorNumElements() != 8) &&
1225 (SrcType->getVectorNumElements() != 16)) {
1226 continue;
1227 }
1228 }
1229
1230 // The approach used is to shift the top bits down, the bottom bits up
1231 // and OR the two shifted values.
1232
1233 // The rotation amount is to be treated modulo the element size.
1234 // Since SPIR-V shift ops don't support this, let's apply the
1235 // modulo ahead of shifting. The element size is always a power of
1236 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001237 auto ModMask =
1238 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001239 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1240 ModMask, "", CI);
1241
1242 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001243 auto ScalarSize =
1244 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001245 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1246 RotAmount, "", CI);
1247
1248 // Now shift the bottom bits up and the top bits down
1249 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1250 RotAmount, "", CI);
1251 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1252 DownAmount, "", CI);
1253
1254 // Finally OR the two shifted values
1255 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1256 HiRotated, "", CI);
1257
1258 // Replace call with the expression
1259 CI->replaceAllUsesWith(V);
1260
1261 // Lastly, remember to remove the user.
1262 ToRemoves.push_back(CI);
1263 }
1264 }
1265
1266 Changed = !ToRemoves.empty();
1267
1268 // And cleanup the calls we don't use anymore.
1269 for (auto V : ToRemoves) {
1270 V->eraseFromParent();
1271 }
1272
1273 // And remove the function we don't need either too.
1274 F->eraseFromParent();
1275 }
1276 }
1277
1278 return Changed;
1279}
1280
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001281bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1282 bool Changed = false;
1283
1284 for (auto const &SymVal : M.getValueSymbolTable()) {
1285
1286 // Skip symbols whose name obviously doesn't match
1287 if (!SymVal.getKey().contains("convert_")) {
1288 continue;
1289 }
1290
1291 // Is there a function going by that name?
1292 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1293
1294 // Get info from the mangled name
1295 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001296 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001297
1298 // All functions of interest are handled by our mangled name parser
1299 if (!parsed) {
1300 continue;
1301 }
1302
1303 // Move on if this isn't a call to convert_
1304 if (!finfo.name.startswith("convert_")) {
1305 continue;
1306 }
1307
1308 // Extract the destination type from the function name
1309 StringRef DstTypeName = finfo.name;
1310 DstTypeName.consume_front("convert_");
1311
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001312 auto DstSignedNess =
1313 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1314 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1315 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1316 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1317 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1318 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1319 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1320 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1321 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1322 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001323
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001324 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001325 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001326
1327 SmallVector<Instruction *, 4> ToRemoves;
1328
1329 // Walk the users of the function.
1330 for (auto &U : F->uses()) {
1331 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1332
1333 // Get arguments
1334 auto SrcValue = CI->getOperand(0);
1335
1336 // Don't touch overloads that aren't in OpenCL C
1337 auto SrcType = SrcValue->getType();
1338 auto DstType = CI->getType();
1339
1340 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1341 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1342 continue;
1343 }
1344
1345 if (SrcType->isVectorTy()) {
1346
1347 if (SrcType->getVectorNumElements() !=
1348 DstType->getVectorNumElements()) {
1349 continue;
1350 }
1351
1352 if ((SrcType->getVectorNumElements() != 2) &&
1353 (SrcType->getVectorNumElements() != 3) &&
1354 (SrcType->getVectorNumElements() != 4) &&
1355 (SrcType->getVectorNumElements() != 8) &&
1356 (SrcType->getVectorNumElements() != 16)) {
1357 continue;
1358 }
1359 }
1360
1361 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1362 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1363
1364 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1365 bool DstIsInt = DstType->isIntOrIntVectorTy();
1366
1367 Value *V;
1368 if (SrcIsFloat && DstIsFloat) {
1369 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1370 } else if (SrcIsFloat && DstIsInt) {
1371 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001372 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1373 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001374 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001375 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1376 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001377 }
1378 } else if (SrcIsInt && DstIsFloat) {
1379 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001380 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1381 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001382 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001383 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1384 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001385 }
1386 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001387 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1388 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001389 } else {
1390 // Not something we're supposed to handle, just move on
1391 continue;
1392 }
1393
1394 // Replace call with the expression
1395 CI->replaceAllUsesWith(V);
1396
1397 // Lastly, remember to remove the user.
1398 ToRemoves.push_back(CI);
1399 }
1400 }
1401
1402 Changed = !ToRemoves.empty();
1403
1404 // And cleanup the calls we don't use anymore.
1405 for (auto V : ToRemoves) {
1406 V->eraseFromParent();
1407 }
1408
1409 // And remove the function we don't need either too.
1410 F->eraseFromParent();
1411 }
1412 }
1413
1414 return Changed;
1415}
1416
Kévin Petit8a560882019-03-21 15:24:34 +00001417bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1418 bool Changed = false;
1419
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001420 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001421
Kévin Petit617a76d2019-04-04 13:54:16 +01001422 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001423 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1424 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1425
1426 // Skip symbols whose name doesn't match
1427 if (!isMad && !isMul) {
1428 continue;
1429 }
1430
1431 // Is there a function going by that name?
1432 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001433 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001434 }
1435 }
1436
Kévin Petit617a76d2019-04-04 13:54:16 +01001437 for (auto F : FnWorklist) {
1438 SmallVector<Instruction *, 4> ToRemoves;
1439
1440 bool isMad = F->getName().startswith("_Z6mad_hi");
1441 // Walk the users of the function.
1442 for (auto &U : F->uses()) {
1443 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1444
1445 // Get arguments
1446 auto AValue = CI->getOperand(0);
1447 auto BValue = CI->getOperand(1);
1448 auto CValue = CI->getOperand(2);
1449
1450 // Don't touch overloads that aren't in OpenCL C
1451 auto AType = AValue->getType();
1452 auto BType = BValue->getType();
1453 auto CType = CValue->getType();
1454
1455 if ((AType != BType) || (CI->getType() != AType) ||
1456 (isMad && (AType != CType))) {
1457 continue;
1458 }
1459
1460 if (!AType->isIntOrIntVectorTy()) {
1461 continue;
1462 }
1463
1464 if ((AType->getScalarSizeInBits() != 8) &&
1465 (AType->getScalarSizeInBits() != 16) &&
1466 (AType->getScalarSizeInBits() != 32) &&
1467 (AType->getScalarSizeInBits() != 64)) {
1468 continue;
1469 }
1470
1471 if (AType->isVectorTy()) {
1472 if ((AType->getVectorNumElements() != 2) &&
1473 (AType->getVectorNumElements() != 3) &&
1474 (AType->getVectorNumElements() != 4) &&
1475 (AType->getVectorNumElements() != 8) &&
1476 (AType->getVectorNumElements() != 16)) {
1477 continue;
1478 }
1479 }
1480
1481 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001482 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001483
1484 // Select the appropriate signed/unsigned SPIR-V op
1485 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001486 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001487 opcode = spv::OpSMulExtended;
1488 } else {
1489 opcode = spv::OpUMulExtended;
1490 }
1491
1492 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001493 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001494 auto ExMulRetType = StructType::create(TwoValueType);
1495
1496 // Call the SPIR-V op
1497 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1498 ExMulRetType, {AValue, BValue});
1499
1500 // Get the high part of the result
1501 unsigned Idxs[] = {1};
1502 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1503
1504 // If we're handling a mad_hi, add the third argument to the result
1505 if (isMad) {
1506 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1507 }
1508
1509 // Replace call with the expression
1510 CI->replaceAllUsesWith(V);
1511
1512 // Lastly, remember to remove the user.
1513 ToRemoves.push_back(CI);
1514 }
1515 }
1516
1517 Changed = !ToRemoves.empty();
1518
1519 // And cleanup the calls we don't use anymore.
1520 for (auto V : ToRemoves) {
1521 V->eraseFromParent();
1522 }
1523
1524 // And remove the function we don't need either too.
1525 F->eraseFromParent();
1526 }
1527
Kévin Petit8a560882019-03-21 15:24:34 +00001528 return Changed;
1529}
1530
Kévin Petitf5b78a22018-10-25 14:32:17 +00001531bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1532 bool Changed = false;
1533
1534 for (auto const &SymVal : M.getValueSymbolTable()) {
1535 // Skip symbols whose name doesn't match
1536 if (!SymVal.getKey().startswith("_Z6select")) {
1537 continue;
1538 }
1539 // Is there a function going by that name?
1540 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1541
1542 SmallVector<Instruction *, 4> ToRemoves;
1543
1544 // Walk the users of the function.
1545 for (auto &U : F->uses()) {
1546 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1547
1548 // Get arguments
1549 auto FalseValue = CI->getOperand(0);
1550 auto TrueValue = CI->getOperand(1);
1551 auto PredicateValue = CI->getOperand(2);
1552
1553 // Don't touch overloads that aren't in OpenCL C
1554 auto FalseType = FalseValue->getType();
1555 auto TrueType = TrueValue->getType();
1556 auto PredicateType = PredicateValue->getType();
1557
1558 if (FalseType != TrueType) {
1559 continue;
1560 }
1561
1562 if (!PredicateType->isIntOrIntVectorTy()) {
1563 continue;
1564 }
1565
1566 if (!FalseType->isIntOrIntVectorTy() &&
1567 !FalseType->getScalarType()->isFloatingPointTy()) {
1568 continue;
1569 }
1570
1571 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1572 continue;
1573 }
1574
1575 if (FalseType->getScalarSizeInBits() !=
1576 PredicateType->getScalarSizeInBits()) {
1577 continue;
1578 }
1579
1580 if (FalseType->isVectorTy()) {
1581 if (FalseType->getVectorNumElements() !=
1582 PredicateType->getVectorNumElements()) {
1583 continue;
1584 }
1585
1586 if ((FalseType->getVectorNumElements() != 2) &&
1587 (FalseType->getVectorNumElements() != 3) &&
1588 (FalseType->getVectorNumElements() != 4) &&
1589 (FalseType->getVectorNumElements() != 8) &&
1590 (FalseType->getVectorNumElements() != 16)) {
1591 continue;
1592 }
1593 }
1594
1595 // Create constant
1596 const auto ZeroValue = Constant::getNullValue(PredicateType);
1597
1598 // Scalar and vector are to be treated differently
1599 CmpInst::Predicate Pred;
1600 if (PredicateType->isVectorTy()) {
1601 Pred = CmpInst::ICMP_SLT;
1602 } else {
1603 Pred = CmpInst::ICMP_NE;
1604 }
1605
1606 // Create comparison instruction
1607 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1608 ZeroValue, "", CI);
1609
1610 // Create select
1611 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1612
1613 // Replace call with the selection
1614 CI->replaceAllUsesWith(V);
1615
1616 // Lastly, remember to remove the user.
1617 ToRemoves.push_back(CI);
1618 }
1619 }
1620
1621 Changed = !ToRemoves.empty();
1622
1623 // And cleanup the calls we don't use anymore.
1624 for (auto V : ToRemoves) {
1625 V->eraseFromParent();
1626 }
1627
1628 // And remove the function we don't need either too.
1629 F->eraseFromParent();
1630 }
1631 }
1632
1633 return Changed;
1634}
1635
Kévin Petite7d0cce2018-10-31 12:38:56 +00001636bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1637 bool Changed = false;
1638
1639 for (auto const &SymVal : M.getValueSymbolTable()) {
1640 // Skip symbols whose name doesn't match
1641 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1642 continue;
1643 }
1644 // Is there a function going by that name?
1645 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1646
1647 SmallVector<Instruction *, 4> ToRemoves;
1648
1649 // Walk the users of the function.
1650 for (auto &U : F->uses()) {
1651 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1652
1653 if (CI->getNumOperands() != 4) {
1654 continue;
1655 }
1656
1657 // Get arguments
1658 auto FalseValue = CI->getOperand(0);
1659 auto TrueValue = CI->getOperand(1);
1660 auto PredicateValue = CI->getOperand(2);
1661
1662 // Don't touch overloads that aren't in OpenCL C
1663 auto FalseType = FalseValue->getType();
1664 auto TrueType = TrueValue->getType();
1665 auto PredicateType = PredicateValue->getType();
1666
1667 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1668 continue;
1669 }
1670
1671 if (TrueType->isVectorTy()) {
1672 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1673 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001674 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001675 }
1676 if ((TrueType->getVectorNumElements() != 2) &&
1677 (TrueType->getVectorNumElements() != 3) &&
1678 (TrueType->getVectorNumElements() != 4) &&
1679 (TrueType->getVectorNumElements() != 8) &&
1680 (TrueType->getVectorNumElements() != 16)) {
1681 continue;
1682 }
1683 }
1684
1685 // Remember the type of the operands
1686 auto OpType = TrueType;
1687
1688 // The actual bit selection will always be done on an integer type,
1689 // declare it here
1690 Type *BitType;
1691
1692 // If the operands are float, then bitcast them to int
1693 if (OpType->getScalarType()->isFloatingPointTy()) {
1694
1695 // First create the new type
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001696 BitType = getIntOrIntVectorTyForCast(M.getContext(), OpType);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001697
1698 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001699 PredicateValue =
1700 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1701 FalseValue =
1702 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1703 TrueValue =
1704 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001705
1706 } else {
1707 // The operands have an integer type, use it directly
1708 BitType = OpType;
1709 }
1710
1711 // All the operands are now always integers
1712 // implement as (c & b) | (~c & a)
1713
1714 // Create our negated predicate value
1715 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001716 auto NotPredicateValue = BinaryOperator::Create(
1717 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001718
1719 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001720 auto BitsFalse = BinaryOperator::Create(
1721 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1722 auto BitsTrue = BinaryOperator::Create(
1723 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001724
1725 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1726 BitsTrue, "", CI);
1727
1728 // If we were dealing with a floating point type, we must bitcast
1729 // the result back to that
1730 if (OpType->getScalarType()->isFloatingPointTy()) {
1731 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1732 }
1733
1734 // Replace call with our new code
1735 CI->replaceAllUsesWith(V);
1736
1737 // Lastly, remember to remove the user.
1738 ToRemoves.push_back(CI);
1739 }
1740 }
1741
1742 Changed = !ToRemoves.empty();
1743
1744 // And cleanup the calls we don't use anymore.
1745 for (auto V : ToRemoves) {
1746 V->eraseFromParent();
1747 }
1748
1749 // And remove the function we don't need either too.
1750 F->eraseFromParent();
1751 }
1752 }
1753
1754 return Changed;
1755}
1756
Kévin Petit6b0a9532018-10-30 20:00:39 +00001757bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1758 bool Changed = false;
1759
1760 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001761 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1762 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1763 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1764 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1765 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1766 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001767 };
1768
1769 for (auto Pair : Map) {
1770 // If we find a function with the matching name.
1771 if (auto F = M.getFunction(Pair.first)) {
1772 SmallVector<Instruction *, 4> ToRemoves;
1773
1774 // Walk the users of the function.
1775 for (auto &U : F->uses()) {
1776 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1777
1778 auto ReplacementFn = Pair.second;
1779
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001780 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001781 Value *VectorArg;
1782
1783 // First figure out which function we're dealing with
1784 if (F->getName().startswith("_Z10smoothstep")) {
1785 ArgsToSplat.push_back(CI->getOperand(1));
1786 VectorArg = CI->getOperand(2);
1787 } else {
1788 VectorArg = CI->getOperand(1);
1789 }
1790
1791 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001792 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001793 auto VecType = VectorArg->getType();
1794
1795 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001796 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001797 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001798 auto index =
1799 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1800 NewVectorArg =
1801 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001802 }
1803 SplatArgs.push_back(NewVectorArg);
1804 }
1805
1806 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001807 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1808 const auto NewFType =
1809 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001810
1811 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1812
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001813 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001814 for (auto arg : SplatArgs) {
1815 NewArgs.push_back(arg);
1816 }
1817 NewArgs.push_back(VectorArg);
1818
1819 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1820
1821 CI->replaceAllUsesWith(NewCI);
1822
1823 // Lastly, remember to remove the user.
1824 ToRemoves.push_back(CI);
1825 }
1826 }
1827
1828 Changed = !ToRemoves.empty();
1829
1830 // And cleanup the calls we don't use anymore.
1831 for (auto V : ToRemoves) {
1832 V->eraseFromParent();
1833 }
1834
1835 // And remove the function we don't need either too.
1836 F->eraseFromParent();
1837 }
1838 }
1839
1840 return Changed;
1841}
1842
David Neto22f144c2017-06-12 14:26:21 -04001843bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1844 bool Changed = false;
1845
1846 const std::map<const char *, Instruction::BinaryOps> Map = {
1847 {"_Z7signbitf", Instruction::LShr},
1848 {"_Z7signbitDv2_f", Instruction::AShr},
1849 {"_Z7signbitDv3_f", Instruction::AShr},
1850 {"_Z7signbitDv4_f", Instruction::AShr},
1851 };
1852
1853 for (auto Pair : Map) {
1854 // If we find a function with the matching name.
1855 if (auto F = M.getFunction(Pair.first)) {
1856 SmallVector<Instruction *, 4> ToRemoves;
1857
1858 // Walk the users of the function.
1859 for (auto &U : F->uses()) {
1860 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1861 auto Arg = CI->getOperand(0);
1862
1863 auto Bitcast =
1864 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1865
1866 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1867 ConstantInt::get(CI->getType(), 31),
1868 "", CI);
1869
1870 CI->replaceAllUsesWith(Shr);
1871
1872 // Lastly, remember to remove the user.
1873 ToRemoves.push_back(CI);
1874 }
1875 }
1876
1877 Changed = !ToRemoves.empty();
1878
1879 // And cleanup the calls we don't use anymore.
1880 for (auto V : ToRemoves) {
1881 V->eraseFromParent();
1882 }
1883
1884 // And remove the function we don't need either too.
1885 F->eraseFromParent();
1886 }
1887 }
1888
1889 return Changed;
1890}
1891
1892bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1893 bool Changed = false;
1894
1895 const std::map<const char *,
1896 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1897 Map = {
1898 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1899 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1900 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1901 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1902 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1903 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1904 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1905 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1906 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1907 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1908 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1909 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1910 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1911 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1912 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1913 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1914 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1915 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1916 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1917 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1918 };
1919
1920 for (auto Pair : Map) {
1921 // If we find a function with the matching name.
1922 if (auto F = M.getFunction(Pair.first)) {
1923 SmallVector<Instruction *, 4> ToRemoves;
1924
1925 // Walk the users of the function.
1926 for (auto &U : F->uses()) {
1927 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1928 // The multiply instruction to use.
1929 auto MulInst = Pair.second.first;
1930
1931 // The add instruction to use.
1932 auto AddInst = Pair.second.second;
1933
1934 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1935
1936 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1937 CI->getArgOperand(1), "", CI);
1938
1939 if (Instruction::BinaryOpsEnd != AddInst) {
1940 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1941 CI);
1942 }
1943
1944 CI->replaceAllUsesWith(I);
1945
1946 // Lastly, remember to remove the user.
1947 ToRemoves.push_back(CI);
1948 }
1949 }
1950
1951 Changed = !ToRemoves.empty();
1952
1953 // And cleanup the calls we don't use anymore.
1954 for (auto V : ToRemoves) {
1955 V->eraseFromParent();
1956 }
1957
1958 // And remove the function we don't need either too.
1959 F->eraseFromParent();
1960 }
1961 }
1962
1963 return Changed;
1964}
1965
Derek Chowcfd368b2017-10-19 20:58:45 -07001966bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1967 bool Changed = false;
1968
alan-bakerf795f392019-06-11 18:24:34 -04001969 for (auto const &SymVal : M.getValueSymbolTable()) {
1970 if (!SymVal.getKey().contains("vstore"))
1971 continue;
1972 if (SymVal.getKey().contains("vstore_"))
1973 continue;
1974 if (SymVal.getKey().contains("vstorea"))
1975 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001976
alan-bakerf795f392019-06-11 18:24:34 -04001977 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001978 SmallVector<Instruction *, 4> ToRemoves;
1979
alan-bakerf795f392019-06-11 18:24:34 -04001980 auto fname = F->getName();
1981 if (!fname.consume_front("_Z"))
1982 continue;
1983 size_t name_len;
1984 if (fname.consumeInteger(10, name_len))
1985 continue;
1986 std::string name = fname.take_front(name_len);
1987
1988 bool ok = StringSwitch<bool>(name)
1989 .Case("vstore2", true)
1990 .Case("vstore3", true)
1991 .Case("vstore4", true)
1992 .Case("vstore8", true)
1993 .Case("vstore16", true)
1994 .Default(false);
1995 if (!ok)
1996 continue;
1997
Derek Chowcfd368b2017-10-19 20:58:45 -07001998 for (auto &U : F->uses()) {
1999 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002000 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07002001
alan-bakerf795f392019-06-11 18:24:34 -04002002 auto data_type = data->getType();
2003 if (!data_type->isVectorTy())
2004 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002005
alan-bakerf795f392019-06-11 18:24:34 -04002006 auto elems = data_type->getVectorNumElements();
2007 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2008 elems != 16)
2009 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002010
alan-bakerf795f392019-06-11 18:24:34 -04002011 auto offset = CI->getOperand(1);
2012 auto ptr = CI->getOperand(2);
2013 auto ptr_type = ptr->getType();
2014 auto pointee_type = ptr_type->getPointerElementType();
2015 if (pointee_type != data_type->getVectorElementType())
2016 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002017
alan-bakerf795f392019-06-11 18:24:34 -04002018 // Avoid pointer casts. Instead generate the correct number of stores
2019 // and rely on drivers to coalesce appropriately.
2020 IRBuilder<> builder(CI);
2021 auto elems_const = builder.getInt32(elems);
2022 auto adjust = builder.CreateMul(offset, elems_const);
2023 for (auto i = 0; i < elems; ++i) {
2024 auto idx = builder.getInt32(i);
2025 auto add = builder.CreateAdd(adjust, idx);
2026 auto gep = builder.CreateGEP(ptr, add);
2027 auto extract = builder.CreateExtractElement(data, i);
2028 auto store = builder.CreateStore(extract, gep);
2029 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002030
Derek Chowcfd368b2017-10-19 20:58:45 -07002031 ToRemoves.push_back(CI);
2032 }
2033 }
2034
2035 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002036 for (auto V : ToRemoves) {
2037 V->eraseFromParent();
2038 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002039 F->eraseFromParent();
2040 }
2041 }
2042
2043 return Changed;
2044}
2045
2046bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2047 bool Changed = false;
2048
alan-bakerf795f392019-06-11 18:24:34 -04002049 for (auto const &SymVal : M.getValueSymbolTable()) {
2050 if (!SymVal.getKey().contains("vload"))
2051 continue;
2052 if (SymVal.getKey().contains("vload_"))
2053 continue;
2054 if (SymVal.getKey().contains("vloada"))
2055 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002056
alan-bakerf795f392019-06-11 18:24:34 -04002057 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002058 SmallVector<Instruction *, 4> ToRemoves;
2059
alan-bakerf795f392019-06-11 18:24:34 -04002060 auto fname = F->getName();
2061 if (!fname.consume_front("_Z"))
2062 continue;
2063 size_t name_len;
2064 if (fname.consumeInteger(10, name_len))
2065 continue;
2066 std::string name = fname.take_front(name_len);
2067
2068 bool ok = StringSwitch<bool>(name)
2069 .Case("vload2", true)
2070 .Case("vload3", true)
2071 .Case("vload4", true)
2072 .Case("vload8", true)
2073 .Case("vload16", true)
2074 .Default(false);
2075 if (!ok)
2076 continue;
2077
Derek Chowcfd368b2017-10-19 20:58:45 -07002078 for (auto &U : F->uses()) {
2079 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002080 auto ret_type = F->getReturnType();
2081 if (!ret_type->isVectorTy())
2082 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002083
alan-bakerf795f392019-06-11 18:24:34 -04002084 auto elems = ret_type->getVectorNumElements();
2085 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2086 elems != 16)
2087 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002088
alan-bakerf795f392019-06-11 18:24:34 -04002089 auto offset = CI->getOperand(0);
2090 auto ptr = CI->getOperand(1);
2091 auto ptr_type = ptr->getType();
2092 auto pointee_type = ptr_type->getPointerElementType();
2093 if (pointee_type != ret_type->getVectorElementType())
2094 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002095
alan-bakerf795f392019-06-11 18:24:34 -04002096 // Avoid pointer casts. Instead generate the correct number of loads
2097 // and rely on drivers to coalesce appropriately.
2098 IRBuilder<> builder(CI);
2099 auto elems_const = builder.getInt32(elems);
2100 Value *insert = UndefValue::get(ret_type);
2101 auto adjust = builder.CreateMul(offset, elems_const);
2102 for (auto i = 0; i < elems; ++i) {
2103 auto idx = builder.getInt32(i);
2104 auto add = builder.CreateAdd(adjust, idx);
2105 auto gep = builder.CreateGEP(ptr, add);
2106 auto load = builder.CreateLoad(gep);
2107 insert = builder.CreateInsertElement(insert, load, i);
2108 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002109
alan-bakerf795f392019-06-11 18:24:34 -04002110 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002111 ToRemoves.push_back(CI);
2112 }
2113 }
2114
2115 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002116 for (auto V : ToRemoves) {
2117 V->eraseFromParent();
2118 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002119 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002120 }
2121 }
2122
2123 return Changed;
2124}
2125
David Neto22f144c2017-06-12 14:26:21 -04002126bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2127 bool Changed = false;
2128
2129 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2130 "_Z10vload_halfjPU3AS2KDh"};
2131
2132 for (auto Name : Map) {
2133 // If we find a function with the matching name.
2134 if (auto F = M.getFunction(Name)) {
2135 SmallVector<Instruction *, 4> ToRemoves;
2136
2137 // Walk the users of the function.
2138 for (auto &U : F->uses()) {
2139 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2140 // The index argument from vload_half.
2141 auto Arg0 = CI->getOperand(0);
2142
2143 // The pointer argument from vload_half.
2144 auto Arg1 = CI->getOperand(1);
2145
David Neto22f144c2017-06-12 14:26:21 -04002146 auto IntTy = Type::getInt32Ty(M.getContext());
2147 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002148 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2149
David Neto22f144c2017-06-12 14:26:21 -04002150 // Our intrinsic to unpack a float2 from an int.
2151 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2152
2153 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2154
David Neto482550a2018-03-24 05:21:07 -07002155 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002156 auto ShortTy = Type::getInt16Ty(M.getContext());
2157 auto ShortPointerTy = PointerType::get(
2158 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002159
David Netoac825b82017-05-30 12:49:01 -04002160 // Cast the half* pointer to short*.
2161 auto Cast =
2162 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002163
David Netoac825b82017-05-30 12:49:01 -04002164 // Index into the correct address of the casted pointer.
2165 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2166
2167 // Load from the short* we casted to.
2168 auto Load = new LoadInst(Index, "", CI);
2169
2170 // ZExt the short -> int.
2171 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2172
2173 // Get our float2.
2174 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2175
2176 // Extract out the bottom element which is our float result.
2177 auto Extract = ExtractElementInst::Create(
2178 Call, ConstantInt::get(IntTy, 0), "", CI);
2179
2180 CI->replaceAllUsesWith(Extract);
2181 } else {
2182 // Assume the pointer argument points to storage aligned to 32bits
2183 // or more.
2184 // TODO(dneto): Do more analysis to make sure this is true?
2185 //
2186 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2187 // with:
2188 //
2189 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2190 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2191 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2192 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2193 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2194 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2195 // x float> %converted, %index_is_odd32
2196
2197 auto IntPointerTy = PointerType::get(
2198 IntTy, Arg1->getType()->getPointerAddressSpace());
2199
David Neto973e6a82017-05-30 13:48:18 -04002200 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002201 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002202 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002203 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2204
2205 auto One = ConstantInt::get(IntTy, 1);
2206 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2207 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2208
2209 // Index into the correct address of the casted pointer.
2210 auto Ptr =
2211 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2212
2213 // Load from the int* we casted to.
2214 auto Load = new LoadInst(Ptr, "", CI);
2215
2216 // Get our float2.
2217 auto Call = CallInst::Create(NewF, Load, "", CI);
2218
2219 // Extract out the float result, where the element number is
2220 // determined by whether the original index was even or odd.
2221 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2222
2223 CI->replaceAllUsesWith(Extract);
2224 }
David Neto22f144c2017-06-12 14:26:21 -04002225
2226 // Lastly, remember to remove the user.
2227 ToRemoves.push_back(CI);
2228 }
2229 }
2230
2231 Changed = !ToRemoves.empty();
2232
2233 // And cleanup the calls we don't use anymore.
2234 for (auto V : ToRemoves) {
2235 V->eraseFromParent();
2236 }
2237
2238 // And remove the function we don't need either too.
2239 F->eraseFromParent();
2240 }
2241 }
2242
2243 return Changed;
2244}
2245
2246bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002247
Kévin Petite8edce32019-04-10 14:23:32 +01002248 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002249 "_Z11vload_half2jPU3AS1KDh",
2250 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2251 "_Z11vload_half2jPU3AS2KDh",
2252 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2253 };
David Neto22f144c2017-06-12 14:26:21 -04002254
Kévin Petite8edce32019-04-10 14:23:32 +01002255 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2256 // The index argument from vload_half.
2257 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002258
Kévin Petite8edce32019-04-10 14:23:32 +01002259 // The pointer argument from vload_half.
2260 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002261
Kévin Petite8edce32019-04-10 14:23:32 +01002262 auto IntTy = Type::getInt32Ty(M.getContext());
2263 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002264 auto NewPointerTy =
2265 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002266 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002267
Kévin Petite8edce32019-04-10 14:23:32 +01002268 // Cast the half* pointer to int*.
2269 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002270
Kévin Petite8edce32019-04-10 14:23:32 +01002271 // Index into the correct address of the casted pointer.
2272 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002273
Kévin Petite8edce32019-04-10 14:23:32 +01002274 // Load from the int* we casted to.
2275 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002276
Kévin Petite8edce32019-04-10 14:23:32 +01002277 // Our intrinsic to unpack a float2 from an int.
2278 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002279
Kévin Petite8edce32019-04-10 14:23:32 +01002280 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002281
Kévin Petite8edce32019-04-10 14:23:32 +01002282 // Get our float2.
2283 return CallInst::Create(NewF, Load, "", CI);
2284 });
David Neto22f144c2017-06-12 14:26:21 -04002285}
2286
2287bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002288
Kévin Petite8edce32019-04-10 14:23:32 +01002289 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002290 "_Z11vload_half4jPU3AS1KDh",
2291 "_Z12vloada_half4jPU3AS1KDh",
2292 "_Z11vload_half4jPU3AS2KDh",
2293 "_Z12vloada_half4jPU3AS2KDh",
2294 };
David Neto22f144c2017-06-12 14:26:21 -04002295
Kévin Petite8edce32019-04-10 14:23:32 +01002296 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2297 // The index argument from vload_half.
2298 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002299
Kévin Petite8edce32019-04-10 14:23:32 +01002300 // The pointer argument from vload_half.
2301 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002302
Kévin Petite8edce32019-04-10 14:23:32 +01002303 auto IntTy = Type::getInt32Ty(M.getContext());
2304 auto Int2Ty = VectorType::get(IntTy, 2);
2305 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002306 auto NewPointerTy =
2307 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002308 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002309
Kévin Petite8edce32019-04-10 14:23:32 +01002310 // Cast the half* pointer to int2*.
2311 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002312
Kévin Petite8edce32019-04-10 14:23:32 +01002313 // Index into the correct address of the casted pointer.
2314 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002315
Kévin Petite8edce32019-04-10 14:23:32 +01002316 // Load from the int2* we casted to.
2317 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002318
Kévin Petite8edce32019-04-10 14:23:32 +01002319 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002320 auto X =
2321 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2322 auto Y =
2323 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002324
Kévin Petite8edce32019-04-10 14:23:32 +01002325 // Our intrinsic to unpack a float2 from an int.
2326 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002327
Kévin Petite8edce32019-04-10 14:23:32 +01002328 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002329
Kévin Petite8edce32019-04-10 14:23:32 +01002330 // Get the lower (x & y) components of our final float4.
2331 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002332
Kévin Petite8edce32019-04-10 14:23:32 +01002333 // Get the higher (z & w) components of our final float4.
2334 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002335
Kévin Petite8edce32019-04-10 14:23:32 +01002336 Constant *ShuffleMask[4] = {
2337 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2338 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002339
Kévin Petite8edce32019-04-10 14:23:32 +01002340 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002341 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2342 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002343 });
David Neto22f144c2017-06-12 14:26:21 -04002344}
2345
David Neto6ad93232018-06-07 15:42:58 -07002346bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002347
2348 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2349 //
2350 // %u = load i32 %ptr
2351 // %fxy = call <2 x float> Unpack2xHalf(u)
2352 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002353 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002354 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2355 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2356 "_Z20__clspv_vloada_half2jPKj", // private
2357 };
2358
Kévin Petite8edce32019-04-10 14:23:32 +01002359 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2360 auto Index = CI->getOperand(0);
2361 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002362
Kévin Petite8edce32019-04-10 14:23:32 +01002363 auto IntTy = Type::getInt32Ty(M.getContext());
2364 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2365 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002366
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002367 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002368 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002369
Kévin Petite8edce32019-04-10 14:23:32 +01002370 // Our intrinsic to unpack a float2 from an int.
2371 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002372
Kévin Petite8edce32019-04-10 14:23:32 +01002373 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002374
Kévin Petite8edce32019-04-10 14:23:32 +01002375 // Get our final float2.
2376 return CallInst::Create(NewF, Load, "", CI);
2377 });
David Neto6ad93232018-06-07 15:42:58 -07002378}
2379
2380bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002381
2382 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2383 //
2384 // %u2 = load <2 x i32> %ptr
2385 // %u2xy = extractelement %u2, 0
2386 // %u2zw = extractelement %u2, 1
2387 // %fxy = call <2 x float> Unpack2xHalf(uint)
2388 // %fzw = call <2 x float> Unpack2xHalf(uint)
2389 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002390 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002391 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2392 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2393 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2394 };
2395
Kévin Petite8edce32019-04-10 14:23:32 +01002396 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2397 auto Index = CI->getOperand(0);
2398 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002399
Kévin Petite8edce32019-04-10 14:23:32 +01002400 auto IntTy = Type::getInt32Ty(M.getContext());
2401 auto Int2Ty = VectorType::get(IntTy, 2);
2402 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2403 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002404
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002405 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002406 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002407
Kévin Petite8edce32019-04-10 14:23:32 +01002408 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002409 auto X =
2410 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2411 auto Y =
2412 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002413
Kévin Petite8edce32019-04-10 14:23:32 +01002414 // Our intrinsic to unpack a float2 from an int.
2415 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002416
Kévin Petite8edce32019-04-10 14:23:32 +01002417 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002418
Kévin Petite8edce32019-04-10 14:23:32 +01002419 // Get the lower (x & y) components of our final float4.
2420 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002421
Kévin Petite8edce32019-04-10 14:23:32 +01002422 // Get the higher (z & w) components of our final float4.
2423 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002424
Kévin Petite8edce32019-04-10 14:23:32 +01002425 Constant *ShuffleMask[4] = {
2426 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2427 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002428
Kévin Petite8edce32019-04-10 14:23:32 +01002429 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002430 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2431 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002432 });
David Neto6ad93232018-06-07 15:42:58 -07002433}
2434
David Neto22f144c2017-06-12 14:26:21 -04002435bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002436
Kévin Petite8edce32019-04-10 14:23:32 +01002437 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2438 "_Z15vstore_half_rtefjPU3AS1Dh",
2439 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002440
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002441 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002442 // The value to store.
2443 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002444
Kévin Petite8edce32019-04-10 14:23:32 +01002445 // The index argument from vstore_half.
2446 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002447
Kévin Petite8edce32019-04-10 14:23:32 +01002448 // The pointer argument from vstore_half.
2449 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002450
Kévin Petite8edce32019-04-10 14:23:32 +01002451 auto IntTy = Type::getInt32Ty(M.getContext());
2452 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2453 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2454 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002455
Kévin Petite8edce32019-04-10 14:23:32 +01002456 // Our intrinsic to pack a float2 to an int.
2457 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002458
Kévin Petite8edce32019-04-10 14:23:32 +01002459 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002460
Kévin Petite8edce32019-04-10 14:23:32 +01002461 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002462 auto TempVec = InsertElementInst::Create(
2463 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002464
Kévin Petite8edce32019-04-10 14:23:32 +01002465 // Pack the float2 -> half2 (in an int).
2466 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002467
Kévin Petite8edce32019-04-10 14:23:32 +01002468 Value *Ret;
2469 if (clspv::Option::F16BitStorage()) {
2470 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002471 auto ShortPointerTy =
2472 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002473
Kévin Petite8edce32019-04-10 14:23:32 +01002474 // Truncate our i32 to an i16.
2475 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002476
Kévin Petite8edce32019-04-10 14:23:32 +01002477 // Cast the half* pointer to short*.
2478 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002479
Kévin Petite8edce32019-04-10 14:23:32 +01002480 // Index into the correct address of the casted pointer.
2481 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002482
Kévin Petite8edce32019-04-10 14:23:32 +01002483 // Store to the int* we casted to.
2484 Ret = new StoreInst(Trunc, Index, CI);
2485 } else {
2486 // We can only write to 32-bit aligned words.
2487 //
2488 // Assuming base is aligned to 32-bits, replace the equivalent of
2489 // vstore_half(value, index, base)
2490 // with:
2491 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2492 // uint32_t write_to_upper_half = index & 1u;
2493 // uint32_t shift = write_to_upper_half << 4;
2494 //
2495 // // Pack the float value as a half number in bottom 16 bits
2496 // // of an i32.
2497 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2498 //
2499 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2500 // ^ ((packed & 0xffff) << shift)
2501 // // We only need relaxed consistency, but OpenCL 1.2 only has
2502 // // sequentially consistent atomics.
2503 // // TODO(dneto): Use relaxed consistency.
2504 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002505 auto IntPointerTy =
2506 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002507
Kévin Petite8edce32019-04-10 14:23:32 +01002508 auto Four = ConstantInt::get(IntTy, 4);
2509 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002510
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002511 auto IndexIsOdd =
2512 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002513 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002514 auto IndexIntoI32 =
2515 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2516 auto BaseI32Ptr =
2517 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2518 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2519 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002520 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2521 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002522 auto MaskBitsToWrite =
2523 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2524 auto MaskedCurrent = BinaryOperator::CreateAnd(
2525 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002526
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002527 auto XLowerBits =
2528 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2529 auto NewBitsToWrite =
2530 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2531 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2532 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002533
Kévin Petite8edce32019-04-10 14:23:32 +01002534 // Generate the call to atomi_xor.
2535 SmallVector<Type *, 5> ParamTypes;
2536 // The pointer type.
2537 ParamTypes.push_back(IntPointerTy);
2538 // The Types for memory scope, semantics, and value.
2539 ParamTypes.push_back(IntTy);
2540 ParamTypes.push_back(IntTy);
2541 ParamTypes.push_back(IntTy);
2542 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2543 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002544
Kévin Petite8edce32019-04-10 14:23:32 +01002545 const auto ConstantScopeDevice =
2546 ConstantInt::get(IntTy, spv::ScopeDevice);
2547 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2548 // (SPIR-V Workgroup).
2549 const auto AddrSpaceSemanticsBits =
2550 IntPointerTy->getPointerAddressSpace() == 1
2551 ? spv::MemorySemanticsUniformMemoryMask
2552 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002553
Kévin Petite8edce32019-04-10 14:23:32 +01002554 // We're using relaxed consistency here.
2555 const auto ConstantMemorySemantics =
2556 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2557 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002558
Kévin Petite8edce32019-04-10 14:23:32 +01002559 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2560 ConstantMemorySemantics, ValueToXor};
2561 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2562 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002563 }
David Neto22f144c2017-06-12 14:26:21 -04002564
Kévin Petite8edce32019-04-10 14:23:32 +01002565 return Ret;
2566 });
David Neto22f144c2017-06-12 14:26:21 -04002567}
2568
2569bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002570
Kévin Petite8edce32019-04-10 14:23:32 +01002571 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002572 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2573 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2574 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2575 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2576 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2577 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2578 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2579 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2580 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2581 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2582 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2583 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2584 };
David Neto22f144c2017-06-12 14:26:21 -04002585
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002586 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002587 // The value to store.
2588 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002589
Kévin Petite8edce32019-04-10 14:23:32 +01002590 // The index argument from vstore_half.
2591 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002592
Kévin Petite8edce32019-04-10 14:23:32 +01002593 // The pointer argument from vstore_half.
2594 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002595
Kévin Petite8edce32019-04-10 14:23:32 +01002596 auto IntTy = Type::getInt32Ty(M.getContext());
2597 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002598 auto NewPointerTy =
2599 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002600 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002601
Kévin Petite8edce32019-04-10 14:23:32 +01002602 // Our intrinsic to pack a float2 to an int.
2603 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002604
Kévin Petite8edce32019-04-10 14:23:32 +01002605 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002606
Kévin Petite8edce32019-04-10 14:23:32 +01002607 // Turn the packed x & y into the final packing.
2608 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002609
Kévin Petite8edce32019-04-10 14:23:32 +01002610 // Cast the half* pointer to int*.
2611 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002612
Kévin Petite8edce32019-04-10 14:23:32 +01002613 // Index into the correct address of the casted pointer.
2614 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002615
Kévin Petite8edce32019-04-10 14:23:32 +01002616 // Store to the int* we casted to.
2617 return new StoreInst(X, Index, CI);
2618 });
David Neto22f144c2017-06-12 14:26:21 -04002619}
2620
2621bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002622
Kévin Petite8edce32019-04-10 14:23:32 +01002623 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002624 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2625 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2626 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2627 "_Z13vstorea_half4Dv4_fjPDh", // private
2628 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2629 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2630 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2631 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2632 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2633 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2634 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2635 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2636 };
David Neto22f144c2017-06-12 14:26:21 -04002637
Kévin Petite8edce32019-04-10 14:23:32 +01002638 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2639 // The value to store.
2640 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002641
Kévin Petite8edce32019-04-10 14:23:32 +01002642 // The index argument from vstore_half.
2643 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002644
Kévin Petite8edce32019-04-10 14:23:32 +01002645 // The pointer argument from vstore_half.
2646 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002647
Kévin Petite8edce32019-04-10 14:23:32 +01002648 auto IntTy = Type::getInt32Ty(M.getContext());
2649 auto Int2Ty = VectorType::get(IntTy, 2);
2650 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002651 auto NewPointerTy =
2652 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002653 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002654
Kévin Petite8edce32019-04-10 14:23:32 +01002655 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2656 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002657
Kévin Petite8edce32019-04-10 14:23:32 +01002658 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002659 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2660 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002661
Kévin Petite8edce32019-04-10 14:23:32 +01002662 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2663 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002664
Kévin Petite8edce32019-04-10 14:23:32 +01002665 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002666 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2667 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002668
Kévin Petite8edce32019-04-10 14:23:32 +01002669 // Our intrinsic to pack a float2 to an int.
2670 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002671
Kévin Petite8edce32019-04-10 14:23:32 +01002672 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002673
Kévin Petite8edce32019-04-10 14:23:32 +01002674 // Turn the packed x & y into the final component of our int2.
2675 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002676
Kévin Petite8edce32019-04-10 14:23:32 +01002677 // Turn the packed z & w into the final component of our int2.
2678 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002679
Kévin Petite8edce32019-04-10 14:23:32 +01002680 auto Combine = InsertElementInst::Create(
2681 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002682 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2683 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002684
Kévin Petite8edce32019-04-10 14:23:32 +01002685 // Cast the half* pointer to int2*.
2686 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002687
Kévin Petite8edce32019-04-10 14:23:32 +01002688 // Index into the correct address of the casted pointer.
2689 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002690
Kévin Petite8edce32019-04-10 14:23:32 +01002691 // Store to the int2* we casted to.
2692 return new StoreInst(Combine, Index, CI);
2693 });
David Neto22f144c2017-06-12 14:26:21 -04002694}
2695
alan-bakerf7e17cb2020-01-02 07:29:59 -05002696bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Module &M) {
2697 bool Changed = false;
2698 const std::map<const char *, const char *> Map = {
2699 // 1D
2700 {"_Z11read_imageh14ocl_image1d_roi", "_Z11read_imagef14ocl_image1d_roi"},
2701 {"_Z11read_imageh14ocl_image1d_ro11ocl_sampleri",
2702 "_Z11read_imagef14ocl_image1d_ro11ocl_sampleri"},
2703 {"_Z11read_imageh14ocl_image1d_ro11ocl_samplerf",
2704 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
2705 // TODO 1D array
2706 // 2D
2707 {"_Z11read_imageh14ocl_image2d_roDv2_i",
2708 "_Z11read_imagef14ocl_image2d_roDv2_i"},
2709 {"_Z11read_imageh14ocl_image2d_ro11ocl_samplerDv2_i",
2710 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i"},
2711 {"_Z11read_imageh14ocl_image2d_ro11ocl_samplerDv2_f",
2712 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2713 // TODO 2D array
2714 // 3D
2715 {"_Z11read_imageh14ocl_image3d_roDv4_i",
2716 "_Z11read_imagef14ocl_image3d_roDv4_i"},
2717 {"_Z11read_imageh14ocl_image3d_ro11ocl_samplerDv4_i",
2718 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i"},
2719 {"_Z11read_imageh14ocl_image3d_ro11ocl_samplerDv4_f",
2720 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
2721
2722 for (auto Pair : Map) {
2723 // If we find a function with the matching name.
2724 if (auto F = M.getFunction(Pair.first)) {
2725 SmallVector<Instruction *, 4> ToRemoves;
2726
2727 // Walk the users of the function.
2728 for (auto &U : F->uses()) {
2729 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2730 SmallVector<Type *, 3> types;
2731 SmallVector<Value *, 3> args;
2732 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2733 types.push_back(CI->getArgOperand(i)->getType());
2734 args.push_back(CI->getArgOperand(i));
2735 }
2736
2737 auto NewFType = FunctionType::get(
2738 VectorType::get(Type::getFloatTy(M.getContext()),
2739 CI->getType()->getVectorNumElements()),
2740 types, false);
2741
2742 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2743
2744 auto NewCI = CallInst::Create(NewF, args, "", CI);
2745
2746 // Convert to the half type.
2747 auto Cast = CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2748
2749 CI->replaceAllUsesWith(Cast);
2750
2751 // Lastly, remember to remove the user.
2752 ToRemoves.push_back(CI);
2753 }
2754 }
2755
2756 Changed = !ToRemoves.empty();
2757
2758 // And cleanup the calls we don't use anymore.
2759 for (auto V : ToRemoves) {
2760 V->eraseFromParent();
2761 }
2762
2763 // And remove the function we don't need either too.
2764 F->eraseFromParent();
2765 }
2766 }
2767
2768 return Changed;
2769}
2770
2771bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Module &M) {
2772 bool Changed = false;
2773 const std::map<const char *, const char *> Map = {
2774 // 1D
2775 {"_Z12write_imageh14ocl_image1d_woiDv4_Dh",
2776 "_Z12write_imagef14ocl_image1d_woiDv4_f"},
2777 // TODO 1D array
2778 // 2D
2779 {"_Z12write_imageh14ocl_image2d_woDv2_iDv4_Dh",
2780 "_Z12write_imagef14ocl_image2d_woDv2_iDv4_f"},
2781 // TODO 2D array
2782 // 3D
2783 {"_Z12write_imageh14ocl_image3d_woDv4_iDv4_Dh",
2784 "_Z12write_imagef14ocl_image3d_woDv4_iDv4_f"}};
2785
2786 for (auto Pair : Map) {
2787 // If we find a function with the matching name.
2788 if (auto F = M.getFunction(Pair.first)) {
2789 SmallVector<Instruction *, 4> ToRemoves;
2790
2791 // Walk the users of the function.
2792 for (auto &U : F->uses()) {
2793 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2794 SmallVector<Type *, 3> types(3);
2795 SmallVector<Value *, 3> args(3);
2796
2797 // Image
2798 types[0] = CI->getArgOperand(0)->getType();
2799 args[0] = CI->getArgOperand(0);
2800
2801 // Coord
2802 types[1] = CI->getArgOperand(1)->getType();
2803 args[1] = CI->getArgOperand(1);
2804
2805 // Data
2806 types[2] = VectorType::get(
2807 Type::getFloatTy(M.getContext()),
2808 CI->getArgOperand(2)->getType()->getVectorNumElements());
2809
2810 auto NewFType =
2811 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
2812
2813 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2814
2815 // Convert data to the float type.
2816 auto Cast =
2817 CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2818 args[2] = Cast;
2819
2820 auto NewCI = CallInst::Create(NewF, args, "", CI);
2821
2822 // Lastly, remember to remove the user.
2823 ToRemoves.push_back(CI);
2824 }
2825 }
2826
2827 Changed = !ToRemoves.empty();
2828
2829 // And cleanup the calls we don't use anymore.
2830 for (auto V : ToRemoves) {
2831 V->eraseFromParent();
2832 }
2833
2834 // And remove the function we don't need either too.
2835 F->eraseFromParent();
2836 }
2837 }
2838
2839 return Changed;
2840}
2841
alan-baker931d18a2019-12-12 08:21:32 -05002842bool ReplaceOpenCLBuiltinPass::replaceUnsampledReadImage(Module &M) {
2843 bool Changed = false;
2844 const std::map<const char *, const char *> Map = {
2845 // 1D
2846 {"_Z11read_imagef14ocl_image1d_roi",
2847 "_Z11read_imagef14ocl_image1d_ro11ocl_sampleri"},
2848 {"_Z11read_imagei14ocl_image1d_roi",
2849 "_Z11read_imagei14ocl_image1d_ro11ocl_sampleri"},
2850 {"_Z12read_imageui14ocl_image1d_roi",
2851 "_Z12read_imageui14ocl_image1d_ro11ocl_sampleri"},
2852 // TODO 1D array
2853 // 2D
2854 {"_Z11read_imagef14ocl_image2d_roDv2_i",
2855 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i"},
2856 {"_Z11read_imagei14ocl_image2d_roDv2_i",
2857 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i"},
2858 {"_Z12read_imageui14ocl_image2d_roDv2_i",
2859 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i"},
2860 // TODO 2D array
2861 // 3D
2862 {"_Z11read_imagef14ocl_image3d_roDv4_i",
2863 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i"},
2864 {"_Z11read_imagei14ocl_image3d_roDv4_i",
2865 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i"},
2866 {"_Z12read_imageui14ocl_image3d_roDv4_i",
2867 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i"}};
2868
2869 Function *translate_sampler =
2870 M.getFunction(clspv::TranslateSamplerInitializerFunction());
2871 Type *sampler_type = M.getTypeByName("opencl.sampler_t");
alan-bakerf7e17cb2020-01-02 07:29:59 -05002872 if (sampler_type) {
2873 sampler_type = sampler_type->getPointerTo(clspv::AddressSpace::Constant);
2874 }
alan-baker931d18a2019-12-12 08:21:32 -05002875 for (auto Pair : Map) {
2876 // If we find a function with the matching name.
2877 if (auto F = M.getFunction(Pair.first)) {
2878 SmallVector<Instruction *, 4> ToRemoves;
2879
2880 // Walk the users of the function.
2881 for (auto &U : F->uses()) {
2882 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2883 // The image.
2884 auto Image = CI->getOperand(0);
2885
2886 // The coordinate.
2887 auto Coord = CI->getOperand(1);
2888
2889 // Create the sampler translation function if necessary.
2890 if (!translate_sampler) {
2891 // Create the sampler type if necessary.
2892 if (!sampler_type) {
2893 sampler_type =
2894 StructType::create(M.getContext(), "opencl.sampler_t");
2895 sampler_type =
2896 sampler_type->getPointerTo(clspv::AddressSpace::Constant);
2897 }
2898 auto fn_type = FunctionType::get(
2899 sampler_type, {Type::getInt32Ty(M.getContext())}, false);
2900 auto callee = M.getOrInsertFunction(
2901 clspv::TranslateSamplerInitializerFunction(), fn_type);
2902 translate_sampler = cast<Function>(callee.getCallee());
2903 }
2904
2905 auto NewFType = FunctionType::get(
2906 CI->getType(), {Image->getType(), sampler_type, Coord->getType()},
2907 false);
2908
2909 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2910
2911 // Sampler is:
2912 // CLK_ADDRESS_NONE = 0
2913 // CLK_FILTER_NEAREST = 0x10
2914 // CLK_NORMALIZED_COORDS_FALSE = 0
2915 const uint64_t data_mask = 0x10;
2916 auto NewSamplerCI = CallInst::Create(
2917 translate_sampler,
2918 {ConstantInt::get(Type::getInt32Ty(M.getContext()), data_mask)},
2919 "", CI);
2920 auto NewCI =
2921 CallInst::Create(NewF, {Image, NewSamplerCI, Coord}, "", CI);
2922
2923 CI->replaceAllUsesWith(NewCI);
2924
2925 // Lastly, remember to remove the user.
2926 ToRemoves.push_back(CI);
2927 }
2928 }
2929
2930 Changed = !ToRemoves.empty();
2931
2932 // And cleanup the calls we don't use anymore.
2933 for (auto V : ToRemoves) {
2934 V->eraseFromParent();
2935 }
2936
2937 // And remove the function we don't need either too.
2938 F->eraseFromParent();
2939 }
2940 }
2941
2942 return Changed;
2943}
2944
Kévin Petit06517a12019-12-09 19:40:31 +00002945bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002946 bool Changed = false;
2947
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002948 const std::map<const char *, const char *> Map = {
alan-bakerf906d2b2019-12-10 11:26:23 -05002949 // 1D
2950 {"_Z11read_imagei14ocl_image1d_ro11ocl_sampleri",
2951 "_Z11read_imagei14ocl_image1d_ro11ocl_samplerf"},
2952 {"_Z12read_imageui14ocl_image1d_ro11ocl_sampleri",
2953 "_Z12read_imageui14ocl_image1d_ro11ocl_samplerf"},
2954 {"_Z11read_imagef14ocl_image1d_ro11ocl_sampleri",
2955 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
2956 // TODO 1Darray
Kévin Petit06517a12019-12-09 19:40:31 +00002957 // 2D
2958 {"_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i",
2959 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f"},
2960 {"_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i",
2961 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_f"},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002962 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2963 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
Kévin Petit06517a12019-12-09 19:40:31 +00002964 // TODO 2D array
2965 // 3D
2966 {"_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i",
2967 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_f"},
2968 {"_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i",
2969 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_f"},
2970 {"_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i",
2971 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002972
2973 for (auto Pair : Map) {
2974 // If we find a function with the matching name.
2975 if (auto F = M.getFunction(Pair.first)) {
2976 SmallVector<Instruction *, 4> ToRemoves;
2977
2978 // Walk the users of the function.
2979 for (auto &U : F->uses()) {
2980 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2981 // The image.
2982 auto Arg0 = CI->getOperand(0);
2983
2984 // The sampler.
2985 auto Arg1 = CI->getOperand(1);
2986
2987 // The coordinate (integer type that we can't handle).
2988 auto Arg2 = CI->getOperand(2);
2989
alan-bakerf906d2b2019-12-10 11:26:23 -05002990 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2991 // TODO(alan-baker): when arrayed images are supported fix component
2992 // calculation.
2993 uint32_t components = dim;
2994 Type *float_ty = nullptr;
2995 if (components == 1) {
2996 float_ty = Type::getFloatTy(M.getContext());
2997 } else {
2998 float_ty = VectorType::get(Type::getFloatTy(M.getContext()),
2999 Arg2->getType()->getVectorNumElements());
3000 }
David Neto22f144c2017-06-12 14:26:21 -04003001
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003002 auto NewFType = FunctionType::get(
alan-bakerf906d2b2019-12-10 11:26:23 -05003003 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003004 false);
David Neto22f144c2017-06-12 14:26:21 -04003005
3006 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
3007
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003008 auto Cast =
alan-bakerf906d2b2019-12-10 11:26:23 -05003009 CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003010
3011 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
3012
3013 CI->replaceAllUsesWith(NewCI);
3014
3015 // Lastly, remember to remove the user.
3016 ToRemoves.push_back(CI);
3017 }
3018 }
3019
3020 Changed = !ToRemoves.empty();
3021
3022 // And cleanup the calls we don't use anymore.
3023 for (auto V : ToRemoves) {
3024 V->eraseFromParent();
3025 }
3026
3027 // And remove the function we don't need either too.
3028 F->eraseFromParent();
3029 }
3030 }
3031
3032 return Changed;
3033}
3034
3035bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
3036 bool Changed = false;
3037
Kévin Petit9b340262019-06-19 18:31:11 +01003038 const std::map<const char *, spv::Op> Map = {
3039 {"_Z8atom_incPU3AS1Vi", spv::OpAtomicIIncrement},
3040 {"_Z8atom_incPU3AS3Vi", spv::OpAtomicIIncrement},
3041 {"_Z8atom_incPU3AS1Vj", spv::OpAtomicIIncrement},
3042 {"_Z8atom_incPU3AS3Vj", spv::OpAtomicIIncrement},
3043 {"_Z8atom_decPU3AS1Vi", spv::OpAtomicIDecrement},
3044 {"_Z8atom_decPU3AS3Vi", spv::OpAtomicIDecrement},
3045 {"_Z8atom_decPU3AS1Vj", spv::OpAtomicIDecrement},
3046 {"_Z8atom_decPU3AS3Vj", spv::OpAtomicIDecrement},
3047 {"_Z12atom_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
3048 {"_Z12atom_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
3049 {"_Z12atom_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
3050 {"_Z12atom_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange},
3051 {"_Z10atomic_incPU3AS1Vi", spv::OpAtomicIIncrement},
3052 {"_Z10atomic_incPU3AS3Vi", spv::OpAtomicIIncrement},
3053 {"_Z10atomic_incPU3AS1Vj", spv::OpAtomicIIncrement},
3054 {"_Z10atomic_incPU3AS3Vj", spv::OpAtomicIIncrement},
3055 {"_Z10atomic_decPU3AS1Vi", spv::OpAtomicIDecrement},
3056 {"_Z10atomic_decPU3AS3Vi", spv::OpAtomicIDecrement},
3057 {"_Z10atomic_decPU3AS1Vj", spv::OpAtomicIDecrement},
3058 {"_Z10atomic_decPU3AS3Vj", spv::OpAtomicIDecrement},
3059 {"_Z14atomic_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
3060 {"_Z14atomic_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
3061 {"_Z14atomic_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
3062 {"_Z14atomic_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange}};
David Neto22f144c2017-06-12 14:26:21 -04003063
3064 for (auto Pair : Map) {
3065 // If we find a function with the matching name.
3066 if (auto F = M.getFunction(Pair.first)) {
3067 SmallVector<Instruction *, 4> ToRemoves;
3068
3069 // Walk the users of the function.
3070 for (auto &U : F->uses()) {
3071 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04003072
3073 auto IntTy = Type::getInt32Ty(M.getContext());
3074
David Neto22f144c2017-06-12 14:26:21 -04003075 // We need to map the OpenCL constants to the SPIR-V equivalents.
3076 const auto ConstantScopeDevice =
3077 ConstantInt::get(IntTy, spv::ScopeDevice);
3078 const auto ConstantMemorySemantics = ConstantInt::get(
3079 IntTy, spv::MemorySemanticsUniformMemoryMask |
3080 spv::MemorySemanticsSequentiallyConsistentMask);
3081
3082 SmallVector<Value *, 5> Params;
3083
3084 // The pointer.
3085 Params.push_back(CI->getArgOperand(0));
3086
3087 // The memory scope.
3088 Params.push_back(ConstantScopeDevice);
3089
3090 // The memory semantics.
3091 Params.push_back(ConstantMemorySemantics);
3092
3093 if (2 < CI->getNumArgOperands()) {
3094 // The unequal memory semantics.
3095 Params.push_back(ConstantMemorySemantics);
3096
3097 // The value.
3098 Params.push_back(CI->getArgOperand(2));
3099
3100 // The comparator.
3101 Params.push_back(CI->getArgOperand(1));
3102 } else if (1 < CI->getNumArgOperands()) {
3103 // The value.
3104 Params.push_back(CI->getArgOperand(1));
3105 }
3106
Kévin Petit9b340262019-06-19 18:31:11 +01003107 auto NewCI =
3108 clspv::InsertSPIRVOp(CI, Pair.second, {}, CI->getType(), Params);
David Neto22f144c2017-06-12 14:26:21 -04003109
3110 CI->replaceAllUsesWith(NewCI);
3111
3112 // Lastly, remember to remove the user.
3113 ToRemoves.push_back(CI);
3114 }
3115 }
3116
3117 Changed = !ToRemoves.empty();
3118
3119 // And cleanup the calls we don't use anymore.
3120 for (auto V : ToRemoves) {
3121 V->eraseFromParent();
3122 }
3123
3124 // And remove the function we don't need either too.
3125 F->eraseFromParent();
3126 }
3127 }
3128
Neil Henning39672102017-09-29 14:33:13 +01003129 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003130 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003131 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003132 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003133 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003134 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003135 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003136 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003137 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003138 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003139 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003140 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003141 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003142 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003143 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003144 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003145 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003146 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003147 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003148 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003149 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003150 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003151 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003152 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003153 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003154 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003155 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003156 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003157 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003158 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003159 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003160 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003161 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01003162 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003163 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003164 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003165 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003166 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003167 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003168 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003169 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003170 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003171 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003172 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003173 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003174 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003175 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01003176 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003177 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01003178 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003179 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01003180 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003181 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01003182 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003183 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003184 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003185 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003186 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003187 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003188 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003189 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003190 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003191 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
3192 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
3193 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01003194
3195 for (auto Pair : Map2) {
3196 // If we find a function with the matching name.
3197 if (auto F = M.getFunction(Pair.first)) {
3198 SmallVector<Instruction *, 4> ToRemoves;
3199
3200 // Walk the users of the function.
3201 for (auto &U : F->uses()) {
3202 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3203 auto AtomicOp = new AtomicRMWInst(
3204 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
3205 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
3206
3207 CI->replaceAllUsesWith(AtomicOp);
3208
3209 // Lastly, remember to remove the user.
3210 ToRemoves.push_back(CI);
3211 }
3212 }
3213
3214 Changed = !ToRemoves.empty();
3215
3216 // And cleanup the calls we don't use anymore.
3217 for (auto V : ToRemoves) {
3218 V->eraseFromParent();
3219 }
3220
3221 // And remove the function we don't need either too.
3222 F->eraseFromParent();
3223 }
3224 }
3225
David Neto22f144c2017-06-12 14:26:21 -04003226 return Changed;
3227}
3228
3229bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04003230
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003231 std::vector<const char *> Names = {
3232 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01003233 };
3234
3235 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04003236 auto IntTy = Type::getInt32Ty(M.getContext());
3237 auto FloatTy = Type::getFloatTy(M.getContext());
3238
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003239 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
3240 ConstantInt::get(IntTy, 1),
3241 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04003242
3243 Constant *UpShuffleMask[4] = {
3244 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3245 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3246
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003247 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
3248 UndefValue::get(FloatTy),
3249 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04003250
Kévin Petite8edce32019-04-10 14:23:32 +01003251 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003252 auto Arg0 =
3253 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
3254 ConstantVector::get(DownShuffleMask), "", CI);
3255 auto Arg1 =
3256 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
3257 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003258 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003259
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003260 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04003261
Kévin Petite8edce32019-04-10 14:23:32 +01003262 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003263
Kévin Petite8edce32019-04-10 14:23:32 +01003264 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003265
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003266 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3267 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003268 });
David Neto22f144c2017-06-12 14:26:21 -04003269}
David Neto62653202017-10-16 19:05:18 -04003270
3271bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3272 bool Changed = false;
3273
3274 // OpenCL's float result = fract(float x, float* ptr)
3275 //
3276 // In the LLVM domain:
3277 //
3278 // %floor_result = call spir_func float @floor(float %x)
3279 // store float %floor_result, float * %ptr
3280 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3281 // %result = call spir_func float
3282 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3283 //
3284 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3285 // and clspv.fract occur in the SPIR-V generator pass:
3286 //
3287 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3288 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3289 // ...
3290 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3291 // OpStore %ptr %floor_result
3292 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3293 // %fract_result = OpExtInst %float
3294 // %glsl_ext Fmin %fract_intermediate %just_under_1
3295
David Neto62653202017-10-16 19:05:18 -04003296 using std::string;
3297
3298 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3299 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003300 using QuadType =
3301 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04003302 auto make_quad = [](const char *a, const char *b, const char *c,
3303 const char *d) {
3304 return std::tuple<const char *, const char *, const char *, const char *>(
3305 a, b, c, d);
3306 };
3307 const std::vector<QuadType> Functions = {
3308 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003309 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
3310 "clspv.fract.v2f"),
3311 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
3312 "clspv.fract.v3f"),
3313 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
3314 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04003315 };
3316
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003317 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04003318 const StringRef fract_name(std::get<0>(quad));
3319
3320 // If we find a function with the matching name.
3321 if (auto F = M.getFunction(fract_name)) {
3322 if (F->use_begin() == F->use_end())
3323 continue;
3324
3325 // We have some uses.
3326 Changed = true;
3327
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003328 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04003329
3330 const StringRef floor_name(std::get<1>(quad));
3331 const StringRef fmin_name(std::get<2>(quad));
3332 const StringRef clspv_fract_name(std::get<3>(quad));
3333
3334 // This is either float or a float vector. All the float-like
3335 // types are this type.
3336 auto result_ty = F->getReturnType();
3337
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003338 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04003339 if (!fmin_fn) {
3340 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003341 FunctionType *fn_ty =
3342 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003343 fmin_fn =
3344 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003345 fmin_fn->addFnAttr(Attribute::ReadNone);
3346 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3347 }
3348
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003349 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003350 if (!floor_fn) {
3351 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003352 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003353 floor_fn = cast<Function>(
3354 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003355 floor_fn->addFnAttr(Attribute::ReadNone);
3356 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3357 }
3358
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003359 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003360 if (!clspv_fract_fn) {
3361 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003362 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003363 clspv_fract_fn = cast<Function>(
3364 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003365 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3366 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3367 }
3368
3369 // Number of significant significand bits, whether represented or not.
3370 unsigned num_significand_bits;
3371 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003372 case Type::HalfTyID:
3373 num_significand_bits = 11;
3374 break;
3375 case Type::FloatTyID:
3376 num_significand_bits = 24;
3377 break;
3378 case Type::DoubleTyID:
3379 num_significand_bits = 53;
3380 break;
3381 default:
3382 assert(false && "Unhandled float type when processing fract builtin");
3383 break;
David Neto62653202017-10-16 19:05:18 -04003384 }
3385 // Beware that the disassembler displays this value as
3386 // OpConstant %float 1
3387 // which is not quite right.
3388 const double kJustUnderOneScalar =
3389 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3390
3391 Constant *just_under_one =
3392 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3393 if (result_ty->isVectorTy()) {
3394 just_under_one = ConstantVector::getSplat(
3395 result_ty->getVectorNumElements(), just_under_one);
3396 }
3397
3398 IRBuilder<> Builder(Context);
3399
3400 SmallVector<Instruction *, 4> ToRemoves;
3401
3402 // Walk the users of the function.
3403 for (auto &U : F->uses()) {
3404 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3405
3406 Builder.SetInsertPoint(CI);
3407 auto arg = CI->getArgOperand(0);
3408 auto ptr = CI->getArgOperand(1);
3409
3410 // Compute floor result and store it.
3411 auto floor = Builder.CreateCall(floor_fn, {arg});
3412 Builder.CreateStore(floor, ptr);
3413
3414 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003415 auto fract_result =
3416 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003417
3418 CI->replaceAllUsesWith(fract_result);
3419
3420 // Lastly, remember to remove the user.
3421 ToRemoves.push_back(CI);
3422 }
3423 }
3424
3425 // And cleanup the calls we don't use anymore.
3426 for (auto V : ToRemoves) {
3427 V->eraseFromParent();
3428 }
3429
3430 // And remove the function we don't need either too.
3431 F->eraseFromParent();
3432 }
3433 }
3434
3435 return Changed;
3436}