blob: c762d54c2da1f6f7da17ba8e2226060d05cabf93 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
James Pricec05f6052020-01-14 13:37:20 -050033#include "clspv/DescriptorMap.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040034#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070035
alan-baker931d18a2019-12-12 08:21:32 -050036#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040037#include "Passes.h"
38#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050039#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040
David Neto22f144c2017-06-12 14:26:21 -040041using namespace llvm;
42
43#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
44
45namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000046
47struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040048 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000049 SignedNess signedness;
50};
51
52struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000053 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000054 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000055
Kévin Petit91bc72e2019-04-08 15:17:46 +010056 bool isArgSigned(size_t arg) const {
57 assert(argTypeInfos.size() > arg);
58 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000059 }
60
Kévin Petit91bc72e2019-04-08 15:17:46 +010061 static FunctionInfo getFromMangledName(StringRef name) {
62 FunctionInfo fi;
63 if (!getFromMangledNameCheck(name, &fi)) {
64 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000065 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010066 return fi;
67 }
Kévin Petit8a560882019-03-21 15:24:34 +000068
Kévin Petit91bc72e2019-04-08 15:17:46 +010069 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
70 if (!name.consume_front("_Z")) {
71 return false;
72 }
73 size_t nameLen;
74 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000075 return false;
76 }
77
Kévin Petit91bc72e2019-04-08 15:17:46 +010078 finfo->name = name.take_front(nameLen);
79 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000080
Kévin Petit91bc72e2019-04-08 15:17:46 +010081 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000082
Kévin Petit91bc72e2019-04-08 15:17:46 +010083 while (name.size() != 0) {
84
85 ArgTypeInfo ti;
86
87 // Try parsing a vector prefix
88 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040089 int numElems;
90 if (name.consumeInteger(10, numElems)) {
91 return false;
92 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010093
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040094 if (!name.consume_front("_")) {
95 return false;
96 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010097 }
98
99 // Parse the base type
alan-baker4092cc52020-01-15 16:42:57 -0500100 if (name.consume_front("Dh")) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100101 ti.signedness = ArgTypeInfo::SignedNess::None;
alan-baker4092cc52020-01-15 16:42:57 -0500102 } else {
103 char typeCode = name.front();
104 name = name.drop_front(1);
105 switch (typeCode) {
106 case 'c': // char
107 case 'a': // signed char
108 case 's': // short
109 case 'i': // int
110 case 'l': // long
111 ti.signedness = ArgTypeInfo::SignedNess::Signed;
112 break;
113 case 'h': // unsigned char
114 case 't': // unsigned short
115 case 'j': // unsigned int
116 case 'm': // unsigned long
117 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
118 break;
119 case 'f':
120 ti.signedness = ArgTypeInfo::SignedNess::None;
121 break;
122 case 'S':
123 ti = prev_ti;
124 if (!name.consume_front("_")) {
125 return false;
126 }
127 break;
128 default:
Kévin Petit91bc72e2019-04-08 15:17:46 +0100129 return false;
130 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100131 }
132
133 finfo->argTypeInfos.push_back(ti);
134
135 prev_ti = ti;
136 }
137
138 return true;
139 };
Kévin Petit8a560882019-03-21 15:24:34 +0000140};
141
David Neto22f144c2017-06-12 14:26:21 -0400142uint32_t clz(uint32_t v) {
143 uint32_t r;
144 uint32_t shift;
145
146 r = (v > 0xFFFF) << 4;
147 v >>= r;
148 shift = (v > 0xFF) << 3;
149 v >>= shift;
150 r |= shift;
151 shift = (v > 0xF) << 2;
152 v >>= shift;
153 r |= shift;
154 shift = (v > 0x3) << 1;
155 v >>= shift;
156 r |= shift;
157 r |= (v >> 1);
158
159 return r;
160}
161
162Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
163 if (1 == elements) {
164 return Type::getInt1Ty(C);
165 } else {
166 return VectorType::get(Type::getInt1Ty(C), elements);
167 }
168}
169
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100170Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
171 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
172 if (Ty->isVectorTy()) {
173 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
174 }
175 return IntTy;
176}
177
David Neto22f144c2017-06-12 14:26:21 -0400178struct ReplaceOpenCLBuiltinPass final : public ModulePass {
179 static char ID;
180 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
181
182 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000183 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100184 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100185 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceRecip(Module &M);
187 bool replaceDivide(Module &M);
Kévin Petit1329a002019-06-15 05:54:05 +0100188 bool replaceDot(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400189 bool replaceExp10(Module &M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100190 bool replaceFmod(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400191 bool replaceLog10(Module &M);
192 bool replaceBarrier(Module &M);
193 bool replaceMemFence(Module &M);
194 bool replaceRelational(Module &M);
195 bool replaceIsInfAndIsNan(Module &M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100196 bool replaceIsFinite(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400197 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000198 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000199 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000200 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000201 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000202 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000203 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000204 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400205 bool replaceSignbit(Module &M);
206 bool replaceMadandMad24andMul24(Module &M);
207 bool replaceVloadHalf(Module &M);
208 bool replaceVloadHalf2(Module &M);
209 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700210 bool replaceClspvVloadaHalf2(Module &M);
211 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400212 bool replaceVstoreHalf(Module &M);
213 bool replaceVstoreHalf2(Module &M);
214 bool replaceVstoreHalf4(Module &M);
alan-bakerf7e17cb2020-01-02 07:29:59 -0500215 bool replaceHalfReadImage(Module &M);
216 bool replaceHalfWriteImage(Module &M);
alan-baker931d18a2019-12-12 08:21:32 -0500217 bool replaceUnsampledReadImage(Module &M);
Kévin Petit06517a12019-12-09 19:40:31 +0000218 bool replaceSampledReadImageWithIntCoords(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400219 bool replaceAtomics(Module &M);
220 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400221 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700222 bool replaceVload(Module &M);
223 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400224};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100225} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400226
227char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400228INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
229 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400230
231namespace clspv {
232ModulePass *createReplaceOpenCLBuiltinPass() {
233 return new ReplaceOpenCLBuiltinPass();
234}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400235} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400236
237bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
238 bool Changed = false;
239
Kévin Petit2444e9b2018-11-09 14:14:37 +0000240 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100241 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100242 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400243 Changed |= replaceRecip(M);
244 Changed |= replaceDivide(M);
Kévin Petit1329a002019-06-15 05:54:05 +0100245 Changed |= replaceDot(M);
David Neto22f144c2017-06-12 14:26:21 -0400246 Changed |= replaceExp10(M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100247 Changed |= replaceFmod(M);
David Neto22f144c2017-06-12 14:26:21 -0400248 Changed |= replaceLog10(M);
249 Changed |= replaceBarrier(M);
250 Changed |= replaceMemFence(M);
251 Changed |= replaceRelational(M);
252 Changed |= replaceIsInfAndIsNan(M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100253 Changed |= replaceIsFinite(M);
David Neto22f144c2017-06-12 14:26:21 -0400254 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000255 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000256 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000257 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000258 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000259 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000260 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000261 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400262 Changed |= replaceSignbit(M);
263 Changed |= replaceMadandMad24andMul24(M);
264 Changed |= replaceVloadHalf(M);
265 Changed |= replaceVloadHalf2(M);
266 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700267 Changed |= replaceClspvVloadaHalf2(M);
268 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400269 Changed |= replaceVstoreHalf(M);
270 Changed |= replaceVstoreHalf2(M);
271 Changed |= replaceVstoreHalf4(M);
alan-bakerf7e17cb2020-01-02 07:29:59 -0500272 // Replace the half image builtins before handling other image builtins.
273 Changed |= replaceHalfReadImage(M);
274 Changed |= replaceHalfWriteImage(M);
Kévin Petit06517a12019-12-09 19:40:31 +0000275 Changed |= replaceSampledReadImageWithIntCoords(M);
David Neto22f144c2017-06-12 14:26:21 -0400276 Changed |= replaceAtomics(M);
277 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400278 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700279 Changed |= replaceVload(M);
280 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400281
282 return Changed;
283}
284
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400285bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
286 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000287
Kévin Petite8edce32019-04-10 14:23:32 +0100288 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000289
290 for (auto Name : Names) {
291 // If we find a function with the matching name.
292 if (auto F = M.getFunction(Name)) {
293 SmallVector<Instruction *, 4> ToRemoves;
294
295 // Walk the users of the function.
296 for (auto &U : F->uses()) {
297 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000298
Kévin Petite8edce32019-04-10 14:23:32 +0100299 auto NewValue = Replacer(CI);
300
301 if (NewValue != nullptr) {
302 CI->replaceAllUsesWith(NewValue);
303 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000304
305 // Lastly, remember to remove the user.
306 ToRemoves.push_back(CI);
307 }
308 }
309
310 Changed = !ToRemoves.empty();
311
312 // And cleanup the calls we don't use anymore.
313 for (auto V : ToRemoves) {
314 V->eraseFromParent();
315 }
316
317 // And remove the function we don't need either too.
318 F->eraseFromParent();
319 }
320 }
321
322 return Changed;
323}
324
Kévin Petite8edce32019-04-10 14:23:32 +0100325bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100326
Kévin Petite8edce32019-04-10 14:23:32 +0100327 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400328 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
329 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
330 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
331 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100332 };
333
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400334 return replaceCallsWithValue(M, Names,
335 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100336}
337
338bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
339
340 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400341 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
342 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
343 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
344 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
345 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
346 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
347 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
348 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
349 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
350 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
351 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100352 };
353
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400354 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100355 auto XValue = CI->getOperand(0);
356 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100357
Kévin Petite8edce32019-04-10 14:23:32 +0100358 IRBuilder<> Builder(CI);
359 auto XmY = Builder.CreateSub(XValue, YValue);
360 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100361
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400362 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100363 auto F = CI->getCalledFunction();
364 auto finfo = FunctionInfo::getFromMangledName(F->getName());
365 if (finfo.isArgSigned(0)) {
366 Cmp = Builder.CreateICmpSGT(YValue, XValue);
367 } else {
368 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100369 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100370
Kévin Petite8edce32019-04-10 14:23:32 +0100371 return Builder.CreateSelect(Cmp, YmX, XmY);
372 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100373}
374
Kévin Petit8c1be282019-04-02 19:34:25 +0100375bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100376
Kévin Petite8edce32019-04-10 14:23:32 +0100377 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400378 "_Z8copysignff",
379 "_Z8copysignDv2_fS_",
380 "_Z8copysignDv3_fS_",
381 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100382 };
383
Kévin Petite8edce32019-04-10 14:23:32 +0100384 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
385 auto XValue = CI->getOperand(0);
386 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100387
Kévin Petite8edce32019-04-10 14:23:32 +0100388 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100389
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400390 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100391 if (Ty->isVectorTy()) {
392 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100393 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100394
Kévin Petite8edce32019-04-10 14:23:32 +0100395 // Return X with the sign of Y
396
397 // Sign bit masks
398 auto SignBit = IntTy->getScalarSizeInBits() - 1;
399 auto SignBitMask = 1 << SignBit;
400 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
401 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
402
403 IRBuilder<> Builder(CI);
404
405 // Extract sign of Y
406 auto YInt = Builder.CreateBitCast(YValue, IntTy);
407 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
408
409 // Clear sign bit in X
410 auto XInt = Builder.CreateBitCast(XValue, IntTy);
411 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
412
413 // Insert sign bit of Y into X
414 auto NewXInt = Builder.CreateOr(XInt, YSign);
415
416 // And cast back to floating-point
417 return Builder.CreateBitCast(NewXInt, Ty);
418 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100419}
420
David Neto22f144c2017-06-12 14:26:21 -0400421bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400422
Kévin Petite8edce32019-04-10 14:23:32 +0100423 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400424 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
425 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
426 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
427 };
428
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400429 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100430 // Recip has one arg.
431 auto Arg = CI->getOperand(0);
432 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
433 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
434 });
David Neto22f144c2017-06-12 14:26:21 -0400435}
436
437bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400438
Kévin Petite8edce32019-04-10 14:23:32 +0100439 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400440 "_Z11half_divideff", "_Z13native_divideff",
441 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
442 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
443 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
444 };
445
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400446 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100447 auto Op0 = CI->getOperand(0);
448 auto Op1 = CI->getOperand(1);
449 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
450 });
David Neto22f144c2017-06-12 14:26:21 -0400451}
452
Kévin Petit1329a002019-06-15 05:54:05 +0100453bool ReplaceOpenCLBuiltinPass::replaceDot(Module &M) {
454
455 std::vector<const char *> Names = {
456 "_Z3dotff",
457 "_Z3dotDv2_fS_",
458 "_Z3dotDv3_fS_",
459 "_Z3dotDv4_fS_",
460 };
461
462 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
463 auto Op0 = CI->getOperand(0);
464 auto Op1 = CI->getOperand(1);
465
466 Value *V;
467 if (Op0->getType()->isVectorTy()) {
468 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
469 CI->getType(), {Op0, Op1});
470 } else {
471 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
472 }
473
474 return V;
475 });
476}
477
David Neto22f144c2017-06-12 14:26:21 -0400478bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
479 bool Changed = false;
480
481 const std::map<const char *, const char *> Map = {
482 {"_Z5exp10f", "_Z3expf"},
483 {"_Z10half_exp10f", "_Z8half_expf"},
484 {"_Z12native_exp10f", "_Z10native_expf"},
485 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
486 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
487 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
488 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
489 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
490 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
491 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
492 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
493 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
494
495 for (auto Pair : Map) {
496 // If we find a function with the matching name.
497 if (auto F = M.getFunction(Pair.first)) {
498 SmallVector<Instruction *, 4> ToRemoves;
499
500 // Walk the users of the function.
501 for (auto &U : F->uses()) {
502 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
503 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
504
505 auto Arg = CI->getOperand(0);
506
507 // Constant of the natural log of 10 (ln(10)).
508 const double Ln10 =
509 2.302585092994045684017991454684364207601101488628772976033;
510
511 auto Mul = BinaryOperator::Create(
512 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
513 CI);
514
515 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
516
517 CI->replaceAllUsesWith(NewCI);
518
519 // Lastly, remember to remove the user.
520 ToRemoves.push_back(CI);
521 }
522 }
523
524 Changed = !ToRemoves.empty();
525
526 // And cleanup the calls we don't use anymore.
527 for (auto V : ToRemoves) {
528 V->eraseFromParent();
529 }
530
531 // And remove the function we don't need either too.
532 F->eraseFromParent();
533 }
534 }
535
536 return Changed;
537}
538
Kévin Petit0644a9c2019-06-20 21:08:46 +0100539bool ReplaceOpenCLBuiltinPass::replaceFmod(Module &M) {
540
541 std::vector<const char *> Names = {
542 "_Z4fmodff",
543 "_Z4fmodDv2_fS_",
544 "_Z4fmodDv3_fS_",
545 "_Z4fmodDv4_fS_",
546 };
547
548 // OpenCL fmod(x,y) is x - y * trunc(x/y)
549 // The sign for a non-zero result is taken from x.
550 // (Try an example.)
551 // So translate to FRem
552 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
553 auto Op0 = CI->getOperand(0);
554 auto Op1 = CI->getOperand(1);
555 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
556 });
557}
558
David Neto22f144c2017-06-12 14:26:21 -0400559bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
560 bool Changed = false;
561
562 const std::map<const char *, const char *> Map = {
563 {"_Z5log10f", "_Z3logf"},
564 {"_Z10half_log10f", "_Z8half_logf"},
565 {"_Z12native_log10f", "_Z10native_logf"},
566 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
567 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
568 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
569 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
570 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
571 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
572 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
573 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
574 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
575
576 for (auto Pair : Map) {
577 // If we find a function with the matching name.
578 if (auto F = M.getFunction(Pair.first)) {
579 SmallVector<Instruction *, 4> ToRemoves;
580
581 // Walk the users of the function.
582 for (auto &U : F->uses()) {
583 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
584 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
585
586 auto Arg = CI->getOperand(0);
587
588 // Constant of the reciprocal of the natural log of 10 (ln(10)).
589 const double Ln10 =
590 0.434294481903251827651128918916605082294397005803666566114;
591
592 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
593
594 auto Mul = BinaryOperator::Create(
595 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
596 "", CI);
597
598 CI->replaceAllUsesWith(Mul);
599
600 // Lastly, remember to remove the user.
601 ToRemoves.push_back(CI);
602 }
603 }
604
605 Changed = !ToRemoves.empty();
606
607 // And cleanup the calls we don't use anymore.
608 for (auto V : ToRemoves) {
609 V->eraseFromParent();
610 }
611
612 // And remove the function we don't need either too.
613 F->eraseFromParent();
614 }
615 }
616
617 return Changed;
618}
619
620bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400621
622 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
623
alan-bakerb60b1fc2019-12-13 19:09:38 -0500624 const std::vector<const char *> Names = {"_Z7barrierj",
625 // OpenCL 2.0 alias for barrier.
626 "_Z18work_group_barrierj"};
David Neto22f144c2017-06-12 14:26:21 -0400627
Kévin Petitc4643922019-06-17 19:32:05 +0100628 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
629 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400630
Kévin Petitc4643922019-06-17 19:32:05 +0100631 // We need to map the OpenCL constants to the SPIR-V equivalents.
632 const auto LocalMemFence =
633 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
634 const auto GlobalMemFence =
635 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
636 const auto ConstantSequentiallyConsistent = ConstantInt::get(
637 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
638 const auto ConstantScopeDevice =
639 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
640 const auto ConstantScopeWorkgroup =
641 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400642
Kévin Petitc4643922019-06-17 19:32:05 +0100643 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
644 const auto LocalMemFenceMask =
645 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
646 const auto WorkgroupShiftAmount =
647 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
648 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
649 Instruction::Shl, LocalMemFenceMask,
650 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400651
Kévin Petitc4643922019-06-17 19:32:05 +0100652 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
653 const auto GlobalMemFenceMask =
654 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
655 const auto UniformShiftAmount =
656 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
657 const auto MemorySemanticsUniform = BinaryOperator::Create(
658 Instruction::Shl, GlobalMemFenceMask,
659 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400660
Kévin Petitc4643922019-06-17 19:32:05 +0100661 // And combine the above together, also adding in
662 // MemorySemanticsSequentiallyConsistentMask.
663 auto MemorySemantics =
664 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
665 ConstantSequentiallyConsistent, "", CI);
666 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
667 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400668
Kévin Petitc4643922019-06-17 19:32:05 +0100669 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
670 // Device Scope, otherwise Workgroup Scope.
671 const auto Cmp =
672 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, GlobalMemFenceMask,
673 GlobalMemFence, "", CI);
674 const auto MemoryScope = SelectInst::Create(Cmp, ConstantScopeDevice,
675 ConstantScopeWorkgroup, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400676
Kévin Petitc4643922019-06-17 19:32:05 +0100677 // Lastly, the Execution Scope is always Workgroup Scope.
678 const auto ExecutionScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400679
Kévin Petitc4643922019-06-17 19:32:05 +0100680 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
681 {Attribute::NoDuplicate}, CI->getType(),
682 {ExecutionScope, MemoryScope, MemorySemantics});
683 });
David Neto22f144c2017-06-12 14:26:21 -0400684}
685
686bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
687 bool Changed = false;
688
689 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
690
Kévin Petitc4643922019-06-17 19:32:05 +0100691 using Tuple = std::tuple<spv::Op, unsigned>;
Neil Henning39672102017-09-29 14:33:13 +0100692 const std::map<const char *, Tuple> Map = {
Kévin Petitc4643922019-06-17 19:32:05 +0100693 {"_Z9mem_fencej", Tuple(spv::OpMemoryBarrier,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400694 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100695 {"_Z14read_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100696 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsAcquireMask)},
Neil Henning39672102017-09-29 14:33:13 +0100697 {"_Z15write_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100698 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400699
700 for (auto Pair : Map) {
701 // If we find a function with the matching name.
702 if (auto F = M.getFunction(Pair.first)) {
703 SmallVector<Instruction *, 4> ToRemoves;
704
705 // Walk the users of the function.
706 for (auto &U : F->uses()) {
707 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400708
709 auto Arg = CI->getOperand(0);
710
711 // We need to map the OpenCL constants to the SPIR-V equivalents.
712 const auto LocalMemFence =
713 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
714 const auto GlobalMemFence =
715 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
716 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100717 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400718 const auto ConstantScopeDevice =
719 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
720
721 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
722 const auto LocalMemFenceMask = BinaryOperator::Create(
723 Instruction::And, LocalMemFence, Arg, "", CI);
724 const auto WorkgroupShiftAmount =
725 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
726 clz(CLK_LOCAL_MEM_FENCE);
727 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
728 Instruction::Shl, LocalMemFenceMask,
729 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
730
731 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
732 const auto GlobalMemFenceMask = BinaryOperator::Create(
733 Instruction::And, GlobalMemFence, Arg, "", CI);
734 const auto UniformShiftAmount =
735 clz(spv::MemorySemanticsUniformMemoryMask) -
736 clz(CLK_GLOBAL_MEM_FENCE);
737 const auto MemorySemanticsUniform = BinaryOperator::Create(
738 Instruction::Shl, GlobalMemFenceMask,
739 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
740
741 // And combine the above together, also adding in
742 // MemorySemanticsSequentiallyConsistentMask.
743 auto MemorySemantics =
744 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
745 ConstantMemorySemantics, "", CI);
746 MemorySemantics = BinaryOperator::Create(
747 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
748
749 // Memory Scope is always device.
750 const auto MemoryScope = ConstantScopeDevice;
751
Kévin Petitc4643922019-06-17 19:32:05 +0100752 const auto SPIRVOp = std::get<0>(Pair.second);
753 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {}, CI->getType(),
754 {MemoryScope, MemorySemantics});
David Neto22f144c2017-06-12 14:26:21 -0400755
756 CI->replaceAllUsesWith(NewCI);
757
758 // Lastly, remember to remove the user.
759 ToRemoves.push_back(CI);
760 }
761 }
762
763 Changed = !ToRemoves.empty();
764
765 // And cleanup the calls we don't use anymore.
766 for (auto V : ToRemoves) {
767 V->eraseFromParent();
768 }
769
770 // And remove the function we don't need either too.
771 F->eraseFromParent();
772 }
773 }
774
775 return Changed;
776}
777
778bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
779 bool Changed = false;
780
781 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
782 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
783 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
784 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
785 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
786 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
787 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
788 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
789 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
790 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
791 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
792 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
793 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
794 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
795 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
796 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
797 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
798 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
799 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
800 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
801 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
802 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
803 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
804 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
805 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
806 };
807
808 for (auto Pair : Map) {
809 // If we find a function with the matching name.
810 if (auto F = M.getFunction(Pair.first)) {
811 SmallVector<Instruction *, 4> ToRemoves;
812
813 // Walk the users of the function.
814 for (auto &U : F->uses()) {
815 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
816 // The predicate to use in the CmpInst.
817 auto Predicate = Pair.second.first;
818
819 // The value to return for true.
820 auto TrueValue =
821 ConstantInt::getSigned(CI->getType(), Pair.second.second);
822
823 // The value to return for false.
824 auto FalseValue = Constant::getNullValue(CI->getType());
825
826 auto Arg1 = CI->getOperand(0);
827 auto Arg2 = CI->getOperand(1);
828
829 const auto Cmp =
830 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
831
832 const auto Select =
833 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
834
835 CI->replaceAllUsesWith(Select);
836
837 // Lastly, remember to remove the user.
838 ToRemoves.push_back(CI);
839 }
840 }
841
842 Changed = !ToRemoves.empty();
843
844 // And cleanup the calls we don't use anymore.
845 for (auto V : ToRemoves) {
846 V->eraseFromParent();
847 }
848
849 // And remove the function we don't need either too.
850 F->eraseFromParent();
851 }
852 }
853
854 return Changed;
855}
856
857bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
858 bool Changed = false;
859
Kévin Petitff03aee2019-06-12 19:39:03 +0100860 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
861 {"_Z5isinff", {spv::OpIsInf, 1}},
862 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
863 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
864 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
865 {"_Z5isnanf", {spv::OpIsNan, 1}},
866 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
867 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
868 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400869 };
870
871 for (auto Pair : Map) {
872 // If we find a function with the matching name.
873 if (auto F = M.getFunction(Pair.first)) {
874 SmallVector<Instruction *, 4> ToRemoves;
875
876 // Walk the users of the function.
877 for (auto &U : F->uses()) {
878 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
879 const auto CITy = CI->getType();
880
Kévin Petitff03aee2019-06-12 19:39:03 +0100881 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400882
883 // The value to return for true.
884 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
885
886 // The value to return for false.
887 auto FalseValue = Constant::getNullValue(CITy);
888
889 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
890 M.getContext(),
891 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
892
Kévin Petitff03aee2019-06-12 19:39:03 +0100893 auto NewCI =
894 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
895 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400896
897 const auto Select =
898 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
899
900 CI->replaceAllUsesWith(Select);
901
902 // Lastly, remember to remove the user.
903 ToRemoves.push_back(CI);
904 }
905 }
906
907 Changed = !ToRemoves.empty();
908
909 // And cleanup the calls we don't use anymore.
910 for (auto V : ToRemoves) {
911 V->eraseFromParent();
912 }
913
914 // And remove the function we don't need either too.
915 F->eraseFromParent();
916 }
917 }
918
919 return Changed;
920}
921
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100922bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Module &M) {
923 std::vector<const char *> Names = {
924 "_Z8isfiniteh", "_Z8isfiniteDv2_h", "_Z8isfiniteDv3_h",
925 "_Z8isfiniteDv4_h", "_Z8isfinitef", "_Z8isfiniteDv2_f",
926 "_Z8isfiniteDv3_f", "_Z8isfiniteDv4_f", "_Z8isfinited",
927 "_Z8isfiniteDv2_d", "_Z8isfiniteDv3_d", "_Z8isfiniteDv4_d",
928 };
929
930 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
931 auto &C = M.getContext();
932 auto Val = CI->getOperand(0);
933 auto ValTy = Val->getType();
934 auto RetTy = CI->getType();
935
936 // Get a suitable integer type to represent the number
937 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
938
939 // Create Mask
940 auto ScalarSize = ValTy->getScalarSizeInBits();
941 Value *InfMask;
942 switch (ScalarSize) {
943 case 16:
944 InfMask = ConstantInt::get(IntTy, 0x7C00U);
945 break;
946 case 32:
947 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
948 break;
949 case 64:
950 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
951 break;
952 default:
953 llvm_unreachable("Unsupported floating-point type");
954 }
955
956 IRBuilder<> Builder(CI);
957
958 // Bitcast to int
959 auto ValInt = Builder.CreateBitCast(Val, IntTy);
960
961 // Mask and compare
962 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
963 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
964
965 auto RetFalse = ConstantInt::get(RetTy, 0);
966 Value *RetTrue;
967 if (ValTy->isVectorTy()) {
968 RetTrue = ConstantInt::getSigned(RetTy, -1);
969 } else {
970 RetTrue = ConstantInt::get(RetTy, 1);
971 }
972 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
973 });
974}
975
David Neto22f144c2017-06-12 14:26:21 -0400976bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
977 bool Changed = false;
978
Kévin Petitff03aee2019-06-12 19:39:03 +0100979 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000980 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100981 {"_Z3allc", spv::OpNop},
982 {"_Z3allDv2_c", spv::OpAll},
983 {"_Z3allDv3_c", spv::OpAll},
984 {"_Z3allDv4_c", spv::OpAll},
985 {"_Z3alls", spv::OpNop},
986 {"_Z3allDv2_s", spv::OpAll},
987 {"_Z3allDv3_s", spv::OpAll},
988 {"_Z3allDv4_s", spv::OpAll},
989 {"_Z3alli", spv::OpNop},
990 {"_Z3allDv2_i", spv::OpAll},
991 {"_Z3allDv3_i", spv::OpAll},
992 {"_Z3allDv4_i", spv::OpAll},
993 {"_Z3alll", spv::OpNop},
994 {"_Z3allDv2_l", spv::OpAll},
995 {"_Z3allDv3_l", spv::OpAll},
996 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000997
998 // any
Kévin Petitff03aee2019-06-12 19:39:03 +0100999 {"_Z3anyc", spv::OpNop},
1000 {"_Z3anyDv2_c", spv::OpAny},
1001 {"_Z3anyDv3_c", spv::OpAny},
1002 {"_Z3anyDv4_c", spv::OpAny},
1003 {"_Z3anys", spv::OpNop},
1004 {"_Z3anyDv2_s", spv::OpAny},
1005 {"_Z3anyDv3_s", spv::OpAny},
1006 {"_Z3anyDv4_s", spv::OpAny},
1007 {"_Z3anyi", spv::OpNop},
1008 {"_Z3anyDv2_i", spv::OpAny},
1009 {"_Z3anyDv3_i", spv::OpAny},
1010 {"_Z3anyDv4_i", spv::OpAny},
1011 {"_Z3anyl", spv::OpNop},
1012 {"_Z3anyDv2_l", spv::OpAny},
1013 {"_Z3anyDv3_l", spv::OpAny},
1014 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -04001015 };
1016
1017 for (auto Pair : Map) {
1018 // If we find a function with the matching name.
1019 if (auto F = M.getFunction(Pair.first)) {
1020 SmallVector<Instruction *, 4> ToRemoves;
1021
1022 // Walk the users of the function.
1023 for (auto &U : F->uses()) {
1024 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04001025
1026 auto Arg = CI->getOperand(0);
1027
1028 Value *V;
1029
Kévin Petitfd27cca2018-10-31 13:00:17 +00001030 // If the argument is a 32-bit int, just use a shift
1031 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1032 V = BinaryOperator::Create(Instruction::LShr, Arg,
1033 ConstantInt::get(Arg->getType(), 31), "",
1034 CI);
1035 } else {
David Neto22f144c2017-06-12 14:26:21 -04001036 // The value for zero to compare against.
1037 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1038
David Neto22f144c2017-06-12 14:26:21 -04001039 // The value to return for true.
1040 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1041
1042 // The value to return for false.
1043 const auto FalseValue = Constant::getNullValue(CI->getType());
1044
Kévin Petitfd27cca2018-10-31 13:00:17 +00001045 const auto Cmp = CmpInst::Create(
1046 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1047
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001048 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001049
1050 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +01001051 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001052
Kévin Petitff03aee2019-06-12 19:39:03 +01001053 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +00001054
Kévin Petitff03aee2019-06-12 19:39:03 +01001055 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +00001056
Kévin Petitff03aee2019-06-12 19:39:03 +01001057 const auto NewCI = clspv::InsertSPIRVOp(
1058 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +00001059 SelectSource = NewCI;
1060
1061 } else {
1062 SelectSource = Cmp;
1063 }
1064
1065 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001066 }
1067
1068 CI->replaceAllUsesWith(V);
1069
1070 // Lastly, remember to remove the user.
1071 ToRemoves.push_back(CI);
1072 }
1073 }
1074
1075 Changed = !ToRemoves.empty();
1076
1077 // And cleanup the calls we don't use anymore.
1078 for (auto V : ToRemoves) {
1079 V->eraseFromParent();
1080 }
1081
1082 // And remove the function we don't need either too.
1083 F->eraseFromParent();
1084 }
1085 }
1086
1087 return Changed;
1088}
1089
Kévin Petitbf0036c2019-03-06 13:57:10 +00001090bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1091 bool Changed = false;
1092
1093 for (auto const &SymVal : M.getValueSymbolTable()) {
1094 // Skip symbols whose name doesn't match
1095 if (!SymVal.getKey().startswith("_Z8upsample")) {
1096 continue;
1097 }
1098 // Is there a function going by that name?
1099 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1100
1101 SmallVector<Instruction *, 4> ToRemoves;
1102
1103 // Walk the users of the function.
1104 for (auto &U : F->uses()) {
1105 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1106
1107 // Get arguments
1108 auto HiValue = CI->getOperand(0);
1109 auto LoValue = CI->getOperand(1);
1110
1111 // Don't touch overloads that aren't in OpenCL C
1112 auto HiType = HiValue->getType();
1113 auto LoType = LoValue->getType();
1114
1115 if (HiType != LoType) {
1116 continue;
1117 }
1118
1119 if (!HiType->isIntOrIntVectorTy()) {
1120 continue;
1121 }
1122
1123 if (HiType->getScalarSizeInBits() * 2 !=
1124 CI->getType()->getScalarSizeInBits()) {
1125 continue;
1126 }
1127
1128 if ((HiType->getScalarSizeInBits() != 8) &&
1129 (HiType->getScalarSizeInBits() != 16) &&
1130 (HiType->getScalarSizeInBits() != 32)) {
1131 continue;
1132 }
1133
1134 if (HiType->isVectorTy()) {
1135 if ((HiType->getVectorNumElements() != 2) &&
1136 (HiType->getVectorNumElements() != 3) &&
1137 (HiType->getVectorNumElements() != 4) &&
1138 (HiType->getVectorNumElements() != 8) &&
1139 (HiType->getVectorNumElements() != 16)) {
1140 continue;
1141 }
1142 }
1143
1144 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001145 auto HiCast =
1146 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1147 auto LoCast =
1148 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001149
1150 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001151 auto ShiftAmount =
1152 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001153 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1154 ShiftAmount, "", CI);
1155
1156 // OR both results
1157 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1158 "", CI);
1159
1160 // Replace call with the expression
1161 CI->replaceAllUsesWith(V);
1162
1163 // Lastly, remember to remove the user.
1164 ToRemoves.push_back(CI);
1165 }
1166 }
1167
1168 Changed = !ToRemoves.empty();
1169
1170 // And cleanup the calls we don't use anymore.
1171 for (auto V : ToRemoves) {
1172 V->eraseFromParent();
1173 }
1174
1175 // And remove the function we don't need either too.
1176 F->eraseFromParent();
1177 }
1178 }
1179
1180 return Changed;
1181}
1182
Kévin Petitd44eef52019-03-08 13:22:14 +00001183bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1184 bool Changed = false;
1185
1186 for (auto const &SymVal : M.getValueSymbolTable()) {
1187 // Skip symbols whose name doesn't match
1188 if (!SymVal.getKey().startswith("_Z6rotate")) {
1189 continue;
1190 }
1191 // Is there a function going by that name?
1192 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1193
1194 SmallVector<Instruction *, 4> ToRemoves;
1195
1196 // Walk the users of the function.
1197 for (auto &U : F->uses()) {
1198 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1199
1200 // Get arguments
1201 auto SrcValue = CI->getOperand(0);
1202 auto RotAmount = CI->getOperand(1);
1203
1204 // Don't touch overloads that aren't in OpenCL C
1205 auto SrcType = SrcValue->getType();
1206 auto RotType = RotAmount->getType();
1207
1208 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1209 continue;
1210 }
1211
1212 if (!SrcType->isIntOrIntVectorTy()) {
1213 continue;
1214 }
1215
1216 if ((SrcType->getScalarSizeInBits() != 8) &&
1217 (SrcType->getScalarSizeInBits() != 16) &&
1218 (SrcType->getScalarSizeInBits() != 32) &&
1219 (SrcType->getScalarSizeInBits() != 64)) {
1220 continue;
1221 }
1222
1223 if (SrcType->isVectorTy()) {
1224 if ((SrcType->getVectorNumElements() != 2) &&
1225 (SrcType->getVectorNumElements() != 3) &&
1226 (SrcType->getVectorNumElements() != 4) &&
1227 (SrcType->getVectorNumElements() != 8) &&
1228 (SrcType->getVectorNumElements() != 16)) {
1229 continue;
1230 }
1231 }
1232
1233 // The approach used is to shift the top bits down, the bottom bits up
1234 // and OR the two shifted values.
1235
1236 // The rotation amount is to be treated modulo the element size.
1237 // Since SPIR-V shift ops don't support this, let's apply the
1238 // modulo ahead of shifting. The element size is always a power of
1239 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001240 auto ModMask =
1241 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001242 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1243 ModMask, "", CI);
1244
1245 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001246 auto ScalarSize =
1247 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001248 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1249 RotAmount, "", CI);
1250
1251 // Now shift the bottom bits up and the top bits down
1252 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1253 RotAmount, "", CI);
1254 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1255 DownAmount, "", CI);
1256
1257 // Finally OR the two shifted values
1258 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1259 HiRotated, "", CI);
1260
1261 // Replace call with the expression
1262 CI->replaceAllUsesWith(V);
1263
1264 // Lastly, remember to remove the user.
1265 ToRemoves.push_back(CI);
1266 }
1267 }
1268
1269 Changed = !ToRemoves.empty();
1270
1271 // And cleanup the calls we don't use anymore.
1272 for (auto V : ToRemoves) {
1273 V->eraseFromParent();
1274 }
1275
1276 // And remove the function we don't need either too.
1277 F->eraseFromParent();
1278 }
1279 }
1280
1281 return Changed;
1282}
1283
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001284bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1285 bool Changed = false;
1286
1287 for (auto const &SymVal : M.getValueSymbolTable()) {
1288
1289 // Skip symbols whose name obviously doesn't match
1290 if (!SymVal.getKey().contains("convert_")) {
1291 continue;
1292 }
1293
1294 // Is there a function going by that name?
1295 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1296
1297 // Get info from the mangled name
1298 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001299 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001300
1301 // All functions of interest are handled by our mangled name parser
1302 if (!parsed) {
1303 continue;
1304 }
1305
1306 // Move on if this isn't a call to convert_
1307 if (!finfo.name.startswith("convert_")) {
1308 continue;
1309 }
1310
1311 // Extract the destination type from the function name
1312 StringRef DstTypeName = finfo.name;
1313 DstTypeName.consume_front("convert_");
1314
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001315 auto DstSignedNess =
1316 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1317 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1318 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1319 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1320 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1321 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1322 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1323 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1324 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1325 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001326
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001327 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001328 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001329
1330 SmallVector<Instruction *, 4> ToRemoves;
1331
1332 // Walk the users of the function.
1333 for (auto &U : F->uses()) {
1334 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1335
1336 // Get arguments
1337 auto SrcValue = CI->getOperand(0);
1338
1339 // Don't touch overloads that aren't in OpenCL C
1340 auto SrcType = SrcValue->getType();
1341 auto DstType = CI->getType();
1342
1343 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1344 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1345 continue;
1346 }
1347
1348 if (SrcType->isVectorTy()) {
1349
1350 if (SrcType->getVectorNumElements() !=
1351 DstType->getVectorNumElements()) {
1352 continue;
1353 }
1354
1355 if ((SrcType->getVectorNumElements() != 2) &&
1356 (SrcType->getVectorNumElements() != 3) &&
1357 (SrcType->getVectorNumElements() != 4) &&
1358 (SrcType->getVectorNumElements() != 8) &&
1359 (SrcType->getVectorNumElements() != 16)) {
1360 continue;
1361 }
1362 }
1363
1364 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1365 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1366
1367 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1368 bool DstIsInt = DstType->isIntOrIntVectorTy();
1369
1370 Value *V;
alan-baker4092cc52020-01-15 16:42:57 -05001371 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1372 // Unnecessary cast operation.
1373 V = SrcValue;
1374 } else if (SrcIsFloat && DstIsFloat) {
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001375 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1376 } else if (SrcIsFloat && DstIsInt) {
1377 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001378 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1379 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001380 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001381 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1382 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001383 }
1384 } else if (SrcIsInt && DstIsFloat) {
1385 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001386 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1387 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001388 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001389 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1390 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001391 }
1392 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001393 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1394 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001395 } else {
1396 // Not something we're supposed to handle, just move on
1397 continue;
1398 }
1399
1400 // Replace call with the expression
1401 CI->replaceAllUsesWith(V);
1402
1403 // Lastly, remember to remove the user.
1404 ToRemoves.push_back(CI);
1405 }
1406 }
1407
1408 Changed = !ToRemoves.empty();
1409
1410 // And cleanup the calls we don't use anymore.
1411 for (auto V : ToRemoves) {
1412 V->eraseFromParent();
1413 }
1414
1415 // And remove the function we don't need either too.
1416 F->eraseFromParent();
1417 }
1418 }
1419
1420 return Changed;
1421}
1422
Kévin Petit8a560882019-03-21 15:24:34 +00001423bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1424 bool Changed = false;
1425
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001426 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001427
Kévin Petit617a76d2019-04-04 13:54:16 +01001428 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001429 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1430 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1431
1432 // Skip symbols whose name doesn't match
1433 if (!isMad && !isMul) {
1434 continue;
1435 }
1436
1437 // Is there a function going by that name?
1438 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001439 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001440 }
1441 }
1442
Kévin Petit617a76d2019-04-04 13:54:16 +01001443 for (auto F : FnWorklist) {
1444 SmallVector<Instruction *, 4> ToRemoves;
1445
1446 bool isMad = F->getName().startswith("_Z6mad_hi");
1447 // Walk the users of the function.
1448 for (auto &U : F->uses()) {
1449 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1450
1451 // Get arguments
1452 auto AValue = CI->getOperand(0);
1453 auto BValue = CI->getOperand(1);
1454 auto CValue = CI->getOperand(2);
1455
1456 // Don't touch overloads that aren't in OpenCL C
1457 auto AType = AValue->getType();
1458 auto BType = BValue->getType();
1459 auto CType = CValue->getType();
1460
1461 if ((AType != BType) || (CI->getType() != AType) ||
1462 (isMad && (AType != CType))) {
1463 continue;
1464 }
1465
1466 if (!AType->isIntOrIntVectorTy()) {
1467 continue;
1468 }
1469
1470 if ((AType->getScalarSizeInBits() != 8) &&
1471 (AType->getScalarSizeInBits() != 16) &&
1472 (AType->getScalarSizeInBits() != 32) &&
1473 (AType->getScalarSizeInBits() != 64)) {
1474 continue;
1475 }
1476
1477 if (AType->isVectorTy()) {
1478 if ((AType->getVectorNumElements() != 2) &&
1479 (AType->getVectorNumElements() != 3) &&
1480 (AType->getVectorNumElements() != 4) &&
1481 (AType->getVectorNumElements() != 8) &&
1482 (AType->getVectorNumElements() != 16)) {
1483 continue;
1484 }
1485 }
1486
1487 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001488 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001489
1490 // Select the appropriate signed/unsigned SPIR-V op
1491 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001492 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001493 opcode = spv::OpSMulExtended;
1494 } else {
1495 opcode = spv::OpUMulExtended;
1496 }
1497
1498 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001499 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001500 auto ExMulRetType = StructType::create(TwoValueType);
1501
1502 // Call the SPIR-V op
1503 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1504 ExMulRetType, {AValue, BValue});
1505
1506 // Get the high part of the result
1507 unsigned Idxs[] = {1};
1508 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1509
1510 // If we're handling a mad_hi, add the third argument to the result
1511 if (isMad) {
1512 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1513 }
1514
1515 // Replace call with the expression
1516 CI->replaceAllUsesWith(V);
1517
1518 // Lastly, remember to remove the user.
1519 ToRemoves.push_back(CI);
1520 }
1521 }
1522
1523 Changed = !ToRemoves.empty();
1524
1525 // And cleanup the calls we don't use anymore.
1526 for (auto V : ToRemoves) {
1527 V->eraseFromParent();
1528 }
1529
1530 // And remove the function we don't need either too.
1531 F->eraseFromParent();
1532 }
1533
Kévin Petit8a560882019-03-21 15:24:34 +00001534 return Changed;
1535}
1536
Kévin Petitf5b78a22018-10-25 14:32:17 +00001537bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1538 bool Changed = false;
1539
1540 for (auto const &SymVal : M.getValueSymbolTable()) {
1541 // Skip symbols whose name doesn't match
1542 if (!SymVal.getKey().startswith("_Z6select")) {
1543 continue;
1544 }
1545 // Is there a function going by that name?
1546 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1547
1548 SmallVector<Instruction *, 4> ToRemoves;
1549
1550 // Walk the users of the function.
1551 for (auto &U : F->uses()) {
1552 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1553
1554 // Get arguments
1555 auto FalseValue = CI->getOperand(0);
1556 auto TrueValue = CI->getOperand(1);
1557 auto PredicateValue = CI->getOperand(2);
1558
1559 // Don't touch overloads that aren't in OpenCL C
1560 auto FalseType = FalseValue->getType();
1561 auto TrueType = TrueValue->getType();
1562 auto PredicateType = PredicateValue->getType();
1563
1564 if (FalseType != TrueType) {
1565 continue;
1566 }
1567
1568 if (!PredicateType->isIntOrIntVectorTy()) {
1569 continue;
1570 }
1571
1572 if (!FalseType->isIntOrIntVectorTy() &&
1573 !FalseType->getScalarType()->isFloatingPointTy()) {
1574 continue;
1575 }
1576
1577 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1578 continue;
1579 }
1580
1581 if (FalseType->getScalarSizeInBits() !=
1582 PredicateType->getScalarSizeInBits()) {
1583 continue;
1584 }
1585
1586 if (FalseType->isVectorTy()) {
1587 if (FalseType->getVectorNumElements() !=
1588 PredicateType->getVectorNumElements()) {
1589 continue;
1590 }
1591
1592 if ((FalseType->getVectorNumElements() != 2) &&
1593 (FalseType->getVectorNumElements() != 3) &&
1594 (FalseType->getVectorNumElements() != 4) &&
1595 (FalseType->getVectorNumElements() != 8) &&
1596 (FalseType->getVectorNumElements() != 16)) {
1597 continue;
1598 }
1599 }
1600
1601 // Create constant
1602 const auto ZeroValue = Constant::getNullValue(PredicateType);
1603
1604 // Scalar and vector are to be treated differently
1605 CmpInst::Predicate Pred;
1606 if (PredicateType->isVectorTy()) {
1607 Pred = CmpInst::ICMP_SLT;
1608 } else {
1609 Pred = CmpInst::ICMP_NE;
1610 }
1611
1612 // Create comparison instruction
1613 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1614 ZeroValue, "", CI);
1615
1616 // Create select
1617 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1618
1619 // Replace call with the selection
1620 CI->replaceAllUsesWith(V);
1621
1622 // Lastly, remember to remove the user.
1623 ToRemoves.push_back(CI);
1624 }
1625 }
1626
1627 Changed = !ToRemoves.empty();
1628
1629 // And cleanup the calls we don't use anymore.
1630 for (auto V : ToRemoves) {
1631 V->eraseFromParent();
1632 }
1633
1634 // And remove the function we don't need either too.
1635 F->eraseFromParent();
1636 }
1637 }
1638
1639 return Changed;
1640}
1641
Kévin Petite7d0cce2018-10-31 12:38:56 +00001642bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1643 bool Changed = false;
1644
1645 for (auto const &SymVal : M.getValueSymbolTable()) {
1646 // Skip symbols whose name doesn't match
1647 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1648 continue;
1649 }
1650 // Is there a function going by that name?
1651 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1652
1653 SmallVector<Instruction *, 4> ToRemoves;
1654
1655 // Walk the users of the function.
1656 for (auto &U : F->uses()) {
1657 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1658
1659 if (CI->getNumOperands() != 4) {
1660 continue;
1661 }
1662
1663 // Get arguments
1664 auto FalseValue = CI->getOperand(0);
1665 auto TrueValue = CI->getOperand(1);
1666 auto PredicateValue = CI->getOperand(2);
1667
1668 // Don't touch overloads that aren't in OpenCL C
1669 auto FalseType = FalseValue->getType();
1670 auto TrueType = TrueValue->getType();
1671 auto PredicateType = PredicateValue->getType();
1672
1673 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1674 continue;
1675 }
1676
1677 if (TrueType->isVectorTy()) {
1678 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1679 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001680 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001681 }
1682 if ((TrueType->getVectorNumElements() != 2) &&
1683 (TrueType->getVectorNumElements() != 3) &&
1684 (TrueType->getVectorNumElements() != 4) &&
1685 (TrueType->getVectorNumElements() != 8) &&
1686 (TrueType->getVectorNumElements() != 16)) {
1687 continue;
1688 }
1689 }
1690
1691 // Remember the type of the operands
1692 auto OpType = TrueType;
1693
1694 // The actual bit selection will always be done on an integer type,
1695 // declare it here
1696 Type *BitType;
1697
1698 // If the operands are float, then bitcast them to int
1699 if (OpType->getScalarType()->isFloatingPointTy()) {
1700
1701 // First create the new type
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001702 BitType = getIntOrIntVectorTyForCast(M.getContext(), OpType);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001703
1704 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001705 PredicateValue =
1706 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1707 FalseValue =
1708 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1709 TrueValue =
1710 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001711
1712 } else {
1713 // The operands have an integer type, use it directly
1714 BitType = OpType;
1715 }
1716
1717 // All the operands are now always integers
1718 // implement as (c & b) | (~c & a)
1719
1720 // Create our negated predicate value
1721 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001722 auto NotPredicateValue = BinaryOperator::Create(
1723 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001724
1725 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001726 auto BitsFalse = BinaryOperator::Create(
1727 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1728 auto BitsTrue = BinaryOperator::Create(
1729 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001730
1731 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1732 BitsTrue, "", CI);
1733
1734 // If we were dealing with a floating point type, we must bitcast
1735 // the result back to that
1736 if (OpType->getScalarType()->isFloatingPointTy()) {
1737 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1738 }
1739
1740 // Replace call with our new code
1741 CI->replaceAllUsesWith(V);
1742
1743 // Lastly, remember to remove the user.
1744 ToRemoves.push_back(CI);
1745 }
1746 }
1747
1748 Changed = !ToRemoves.empty();
1749
1750 // And cleanup the calls we don't use anymore.
1751 for (auto V : ToRemoves) {
1752 V->eraseFromParent();
1753 }
1754
1755 // And remove the function we don't need either too.
1756 F->eraseFromParent();
1757 }
1758 }
1759
1760 return Changed;
1761}
1762
Kévin Petit6b0a9532018-10-30 20:00:39 +00001763bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1764 bool Changed = false;
1765
1766 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001767 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1768 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1769 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1770 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1771 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1772 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001773 };
1774
1775 for (auto Pair : Map) {
1776 // If we find a function with the matching name.
1777 if (auto F = M.getFunction(Pair.first)) {
1778 SmallVector<Instruction *, 4> ToRemoves;
1779
1780 // Walk the users of the function.
1781 for (auto &U : F->uses()) {
1782 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1783
1784 auto ReplacementFn = Pair.second;
1785
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001786 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001787 Value *VectorArg;
1788
1789 // First figure out which function we're dealing with
1790 if (F->getName().startswith("_Z10smoothstep")) {
1791 ArgsToSplat.push_back(CI->getOperand(1));
1792 VectorArg = CI->getOperand(2);
1793 } else {
1794 VectorArg = CI->getOperand(1);
1795 }
1796
1797 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001798 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001799 auto VecType = VectorArg->getType();
1800
1801 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001802 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001803 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001804 auto index =
1805 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1806 NewVectorArg =
1807 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001808 }
1809 SplatArgs.push_back(NewVectorArg);
1810 }
1811
1812 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001813 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1814 const auto NewFType =
1815 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001816
1817 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1818
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001819 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001820 for (auto arg : SplatArgs) {
1821 NewArgs.push_back(arg);
1822 }
1823 NewArgs.push_back(VectorArg);
1824
1825 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1826
1827 CI->replaceAllUsesWith(NewCI);
1828
1829 // Lastly, remember to remove the user.
1830 ToRemoves.push_back(CI);
1831 }
1832 }
1833
1834 Changed = !ToRemoves.empty();
1835
1836 // And cleanup the calls we don't use anymore.
1837 for (auto V : ToRemoves) {
1838 V->eraseFromParent();
1839 }
1840
1841 // And remove the function we don't need either too.
1842 F->eraseFromParent();
1843 }
1844 }
1845
1846 return Changed;
1847}
1848
David Neto22f144c2017-06-12 14:26:21 -04001849bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1850 bool Changed = false;
1851
1852 const std::map<const char *, Instruction::BinaryOps> Map = {
1853 {"_Z7signbitf", Instruction::LShr},
1854 {"_Z7signbitDv2_f", Instruction::AShr},
1855 {"_Z7signbitDv3_f", Instruction::AShr},
1856 {"_Z7signbitDv4_f", Instruction::AShr},
1857 };
1858
1859 for (auto Pair : Map) {
1860 // If we find a function with the matching name.
1861 if (auto F = M.getFunction(Pair.first)) {
1862 SmallVector<Instruction *, 4> ToRemoves;
1863
1864 // Walk the users of the function.
1865 for (auto &U : F->uses()) {
1866 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1867 auto Arg = CI->getOperand(0);
1868
1869 auto Bitcast =
1870 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1871
1872 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1873 ConstantInt::get(CI->getType(), 31),
1874 "", CI);
1875
1876 CI->replaceAllUsesWith(Shr);
1877
1878 // Lastly, remember to remove the user.
1879 ToRemoves.push_back(CI);
1880 }
1881 }
1882
1883 Changed = !ToRemoves.empty();
1884
1885 // And cleanup the calls we don't use anymore.
1886 for (auto V : ToRemoves) {
1887 V->eraseFromParent();
1888 }
1889
1890 // And remove the function we don't need either too.
1891 F->eraseFromParent();
1892 }
1893 }
1894
1895 return Changed;
1896}
1897
1898bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1899 bool Changed = false;
1900
1901 const std::map<const char *,
1902 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1903 Map = {
1904 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1905 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1906 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1907 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
alan-bakerc21a65e2020-01-15 14:19:39 -05001908 {"_Z3madDhDhDh", {Instruction::FMul, Instruction::FAdd}},
1909 {"_Z3madDv2_DhS_S_", {Instruction::FMul, Instruction::FAdd}},
1910 {"_Z3madDv3_DhS_S_", {Instruction::FMul, Instruction::FAdd}},
1911 {"_Z3madDv4_DhS_S_", {Instruction::FMul, Instruction::FAdd}},
David Neto22f144c2017-06-12 14:26:21 -04001912 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1913 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1914 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1915 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1916 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1917 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1918 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1919 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1920 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1921 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1922 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1923 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1924 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1925 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1926 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1927 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1928 };
1929
1930 for (auto Pair : Map) {
1931 // If we find a function with the matching name.
1932 if (auto F = M.getFunction(Pair.first)) {
1933 SmallVector<Instruction *, 4> ToRemoves;
1934
1935 // Walk the users of the function.
1936 for (auto &U : F->uses()) {
1937 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1938 // The multiply instruction to use.
1939 auto MulInst = Pair.second.first;
1940
1941 // The add instruction to use.
1942 auto AddInst = Pair.second.second;
1943
1944 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1945
1946 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1947 CI->getArgOperand(1), "", CI);
1948
1949 if (Instruction::BinaryOpsEnd != AddInst) {
1950 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1951 CI);
1952 }
1953
1954 CI->replaceAllUsesWith(I);
1955
1956 // Lastly, remember to remove the user.
1957 ToRemoves.push_back(CI);
1958 }
1959 }
1960
1961 Changed = !ToRemoves.empty();
1962
1963 // And cleanup the calls we don't use anymore.
1964 for (auto V : ToRemoves) {
1965 V->eraseFromParent();
1966 }
1967
1968 // And remove the function we don't need either too.
1969 F->eraseFromParent();
1970 }
1971 }
1972
1973 return Changed;
1974}
1975
Derek Chowcfd368b2017-10-19 20:58:45 -07001976bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1977 bool Changed = false;
1978
alan-bakerf795f392019-06-11 18:24:34 -04001979 for (auto const &SymVal : M.getValueSymbolTable()) {
1980 if (!SymVal.getKey().contains("vstore"))
1981 continue;
1982 if (SymVal.getKey().contains("vstore_"))
1983 continue;
1984 if (SymVal.getKey().contains("vstorea"))
1985 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001986
alan-bakerf795f392019-06-11 18:24:34 -04001987 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001988 SmallVector<Instruction *, 4> ToRemoves;
1989
alan-bakerf795f392019-06-11 18:24:34 -04001990 auto fname = F->getName();
1991 if (!fname.consume_front("_Z"))
1992 continue;
1993 size_t name_len;
1994 if (fname.consumeInteger(10, name_len))
1995 continue;
alan-baker21574d32020-01-29 16:00:31 -05001996 std::string name = fname.take_front(name_len).str();
alan-bakerf795f392019-06-11 18:24:34 -04001997
1998 bool ok = StringSwitch<bool>(name)
1999 .Case("vstore2", true)
2000 .Case("vstore3", true)
2001 .Case("vstore4", true)
2002 .Case("vstore8", true)
2003 .Case("vstore16", true)
2004 .Default(false);
2005 if (!ok)
2006 continue;
2007
Derek Chowcfd368b2017-10-19 20:58:45 -07002008 for (auto &U : F->uses()) {
2009 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002010 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07002011
alan-bakerf795f392019-06-11 18:24:34 -04002012 auto data_type = data->getType();
2013 if (!data_type->isVectorTy())
2014 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002015
alan-bakerf795f392019-06-11 18:24:34 -04002016 auto elems = data_type->getVectorNumElements();
2017 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2018 elems != 16)
2019 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002020
alan-bakerf795f392019-06-11 18:24:34 -04002021 auto offset = CI->getOperand(1);
2022 auto ptr = CI->getOperand(2);
2023 auto ptr_type = ptr->getType();
2024 auto pointee_type = ptr_type->getPointerElementType();
2025 if (pointee_type != data_type->getVectorElementType())
2026 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002027
alan-bakerf795f392019-06-11 18:24:34 -04002028 // Avoid pointer casts. Instead generate the correct number of stores
2029 // and rely on drivers to coalesce appropriately.
2030 IRBuilder<> builder(CI);
2031 auto elems_const = builder.getInt32(elems);
2032 auto adjust = builder.CreateMul(offset, elems_const);
2033 for (auto i = 0; i < elems; ++i) {
2034 auto idx = builder.getInt32(i);
2035 auto add = builder.CreateAdd(adjust, idx);
2036 auto gep = builder.CreateGEP(ptr, add);
2037 auto extract = builder.CreateExtractElement(data, i);
2038 auto store = builder.CreateStore(extract, gep);
2039 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002040
Derek Chowcfd368b2017-10-19 20:58:45 -07002041 ToRemoves.push_back(CI);
2042 }
2043 }
2044
2045 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002046 for (auto V : ToRemoves) {
2047 V->eraseFromParent();
2048 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002049 F->eraseFromParent();
2050 }
2051 }
2052
2053 return Changed;
2054}
2055
2056bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2057 bool Changed = false;
2058
alan-bakerf795f392019-06-11 18:24:34 -04002059 for (auto const &SymVal : M.getValueSymbolTable()) {
2060 if (!SymVal.getKey().contains("vload"))
2061 continue;
2062 if (SymVal.getKey().contains("vload_"))
2063 continue;
2064 if (SymVal.getKey().contains("vloada"))
2065 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002066
alan-bakerf795f392019-06-11 18:24:34 -04002067 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002068 SmallVector<Instruction *, 4> ToRemoves;
2069
alan-bakerf795f392019-06-11 18:24:34 -04002070 auto fname = F->getName();
2071 if (!fname.consume_front("_Z"))
2072 continue;
2073 size_t name_len;
2074 if (fname.consumeInteger(10, name_len))
2075 continue;
alan-baker21574d32020-01-29 16:00:31 -05002076 std::string name = fname.take_front(name_len).str();
alan-bakerf795f392019-06-11 18:24:34 -04002077
2078 bool ok = StringSwitch<bool>(name)
2079 .Case("vload2", true)
2080 .Case("vload3", true)
2081 .Case("vload4", true)
2082 .Case("vload8", true)
2083 .Case("vload16", true)
2084 .Default(false);
2085 if (!ok)
2086 continue;
2087
Derek Chowcfd368b2017-10-19 20:58:45 -07002088 for (auto &U : F->uses()) {
2089 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002090 auto ret_type = F->getReturnType();
2091 if (!ret_type->isVectorTy())
2092 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002093
alan-bakerf795f392019-06-11 18:24:34 -04002094 auto elems = ret_type->getVectorNumElements();
2095 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2096 elems != 16)
2097 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002098
alan-bakerf795f392019-06-11 18:24:34 -04002099 auto offset = CI->getOperand(0);
2100 auto ptr = CI->getOperand(1);
2101 auto ptr_type = ptr->getType();
2102 auto pointee_type = ptr_type->getPointerElementType();
2103 if (pointee_type != ret_type->getVectorElementType())
2104 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002105
alan-bakerf795f392019-06-11 18:24:34 -04002106 // Avoid pointer casts. Instead generate the correct number of loads
2107 // and rely on drivers to coalesce appropriately.
2108 IRBuilder<> builder(CI);
2109 auto elems_const = builder.getInt32(elems);
2110 Value *insert = UndefValue::get(ret_type);
2111 auto adjust = builder.CreateMul(offset, elems_const);
2112 for (auto i = 0; i < elems; ++i) {
2113 auto idx = builder.getInt32(i);
2114 auto add = builder.CreateAdd(adjust, idx);
2115 auto gep = builder.CreateGEP(ptr, add);
2116 auto load = builder.CreateLoad(gep);
2117 insert = builder.CreateInsertElement(insert, load, i);
2118 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002119
alan-bakerf795f392019-06-11 18:24:34 -04002120 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002121 ToRemoves.push_back(CI);
2122 }
2123 }
2124
2125 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002126 for (auto V : ToRemoves) {
2127 V->eraseFromParent();
2128 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002129 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002130 }
2131 }
2132
2133 return Changed;
2134}
2135
David Neto22f144c2017-06-12 14:26:21 -04002136bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2137 bool Changed = false;
2138
2139 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2140 "_Z10vload_halfjPU3AS2KDh"};
2141
2142 for (auto Name : Map) {
2143 // If we find a function with the matching name.
2144 if (auto F = M.getFunction(Name)) {
2145 SmallVector<Instruction *, 4> ToRemoves;
2146
2147 // Walk the users of the function.
2148 for (auto &U : F->uses()) {
2149 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2150 // The index argument from vload_half.
2151 auto Arg0 = CI->getOperand(0);
2152
2153 // The pointer argument from vload_half.
2154 auto Arg1 = CI->getOperand(1);
2155
David Neto22f144c2017-06-12 14:26:21 -04002156 auto IntTy = Type::getInt32Ty(M.getContext());
2157 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002158 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2159
David Neto22f144c2017-06-12 14:26:21 -04002160 // Our intrinsic to unpack a float2 from an int.
2161 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2162
2163 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2164
David Neto482550a2018-03-24 05:21:07 -07002165 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002166 auto ShortTy = Type::getInt16Ty(M.getContext());
2167 auto ShortPointerTy = PointerType::get(
2168 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002169
David Netoac825b82017-05-30 12:49:01 -04002170 // Cast the half* pointer to short*.
2171 auto Cast =
2172 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002173
David Netoac825b82017-05-30 12:49:01 -04002174 // Index into the correct address of the casted pointer.
2175 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2176
2177 // Load from the short* we casted to.
2178 auto Load = new LoadInst(Index, "", CI);
2179
2180 // ZExt the short -> int.
2181 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2182
2183 // Get our float2.
2184 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2185
2186 // Extract out the bottom element which is our float result.
2187 auto Extract = ExtractElementInst::Create(
2188 Call, ConstantInt::get(IntTy, 0), "", CI);
2189
2190 CI->replaceAllUsesWith(Extract);
2191 } else {
2192 // Assume the pointer argument points to storage aligned to 32bits
2193 // or more.
2194 // TODO(dneto): Do more analysis to make sure this is true?
2195 //
2196 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2197 // with:
2198 //
2199 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2200 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2201 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2202 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2203 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2204 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2205 // x float> %converted, %index_is_odd32
2206
2207 auto IntPointerTy = PointerType::get(
2208 IntTy, Arg1->getType()->getPointerAddressSpace());
2209
David Neto973e6a82017-05-30 13:48:18 -04002210 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002211 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002212 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002213 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2214
2215 auto One = ConstantInt::get(IntTy, 1);
2216 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2217 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2218
2219 // Index into the correct address of the casted pointer.
2220 auto Ptr =
2221 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2222
2223 // Load from the int* we casted to.
2224 auto Load = new LoadInst(Ptr, "", CI);
2225
2226 // Get our float2.
2227 auto Call = CallInst::Create(NewF, Load, "", CI);
2228
2229 // Extract out the float result, where the element number is
2230 // determined by whether the original index was even or odd.
2231 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2232
2233 CI->replaceAllUsesWith(Extract);
2234 }
David Neto22f144c2017-06-12 14:26:21 -04002235
2236 // Lastly, remember to remove the user.
2237 ToRemoves.push_back(CI);
2238 }
2239 }
2240
2241 Changed = !ToRemoves.empty();
2242
2243 // And cleanup the calls we don't use anymore.
2244 for (auto V : ToRemoves) {
2245 V->eraseFromParent();
2246 }
2247
2248 // And remove the function we don't need either too.
2249 F->eraseFromParent();
2250 }
2251 }
2252
2253 return Changed;
2254}
2255
2256bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002257
Kévin Petite8edce32019-04-10 14:23:32 +01002258 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002259 "_Z11vload_half2jPU3AS1KDh",
2260 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2261 "_Z11vload_half2jPU3AS2KDh",
2262 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2263 };
David Neto22f144c2017-06-12 14:26:21 -04002264
Kévin Petite8edce32019-04-10 14:23:32 +01002265 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2266 // The index argument from vload_half.
2267 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002268
Kévin Petite8edce32019-04-10 14:23:32 +01002269 // The pointer argument from vload_half.
2270 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002271
Kévin Petite8edce32019-04-10 14:23:32 +01002272 auto IntTy = Type::getInt32Ty(M.getContext());
2273 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002274 auto NewPointerTy =
2275 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002276 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002277
Kévin Petite8edce32019-04-10 14:23:32 +01002278 // Cast the half* pointer to int*.
2279 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002280
Kévin Petite8edce32019-04-10 14:23:32 +01002281 // Index into the correct address of the casted pointer.
2282 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002283
Kévin Petite8edce32019-04-10 14:23:32 +01002284 // Load from the int* we casted to.
2285 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002286
Kévin Petite8edce32019-04-10 14:23:32 +01002287 // Our intrinsic to unpack a float2 from an int.
2288 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002289
Kévin Petite8edce32019-04-10 14:23:32 +01002290 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002291
Kévin Petite8edce32019-04-10 14:23:32 +01002292 // Get our float2.
2293 return CallInst::Create(NewF, Load, "", CI);
2294 });
David Neto22f144c2017-06-12 14:26:21 -04002295}
2296
2297bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002298
Kévin Petite8edce32019-04-10 14:23:32 +01002299 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002300 "_Z11vload_half4jPU3AS1KDh",
2301 "_Z12vloada_half4jPU3AS1KDh",
2302 "_Z11vload_half4jPU3AS2KDh",
2303 "_Z12vloada_half4jPU3AS2KDh",
2304 };
David Neto22f144c2017-06-12 14:26:21 -04002305
Kévin Petite8edce32019-04-10 14:23:32 +01002306 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2307 // The index argument from vload_half.
2308 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002309
Kévin Petite8edce32019-04-10 14:23:32 +01002310 // The pointer argument from vload_half.
2311 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002312
Kévin Petite8edce32019-04-10 14:23:32 +01002313 auto IntTy = Type::getInt32Ty(M.getContext());
2314 auto Int2Ty = VectorType::get(IntTy, 2);
2315 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002316 auto NewPointerTy =
2317 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002318 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002319
Kévin Petite8edce32019-04-10 14:23:32 +01002320 // Cast the half* pointer to int2*.
2321 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002322
Kévin Petite8edce32019-04-10 14:23:32 +01002323 // Index into the correct address of the casted pointer.
2324 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002325
Kévin Petite8edce32019-04-10 14:23:32 +01002326 // Load from the int2* we casted to.
2327 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002328
Kévin Petite8edce32019-04-10 14:23:32 +01002329 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002330 auto X =
2331 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2332 auto Y =
2333 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002334
Kévin Petite8edce32019-04-10 14:23:32 +01002335 // Our intrinsic to unpack a float2 from an int.
2336 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002337
Kévin Petite8edce32019-04-10 14:23:32 +01002338 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002339
Kévin Petite8edce32019-04-10 14:23:32 +01002340 // Get the lower (x & y) components of our final float4.
2341 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002342
Kévin Petite8edce32019-04-10 14:23:32 +01002343 // Get the higher (z & w) components of our final float4.
2344 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002345
Kévin Petite8edce32019-04-10 14:23:32 +01002346 Constant *ShuffleMask[4] = {
2347 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2348 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002349
Kévin Petite8edce32019-04-10 14:23:32 +01002350 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002351 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2352 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002353 });
David Neto22f144c2017-06-12 14:26:21 -04002354}
2355
David Neto6ad93232018-06-07 15:42:58 -07002356bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002357
2358 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2359 //
2360 // %u = load i32 %ptr
2361 // %fxy = call <2 x float> Unpack2xHalf(u)
2362 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002363 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002364 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2365 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2366 "_Z20__clspv_vloada_half2jPKj", // private
2367 };
2368
Kévin Petite8edce32019-04-10 14:23:32 +01002369 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2370 auto Index = CI->getOperand(0);
2371 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002372
Kévin Petite8edce32019-04-10 14:23:32 +01002373 auto IntTy = Type::getInt32Ty(M.getContext());
2374 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2375 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002376
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002377 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002378 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002379
Kévin Petite8edce32019-04-10 14:23:32 +01002380 // Our intrinsic to unpack a float2 from an int.
2381 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002382
Kévin Petite8edce32019-04-10 14:23:32 +01002383 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002384
Kévin Petite8edce32019-04-10 14:23:32 +01002385 // Get our final float2.
2386 return CallInst::Create(NewF, Load, "", CI);
2387 });
David Neto6ad93232018-06-07 15:42:58 -07002388}
2389
2390bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002391
2392 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2393 //
2394 // %u2 = load <2 x i32> %ptr
2395 // %u2xy = extractelement %u2, 0
2396 // %u2zw = extractelement %u2, 1
2397 // %fxy = call <2 x float> Unpack2xHalf(uint)
2398 // %fzw = call <2 x float> Unpack2xHalf(uint)
2399 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002400 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002401 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2402 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2403 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2404 };
2405
Kévin Petite8edce32019-04-10 14:23:32 +01002406 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2407 auto Index = CI->getOperand(0);
2408 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002409
Kévin Petite8edce32019-04-10 14:23:32 +01002410 auto IntTy = Type::getInt32Ty(M.getContext());
2411 auto Int2Ty = VectorType::get(IntTy, 2);
2412 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2413 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002414
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002415 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002416 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002417
Kévin Petite8edce32019-04-10 14:23:32 +01002418 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002419 auto X =
2420 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2421 auto Y =
2422 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002423
Kévin Petite8edce32019-04-10 14:23:32 +01002424 // Our intrinsic to unpack a float2 from an int.
2425 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002426
Kévin Petite8edce32019-04-10 14:23:32 +01002427 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002428
Kévin Petite8edce32019-04-10 14:23:32 +01002429 // Get the lower (x & y) components of our final float4.
2430 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002431
Kévin Petite8edce32019-04-10 14:23:32 +01002432 // Get the higher (z & w) components of our final float4.
2433 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002434
Kévin Petite8edce32019-04-10 14:23:32 +01002435 Constant *ShuffleMask[4] = {
2436 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2437 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002438
Kévin Petite8edce32019-04-10 14:23:32 +01002439 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002440 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2441 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002442 });
David Neto6ad93232018-06-07 15:42:58 -07002443}
2444
David Neto22f144c2017-06-12 14:26:21 -04002445bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002446
Kévin Petite8edce32019-04-10 14:23:32 +01002447 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2448 "_Z15vstore_half_rtefjPU3AS1Dh",
2449 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002450
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002451 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002452 // The value to store.
2453 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002454
Kévin Petite8edce32019-04-10 14:23:32 +01002455 // The index argument from vstore_half.
2456 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002457
Kévin Petite8edce32019-04-10 14:23:32 +01002458 // The pointer argument from vstore_half.
2459 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002460
Kévin Petite8edce32019-04-10 14:23:32 +01002461 auto IntTy = Type::getInt32Ty(M.getContext());
2462 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2463 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2464 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002465
Kévin Petite8edce32019-04-10 14:23:32 +01002466 // Our intrinsic to pack a float2 to an int.
2467 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002468
Kévin Petite8edce32019-04-10 14:23:32 +01002469 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002470
Kévin Petite8edce32019-04-10 14:23:32 +01002471 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002472 auto TempVec = InsertElementInst::Create(
2473 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002474
Kévin Petite8edce32019-04-10 14:23:32 +01002475 // Pack the float2 -> half2 (in an int).
2476 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002477
Kévin Petite8edce32019-04-10 14:23:32 +01002478 Value *Ret;
2479 if (clspv::Option::F16BitStorage()) {
2480 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002481 auto ShortPointerTy =
2482 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002483
Kévin Petite8edce32019-04-10 14:23:32 +01002484 // Truncate our i32 to an i16.
2485 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002486
Kévin Petite8edce32019-04-10 14:23:32 +01002487 // Cast the half* pointer to short*.
2488 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002489
Kévin Petite8edce32019-04-10 14:23:32 +01002490 // Index into the correct address of the casted pointer.
2491 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002492
Kévin Petite8edce32019-04-10 14:23:32 +01002493 // Store to the int* we casted to.
2494 Ret = new StoreInst(Trunc, Index, CI);
2495 } else {
2496 // We can only write to 32-bit aligned words.
2497 //
2498 // Assuming base is aligned to 32-bits, replace the equivalent of
2499 // vstore_half(value, index, base)
2500 // with:
2501 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2502 // uint32_t write_to_upper_half = index & 1u;
2503 // uint32_t shift = write_to_upper_half << 4;
2504 //
2505 // // Pack the float value as a half number in bottom 16 bits
2506 // // of an i32.
2507 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2508 //
2509 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2510 // ^ ((packed & 0xffff) << shift)
2511 // // We only need relaxed consistency, but OpenCL 1.2 only has
2512 // // sequentially consistent atomics.
2513 // // TODO(dneto): Use relaxed consistency.
2514 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002515 auto IntPointerTy =
2516 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002517
Kévin Petite8edce32019-04-10 14:23:32 +01002518 auto Four = ConstantInt::get(IntTy, 4);
2519 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002520
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002521 auto IndexIsOdd =
2522 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002523 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002524 auto IndexIntoI32 =
2525 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2526 auto BaseI32Ptr =
2527 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2528 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2529 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002530 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2531 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002532 auto MaskBitsToWrite =
2533 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2534 auto MaskedCurrent = BinaryOperator::CreateAnd(
2535 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002536
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002537 auto XLowerBits =
2538 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2539 auto NewBitsToWrite =
2540 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2541 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2542 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002543
Kévin Petite8edce32019-04-10 14:23:32 +01002544 // Generate the call to atomi_xor.
2545 SmallVector<Type *, 5> ParamTypes;
2546 // The pointer type.
2547 ParamTypes.push_back(IntPointerTy);
2548 // The Types for memory scope, semantics, and value.
2549 ParamTypes.push_back(IntTy);
2550 ParamTypes.push_back(IntTy);
2551 ParamTypes.push_back(IntTy);
2552 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2553 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002554
Kévin Petite8edce32019-04-10 14:23:32 +01002555 const auto ConstantScopeDevice =
2556 ConstantInt::get(IntTy, spv::ScopeDevice);
2557 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2558 // (SPIR-V Workgroup).
2559 const auto AddrSpaceSemanticsBits =
2560 IntPointerTy->getPointerAddressSpace() == 1
2561 ? spv::MemorySemanticsUniformMemoryMask
2562 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002563
Kévin Petite8edce32019-04-10 14:23:32 +01002564 // We're using relaxed consistency here.
2565 const auto ConstantMemorySemantics =
2566 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2567 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002568
Kévin Petite8edce32019-04-10 14:23:32 +01002569 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2570 ConstantMemorySemantics, ValueToXor};
2571 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2572 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002573 }
David Neto22f144c2017-06-12 14:26:21 -04002574
Kévin Petite8edce32019-04-10 14:23:32 +01002575 return Ret;
2576 });
David Neto22f144c2017-06-12 14:26:21 -04002577}
2578
2579bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002580
Kévin Petite8edce32019-04-10 14:23:32 +01002581 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002582 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2583 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2584 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2585 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2586 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2587 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2588 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2589 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2590 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2591 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2592 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2593 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2594 };
David Neto22f144c2017-06-12 14:26:21 -04002595
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002596 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002597 // The value to store.
2598 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002599
Kévin Petite8edce32019-04-10 14:23:32 +01002600 // The index argument from vstore_half.
2601 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002602
Kévin Petite8edce32019-04-10 14:23:32 +01002603 // The pointer argument from vstore_half.
2604 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002605
Kévin Petite8edce32019-04-10 14:23:32 +01002606 auto IntTy = Type::getInt32Ty(M.getContext());
2607 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002608 auto NewPointerTy =
2609 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002610 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002611
Kévin Petite8edce32019-04-10 14:23:32 +01002612 // Our intrinsic to pack a float2 to an int.
2613 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002614
Kévin Petite8edce32019-04-10 14:23:32 +01002615 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002616
Kévin Petite8edce32019-04-10 14:23:32 +01002617 // Turn the packed x & y into the final packing.
2618 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002619
Kévin Petite8edce32019-04-10 14:23:32 +01002620 // Cast the half* pointer to int*.
2621 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002622
Kévin Petite8edce32019-04-10 14:23:32 +01002623 // Index into the correct address of the casted pointer.
2624 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002625
Kévin Petite8edce32019-04-10 14:23:32 +01002626 // Store to the int* we casted to.
2627 return new StoreInst(X, Index, CI);
2628 });
David Neto22f144c2017-06-12 14:26:21 -04002629}
2630
2631bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002632
Kévin Petite8edce32019-04-10 14:23:32 +01002633 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002634 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2635 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2636 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2637 "_Z13vstorea_half4Dv4_fjPDh", // private
2638 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2639 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2640 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2641 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2642 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2643 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2644 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2645 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2646 };
David Neto22f144c2017-06-12 14:26:21 -04002647
Kévin Petite8edce32019-04-10 14:23:32 +01002648 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2649 // The value to store.
2650 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002651
Kévin Petite8edce32019-04-10 14:23:32 +01002652 // The index argument from vstore_half.
2653 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002654
Kévin Petite8edce32019-04-10 14:23:32 +01002655 // The pointer argument from vstore_half.
2656 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002657
Kévin Petite8edce32019-04-10 14:23:32 +01002658 auto IntTy = Type::getInt32Ty(M.getContext());
2659 auto Int2Ty = VectorType::get(IntTy, 2);
2660 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002661 auto NewPointerTy =
2662 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002663 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002664
Kévin Petite8edce32019-04-10 14:23:32 +01002665 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2666 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002667
Kévin Petite8edce32019-04-10 14:23:32 +01002668 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002669 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2670 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002671
Kévin Petite8edce32019-04-10 14:23:32 +01002672 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2673 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002674
Kévin Petite8edce32019-04-10 14:23:32 +01002675 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002676 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2677 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002678
Kévin Petite8edce32019-04-10 14:23:32 +01002679 // Our intrinsic to pack a float2 to an int.
2680 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002681
Kévin Petite8edce32019-04-10 14:23:32 +01002682 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002683
Kévin Petite8edce32019-04-10 14:23:32 +01002684 // Turn the packed x & y into the final component of our int2.
2685 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002686
Kévin Petite8edce32019-04-10 14:23:32 +01002687 // Turn the packed z & w into the final component of our int2.
2688 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002689
Kévin Petite8edce32019-04-10 14:23:32 +01002690 auto Combine = InsertElementInst::Create(
2691 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002692 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2693 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002694
Kévin Petite8edce32019-04-10 14:23:32 +01002695 // Cast the half* pointer to int2*.
2696 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002697
Kévin Petite8edce32019-04-10 14:23:32 +01002698 // Index into the correct address of the casted pointer.
2699 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002700
Kévin Petite8edce32019-04-10 14:23:32 +01002701 // Store to the int2* we casted to.
2702 return new StoreInst(Combine, Index, CI);
2703 });
David Neto22f144c2017-06-12 14:26:21 -04002704}
2705
alan-bakerf7e17cb2020-01-02 07:29:59 -05002706bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Module &M) {
2707 bool Changed = false;
2708 const std::map<const char *, const char *> Map = {
2709 // 1D
2710 {"_Z11read_imageh14ocl_image1d_roi", "_Z11read_imagef14ocl_image1d_roi"},
2711 {"_Z11read_imageh14ocl_image1d_ro11ocl_sampleri",
2712 "_Z11read_imagef14ocl_image1d_ro11ocl_sampleri"},
2713 {"_Z11read_imageh14ocl_image1d_ro11ocl_samplerf",
2714 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
alan-baker7150a1d2020-02-25 08:31:06 -05002715 // 1D array
2716 {"_Z11read_imageh20ocl_image1d_array_roDv2_i",
2717 "_Z11read_imagef20ocl_image1d_array_roDv2_i"},
2718 {"_Z11read_imageh20ocl_image1d_array_ro11ocl_samplerDv2_i",
2719 "_Z11read_imagef20ocl_image1d_array_ro11ocl_samplerDv2_i"},
2720 {"_Z11read_imageh20ocl_image1d_array_ro11ocl_samplerDv2_f",
2721 "_Z11read_imagef20ocl_image1d_array_ro11ocl_samplerDv2_f"},
alan-bakerf7e17cb2020-01-02 07:29:59 -05002722 // 2D
2723 {"_Z11read_imageh14ocl_image2d_roDv2_i",
2724 "_Z11read_imagef14ocl_image2d_roDv2_i"},
2725 {"_Z11read_imageh14ocl_image2d_ro11ocl_samplerDv2_i",
2726 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i"},
2727 {"_Z11read_imageh14ocl_image2d_ro11ocl_samplerDv2_f",
2728 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
alan-baker7150a1d2020-02-25 08:31:06 -05002729 // 2D array
2730 {"_Z11read_imageh20ocl_image2d_array_roDv4_i",
2731 "_Z11read_imagef20ocl_image2d_array_roDv4_i"},
2732 {"_Z11read_imageh20ocl_image2d_array_ro11ocl_samplerDv4_i",
2733 "_Z11read_imagef20ocl_image2d_array_ro11ocl_samplerDv4_i"},
2734 {"_Z11read_imageh20ocl_image2d_array_ro11ocl_samplerDv4_f",
2735 "_Z11read_imagef20ocl_image2d_array_ro11ocl_samplerDv4_f"},
alan-bakerf7e17cb2020-01-02 07:29:59 -05002736 // 3D
2737 {"_Z11read_imageh14ocl_image3d_roDv4_i",
2738 "_Z11read_imagef14ocl_image3d_roDv4_i"},
2739 {"_Z11read_imageh14ocl_image3d_ro11ocl_samplerDv4_i",
2740 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i"},
2741 {"_Z11read_imageh14ocl_image3d_ro11ocl_samplerDv4_f",
2742 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
2743
2744 for (auto Pair : Map) {
2745 // If we find a function with the matching name.
2746 if (auto F = M.getFunction(Pair.first)) {
2747 SmallVector<Instruction *, 4> ToRemoves;
2748
2749 // Walk the users of the function.
2750 for (auto &U : F->uses()) {
2751 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2752 SmallVector<Type *, 3> types;
2753 SmallVector<Value *, 3> args;
2754 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2755 types.push_back(CI->getArgOperand(i)->getType());
2756 args.push_back(CI->getArgOperand(i));
2757 }
2758
2759 auto NewFType = FunctionType::get(
2760 VectorType::get(Type::getFloatTy(M.getContext()),
2761 CI->getType()->getVectorNumElements()),
2762 types, false);
2763
2764 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2765
2766 auto NewCI = CallInst::Create(NewF, args, "", CI);
2767
2768 // Convert to the half type.
2769 auto Cast = CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2770
2771 CI->replaceAllUsesWith(Cast);
2772
2773 // Lastly, remember to remove the user.
2774 ToRemoves.push_back(CI);
2775 }
2776 }
2777
2778 Changed = !ToRemoves.empty();
2779
2780 // And cleanup the calls we don't use anymore.
2781 for (auto V : ToRemoves) {
2782 V->eraseFromParent();
2783 }
2784
2785 // And remove the function we don't need either too.
2786 F->eraseFromParent();
2787 }
2788 }
2789
2790 return Changed;
2791}
2792
2793bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Module &M) {
2794 bool Changed = false;
2795 const std::map<const char *, const char *> Map = {
2796 // 1D
2797 {"_Z12write_imageh14ocl_image1d_woiDv4_Dh",
2798 "_Z12write_imagef14ocl_image1d_woiDv4_f"},
alan-baker7150a1d2020-02-25 08:31:06 -05002799 // 1D array
2800 {"_Z12write_imageh20ocl_image1d_array_woDv2_iDv4_Dh",
2801 "_Z12write_imagef20ocl_image1d_array_woDv2_iDv4_f"},
alan-bakerf7e17cb2020-01-02 07:29:59 -05002802 // 2D
2803 {"_Z12write_imageh14ocl_image2d_woDv2_iDv4_Dh",
2804 "_Z12write_imagef14ocl_image2d_woDv2_iDv4_f"},
alan-baker7150a1d2020-02-25 08:31:06 -05002805 // 2D array
2806 {"_Z12write_imageh20ocl_image2d_array_woDv4_iDv4_Dh",
2807 "_Z12write_imagef20ocl_image2d_array_woDv4_iDv4_f"},
alan-bakerf7e17cb2020-01-02 07:29:59 -05002808 // 3D
2809 {"_Z12write_imageh14ocl_image3d_woDv4_iDv4_Dh",
2810 "_Z12write_imagef14ocl_image3d_woDv4_iDv4_f"}};
2811
2812 for (auto Pair : Map) {
2813 // If we find a function with the matching name.
2814 if (auto F = M.getFunction(Pair.first)) {
2815 SmallVector<Instruction *, 4> ToRemoves;
2816
2817 // Walk the users of the function.
2818 for (auto &U : F->uses()) {
2819 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2820 SmallVector<Type *, 3> types(3);
2821 SmallVector<Value *, 3> args(3);
2822
2823 // Image
2824 types[0] = CI->getArgOperand(0)->getType();
2825 args[0] = CI->getArgOperand(0);
2826
2827 // Coord
2828 types[1] = CI->getArgOperand(1)->getType();
2829 args[1] = CI->getArgOperand(1);
2830
2831 // Data
2832 types[2] = VectorType::get(
2833 Type::getFloatTy(M.getContext()),
2834 CI->getArgOperand(2)->getType()->getVectorNumElements());
2835
2836 auto NewFType =
2837 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
2838
2839 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2840
2841 // Convert data to the float type.
2842 auto Cast =
2843 CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2844 args[2] = Cast;
2845
2846 auto NewCI = CallInst::Create(NewF, args, "", CI);
2847
2848 // Lastly, remember to remove the user.
2849 ToRemoves.push_back(CI);
2850 }
2851 }
2852
2853 Changed = !ToRemoves.empty();
2854
2855 // And cleanup the calls we don't use anymore.
2856 for (auto V : ToRemoves) {
2857 V->eraseFromParent();
2858 }
2859
2860 // And remove the function we don't need either too.
2861 F->eraseFromParent();
2862 }
2863 }
2864
2865 return Changed;
2866}
2867
Kévin Petit06517a12019-12-09 19:40:31 +00002868bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002869 bool Changed = false;
2870
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002871 const std::map<const char *, const char *> Map = {
alan-bakerf906d2b2019-12-10 11:26:23 -05002872 // 1D
2873 {"_Z11read_imagei14ocl_image1d_ro11ocl_sampleri",
2874 "_Z11read_imagei14ocl_image1d_ro11ocl_samplerf"},
2875 {"_Z12read_imageui14ocl_image1d_ro11ocl_sampleri",
2876 "_Z12read_imageui14ocl_image1d_ro11ocl_samplerf"},
2877 {"_Z11read_imagef14ocl_image1d_ro11ocl_sampleri",
2878 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
alan-baker7150a1d2020-02-25 08:31:06 -05002879 // 1D array
2880 {"_Z11read_imagei20ocl_image1d_array_ro11ocl_samplerDv2_i",
2881 "_Z11read_imagei20ocl_image1d_array_ro11ocl_samplerDv2_f"},
2882 {"_Z12read_imageui20ocl_image1d_array_ro11ocl_samplerDv2_i",
2883 "_Z12read_imageui20ocl_image1d_array_ro11ocl_samplerDv2_f"},
2884 {"_Z11read_imagef20ocl_image1d_array_ro11ocl_samplerDv2_i",
2885 "_Z11read_imagef20ocl_image1d_array_ro11ocl_samplerDv2_f"},
Kévin Petit06517a12019-12-09 19:40:31 +00002886 // 2D
2887 {"_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i",
2888 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f"},
2889 {"_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i",
2890 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_f"},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002891 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2892 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
alan-baker7150a1d2020-02-25 08:31:06 -05002893 // 2D array
2894 {"_Z11read_imagei20ocl_image2d_array_ro11ocl_samplerDv4_i",
2895 "_Z11read_imagei20ocl_image2d_array_ro11ocl_samplerDv4_f"},
2896 {"_Z12read_imageui20ocl_image2d_array_ro11ocl_samplerDv4_i",
2897 "_Z12read_imageui20ocl_image2d_array_ro11ocl_samplerDv4_f"},
2898 {"_Z11read_imagef20ocl_image2d_array_ro11ocl_samplerDv4_i",
2899 "_Z11read_imagef20ocl_image2d_array_ro11ocl_samplerDv4_f"},
Kévin Petit06517a12019-12-09 19:40:31 +00002900 // 3D
2901 {"_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i",
2902 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_f"},
2903 {"_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i",
2904 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_f"},
2905 {"_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i",
2906 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002907
2908 for (auto Pair : Map) {
2909 // If we find a function with the matching name.
2910 if (auto F = M.getFunction(Pair.first)) {
2911 SmallVector<Instruction *, 4> ToRemoves;
2912
2913 // Walk the users of the function.
2914 for (auto &U : F->uses()) {
2915 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2916 // The image.
2917 auto Arg0 = CI->getOperand(0);
2918
2919 // The sampler.
2920 auto Arg1 = CI->getOperand(1);
2921
2922 // The coordinate (integer type that we can't handle).
2923 auto Arg2 = CI->getOperand(2);
2924
alan-bakerf906d2b2019-12-10 11:26:23 -05002925 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
alan-baker7150a1d2020-02-25 08:31:06 -05002926 uint32_t components =
2927 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
alan-bakerf906d2b2019-12-10 11:26:23 -05002928 Type *float_ty = nullptr;
2929 if (components == 1) {
2930 float_ty = Type::getFloatTy(M.getContext());
2931 } else {
2932 float_ty = VectorType::get(Type::getFloatTy(M.getContext()),
2933 Arg2->getType()->getVectorNumElements());
2934 }
David Neto22f144c2017-06-12 14:26:21 -04002935
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002936 auto NewFType = FunctionType::get(
alan-bakerf906d2b2019-12-10 11:26:23 -05002937 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002938 false);
David Neto22f144c2017-06-12 14:26:21 -04002939
2940 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2941
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002942 auto Cast =
alan-bakerf906d2b2019-12-10 11:26:23 -05002943 CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002944
2945 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2946
2947 CI->replaceAllUsesWith(NewCI);
2948
2949 // Lastly, remember to remove the user.
2950 ToRemoves.push_back(CI);
2951 }
2952 }
2953
2954 Changed = !ToRemoves.empty();
2955
2956 // And cleanup the calls we don't use anymore.
2957 for (auto V : ToRemoves) {
2958 V->eraseFromParent();
2959 }
2960
2961 // And remove the function we don't need either too.
2962 F->eraseFromParent();
2963 }
2964 }
2965
2966 return Changed;
2967}
2968
2969bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2970 bool Changed = false;
2971
Kévin Petit9b340262019-06-19 18:31:11 +01002972 const std::map<const char *, spv::Op> Map = {
2973 {"_Z8atom_incPU3AS1Vi", spv::OpAtomicIIncrement},
2974 {"_Z8atom_incPU3AS3Vi", spv::OpAtomicIIncrement},
2975 {"_Z8atom_incPU3AS1Vj", spv::OpAtomicIIncrement},
2976 {"_Z8atom_incPU3AS3Vj", spv::OpAtomicIIncrement},
2977 {"_Z8atom_decPU3AS1Vi", spv::OpAtomicIDecrement},
2978 {"_Z8atom_decPU3AS3Vi", spv::OpAtomicIDecrement},
2979 {"_Z8atom_decPU3AS1Vj", spv::OpAtomicIDecrement},
2980 {"_Z8atom_decPU3AS3Vj", spv::OpAtomicIDecrement},
2981 {"_Z12atom_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2982 {"_Z12atom_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2983 {"_Z12atom_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2984 {"_Z12atom_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange},
2985 {"_Z10atomic_incPU3AS1Vi", spv::OpAtomicIIncrement},
2986 {"_Z10atomic_incPU3AS3Vi", spv::OpAtomicIIncrement},
2987 {"_Z10atomic_incPU3AS1Vj", spv::OpAtomicIIncrement},
2988 {"_Z10atomic_incPU3AS3Vj", spv::OpAtomicIIncrement},
2989 {"_Z10atomic_decPU3AS1Vi", spv::OpAtomicIDecrement},
2990 {"_Z10atomic_decPU3AS3Vi", spv::OpAtomicIDecrement},
2991 {"_Z10atomic_decPU3AS1Vj", spv::OpAtomicIDecrement},
2992 {"_Z10atomic_decPU3AS3Vj", spv::OpAtomicIDecrement},
2993 {"_Z14atomic_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2994 {"_Z14atomic_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2995 {"_Z14atomic_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2996 {"_Z14atomic_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange}};
David Neto22f144c2017-06-12 14:26:21 -04002997
2998 for (auto Pair : Map) {
2999 // If we find a function with the matching name.
3000 if (auto F = M.getFunction(Pair.first)) {
3001 SmallVector<Instruction *, 4> ToRemoves;
3002
3003 // Walk the users of the function.
3004 for (auto &U : F->uses()) {
3005 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04003006
3007 auto IntTy = Type::getInt32Ty(M.getContext());
3008
David Neto22f144c2017-06-12 14:26:21 -04003009 // We need to map the OpenCL constants to the SPIR-V equivalents.
3010 const auto ConstantScopeDevice =
3011 ConstantInt::get(IntTy, spv::ScopeDevice);
3012 const auto ConstantMemorySemantics = ConstantInt::get(
3013 IntTy, spv::MemorySemanticsUniformMemoryMask |
3014 spv::MemorySemanticsSequentiallyConsistentMask);
3015
3016 SmallVector<Value *, 5> Params;
3017
3018 // The pointer.
3019 Params.push_back(CI->getArgOperand(0));
3020
3021 // The memory scope.
3022 Params.push_back(ConstantScopeDevice);
3023
3024 // The memory semantics.
3025 Params.push_back(ConstantMemorySemantics);
3026
3027 if (2 < CI->getNumArgOperands()) {
3028 // The unequal memory semantics.
3029 Params.push_back(ConstantMemorySemantics);
3030
3031 // The value.
3032 Params.push_back(CI->getArgOperand(2));
3033
3034 // The comparator.
3035 Params.push_back(CI->getArgOperand(1));
3036 } else if (1 < CI->getNumArgOperands()) {
3037 // The value.
3038 Params.push_back(CI->getArgOperand(1));
3039 }
3040
Kévin Petit9b340262019-06-19 18:31:11 +01003041 auto NewCI =
3042 clspv::InsertSPIRVOp(CI, Pair.second, {}, CI->getType(), Params);
David Neto22f144c2017-06-12 14:26:21 -04003043
3044 CI->replaceAllUsesWith(NewCI);
3045
3046 // Lastly, remember to remove the user.
3047 ToRemoves.push_back(CI);
3048 }
3049 }
3050
3051 Changed = !ToRemoves.empty();
3052
3053 // And cleanup the calls we don't use anymore.
3054 for (auto V : ToRemoves) {
3055 V->eraseFromParent();
3056 }
3057
3058 // And remove the function we don't need either too.
3059 F->eraseFromParent();
3060 }
3061 }
3062
Neil Henning39672102017-09-29 14:33:13 +01003063 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003064 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003065 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003066 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003067 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003068 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003069 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003070 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003071 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003072 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003073 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003074 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003075 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003076 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003077 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003078 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003079 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003080 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003081 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003082 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003083 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003084 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003085 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003086 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003087 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003088 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003089 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003090 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003091 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003092 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003093 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003094 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003095 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01003096 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003097 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003098 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003099 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003100 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003101 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003102 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003103 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003104 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003105 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003106 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003107 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003108 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003109 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01003110 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003111 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01003112 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003113 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01003114 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003115 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01003116 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003117 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003118 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003119 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003120 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003121 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003122 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003123 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003124 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003125 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
3126 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
3127 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01003128
3129 for (auto Pair : Map2) {
3130 // If we find a function with the matching name.
3131 if (auto F = M.getFunction(Pair.first)) {
3132 SmallVector<Instruction *, 4> ToRemoves;
3133
3134 // Walk the users of the function.
3135 for (auto &U : F->uses()) {
3136 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3137 auto AtomicOp = new AtomicRMWInst(
3138 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
3139 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
3140
3141 CI->replaceAllUsesWith(AtomicOp);
3142
3143 // Lastly, remember to remove the user.
3144 ToRemoves.push_back(CI);
3145 }
3146 }
3147
3148 Changed = !ToRemoves.empty();
3149
3150 // And cleanup the calls we don't use anymore.
3151 for (auto V : ToRemoves) {
3152 V->eraseFromParent();
3153 }
3154
3155 // And remove the function we don't need either too.
3156 F->eraseFromParent();
3157 }
3158 }
3159
David Neto22f144c2017-06-12 14:26:21 -04003160 return Changed;
3161}
3162
3163bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04003164
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003165 std::vector<const char *> Names = {
3166 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01003167 };
3168
3169 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04003170 auto IntTy = Type::getInt32Ty(M.getContext());
3171 auto FloatTy = Type::getFloatTy(M.getContext());
3172
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003173 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
3174 ConstantInt::get(IntTy, 1),
3175 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04003176
3177 Constant *UpShuffleMask[4] = {
3178 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3179 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3180
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003181 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
3182 UndefValue::get(FloatTy),
3183 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04003184
Kévin Petite8edce32019-04-10 14:23:32 +01003185 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003186 auto Arg0 =
3187 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
3188 ConstantVector::get(DownShuffleMask), "", CI);
3189 auto Arg1 =
3190 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
3191 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003192 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003193
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003194 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04003195
Kévin Petite8edce32019-04-10 14:23:32 +01003196 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003197
Kévin Petite8edce32019-04-10 14:23:32 +01003198 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003199
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003200 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3201 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003202 });
David Neto22f144c2017-06-12 14:26:21 -04003203}
David Neto62653202017-10-16 19:05:18 -04003204
3205bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3206 bool Changed = false;
3207
3208 // OpenCL's float result = fract(float x, float* ptr)
3209 //
3210 // In the LLVM domain:
3211 //
3212 // %floor_result = call spir_func float @floor(float %x)
3213 // store float %floor_result, float * %ptr
3214 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3215 // %result = call spir_func float
3216 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3217 //
3218 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3219 // and clspv.fract occur in the SPIR-V generator pass:
3220 //
3221 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3222 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3223 // ...
3224 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3225 // OpStore %ptr %floor_result
3226 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3227 // %fract_result = OpExtInst %float
3228 // %glsl_ext Fmin %fract_intermediate %just_under_1
3229
David Neto62653202017-10-16 19:05:18 -04003230 using std::string;
3231
3232 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3233 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003234 using QuadType =
3235 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04003236 auto make_quad = [](const char *a, const char *b, const char *c,
3237 const char *d) {
3238 return std::tuple<const char *, const char *, const char *, const char *>(
3239 a, b, c, d);
3240 };
3241 const std::vector<QuadType> Functions = {
3242 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003243 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
3244 "clspv.fract.v2f"),
3245 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
3246 "clspv.fract.v3f"),
3247 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
3248 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04003249 };
3250
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003251 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04003252 const StringRef fract_name(std::get<0>(quad));
3253
3254 // If we find a function with the matching name.
3255 if (auto F = M.getFunction(fract_name)) {
3256 if (F->use_begin() == F->use_end())
3257 continue;
3258
3259 // We have some uses.
3260 Changed = true;
3261
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003262 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04003263
3264 const StringRef floor_name(std::get<1>(quad));
3265 const StringRef fmin_name(std::get<2>(quad));
3266 const StringRef clspv_fract_name(std::get<3>(quad));
3267
3268 // This is either float or a float vector. All the float-like
3269 // types are this type.
3270 auto result_ty = F->getReturnType();
3271
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003272 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04003273 if (!fmin_fn) {
3274 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003275 FunctionType *fn_ty =
3276 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003277 fmin_fn =
3278 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003279 fmin_fn->addFnAttr(Attribute::ReadNone);
3280 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3281 }
3282
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003283 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003284 if (!floor_fn) {
3285 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003286 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003287 floor_fn = cast<Function>(
3288 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003289 floor_fn->addFnAttr(Attribute::ReadNone);
3290 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3291 }
3292
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003293 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003294 if (!clspv_fract_fn) {
3295 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003296 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003297 clspv_fract_fn = cast<Function>(
3298 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003299 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3300 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3301 }
3302
3303 // Number of significant significand bits, whether represented or not.
3304 unsigned num_significand_bits;
3305 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003306 case Type::HalfTyID:
3307 num_significand_bits = 11;
3308 break;
3309 case Type::FloatTyID:
3310 num_significand_bits = 24;
3311 break;
3312 case Type::DoubleTyID:
3313 num_significand_bits = 53;
3314 break;
3315 default:
3316 assert(false && "Unhandled float type when processing fract builtin");
3317 break;
David Neto62653202017-10-16 19:05:18 -04003318 }
3319 // Beware that the disassembler displays this value as
3320 // OpConstant %float 1
3321 // which is not quite right.
3322 const double kJustUnderOneScalar =
3323 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3324
3325 Constant *just_under_one =
3326 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3327 if (result_ty->isVectorTy()) {
3328 just_under_one = ConstantVector::getSplat(
alan-baker7261e062020-03-15 14:35:48 -04003329 {result_ty->getVectorNumElements(), false}, just_under_one);
David Neto62653202017-10-16 19:05:18 -04003330 }
3331
3332 IRBuilder<> Builder(Context);
3333
3334 SmallVector<Instruction *, 4> ToRemoves;
3335
3336 // Walk the users of the function.
3337 for (auto &U : F->uses()) {
3338 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3339
3340 Builder.SetInsertPoint(CI);
3341 auto arg = CI->getArgOperand(0);
3342 auto ptr = CI->getArgOperand(1);
3343
3344 // Compute floor result and store it.
3345 auto floor = Builder.CreateCall(floor_fn, {arg});
3346 Builder.CreateStore(floor, ptr);
3347
3348 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003349 auto fract_result =
3350 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003351
3352 CI->replaceAllUsesWith(fract_result);
3353
3354 // Lastly, remember to remove the user.
3355 ToRemoves.push_back(CI);
3356 }
3357 }
3358
3359 // And cleanup the calls we don't use anymore.
3360 for (auto V : ToRemoves) {
3361 V->eraseFromParent();
3362 }
3363
3364 // And remove the function we don't need either too.
3365 F->eraseFromParent();
3366 }
3367 }
3368
3369 return Changed;
3370}