blob: 731ca779fe35503683bda311b7510bbeace2f4d0 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040032#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070033
Diego Novilloa4c44fa2019-04-11 10:56:15 -040034#include "Passes.h"
35#include "SPIRVOp.h"
36
David Neto22f144c2017-06-12 14:26:21 -040037using namespace llvm;
38
39#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
40
41namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000042
43struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040044 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000045 SignedNess signedness;
46};
47
48struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000049 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000050 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000051
Kévin Petit91bc72e2019-04-08 15:17:46 +010052 bool isArgSigned(size_t arg) const {
53 assert(argTypeInfos.size() > arg);
54 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000055 }
56
Kévin Petit91bc72e2019-04-08 15:17:46 +010057 static FunctionInfo getFromMangledName(StringRef name) {
58 FunctionInfo fi;
59 if (!getFromMangledNameCheck(name, &fi)) {
60 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000061 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010062 return fi;
63 }
Kévin Petit8a560882019-03-21 15:24:34 +000064
Kévin Petit91bc72e2019-04-08 15:17:46 +010065 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
66 if (!name.consume_front("_Z")) {
67 return false;
68 }
69 size_t nameLen;
70 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000071 return false;
72 }
73
Kévin Petit91bc72e2019-04-08 15:17:46 +010074 finfo->name = name.take_front(nameLen);
75 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000076
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000078
Kévin Petit91bc72e2019-04-08 15:17:46 +010079 while (name.size() != 0) {
80
81 ArgTypeInfo ti;
82
83 // Try parsing a vector prefix
84 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040085 int numElems;
86 if (name.consumeInteger(10, numElems)) {
87 return false;
88 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010089
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040090 if (!name.consume_front("_")) {
91 return false;
92 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010093 }
94
95 // Parse the base type
96 char typeCode = name.front();
97 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040098 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +010099 case 'c': // char
100 case 'a': // signed char
101 case 's': // short
102 case 'i': // int
103 case 'l': // long
104 ti.signedness = ArgTypeInfo::SignedNess::Signed;
105 break;
106 case 'h': // unsigned char
107 case 't': // unsigned short
108 case 'j': // unsigned int
109 case 'm': // unsigned long
110 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
111 break;
112 case 'f':
113 ti.signedness = ArgTypeInfo::SignedNess::None;
114 break;
115 case 'S':
116 ti = prev_ti;
117 if (!name.consume_front("_")) {
118 return false;
119 }
120 break;
121 default:
122 return false;
123 }
124
125 finfo->argTypeInfos.push_back(ti);
126
127 prev_ti = ti;
128 }
129
130 return true;
131 };
Kévin Petit8a560882019-03-21 15:24:34 +0000132};
133
David Neto22f144c2017-06-12 14:26:21 -0400134uint32_t clz(uint32_t v) {
135 uint32_t r;
136 uint32_t shift;
137
138 r = (v > 0xFFFF) << 4;
139 v >>= r;
140 shift = (v > 0xFF) << 3;
141 v >>= shift;
142 r |= shift;
143 shift = (v > 0xF) << 2;
144 v >>= shift;
145 r |= shift;
146 shift = (v > 0x3) << 1;
147 v >>= shift;
148 r |= shift;
149 r |= (v >> 1);
150
151 return r;
152}
153
154Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
155 if (1 == elements) {
156 return Type::getInt1Ty(C);
157 } else {
158 return VectorType::get(Type::getInt1Ty(C), elements);
159 }
160}
161
162struct ReplaceOpenCLBuiltinPass final : public ModulePass {
163 static char ID;
164 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
165
166 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000167 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100168 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100169 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400170 bool replaceRecip(Module &M);
171 bool replaceDivide(Module &M);
172 bool replaceExp10(Module &M);
173 bool replaceLog10(Module &M);
174 bool replaceBarrier(Module &M);
175 bool replaceMemFence(Module &M);
176 bool replaceRelational(Module &M);
177 bool replaceIsInfAndIsNan(Module &M);
178 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000179 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000180 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000181 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000182 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000183 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000184 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000185 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceSignbit(Module &M);
187 bool replaceMadandMad24andMul24(Module &M);
188 bool replaceVloadHalf(Module &M);
189 bool replaceVloadHalf2(Module &M);
190 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700191 bool replaceClspvVloadaHalf2(Module &M);
192 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400193 bool replaceVstoreHalf(Module &M);
194 bool replaceVstoreHalf2(Module &M);
195 bool replaceVstoreHalf4(Module &M);
196 bool replaceReadImageF(Module &M);
197 bool replaceAtomics(Module &M);
198 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400199 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700200 bool replaceVload(Module &M);
201 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400202};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100203} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400204
205char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400206INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
207 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400208
209namespace clspv {
210ModulePass *createReplaceOpenCLBuiltinPass() {
211 return new ReplaceOpenCLBuiltinPass();
212}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400213} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400214
215bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
216 bool Changed = false;
217
Kévin Petit2444e9b2018-11-09 14:14:37 +0000218 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100219 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100220 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400221 Changed |= replaceRecip(M);
222 Changed |= replaceDivide(M);
223 Changed |= replaceExp10(M);
224 Changed |= replaceLog10(M);
225 Changed |= replaceBarrier(M);
226 Changed |= replaceMemFence(M);
227 Changed |= replaceRelational(M);
228 Changed |= replaceIsInfAndIsNan(M);
229 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000230 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000231 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000232 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000233 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000234 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000235 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000236 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400237 Changed |= replaceSignbit(M);
238 Changed |= replaceMadandMad24andMul24(M);
239 Changed |= replaceVloadHalf(M);
240 Changed |= replaceVloadHalf2(M);
241 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700242 Changed |= replaceClspvVloadaHalf2(M);
243 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400244 Changed |= replaceVstoreHalf(M);
245 Changed |= replaceVstoreHalf2(M);
246 Changed |= replaceVstoreHalf4(M);
247 Changed |= replaceReadImageF(M);
248 Changed |= replaceAtomics(M);
249 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400250 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700251 Changed |= replaceVload(M);
252 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400253
254 return Changed;
255}
256
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400257bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
258 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000259
Kévin Petite8edce32019-04-10 14:23:32 +0100260 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000261
262 for (auto Name : Names) {
263 // If we find a function with the matching name.
264 if (auto F = M.getFunction(Name)) {
265 SmallVector<Instruction *, 4> ToRemoves;
266
267 // Walk the users of the function.
268 for (auto &U : F->uses()) {
269 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000270
Kévin Petite8edce32019-04-10 14:23:32 +0100271 auto NewValue = Replacer(CI);
272
273 if (NewValue != nullptr) {
274 CI->replaceAllUsesWith(NewValue);
275 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000276
277 // Lastly, remember to remove the user.
278 ToRemoves.push_back(CI);
279 }
280 }
281
282 Changed = !ToRemoves.empty();
283
284 // And cleanup the calls we don't use anymore.
285 for (auto V : ToRemoves) {
286 V->eraseFromParent();
287 }
288
289 // And remove the function we don't need either too.
290 F->eraseFromParent();
291 }
292 }
293
294 return Changed;
295}
296
Kévin Petite8edce32019-04-10 14:23:32 +0100297bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100298
Kévin Petite8edce32019-04-10 14:23:32 +0100299 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400300 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
301 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
302 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
303 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100304 };
305
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400306 return replaceCallsWithValue(M, Names,
307 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100308}
309
310bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
311
312 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400313 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
314 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
315 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
316 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
317 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
318 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
319 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
320 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
321 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
322 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
323 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100324 };
325
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400326 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100327 auto XValue = CI->getOperand(0);
328 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100329
Kévin Petite8edce32019-04-10 14:23:32 +0100330 IRBuilder<> Builder(CI);
331 auto XmY = Builder.CreateSub(XValue, YValue);
332 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100333
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400334 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100335 auto F = CI->getCalledFunction();
336 auto finfo = FunctionInfo::getFromMangledName(F->getName());
337 if (finfo.isArgSigned(0)) {
338 Cmp = Builder.CreateICmpSGT(YValue, XValue);
339 } else {
340 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100341 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100342
Kévin Petite8edce32019-04-10 14:23:32 +0100343 return Builder.CreateSelect(Cmp, YmX, XmY);
344 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100345}
346
Kévin Petit8c1be282019-04-02 19:34:25 +0100347bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100348
Kévin Petite8edce32019-04-10 14:23:32 +0100349 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400350 "_Z8copysignff",
351 "_Z8copysignDv2_fS_",
352 "_Z8copysignDv3_fS_",
353 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100354 };
355
Kévin Petite8edce32019-04-10 14:23:32 +0100356 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
357 auto XValue = CI->getOperand(0);
358 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100359
Kévin Petite8edce32019-04-10 14:23:32 +0100360 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100361
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400362 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100363 if (Ty->isVectorTy()) {
364 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100365 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100366
Kévin Petite8edce32019-04-10 14:23:32 +0100367 // Return X with the sign of Y
368
369 // Sign bit masks
370 auto SignBit = IntTy->getScalarSizeInBits() - 1;
371 auto SignBitMask = 1 << SignBit;
372 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
373 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
374
375 IRBuilder<> Builder(CI);
376
377 // Extract sign of Y
378 auto YInt = Builder.CreateBitCast(YValue, IntTy);
379 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
380
381 // Clear sign bit in X
382 auto XInt = Builder.CreateBitCast(XValue, IntTy);
383 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
384
385 // Insert sign bit of Y into X
386 auto NewXInt = Builder.CreateOr(XInt, YSign);
387
388 // And cast back to floating-point
389 return Builder.CreateBitCast(NewXInt, Ty);
390 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100391}
392
David Neto22f144c2017-06-12 14:26:21 -0400393bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400394
Kévin Petite8edce32019-04-10 14:23:32 +0100395 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400396 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
397 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
398 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
399 };
400
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400401 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100402 // Recip has one arg.
403 auto Arg = CI->getOperand(0);
404 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
405 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
406 });
David Neto22f144c2017-06-12 14:26:21 -0400407}
408
409bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400410
Kévin Petite8edce32019-04-10 14:23:32 +0100411 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400412 "_Z11half_divideff", "_Z13native_divideff",
413 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
414 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
415 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
416 };
417
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400418 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100419 auto Op0 = CI->getOperand(0);
420 auto Op1 = CI->getOperand(1);
421 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
422 });
David Neto22f144c2017-06-12 14:26:21 -0400423}
424
425bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
426 bool Changed = false;
427
428 const std::map<const char *, const char *> Map = {
429 {"_Z5exp10f", "_Z3expf"},
430 {"_Z10half_exp10f", "_Z8half_expf"},
431 {"_Z12native_exp10f", "_Z10native_expf"},
432 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
433 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
434 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
435 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
436 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
437 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
438 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
439 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
440 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
441
442 for (auto Pair : Map) {
443 // If we find a function with the matching name.
444 if (auto F = M.getFunction(Pair.first)) {
445 SmallVector<Instruction *, 4> ToRemoves;
446
447 // Walk the users of the function.
448 for (auto &U : F->uses()) {
449 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
450 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
451
452 auto Arg = CI->getOperand(0);
453
454 // Constant of the natural log of 10 (ln(10)).
455 const double Ln10 =
456 2.302585092994045684017991454684364207601101488628772976033;
457
458 auto Mul = BinaryOperator::Create(
459 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
460 CI);
461
462 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
463
464 CI->replaceAllUsesWith(NewCI);
465
466 // Lastly, remember to remove the user.
467 ToRemoves.push_back(CI);
468 }
469 }
470
471 Changed = !ToRemoves.empty();
472
473 // And cleanup the calls we don't use anymore.
474 for (auto V : ToRemoves) {
475 V->eraseFromParent();
476 }
477
478 // And remove the function we don't need either too.
479 F->eraseFromParent();
480 }
481 }
482
483 return Changed;
484}
485
486bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
487 bool Changed = false;
488
489 const std::map<const char *, const char *> Map = {
490 {"_Z5log10f", "_Z3logf"},
491 {"_Z10half_log10f", "_Z8half_logf"},
492 {"_Z12native_log10f", "_Z10native_logf"},
493 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
494 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
495 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
496 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
497 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
498 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
499 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
500 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
501 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
502
503 for (auto Pair : Map) {
504 // If we find a function with the matching name.
505 if (auto F = M.getFunction(Pair.first)) {
506 SmallVector<Instruction *, 4> ToRemoves;
507
508 // Walk the users of the function.
509 for (auto &U : F->uses()) {
510 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
511 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
512
513 auto Arg = CI->getOperand(0);
514
515 // Constant of the reciprocal of the natural log of 10 (ln(10)).
516 const double Ln10 =
517 0.434294481903251827651128918916605082294397005803666566114;
518
519 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
520
521 auto Mul = BinaryOperator::Create(
522 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
523 "", CI);
524
525 CI->replaceAllUsesWith(Mul);
526
527 // Lastly, remember to remove the user.
528 ToRemoves.push_back(CI);
529 }
530 }
531
532 Changed = !ToRemoves.empty();
533
534 // And cleanup the calls we don't use anymore.
535 for (auto V : ToRemoves) {
536 V->eraseFromParent();
537 }
538
539 // And remove the function we don't need either too.
540 F->eraseFromParent();
541 }
542 }
543
544 return Changed;
545}
546
547bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
548 bool Changed = false;
549
550 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
551
552 const std::map<const char *, const char *> Map = {
553 {"_Z7barrierj", "__spirv_control_barrier"}};
554
555 for (auto Pair : Map) {
556 // If we find a function with the matching name.
557 if (auto F = M.getFunction(Pair.first)) {
558 SmallVector<Instruction *, 4> ToRemoves;
559
560 // Walk the users of the function.
561 for (auto &U : F->uses()) {
562 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
563 auto FType = F->getFunctionType();
564 SmallVector<Type *, 3> Params;
565 for (unsigned i = 0; i < 3; i++) {
566 Params.push_back(FType->getParamType(0));
567 }
568 auto NewFType =
569 FunctionType::get(FType->getReturnType(), Params, false);
570 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
571
572 auto Arg = CI->getOperand(0);
573
574 // We need to map the OpenCL constants to the SPIR-V equivalents.
575 const auto LocalMemFence =
576 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
577 const auto GlobalMemFence =
578 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
579 const auto ConstantSequentiallyConsistent = ConstantInt::get(
580 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
581 const auto ConstantScopeDevice =
582 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
583 const auto ConstantScopeWorkgroup =
584 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
585
586 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
587 const auto LocalMemFenceMask = BinaryOperator::Create(
588 Instruction::And, LocalMemFence, Arg, "", CI);
589 const auto WorkgroupShiftAmount =
590 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
591 clz(CLK_LOCAL_MEM_FENCE);
592 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
593 Instruction::Shl, LocalMemFenceMask,
594 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
595
596 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
597 const auto GlobalMemFenceMask = BinaryOperator::Create(
598 Instruction::And, GlobalMemFence, Arg, "", CI);
599 const auto UniformShiftAmount =
600 clz(spv::MemorySemanticsUniformMemoryMask) -
601 clz(CLK_GLOBAL_MEM_FENCE);
602 const auto MemorySemanticsUniform = BinaryOperator::Create(
603 Instruction::Shl, GlobalMemFenceMask,
604 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
605
606 // And combine the above together, also adding in
607 // MemorySemanticsSequentiallyConsistentMask.
608 auto MemorySemantics =
609 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
610 ConstantSequentiallyConsistent, "", CI);
611 MemorySemantics = BinaryOperator::Create(
612 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
613
614 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
615 // Device Scope, otherwise Workgroup Scope.
616 const auto Cmp =
617 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
618 GlobalMemFenceMask, GlobalMemFence, "", CI);
619 const auto MemoryScope = SelectInst::Create(
620 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
621
622 // Lastly, the Execution Scope is always Workgroup Scope.
623 const auto ExecutionScope = ConstantScopeWorkgroup;
624
625 auto NewCI = CallInst::Create(
626 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
627
628 CI->replaceAllUsesWith(NewCI);
629
630 // Lastly, remember to remove the user.
631 ToRemoves.push_back(CI);
632 }
633 }
634
635 Changed = !ToRemoves.empty();
636
637 // And cleanup the calls we don't use anymore.
638 for (auto V : ToRemoves) {
639 V->eraseFromParent();
640 }
641
642 // And remove the function we don't need either too.
643 F->eraseFromParent();
644 }
645 }
646
647 return Changed;
648}
649
650bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
651 bool Changed = false;
652
653 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
654
Neil Henning39672102017-09-29 14:33:13 +0100655 using Tuple = std::tuple<const char *, unsigned>;
656 const std::map<const char *, Tuple> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400657 {"_Z9mem_fencej", Tuple("__spirv_memory_barrier",
658 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100659 {"_Z14read_mem_fencej",
660 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
661 {"_Z15write_mem_fencej",
662 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400663
664 for (auto Pair : Map) {
665 // If we find a function with the matching name.
666 if (auto F = M.getFunction(Pair.first)) {
667 SmallVector<Instruction *, 4> ToRemoves;
668
669 // Walk the users of the function.
670 for (auto &U : F->uses()) {
671 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
672 auto FType = F->getFunctionType();
673 SmallVector<Type *, 2> Params;
674 for (unsigned i = 0; i < 2; i++) {
675 Params.push_back(FType->getParamType(0));
676 }
677 auto NewFType =
678 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100679 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400680
681 auto Arg = CI->getOperand(0);
682
683 // We need to map the OpenCL constants to the SPIR-V equivalents.
684 const auto LocalMemFence =
685 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
686 const auto GlobalMemFence =
687 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
688 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100689 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400690 const auto ConstantScopeDevice =
691 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
692
693 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
694 const auto LocalMemFenceMask = BinaryOperator::Create(
695 Instruction::And, LocalMemFence, Arg, "", CI);
696 const auto WorkgroupShiftAmount =
697 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
698 clz(CLK_LOCAL_MEM_FENCE);
699 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
700 Instruction::Shl, LocalMemFenceMask,
701 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
702
703 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
704 const auto GlobalMemFenceMask = BinaryOperator::Create(
705 Instruction::And, GlobalMemFence, Arg, "", CI);
706 const auto UniformShiftAmount =
707 clz(spv::MemorySemanticsUniformMemoryMask) -
708 clz(CLK_GLOBAL_MEM_FENCE);
709 const auto MemorySemanticsUniform = BinaryOperator::Create(
710 Instruction::Shl, GlobalMemFenceMask,
711 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
712
713 // And combine the above together, also adding in
714 // MemorySemanticsSequentiallyConsistentMask.
715 auto MemorySemantics =
716 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
717 ConstantMemorySemantics, "", CI);
718 MemorySemantics = BinaryOperator::Create(
719 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
720
721 // Memory Scope is always device.
722 const auto MemoryScope = ConstantScopeDevice;
723
724 auto NewCI =
725 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
726
727 CI->replaceAllUsesWith(NewCI);
728
729 // Lastly, remember to remove the user.
730 ToRemoves.push_back(CI);
731 }
732 }
733
734 Changed = !ToRemoves.empty();
735
736 // And cleanup the calls we don't use anymore.
737 for (auto V : ToRemoves) {
738 V->eraseFromParent();
739 }
740
741 // And remove the function we don't need either too.
742 F->eraseFromParent();
743 }
744 }
745
746 return Changed;
747}
748
749bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
750 bool Changed = false;
751
752 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
753 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
754 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
755 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
756 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
757 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
758 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
759 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
760 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
761 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
762 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
763 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
764 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
765 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
766 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
767 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
768 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
769 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
770 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
771 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
772 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
773 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
774 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
775 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
776 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
777 };
778
779 for (auto Pair : Map) {
780 // If we find a function with the matching name.
781 if (auto F = M.getFunction(Pair.first)) {
782 SmallVector<Instruction *, 4> ToRemoves;
783
784 // Walk the users of the function.
785 for (auto &U : F->uses()) {
786 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
787 // The predicate to use in the CmpInst.
788 auto Predicate = Pair.second.first;
789
790 // The value to return for true.
791 auto TrueValue =
792 ConstantInt::getSigned(CI->getType(), Pair.second.second);
793
794 // The value to return for false.
795 auto FalseValue = Constant::getNullValue(CI->getType());
796
797 auto Arg1 = CI->getOperand(0);
798 auto Arg2 = CI->getOperand(1);
799
800 const auto Cmp =
801 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
802
803 const auto Select =
804 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
805
806 CI->replaceAllUsesWith(Select);
807
808 // Lastly, remember to remove the user.
809 ToRemoves.push_back(CI);
810 }
811 }
812
813 Changed = !ToRemoves.empty();
814
815 // And cleanup the calls we don't use anymore.
816 for (auto V : ToRemoves) {
817 V->eraseFromParent();
818 }
819
820 // And remove the function we don't need either too.
821 F->eraseFromParent();
822 }
823 }
824
825 return Changed;
826}
827
828bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
829 bool Changed = false;
830
831 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
832 {"_Z5isinff", {"__spirv_isinff", 1}},
833 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
834 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
835 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
836 {"_Z5isnanf", {"__spirv_isnanf", 1}},
837 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
838 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
839 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
840 };
841
842 for (auto Pair : Map) {
843 // If we find a function with the matching name.
844 if (auto F = M.getFunction(Pair.first)) {
845 SmallVector<Instruction *, 4> ToRemoves;
846
847 // Walk the users of the function.
848 for (auto &U : F->uses()) {
849 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
850 const auto CITy = CI->getType();
851
852 // The fake SPIR-V intrinsic to generate.
853 auto SPIRVIntrinsic = Pair.second.first;
854
855 // The value to return for true.
856 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
857
858 // The value to return for false.
859 auto FalseValue = Constant::getNullValue(CITy);
860
861 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
862 M.getContext(),
863 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
864
865 auto NewFType =
866 FunctionType::get(CorrespondingBoolTy,
867 F->getFunctionType()->getParamType(0), false);
868
869 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
870
871 auto Arg = CI->getOperand(0);
872
873 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
874
875 const auto Select =
876 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
877
878 CI->replaceAllUsesWith(Select);
879
880 // Lastly, remember to remove the user.
881 ToRemoves.push_back(CI);
882 }
883 }
884
885 Changed = !ToRemoves.empty();
886
887 // And cleanup the calls we don't use anymore.
888 for (auto V : ToRemoves) {
889 V->eraseFromParent();
890 }
891
892 // And remove the function we don't need either too.
893 F->eraseFromParent();
894 }
895 }
896
897 return Changed;
898}
899
900bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
901 bool Changed = false;
902
903 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000904 // all
alan-bakerb39c8262019-03-08 14:03:37 -0500905 {"_Z3allc", ""},
906 {"_Z3allDv2_c", "__spirv_allDv2_c"},
907 {"_Z3allDv3_c", "__spirv_allDv3_c"},
908 {"_Z3allDv4_c", "__spirv_allDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000909 {"_Z3alls", ""},
910 {"_Z3allDv2_s", "__spirv_allDv2_s"},
911 {"_Z3allDv3_s", "__spirv_allDv3_s"},
912 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400913 {"_Z3alli", ""},
914 {"_Z3allDv2_i", "__spirv_allDv2_i"},
915 {"_Z3allDv3_i", "__spirv_allDv3_i"},
916 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000917 {"_Z3alll", ""},
918 {"_Z3allDv2_l", "__spirv_allDv2_l"},
919 {"_Z3allDv3_l", "__spirv_allDv3_l"},
920 {"_Z3allDv4_l", "__spirv_allDv4_l"},
921
922 // any
alan-bakerb39c8262019-03-08 14:03:37 -0500923 {"_Z3anyc", ""},
924 {"_Z3anyDv2_c", "__spirv_anyDv2_c"},
925 {"_Z3anyDv3_c", "__spirv_anyDv3_c"},
926 {"_Z3anyDv4_c", "__spirv_anyDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000927 {"_Z3anys", ""},
928 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
929 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
930 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400931 {"_Z3anyi", ""},
932 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
933 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
934 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000935 {"_Z3anyl", ""},
936 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
937 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
938 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400939 };
940
941 for (auto Pair : Map) {
942 // If we find a function with the matching name.
943 if (auto F = M.getFunction(Pair.first)) {
944 SmallVector<Instruction *, 4> ToRemoves;
945
946 // Walk the users of the function.
947 for (auto &U : F->uses()) {
948 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
949 // The fake SPIR-V intrinsic to generate.
950 auto SPIRVIntrinsic = Pair.second;
951
952 auto Arg = CI->getOperand(0);
953
954 Value *V;
955
Kévin Petitfd27cca2018-10-31 13:00:17 +0000956 // If the argument is a 32-bit int, just use a shift
957 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
958 V = BinaryOperator::Create(Instruction::LShr, Arg,
959 ConstantInt::get(Arg->getType(), 31), "",
960 CI);
961 } else {
David Neto22f144c2017-06-12 14:26:21 -0400962 // The value for zero to compare against.
963 const auto ZeroValue = Constant::getNullValue(Arg->getType());
964
David Neto22f144c2017-06-12 14:26:21 -0400965 // The value to return for true.
966 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
967
968 // The value to return for false.
969 const auto FalseValue = Constant::getNullValue(CI->getType());
970
Kévin Petitfd27cca2018-10-31 13:00:17 +0000971 const auto Cmp = CmpInst::Create(
972 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
973
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400974 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000975
976 // If we have a function to call, call it!
977 if (0 < strlen(SPIRVIntrinsic)) {
978
979 const auto NewFType = FunctionType::get(
980 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
981
982 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
983
984 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
985
986 SelectSource = NewCI;
987
988 } else {
989 SelectSource = Cmp;
990 }
991
992 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400993 }
994
995 CI->replaceAllUsesWith(V);
996
997 // Lastly, remember to remove the user.
998 ToRemoves.push_back(CI);
999 }
1000 }
1001
1002 Changed = !ToRemoves.empty();
1003
1004 // And cleanup the calls we don't use anymore.
1005 for (auto V : ToRemoves) {
1006 V->eraseFromParent();
1007 }
1008
1009 // And remove the function we don't need either too.
1010 F->eraseFromParent();
1011 }
1012 }
1013
1014 return Changed;
1015}
1016
Kévin Petitbf0036c2019-03-06 13:57:10 +00001017bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1018 bool Changed = false;
1019
1020 for (auto const &SymVal : M.getValueSymbolTable()) {
1021 // Skip symbols whose name doesn't match
1022 if (!SymVal.getKey().startswith("_Z8upsample")) {
1023 continue;
1024 }
1025 // Is there a function going by that name?
1026 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1027
1028 SmallVector<Instruction *, 4> ToRemoves;
1029
1030 // Walk the users of the function.
1031 for (auto &U : F->uses()) {
1032 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1033
1034 // Get arguments
1035 auto HiValue = CI->getOperand(0);
1036 auto LoValue = CI->getOperand(1);
1037
1038 // Don't touch overloads that aren't in OpenCL C
1039 auto HiType = HiValue->getType();
1040 auto LoType = LoValue->getType();
1041
1042 if (HiType != LoType) {
1043 continue;
1044 }
1045
1046 if (!HiType->isIntOrIntVectorTy()) {
1047 continue;
1048 }
1049
1050 if (HiType->getScalarSizeInBits() * 2 !=
1051 CI->getType()->getScalarSizeInBits()) {
1052 continue;
1053 }
1054
1055 if ((HiType->getScalarSizeInBits() != 8) &&
1056 (HiType->getScalarSizeInBits() != 16) &&
1057 (HiType->getScalarSizeInBits() != 32)) {
1058 continue;
1059 }
1060
1061 if (HiType->isVectorTy()) {
1062 if ((HiType->getVectorNumElements() != 2) &&
1063 (HiType->getVectorNumElements() != 3) &&
1064 (HiType->getVectorNumElements() != 4) &&
1065 (HiType->getVectorNumElements() != 8) &&
1066 (HiType->getVectorNumElements() != 16)) {
1067 continue;
1068 }
1069 }
1070
1071 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001072 auto HiCast =
1073 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1074 auto LoCast =
1075 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001076
1077 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001078 auto ShiftAmount =
1079 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001080 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1081 ShiftAmount, "", CI);
1082
1083 // OR both results
1084 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1085 "", CI);
1086
1087 // Replace call with the expression
1088 CI->replaceAllUsesWith(V);
1089
1090 // Lastly, remember to remove the user.
1091 ToRemoves.push_back(CI);
1092 }
1093 }
1094
1095 Changed = !ToRemoves.empty();
1096
1097 // And cleanup the calls we don't use anymore.
1098 for (auto V : ToRemoves) {
1099 V->eraseFromParent();
1100 }
1101
1102 // And remove the function we don't need either too.
1103 F->eraseFromParent();
1104 }
1105 }
1106
1107 return Changed;
1108}
1109
Kévin Petitd44eef52019-03-08 13:22:14 +00001110bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1111 bool Changed = false;
1112
1113 for (auto const &SymVal : M.getValueSymbolTable()) {
1114 // Skip symbols whose name doesn't match
1115 if (!SymVal.getKey().startswith("_Z6rotate")) {
1116 continue;
1117 }
1118 // Is there a function going by that name?
1119 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1120
1121 SmallVector<Instruction *, 4> ToRemoves;
1122
1123 // Walk the users of the function.
1124 for (auto &U : F->uses()) {
1125 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1126
1127 // Get arguments
1128 auto SrcValue = CI->getOperand(0);
1129 auto RotAmount = CI->getOperand(1);
1130
1131 // Don't touch overloads that aren't in OpenCL C
1132 auto SrcType = SrcValue->getType();
1133 auto RotType = RotAmount->getType();
1134
1135 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1136 continue;
1137 }
1138
1139 if (!SrcType->isIntOrIntVectorTy()) {
1140 continue;
1141 }
1142
1143 if ((SrcType->getScalarSizeInBits() != 8) &&
1144 (SrcType->getScalarSizeInBits() != 16) &&
1145 (SrcType->getScalarSizeInBits() != 32) &&
1146 (SrcType->getScalarSizeInBits() != 64)) {
1147 continue;
1148 }
1149
1150 if (SrcType->isVectorTy()) {
1151 if ((SrcType->getVectorNumElements() != 2) &&
1152 (SrcType->getVectorNumElements() != 3) &&
1153 (SrcType->getVectorNumElements() != 4) &&
1154 (SrcType->getVectorNumElements() != 8) &&
1155 (SrcType->getVectorNumElements() != 16)) {
1156 continue;
1157 }
1158 }
1159
1160 // The approach used is to shift the top bits down, the bottom bits up
1161 // and OR the two shifted values.
1162
1163 // The rotation amount is to be treated modulo the element size.
1164 // Since SPIR-V shift ops don't support this, let's apply the
1165 // modulo ahead of shifting. The element size is always a power of
1166 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001167 auto ModMask =
1168 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001169 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1170 ModMask, "", CI);
1171
1172 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001173 auto ScalarSize =
1174 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001175 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1176 RotAmount, "", CI);
1177
1178 // Now shift the bottom bits up and the top bits down
1179 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1180 RotAmount, "", CI);
1181 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1182 DownAmount, "", CI);
1183
1184 // Finally OR the two shifted values
1185 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1186 HiRotated, "", CI);
1187
1188 // Replace call with the expression
1189 CI->replaceAllUsesWith(V);
1190
1191 // Lastly, remember to remove the user.
1192 ToRemoves.push_back(CI);
1193 }
1194 }
1195
1196 Changed = !ToRemoves.empty();
1197
1198 // And cleanup the calls we don't use anymore.
1199 for (auto V : ToRemoves) {
1200 V->eraseFromParent();
1201 }
1202
1203 // And remove the function we don't need either too.
1204 F->eraseFromParent();
1205 }
1206 }
1207
1208 return Changed;
1209}
1210
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001211bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1212 bool Changed = false;
1213
1214 for (auto const &SymVal : M.getValueSymbolTable()) {
1215
1216 // Skip symbols whose name obviously doesn't match
1217 if (!SymVal.getKey().contains("convert_")) {
1218 continue;
1219 }
1220
1221 // Is there a function going by that name?
1222 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1223
1224 // Get info from the mangled name
1225 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001226 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001227
1228 // All functions of interest are handled by our mangled name parser
1229 if (!parsed) {
1230 continue;
1231 }
1232
1233 // Move on if this isn't a call to convert_
1234 if (!finfo.name.startswith("convert_")) {
1235 continue;
1236 }
1237
1238 // Extract the destination type from the function name
1239 StringRef DstTypeName = finfo.name;
1240 DstTypeName.consume_front("convert_");
1241
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001242 auto DstSignedNess =
1243 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1244 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1245 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1246 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1247 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1248 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1249 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1250 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1251 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1252 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001253
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001254 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001255 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001256
1257 SmallVector<Instruction *, 4> ToRemoves;
1258
1259 // Walk the users of the function.
1260 for (auto &U : F->uses()) {
1261 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1262
1263 // Get arguments
1264 auto SrcValue = CI->getOperand(0);
1265
1266 // Don't touch overloads that aren't in OpenCL C
1267 auto SrcType = SrcValue->getType();
1268 auto DstType = CI->getType();
1269
1270 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1271 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1272 continue;
1273 }
1274
1275 if (SrcType->isVectorTy()) {
1276
1277 if (SrcType->getVectorNumElements() !=
1278 DstType->getVectorNumElements()) {
1279 continue;
1280 }
1281
1282 if ((SrcType->getVectorNumElements() != 2) &&
1283 (SrcType->getVectorNumElements() != 3) &&
1284 (SrcType->getVectorNumElements() != 4) &&
1285 (SrcType->getVectorNumElements() != 8) &&
1286 (SrcType->getVectorNumElements() != 16)) {
1287 continue;
1288 }
1289 }
1290
1291 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1292 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1293
1294 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1295 bool DstIsInt = DstType->isIntOrIntVectorTy();
1296
1297 Value *V;
1298 if (SrcIsFloat && DstIsFloat) {
1299 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1300 } else if (SrcIsFloat && DstIsInt) {
1301 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001302 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1303 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001304 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001305 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1306 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001307 }
1308 } else if (SrcIsInt && DstIsFloat) {
1309 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001310 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1311 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001312 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001313 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1314 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001315 }
1316 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001317 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1318 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001319 } else {
1320 // Not something we're supposed to handle, just move on
1321 continue;
1322 }
1323
1324 // Replace call with the expression
1325 CI->replaceAllUsesWith(V);
1326
1327 // Lastly, remember to remove the user.
1328 ToRemoves.push_back(CI);
1329 }
1330 }
1331
1332 Changed = !ToRemoves.empty();
1333
1334 // And cleanup the calls we don't use anymore.
1335 for (auto V : ToRemoves) {
1336 V->eraseFromParent();
1337 }
1338
1339 // And remove the function we don't need either too.
1340 F->eraseFromParent();
1341 }
1342 }
1343
1344 return Changed;
1345}
1346
Kévin Petit8a560882019-03-21 15:24:34 +00001347bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1348 bool Changed = false;
1349
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001350 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001351
Kévin Petit617a76d2019-04-04 13:54:16 +01001352 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001353 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1354 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1355
1356 // Skip symbols whose name doesn't match
1357 if (!isMad && !isMul) {
1358 continue;
1359 }
1360
1361 // Is there a function going by that name?
1362 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001363 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001364 }
1365 }
1366
Kévin Petit617a76d2019-04-04 13:54:16 +01001367 for (auto F : FnWorklist) {
1368 SmallVector<Instruction *, 4> ToRemoves;
1369
1370 bool isMad = F->getName().startswith("_Z6mad_hi");
1371 // Walk the users of the function.
1372 for (auto &U : F->uses()) {
1373 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1374
1375 // Get arguments
1376 auto AValue = CI->getOperand(0);
1377 auto BValue = CI->getOperand(1);
1378 auto CValue = CI->getOperand(2);
1379
1380 // Don't touch overloads that aren't in OpenCL C
1381 auto AType = AValue->getType();
1382 auto BType = BValue->getType();
1383 auto CType = CValue->getType();
1384
1385 if ((AType != BType) || (CI->getType() != AType) ||
1386 (isMad && (AType != CType))) {
1387 continue;
1388 }
1389
1390 if (!AType->isIntOrIntVectorTy()) {
1391 continue;
1392 }
1393
1394 if ((AType->getScalarSizeInBits() != 8) &&
1395 (AType->getScalarSizeInBits() != 16) &&
1396 (AType->getScalarSizeInBits() != 32) &&
1397 (AType->getScalarSizeInBits() != 64)) {
1398 continue;
1399 }
1400
1401 if (AType->isVectorTy()) {
1402 if ((AType->getVectorNumElements() != 2) &&
1403 (AType->getVectorNumElements() != 3) &&
1404 (AType->getVectorNumElements() != 4) &&
1405 (AType->getVectorNumElements() != 8) &&
1406 (AType->getVectorNumElements() != 16)) {
1407 continue;
1408 }
1409 }
1410
1411 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001412 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001413
1414 // Select the appropriate signed/unsigned SPIR-V op
1415 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001416 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001417 opcode = spv::OpSMulExtended;
1418 } else {
1419 opcode = spv::OpUMulExtended;
1420 }
1421
1422 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001423 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001424 auto ExMulRetType = StructType::create(TwoValueType);
1425
1426 // Call the SPIR-V op
1427 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1428 ExMulRetType, {AValue, BValue});
1429
1430 // Get the high part of the result
1431 unsigned Idxs[] = {1};
1432 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1433
1434 // If we're handling a mad_hi, add the third argument to the result
1435 if (isMad) {
1436 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1437 }
1438
1439 // Replace call with the expression
1440 CI->replaceAllUsesWith(V);
1441
1442 // Lastly, remember to remove the user.
1443 ToRemoves.push_back(CI);
1444 }
1445 }
1446
1447 Changed = !ToRemoves.empty();
1448
1449 // And cleanup the calls we don't use anymore.
1450 for (auto V : ToRemoves) {
1451 V->eraseFromParent();
1452 }
1453
1454 // And remove the function we don't need either too.
1455 F->eraseFromParent();
1456 }
1457
Kévin Petit8a560882019-03-21 15:24:34 +00001458 return Changed;
1459}
1460
Kévin Petitf5b78a22018-10-25 14:32:17 +00001461bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1462 bool Changed = false;
1463
1464 for (auto const &SymVal : M.getValueSymbolTable()) {
1465 // Skip symbols whose name doesn't match
1466 if (!SymVal.getKey().startswith("_Z6select")) {
1467 continue;
1468 }
1469 // Is there a function going by that name?
1470 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1471
1472 SmallVector<Instruction *, 4> ToRemoves;
1473
1474 // Walk the users of the function.
1475 for (auto &U : F->uses()) {
1476 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1477
1478 // Get arguments
1479 auto FalseValue = CI->getOperand(0);
1480 auto TrueValue = CI->getOperand(1);
1481 auto PredicateValue = CI->getOperand(2);
1482
1483 // Don't touch overloads that aren't in OpenCL C
1484 auto FalseType = FalseValue->getType();
1485 auto TrueType = TrueValue->getType();
1486 auto PredicateType = PredicateValue->getType();
1487
1488 if (FalseType != TrueType) {
1489 continue;
1490 }
1491
1492 if (!PredicateType->isIntOrIntVectorTy()) {
1493 continue;
1494 }
1495
1496 if (!FalseType->isIntOrIntVectorTy() &&
1497 !FalseType->getScalarType()->isFloatingPointTy()) {
1498 continue;
1499 }
1500
1501 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1502 continue;
1503 }
1504
1505 if (FalseType->getScalarSizeInBits() !=
1506 PredicateType->getScalarSizeInBits()) {
1507 continue;
1508 }
1509
1510 if (FalseType->isVectorTy()) {
1511 if (FalseType->getVectorNumElements() !=
1512 PredicateType->getVectorNumElements()) {
1513 continue;
1514 }
1515
1516 if ((FalseType->getVectorNumElements() != 2) &&
1517 (FalseType->getVectorNumElements() != 3) &&
1518 (FalseType->getVectorNumElements() != 4) &&
1519 (FalseType->getVectorNumElements() != 8) &&
1520 (FalseType->getVectorNumElements() != 16)) {
1521 continue;
1522 }
1523 }
1524
1525 // Create constant
1526 const auto ZeroValue = Constant::getNullValue(PredicateType);
1527
1528 // Scalar and vector are to be treated differently
1529 CmpInst::Predicate Pred;
1530 if (PredicateType->isVectorTy()) {
1531 Pred = CmpInst::ICMP_SLT;
1532 } else {
1533 Pred = CmpInst::ICMP_NE;
1534 }
1535
1536 // Create comparison instruction
1537 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1538 ZeroValue, "", CI);
1539
1540 // Create select
1541 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1542
1543 // Replace call with the selection
1544 CI->replaceAllUsesWith(V);
1545
1546 // Lastly, remember to remove the user.
1547 ToRemoves.push_back(CI);
1548 }
1549 }
1550
1551 Changed = !ToRemoves.empty();
1552
1553 // And cleanup the calls we don't use anymore.
1554 for (auto V : ToRemoves) {
1555 V->eraseFromParent();
1556 }
1557
1558 // And remove the function we don't need either too.
1559 F->eraseFromParent();
1560 }
1561 }
1562
1563 return Changed;
1564}
1565
Kévin Petite7d0cce2018-10-31 12:38:56 +00001566bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1567 bool Changed = false;
1568
1569 for (auto const &SymVal : M.getValueSymbolTable()) {
1570 // Skip symbols whose name doesn't match
1571 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1572 continue;
1573 }
1574 // Is there a function going by that name?
1575 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1576
1577 SmallVector<Instruction *, 4> ToRemoves;
1578
1579 // Walk the users of the function.
1580 for (auto &U : F->uses()) {
1581 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1582
1583 if (CI->getNumOperands() != 4) {
1584 continue;
1585 }
1586
1587 // Get arguments
1588 auto FalseValue = CI->getOperand(0);
1589 auto TrueValue = CI->getOperand(1);
1590 auto PredicateValue = CI->getOperand(2);
1591
1592 // Don't touch overloads that aren't in OpenCL C
1593 auto FalseType = FalseValue->getType();
1594 auto TrueType = TrueValue->getType();
1595 auto PredicateType = PredicateValue->getType();
1596
1597 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1598 continue;
1599 }
1600
1601 if (TrueType->isVectorTy()) {
1602 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1603 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001604 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001605 }
1606 if ((TrueType->getVectorNumElements() != 2) &&
1607 (TrueType->getVectorNumElements() != 3) &&
1608 (TrueType->getVectorNumElements() != 4) &&
1609 (TrueType->getVectorNumElements() != 8) &&
1610 (TrueType->getVectorNumElements() != 16)) {
1611 continue;
1612 }
1613 }
1614
1615 // Remember the type of the operands
1616 auto OpType = TrueType;
1617
1618 // The actual bit selection will always be done on an integer type,
1619 // declare it here
1620 Type *BitType;
1621
1622 // If the operands are float, then bitcast them to int
1623 if (OpType->getScalarType()->isFloatingPointTy()) {
1624
1625 // First create the new type
1626 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1627 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1628 if (OpType->isVectorTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001629 BitType =
1630 VectorType::get(BitType, OpType->getVectorNumElements());
Kévin Petite7d0cce2018-10-31 12:38:56 +00001631 }
1632
1633 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001634 PredicateValue =
1635 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1636 FalseValue =
1637 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1638 TrueValue =
1639 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001640
1641 } else {
1642 // The operands have an integer type, use it directly
1643 BitType = OpType;
1644 }
1645
1646 // All the operands are now always integers
1647 // implement as (c & b) | (~c & a)
1648
1649 // Create our negated predicate value
1650 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001651 auto NotPredicateValue = BinaryOperator::Create(
1652 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001653
1654 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001655 auto BitsFalse = BinaryOperator::Create(
1656 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1657 auto BitsTrue = BinaryOperator::Create(
1658 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001659
1660 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1661 BitsTrue, "", CI);
1662
1663 // If we were dealing with a floating point type, we must bitcast
1664 // the result back to that
1665 if (OpType->getScalarType()->isFloatingPointTy()) {
1666 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1667 }
1668
1669 // Replace call with our new code
1670 CI->replaceAllUsesWith(V);
1671
1672 // Lastly, remember to remove the user.
1673 ToRemoves.push_back(CI);
1674 }
1675 }
1676
1677 Changed = !ToRemoves.empty();
1678
1679 // And cleanup the calls we don't use anymore.
1680 for (auto V : ToRemoves) {
1681 V->eraseFromParent();
1682 }
1683
1684 // And remove the function we don't need either too.
1685 F->eraseFromParent();
1686 }
1687 }
1688
1689 return Changed;
1690}
1691
Kévin Petit6b0a9532018-10-30 20:00:39 +00001692bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1693 bool Changed = false;
1694
1695 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001696 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1697 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1698 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1699 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1700 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1701 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001702 };
1703
1704 for (auto Pair : Map) {
1705 // If we find a function with the matching name.
1706 if (auto F = M.getFunction(Pair.first)) {
1707 SmallVector<Instruction *, 4> ToRemoves;
1708
1709 // Walk the users of the function.
1710 for (auto &U : F->uses()) {
1711 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1712
1713 auto ReplacementFn = Pair.second;
1714
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001715 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001716 Value *VectorArg;
1717
1718 // First figure out which function we're dealing with
1719 if (F->getName().startswith("_Z10smoothstep")) {
1720 ArgsToSplat.push_back(CI->getOperand(1));
1721 VectorArg = CI->getOperand(2);
1722 } else {
1723 VectorArg = CI->getOperand(1);
1724 }
1725
1726 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001727 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001728 auto VecType = VectorArg->getType();
1729
1730 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001731 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001732 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001733 auto index =
1734 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1735 NewVectorArg =
1736 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001737 }
1738 SplatArgs.push_back(NewVectorArg);
1739 }
1740
1741 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001742 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1743 const auto NewFType =
1744 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001745
1746 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1747
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001748 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001749 for (auto arg : SplatArgs) {
1750 NewArgs.push_back(arg);
1751 }
1752 NewArgs.push_back(VectorArg);
1753
1754 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1755
1756 CI->replaceAllUsesWith(NewCI);
1757
1758 // Lastly, remember to remove the user.
1759 ToRemoves.push_back(CI);
1760 }
1761 }
1762
1763 Changed = !ToRemoves.empty();
1764
1765 // And cleanup the calls we don't use anymore.
1766 for (auto V : ToRemoves) {
1767 V->eraseFromParent();
1768 }
1769
1770 // And remove the function we don't need either too.
1771 F->eraseFromParent();
1772 }
1773 }
1774
1775 return Changed;
1776}
1777
David Neto22f144c2017-06-12 14:26:21 -04001778bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1779 bool Changed = false;
1780
1781 const std::map<const char *, Instruction::BinaryOps> Map = {
1782 {"_Z7signbitf", Instruction::LShr},
1783 {"_Z7signbitDv2_f", Instruction::AShr},
1784 {"_Z7signbitDv3_f", Instruction::AShr},
1785 {"_Z7signbitDv4_f", Instruction::AShr},
1786 };
1787
1788 for (auto Pair : Map) {
1789 // If we find a function with the matching name.
1790 if (auto F = M.getFunction(Pair.first)) {
1791 SmallVector<Instruction *, 4> ToRemoves;
1792
1793 // Walk the users of the function.
1794 for (auto &U : F->uses()) {
1795 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1796 auto Arg = CI->getOperand(0);
1797
1798 auto Bitcast =
1799 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1800
1801 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1802 ConstantInt::get(CI->getType(), 31),
1803 "", CI);
1804
1805 CI->replaceAllUsesWith(Shr);
1806
1807 // Lastly, remember to remove the user.
1808 ToRemoves.push_back(CI);
1809 }
1810 }
1811
1812 Changed = !ToRemoves.empty();
1813
1814 // And cleanup the calls we don't use anymore.
1815 for (auto V : ToRemoves) {
1816 V->eraseFromParent();
1817 }
1818
1819 // And remove the function we don't need either too.
1820 F->eraseFromParent();
1821 }
1822 }
1823
1824 return Changed;
1825}
1826
1827bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1828 bool Changed = false;
1829
1830 const std::map<const char *,
1831 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1832 Map = {
1833 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1834 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1835 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1836 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1837 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1838 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1839 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1840 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1841 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1842 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1843 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1844 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1845 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1846 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1847 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1848 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1849 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1850 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1851 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1852 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1853 };
1854
1855 for (auto Pair : Map) {
1856 // If we find a function with the matching name.
1857 if (auto F = M.getFunction(Pair.first)) {
1858 SmallVector<Instruction *, 4> ToRemoves;
1859
1860 // Walk the users of the function.
1861 for (auto &U : F->uses()) {
1862 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1863 // The multiply instruction to use.
1864 auto MulInst = Pair.second.first;
1865
1866 // The add instruction to use.
1867 auto AddInst = Pair.second.second;
1868
1869 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1870
1871 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1872 CI->getArgOperand(1), "", CI);
1873
1874 if (Instruction::BinaryOpsEnd != AddInst) {
1875 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1876 CI);
1877 }
1878
1879 CI->replaceAllUsesWith(I);
1880
1881 // Lastly, remember to remove the user.
1882 ToRemoves.push_back(CI);
1883 }
1884 }
1885
1886 Changed = !ToRemoves.empty();
1887
1888 // And cleanup the calls we don't use anymore.
1889 for (auto V : ToRemoves) {
1890 V->eraseFromParent();
1891 }
1892
1893 // And remove the function we don't need either too.
1894 F->eraseFromParent();
1895 }
1896 }
1897
1898 return Changed;
1899}
1900
Derek Chowcfd368b2017-10-19 20:58:45 -07001901bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1902 bool Changed = false;
1903
1904 struct VectorStoreOps {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001905 const char *name;
Derek Chowcfd368b2017-10-19 20:58:45 -07001906 int n;
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001907 Type *(*get_scalar_type_function)(LLVMContext &);
1908 } vector_store_ops[] = {// TODO(derekjchow): Expand this list.
1909 {"_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy}};
Derek Chowcfd368b2017-10-19 20:58:45 -07001910
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001911 for (const auto &Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001912 auto Name = Op.name;
1913 auto N = Op.n;
1914 auto TypeFn = Op.get_scalar_type_function;
1915 if (auto F = M.getFunction(Name)) {
1916 SmallVector<Instruction *, 4> ToRemoves;
1917
1918 // Walk the users of the function.
1919 for (auto &U : F->uses()) {
1920 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1921 // The value argument from vstoren.
1922 auto Arg0 = CI->getOperand(0);
1923
1924 // The index argument from vstoren.
1925 auto Arg1 = CI->getOperand(1);
1926
1927 // The pointer argument from vstoren.
1928 auto Arg2 = CI->getOperand(2);
1929
1930 // Get types.
1931 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1932 auto ScalarNPointerTy = PointerType::get(
1933 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1934
1935 // Cast to scalarn
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001936 auto Cast =
1937 CastInst::CreatePointerCast(Arg2, ScalarNPointerTy, "", CI);
Derek Chowcfd368b2017-10-19 20:58:45 -07001938 // Index to correct address
1939 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1940 // Store
1941 auto Store = new StoreInst(Arg0, Index, CI);
1942
1943 CI->replaceAllUsesWith(Store);
1944 ToRemoves.push_back(CI);
1945 }
1946 }
1947
1948 Changed = !ToRemoves.empty();
1949
1950 // And cleanup the calls we don't use anymore.
1951 for (auto V : ToRemoves) {
1952 V->eraseFromParent();
1953 }
1954
1955 // And remove the function we don't need either too.
1956 F->eraseFromParent();
1957 }
1958 }
1959
1960 return Changed;
1961}
1962
1963bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1964 bool Changed = false;
1965
1966 struct VectorLoadOps {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001967 const char *name;
Derek Chowcfd368b2017-10-19 20:58:45 -07001968 int n;
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001969 Type *(*get_scalar_type_function)(LLVMContext &);
1970 } vector_load_ops[] = {// TODO(derekjchow): Expand this list.
1971 {"_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy}};
Derek Chowcfd368b2017-10-19 20:58:45 -07001972
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001973 for (const auto &Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001974 auto Name = Op.name;
1975 auto N = Op.n;
1976 auto TypeFn = Op.get_scalar_type_function;
1977 // If we find a function with the matching name.
1978 if (auto F = M.getFunction(Name)) {
1979 SmallVector<Instruction *, 4> ToRemoves;
1980
1981 // Walk the users of the function.
1982 for (auto &U : F->uses()) {
1983 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1984 // The index argument from vloadn.
1985 auto Arg0 = CI->getOperand(0);
1986
1987 // The pointer argument from vloadn.
1988 auto Arg1 = CI->getOperand(1);
1989
1990 // Get types.
1991 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1992 auto ScalarNPointerTy = PointerType::get(
1993 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1994
1995 // Cast to scalarn
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001996 auto Cast =
1997 CastInst::CreatePointerCast(Arg1, ScalarNPointerTy, "", CI);
Derek Chowcfd368b2017-10-19 20:58:45 -07001998 // Index to correct address
1999 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
2000 // Load
2001 auto Load = new LoadInst(Index, "", CI);
2002
2003 CI->replaceAllUsesWith(Load);
2004 ToRemoves.push_back(CI);
2005 }
2006 }
2007
2008 Changed = !ToRemoves.empty();
2009
2010 // And cleanup the calls we don't use anymore.
2011 for (auto V : ToRemoves) {
2012 V->eraseFromParent();
2013 }
2014
2015 // And remove the function we don't need either too.
2016 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002017 }
2018 }
2019
2020 return Changed;
2021}
2022
David Neto22f144c2017-06-12 14:26:21 -04002023bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2024 bool Changed = false;
2025
2026 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2027 "_Z10vload_halfjPU3AS2KDh"};
2028
2029 for (auto Name : Map) {
2030 // If we find a function with the matching name.
2031 if (auto F = M.getFunction(Name)) {
2032 SmallVector<Instruction *, 4> ToRemoves;
2033
2034 // Walk the users of the function.
2035 for (auto &U : F->uses()) {
2036 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2037 // The index argument from vload_half.
2038 auto Arg0 = CI->getOperand(0);
2039
2040 // The pointer argument from vload_half.
2041 auto Arg1 = CI->getOperand(1);
2042
David Neto22f144c2017-06-12 14:26:21 -04002043 auto IntTy = Type::getInt32Ty(M.getContext());
2044 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002045 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2046
David Neto22f144c2017-06-12 14:26:21 -04002047 // Our intrinsic to unpack a float2 from an int.
2048 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2049
2050 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2051
David Neto482550a2018-03-24 05:21:07 -07002052 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002053 auto ShortTy = Type::getInt16Ty(M.getContext());
2054 auto ShortPointerTy = PointerType::get(
2055 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002056
David Netoac825b82017-05-30 12:49:01 -04002057 // Cast the half* pointer to short*.
2058 auto Cast =
2059 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002060
David Netoac825b82017-05-30 12:49:01 -04002061 // Index into the correct address of the casted pointer.
2062 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2063
2064 // Load from the short* we casted to.
2065 auto Load = new LoadInst(Index, "", CI);
2066
2067 // ZExt the short -> int.
2068 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2069
2070 // Get our float2.
2071 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2072
2073 // Extract out the bottom element which is our float result.
2074 auto Extract = ExtractElementInst::Create(
2075 Call, ConstantInt::get(IntTy, 0), "", CI);
2076
2077 CI->replaceAllUsesWith(Extract);
2078 } else {
2079 // Assume the pointer argument points to storage aligned to 32bits
2080 // or more.
2081 // TODO(dneto): Do more analysis to make sure this is true?
2082 //
2083 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2084 // with:
2085 //
2086 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2087 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2088 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2089 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2090 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2091 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2092 // x float> %converted, %index_is_odd32
2093
2094 auto IntPointerTy = PointerType::get(
2095 IntTy, Arg1->getType()->getPointerAddressSpace());
2096
David Neto973e6a82017-05-30 13:48:18 -04002097 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002098 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002099 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002100 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2101
2102 auto One = ConstantInt::get(IntTy, 1);
2103 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2104 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2105
2106 // Index into the correct address of the casted pointer.
2107 auto Ptr =
2108 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2109
2110 // Load from the int* we casted to.
2111 auto Load = new LoadInst(Ptr, "", CI);
2112
2113 // Get our float2.
2114 auto Call = CallInst::Create(NewF, Load, "", CI);
2115
2116 // Extract out the float result, where the element number is
2117 // determined by whether the original index was even or odd.
2118 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2119
2120 CI->replaceAllUsesWith(Extract);
2121 }
David Neto22f144c2017-06-12 14:26:21 -04002122
2123 // Lastly, remember to remove the user.
2124 ToRemoves.push_back(CI);
2125 }
2126 }
2127
2128 Changed = !ToRemoves.empty();
2129
2130 // And cleanup the calls we don't use anymore.
2131 for (auto V : ToRemoves) {
2132 V->eraseFromParent();
2133 }
2134
2135 // And remove the function we don't need either too.
2136 F->eraseFromParent();
2137 }
2138 }
2139
2140 return Changed;
2141}
2142
2143bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002144
Kévin Petite8edce32019-04-10 14:23:32 +01002145 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002146 "_Z11vload_half2jPU3AS1KDh",
2147 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2148 "_Z11vload_half2jPU3AS2KDh",
2149 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2150 };
David Neto22f144c2017-06-12 14:26:21 -04002151
Kévin Petite8edce32019-04-10 14:23:32 +01002152 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2153 // The index argument from vload_half.
2154 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002155
Kévin Petite8edce32019-04-10 14:23:32 +01002156 // The pointer argument from vload_half.
2157 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002158
Kévin Petite8edce32019-04-10 14:23:32 +01002159 auto IntTy = Type::getInt32Ty(M.getContext());
2160 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002161 auto NewPointerTy =
2162 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002163 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002164
Kévin Petite8edce32019-04-10 14:23:32 +01002165 // Cast the half* pointer to int*.
2166 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002167
Kévin Petite8edce32019-04-10 14:23:32 +01002168 // Index into the correct address of the casted pointer.
2169 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002170
Kévin Petite8edce32019-04-10 14:23:32 +01002171 // Load from the int* we casted to.
2172 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002173
Kévin Petite8edce32019-04-10 14:23:32 +01002174 // Our intrinsic to unpack a float2 from an int.
2175 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002176
Kévin Petite8edce32019-04-10 14:23:32 +01002177 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002178
Kévin Petite8edce32019-04-10 14:23:32 +01002179 // Get our float2.
2180 return CallInst::Create(NewF, Load, "", CI);
2181 });
David Neto22f144c2017-06-12 14:26:21 -04002182}
2183
2184bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002185
Kévin Petite8edce32019-04-10 14:23:32 +01002186 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002187 "_Z11vload_half4jPU3AS1KDh",
2188 "_Z12vloada_half4jPU3AS1KDh",
2189 "_Z11vload_half4jPU3AS2KDh",
2190 "_Z12vloada_half4jPU3AS2KDh",
2191 };
David Neto22f144c2017-06-12 14:26:21 -04002192
Kévin Petite8edce32019-04-10 14:23:32 +01002193 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2194 // The index argument from vload_half.
2195 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002196
Kévin Petite8edce32019-04-10 14:23:32 +01002197 // The pointer argument from vload_half.
2198 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002199
Kévin Petite8edce32019-04-10 14:23:32 +01002200 auto IntTy = Type::getInt32Ty(M.getContext());
2201 auto Int2Ty = VectorType::get(IntTy, 2);
2202 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002203 auto NewPointerTy =
2204 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002205 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002206
Kévin Petite8edce32019-04-10 14:23:32 +01002207 // Cast the half* pointer to int2*.
2208 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002209
Kévin Petite8edce32019-04-10 14:23:32 +01002210 // Index into the correct address of the casted pointer.
2211 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002212
Kévin Petite8edce32019-04-10 14:23:32 +01002213 // Load from the int2* we casted to.
2214 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002215
Kévin Petite8edce32019-04-10 14:23:32 +01002216 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002217 auto X =
2218 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2219 auto Y =
2220 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002221
Kévin Petite8edce32019-04-10 14:23:32 +01002222 // Our intrinsic to unpack a float2 from an int.
2223 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002224
Kévin Petite8edce32019-04-10 14:23:32 +01002225 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002226
Kévin Petite8edce32019-04-10 14:23:32 +01002227 // Get the lower (x & y) components of our final float4.
2228 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002229
Kévin Petite8edce32019-04-10 14:23:32 +01002230 // Get the higher (z & w) components of our final float4.
2231 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002232
Kévin Petite8edce32019-04-10 14:23:32 +01002233 Constant *ShuffleMask[4] = {
2234 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2235 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002236
Kévin Petite8edce32019-04-10 14:23:32 +01002237 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002238 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2239 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002240 });
David Neto22f144c2017-06-12 14:26:21 -04002241}
2242
David Neto6ad93232018-06-07 15:42:58 -07002243bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002244
2245 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2246 //
2247 // %u = load i32 %ptr
2248 // %fxy = call <2 x float> Unpack2xHalf(u)
2249 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002250 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002251 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2252 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2253 "_Z20__clspv_vloada_half2jPKj", // private
2254 };
2255
Kévin Petite8edce32019-04-10 14:23:32 +01002256 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2257 auto Index = CI->getOperand(0);
2258 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002259
Kévin Petite8edce32019-04-10 14:23:32 +01002260 auto IntTy = Type::getInt32Ty(M.getContext());
2261 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2262 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002263
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002264 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002265 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002266
Kévin Petite8edce32019-04-10 14:23:32 +01002267 // Our intrinsic to unpack a float2 from an int.
2268 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002269
Kévin Petite8edce32019-04-10 14:23:32 +01002270 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002271
Kévin Petite8edce32019-04-10 14:23:32 +01002272 // Get our final float2.
2273 return CallInst::Create(NewF, Load, "", CI);
2274 });
David Neto6ad93232018-06-07 15:42:58 -07002275}
2276
2277bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002278
2279 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2280 //
2281 // %u2 = load <2 x i32> %ptr
2282 // %u2xy = extractelement %u2, 0
2283 // %u2zw = extractelement %u2, 1
2284 // %fxy = call <2 x float> Unpack2xHalf(uint)
2285 // %fzw = call <2 x float> Unpack2xHalf(uint)
2286 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002287 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002288 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2289 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2290 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2291 };
2292
Kévin Petite8edce32019-04-10 14:23:32 +01002293 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2294 auto Index = CI->getOperand(0);
2295 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002296
Kévin Petite8edce32019-04-10 14:23:32 +01002297 auto IntTy = Type::getInt32Ty(M.getContext());
2298 auto Int2Ty = VectorType::get(IntTy, 2);
2299 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2300 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002301
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002302 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002303 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002304
Kévin Petite8edce32019-04-10 14:23:32 +01002305 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002306 auto X =
2307 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2308 auto Y =
2309 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002310
Kévin Petite8edce32019-04-10 14:23:32 +01002311 // Our intrinsic to unpack a float2 from an int.
2312 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002313
Kévin Petite8edce32019-04-10 14:23:32 +01002314 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002315
Kévin Petite8edce32019-04-10 14:23:32 +01002316 // Get the lower (x & y) components of our final float4.
2317 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002318
Kévin Petite8edce32019-04-10 14:23:32 +01002319 // Get the higher (z & w) components of our final float4.
2320 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002321
Kévin Petite8edce32019-04-10 14:23:32 +01002322 Constant *ShuffleMask[4] = {
2323 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2324 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002325
Kévin Petite8edce32019-04-10 14:23:32 +01002326 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002327 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2328 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002329 });
David Neto6ad93232018-06-07 15:42:58 -07002330}
2331
David Neto22f144c2017-06-12 14:26:21 -04002332bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002333
Kévin Petite8edce32019-04-10 14:23:32 +01002334 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2335 "_Z15vstore_half_rtefjPU3AS1Dh",
2336 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002337
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002338 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002339 // The value to store.
2340 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002341
Kévin Petite8edce32019-04-10 14:23:32 +01002342 // The index argument from vstore_half.
2343 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002344
Kévin Petite8edce32019-04-10 14:23:32 +01002345 // The pointer argument from vstore_half.
2346 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002347
Kévin Petite8edce32019-04-10 14:23:32 +01002348 auto IntTy = Type::getInt32Ty(M.getContext());
2349 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2350 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2351 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002352
Kévin Petite8edce32019-04-10 14:23:32 +01002353 // Our intrinsic to pack a float2 to an int.
2354 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002355
Kévin Petite8edce32019-04-10 14:23:32 +01002356 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002357
Kévin Petite8edce32019-04-10 14:23:32 +01002358 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002359 auto TempVec = InsertElementInst::Create(
2360 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002361
Kévin Petite8edce32019-04-10 14:23:32 +01002362 // Pack the float2 -> half2 (in an int).
2363 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002364
Kévin Petite8edce32019-04-10 14:23:32 +01002365 Value *Ret;
2366 if (clspv::Option::F16BitStorage()) {
2367 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002368 auto ShortPointerTy =
2369 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002370
Kévin Petite8edce32019-04-10 14:23:32 +01002371 // Truncate our i32 to an i16.
2372 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002373
Kévin Petite8edce32019-04-10 14:23:32 +01002374 // Cast the half* pointer to short*.
2375 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002376
Kévin Petite8edce32019-04-10 14:23:32 +01002377 // Index into the correct address of the casted pointer.
2378 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002379
Kévin Petite8edce32019-04-10 14:23:32 +01002380 // Store to the int* we casted to.
2381 Ret = new StoreInst(Trunc, Index, CI);
2382 } else {
2383 // We can only write to 32-bit aligned words.
2384 //
2385 // Assuming base is aligned to 32-bits, replace the equivalent of
2386 // vstore_half(value, index, base)
2387 // with:
2388 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2389 // uint32_t write_to_upper_half = index & 1u;
2390 // uint32_t shift = write_to_upper_half << 4;
2391 //
2392 // // Pack the float value as a half number in bottom 16 bits
2393 // // of an i32.
2394 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2395 //
2396 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2397 // ^ ((packed & 0xffff) << shift)
2398 // // We only need relaxed consistency, but OpenCL 1.2 only has
2399 // // sequentially consistent atomics.
2400 // // TODO(dneto): Use relaxed consistency.
2401 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002402 auto IntPointerTy =
2403 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002404
Kévin Petite8edce32019-04-10 14:23:32 +01002405 auto Four = ConstantInt::get(IntTy, 4);
2406 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002407
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002408 auto IndexIsOdd =
2409 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002410 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002411 auto IndexIntoI32 =
2412 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2413 auto BaseI32Ptr =
2414 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2415 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2416 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002417 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2418 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002419 auto MaskBitsToWrite =
2420 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2421 auto MaskedCurrent = BinaryOperator::CreateAnd(
2422 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002423
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002424 auto XLowerBits =
2425 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2426 auto NewBitsToWrite =
2427 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2428 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2429 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002430
Kévin Petite8edce32019-04-10 14:23:32 +01002431 // Generate the call to atomi_xor.
2432 SmallVector<Type *, 5> ParamTypes;
2433 // The pointer type.
2434 ParamTypes.push_back(IntPointerTy);
2435 // The Types for memory scope, semantics, and value.
2436 ParamTypes.push_back(IntTy);
2437 ParamTypes.push_back(IntTy);
2438 ParamTypes.push_back(IntTy);
2439 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2440 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002441
Kévin Petite8edce32019-04-10 14:23:32 +01002442 const auto ConstantScopeDevice =
2443 ConstantInt::get(IntTy, spv::ScopeDevice);
2444 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2445 // (SPIR-V Workgroup).
2446 const auto AddrSpaceSemanticsBits =
2447 IntPointerTy->getPointerAddressSpace() == 1
2448 ? spv::MemorySemanticsUniformMemoryMask
2449 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002450
Kévin Petite8edce32019-04-10 14:23:32 +01002451 // We're using relaxed consistency here.
2452 const auto ConstantMemorySemantics =
2453 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2454 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002455
Kévin Petite8edce32019-04-10 14:23:32 +01002456 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2457 ConstantMemorySemantics, ValueToXor};
2458 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2459 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002460 }
David Neto22f144c2017-06-12 14:26:21 -04002461
Kévin Petite8edce32019-04-10 14:23:32 +01002462 return Ret;
2463 });
David Neto22f144c2017-06-12 14:26:21 -04002464}
2465
2466bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002467
Kévin Petite8edce32019-04-10 14:23:32 +01002468 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002469 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2470 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2471 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2472 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2473 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2474 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2475 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2476 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2477 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2478 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2479 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2480 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2481 };
David Neto22f144c2017-06-12 14:26:21 -04002482
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002483 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002484 // The value to store.
2485 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002486
Kévin Petite8edce32019-04-10 14:23:32 +01002487 // The index argument from vstore_half.
2488 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002489
Kévin Petite8edce32019-04-10 14:23:32 +01002490 // The pointer argument from vstore_half.
2491 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002492
Kévin Petite8edce32019-04-10 14:23:32 +01002493 auto IntTy = Type::getInt32Ty(M.getContext());
2494 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002495 auto NewPointerTy =
2496 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002497 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002498
Kévin Petite8edce32019-04-10 14:23:32 +01002499 // Our intrinsic to pack a float2 to an int.
2500 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002501
Kévin Petite8edce32019-04-10 14:23:32 +01002502 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002503
Kévin Petite8edce32019-04-10 14:23:32 +01002504 // Turn the packed x & y into the final packing.
2505 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002506
Kévin Petite8edce32019-04-10 14:23:32 +01002507 // Cast the half* pointer to int*.
2508 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002509
Kévin Petite8edce32019-04-10 14:23:32 +01002510 // Index into the correct address of the casted pointer.
2511 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002512
Kévin Petite8edce32019-04-10 14:23:32 +01002513 // Store to the int* we casted to.
2514 return new StoreInst(X, Index, CI);
2515 });
David Neto22f144c2017-06-12 14:26:21 -04002516}
2517
2518bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002519
Kévin Petite8edce32019-04-10 14:23:32 +01002520 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002521 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2522 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2523 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2524 "_Z13vstorea_half4Dv4_fjPDh", // private
2525 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2526 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2527 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2528 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2529 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2530 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2531 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2532 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2533 };
David Neto22f144c2017-06-12 14:26:21 -04002534
Kévin Petite8edce32019-04-10 14:23:32 +01002535 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2536 // The value to store.
2537 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002538
Kévin Petite8edce32019-04-10 14:23:32 +01002539 // The index argument from vstore_half.
2540 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002541
Kévin Petite8edce32019-04-10 14:23:32 +01002542 // The pointer argument from vstore_half.
2543 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002544
Kévin Petite8edce32019-04-10 14:23:32 +01002545 auto IntTy = Type::getInt32Ty(M.getContext());
2546 auto Int2Ty = VectorType::get(IntTy, 2);
2547 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002548 auto NewPointerTy =
2549 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002550 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002551
Kévin Petite8edce32019-04-10 14:23:32 +01002552 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2553 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002554
Kévin Petite8edce32019-04-10 14:23:32 +01002555 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002556 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2557 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002558
Kévin Petite8edce32019-04-10 14:23:32 +01002559 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2560 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002561
Kévin Petite8edce32019-04-10 14:23:32 +01002562 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002563 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2564 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002565
Kévin Petite8edce32019-04-10 14:23:32 +01002566 // Our intrinsic to pack a float2 to an int.
2567 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002568
Kévin Petite8edce32019-04-10 14:23:32 +01002569 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002570
Kévin Petite8edce32019-04-10 14:23:32 +01002571 // Turn the packed x & y into the final component of our int2.
2572 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002573
Kévin Petite8edce32019-04-10 14:23:32 +01002574 // Turn the packed z & w into the final component of our int2.
2575 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002576
Kévin Petite8edce32019-04-10 14:23:32 +01002577 auto Combine = InsertElementInst::Create(
2578 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002579 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2580 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002581
Kévin Petite8edce32019-04-10 14:23:32 +01002582 // Cast the half* pointer to int2*.
2583 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002584
Kévin Petite8edce32019-04-10 14:23:32 +01002585 // Index into the correct address of the casted pointer.
2586 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002587
Kévin Petite8edce32019-04-10 14:23:32 +01002588 // Store to the int2* we casted to.
2589 return new StoreInst(Combine, Index, CI);
2590 });
David Neto22f144c2017-06-12 14:26:21 -04002591}
2592
2593bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2594 bool Changed = false;
2595
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002596 const std::map<const char *, const char *> Map = {
2597 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2598 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2599 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i",
2600 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002601
2602 for (auto Pair : Map) {
2603 // If we find a function with the matching name.
2604 if (auto F = M.getFunction(Pair.first)) {
2605 SmallVector<Instruction *, 4> ToRemoves;
2606
2607 // Walk the users of the function.
2608 for (auto &U : F->uses()) {
2609 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2610 // The image.
2611 auto Arg0 = CI->getOperand(0);
2612
2613 // The sampler.
2614 auto Arg1 = CI->getOperand(1);
2615
2616 // The coordinate (integer type that we can't handle).
2617 auto Arg2 = CI->getOperand(2);
2618
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002619 auto FloatVecTy =
2620 VectorType::get(Type::getFloatTy(M.getContext()),
2621 Arg2->getType()->getVectorNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002622
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002623 auto NewFType = FunctionType::get(
2624 CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy},
2625 false);
David Neto22f144c2017-06-12 14:26:21 -04002626
2627 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2628
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002629 auto Cast =
2630 CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002631
2632 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2633
2634 CI->replaceAllUsesWith(NewCI);
2635
2636 // Lastly, remember to remove the user.
2637 ToRemoves.push_back(CI);
2638 }
2639 }
2640
2641 Changed = !ToRemoves.empty();
2642
2643 // And cleanup the calls we don't use anymore.
2644 for (auto V : ToRemoves) {
2645 V->eraseFromParent();
2646 }
2647
2648 // And remove the function we don't need either too.
2649 F->eraseFromParent();
2650 }
2651 }
2652
2653 return Changed;
2654}
2655
2656bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2657 bool Changed = false;
2658
2659 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002660 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002661 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002662 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002663 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002664 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002665 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002666 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002667 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002668 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002669 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002670 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002671 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002672 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002673 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002674 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002675 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002676 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002677 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002678 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002679 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002680 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002681 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2682 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2683 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002684
2685 for (auto Pair : Map) {
2686 // If we find a function with the matching name.
2687 if (auto F = M.getFunction(Pair.first)) {
2688 SmallVector<Instruction *, 4> ToRemoves;
2689
2690 // Walk the users of the function.
2691 for (auto &U : F->uses()) {
2692 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2693 auto FType = F->getFunctionType();
2694 SmallVector<Type *, 5> ParamTypes;
2695
2696 // The pointer type.
2697 ParamTypes.push_back(FType->getParamType(0));
2698
2699 auto IntTy = Type::getInt32Ty(M.getContext());
2700
2701 // The memory scope type.
2702 ParamTypes.push_back(IntTy);
2703
2704 // The memory semantics type.
2705 ParamTypes.push_back(IntTy);
2706
2707 if (2 < CI->getNumArgOperands()) {
2708 // The unequal memory semantics type.
2709 ParamTypes.push_back(IntTy);
2710
2711 // The value type.
2712 ParamTypes.push_back(FType->getParamType(2));
2713
2714 // The comparator type.
2715 ParamTypes.push_back(FType->getParamType(1));
2716 } else if (1 < CI->getNumArgOperands()) {
2717 // The value type.
2718 ParamTypes.push_back(FType->getParamType(1));
2719 }
2720
2721 auto NewFType =
2722 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2723 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2724
2725 // We need to map the OpenCL constants to the SPIR-V equivalents.
2726 const auto ConstantScopeDevice =
2727 ConstantInt::get(IntTy, spv::ScopeDevice);
2728 const auto ConstantMemorySemantics = ConstantInt::get(
2729 IntTy, spv::MemorySemanticsUniformMemoryMask |
2730 spv::MemorySemanticsSequentiallyConsistentMask);
2731
2732 SmallVector<Value *, 5> Params;
2733
2734 // The pointer.
2735 Params.push_back(CI->getArgOperand(0));
2736
2737 // The memory scope.
2738 Params.push_back(ConstantScopeDevice);
2739
2740 // The memory semantics.
2741 Params.push_back(ConstantMemorySemantics);
2742
2743 if (2 < CI->getNumArgOperands()) {
2744 // The unequal memory semantics.
2745 Params.push_back(ConstantMemorySemantics);
2746
2747 // The value.
2748 Params.push_back(CI->getArgOperand(2));
2749
2750 // The comparator.
2751 Params.push_back(CI->getArgOperand(1));
2752 } else if (1 < CI->getNumArgOperands()) {
2753 // The value.
2754 Params.push_back(CI->getArgOperand(1));
2755 }
2756
2757 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2758
2759 CI->replaceAllUsesWith(NewCI);
2760
2761 // Lastly, remember to remove the user.
2762 ToRemoves.push_back(CI);
2763 }
2764 }
2765
2766 Changed = !ToRemoves.empty();
2767
2768 // And cleanup the calls we don't use anymore.
2769 for (auto V : ToRemoves) {
2770 V->eraseFromParent();
2771 }
2772
2773 // And remove the function we don't need either too.
2774 F->eraseFromParent();
2775 }
2776 }
2777
Neil Henning39672102017-09-29 14:33:13 +01002778 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002779 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002780 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002781 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002782 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002783 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002784 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002785 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002786 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002787 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002788 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002789 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002790 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002791 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002792 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002793 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002794 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002795 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002796 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002797 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002798 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002799 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002800 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002801 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002802 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002803 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002804 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002805 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002806 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002807 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002808 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002809 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002810 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002811 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002812 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002813 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002814 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002815 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002816 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002817 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002818 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002819 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002820 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002821 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002822 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002823 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002824 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002825 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002826 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002827 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002828 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002829 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002830 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002831 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002832 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002833 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002834 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002835 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002836 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002837 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002838 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002839 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002840 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2841 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2842 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002843
2844 for (auto Pair : Map2) {
2845 // If we find a function with the matching name.
2846 if (auto F = M.getFunction(Pair.first)) {
2847 SmallVector<Instruction *, 4> ToRemoves;
2848
2849 // Walk the users of the function.
2850 for (auto &U : F->uses()) {
2851 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2852 auto AtomicOp = new AtomicRMWInst(
2853 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2854 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2855
2856 CI->replaceAllUsesWith(AtomicOp);
2857
2858 // Lastly, remember to remove the user.
2859 ToRemoves.push_back(CI);
2860 }
2861 }
2862
2863 Changed = !ToRemoves.empty();
2864
2865 // And cleanup the calls we don't use anymore.
2866 for (auto V : ToRemoves) {
2867 V->eraseFromParent();
2868 }
2869
2870 // And remove the function we don't need either too.
2871 F->eraseFromParent();
2872 }
2873 }
2874
David Neto22f144c2017-06-12 14:26:21 -04002875 return Changed;
2876}
2877
2878bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002879
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002880 std::vector<const char *> Names = {
2881 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01002882 };
2883
2884 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002885 auto IntTy = Type::getInt32Ty(M.getContext());
2886 auto FloatTy = Type::getFloatTy(M.getContext());
2887
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002888 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2889 ConstantInt::get(IntTy, 1),
2890 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002891
2892 Constant *UpShuffleMask[4] = {
2893 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2894 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2895
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002896 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2897 UndefValue::get(FloatTy),
2898 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002899
Kévin Petite8edce32019-04-10 14:23:32 +01002900 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002901 auto Arg0 =
2902 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2903 ConstantVector::get(DownShuffleMask), "", CI);
2904 auto Arg1 =
2905 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2906 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002907 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002908
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002909 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04002910
Kévin Petite8edce32019-04-10 14:23:32 +01002911 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002912
Kévin Petite8edce32019-04-10 14:23:32 +01002913 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002914
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002915 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2916 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002917 });
David Neto22f144c2017-06-12 14:26:21 -04002918}
David Neto62653202017-10-16 19:05:18 -04002919
2920bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2921 bool Changed = false;
2922
2923 // OpenCL's float result = fract(float x, float* ptr)
2924 //
2925 // In the LLVM domain:
2926 //
2927 // %floor_result = call spir_func float @floor(float %x)
2928 // store float %floor_result, float * %ptr
2929 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2930 // %result = call spir_func float
2931 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2932 //
2933 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2934 // and clspv.fract occur in the SPIR-V generator pass:
2935 //
2936 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2937 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2938 // ...
2939 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2940 // OpStore %ptr %floor_result
2941 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2942 // %fract_result = OpExtInst %float
2943 // %glsl_ext Fmin %fract_intermediate %just_under_1
2944
David Neto62653202017-10-16 19:05:18 -04002945 using std::string;
2946
2947 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2948 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002949 using QuadType =
2950 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04002951 auto make_quad = [](const char *a, const char *b, const char *c,
2952 const char *d) {
2953 return std::tuple<const char *, const char *, const char *, const char *>(
2954 a, b, c, d);
2955 };
2956 const std::vector<QuadType> Functions = {
2957 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002958 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
2959 "clspv.fract.v2f"),
2960 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
2961 "clspv.fract.v3f"),
2962 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
2963 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04002964 };
2965
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002966 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04002967 const StringRef fract_name(std::get<0>(quad));
2968
2969 // If we find a function with the matching name.
2970 if (auto F = M.getFunction(fract_name)) {
2971 if (F->use_begin() == F->use_end())
2972 continue;
2973
2974 // We have some uses.
2975 Changed = true;
2976
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002977 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04002978
2979 const StringRef floor_name(std::get<1>(quad));
2980 const StringRef fmin_name(std::get<2>(quad));
2981 const StringRef clspv_fract_name(std::get<3>(quad));
2982
2983 // This is either float or a float vector. All the float-like
2984 // types are this type.
2985 auto result_ty = F->getReturnType();
2986
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002987 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04002988 if (!fmin_fn) {
2989 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002990 FunctionType *fn_ty =
2991 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04002992 fmin_fn =
2993 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04002994 fmin_fn->addFnAttr(Attribute::ReadNone);
2995 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2996 }
2997
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002998 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04002999 if (!floor_fn) {
3000 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003001 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003002 floor_fn = cast<Function>(
3003 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003004 floor_fn->addFnAttr(Attribute::ReadNone);
3005 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3006 }
3007
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003008 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003009 if (!clspv_fract_fn) {
3010 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003011 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003012 clspv_fract_fn = cast<Function>(
3013 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003014 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3015 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3016 }
3017
3018 // Number of significant significand bits, whether represented or not.
3019 unsigned num_significand_bits;
3020 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003021 case Type::HalfTyID:
3022 num_significand_bits = 11;
3023 break;
3024 case Type::FloatTyID:
3025 num_significand_bits = 24;
3026 break;
3027 case Type::DoubleTyID:
3028 num_significand_bits = 53;
3029 break;
3030 default:
3031 assert(false && "Unhandled float type when processing fract builtin");
3032 break;
David Neto62653202017-10-16 19:05:18 -04003033 }
3034 // Beware that the disassembler displays this value as
3035 // OpConstant %float 1
3036 // which is not quite right.
3037 const double kJustUnderOneScalar =
3038 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3039
3040 Constant *just_under_one =
3041 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3042 if (result_ty->isVectorTy()) {
3043 just_under_one = ConstantVector::getSplat(
3044 result_ty->getVectorNumElements(), just_under_one);
3045 }
3046
3047 IRBuilder<> Builder(Context);
3048
3049 SmallVector<Instruction *, 4> ToRemoves;
3050
3051 // Walk the users of the function.
3052 for (auto &U : F->uses()) {
3053 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3054
3055 Builder.SetInsertPoint(CI);
3056 auto arg = CI->getArgOperand(0);
3057 auto ptr = CI->getArgOperand(1);
3058
3059 // Compute floor result and store it.
3060 auto floor = Builder.CreateCall(floor_fn, {arg});
3061 Builder.CreateStore(floor, ptr);
3062
3063 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003064 auto fract_result =
3065 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003066
3067 CI->replaceAllUsesWith(fract_result);
3068
3069 // Lastly, remember to remove the user.
3070 ToRemoves.push_back(CI);
3071 }
3072 }
3073
3074 // And cleanup the calls we don't use anymore.
3075 for (auto V : ToRemoves) {
3076 V->eraseFromParent();
3077 }
3078
3079 // And remove the function we don't need either too.
3080 F->eraseFromParent();
3081 }
3082 }
3083
3084 return Changed;
3085}