blob: b9559f1d3129d0a150a4c6732000da96ed10c3e0 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040032#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070033
Diego Novilloa4c44fa2019-04-11 10:56:15 -040034#include "Passes.h"
35#include "SPIRVOp.h"
36
David Neto22f144c2017-06-12 14:26:21 -040037using namespace llvm;
38
39#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
40
41namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000042
43struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040044 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000045 SignedNess signedness;
46};
47
48struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000049 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000050 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000051
Kévin Petit91bc72e2019-04-08 15:17:46 +010052 bool isArgSigned(size_t arg) const {
53 assert(argTypeInfos.size() > arg);
54 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000055 }
56
Kévin Petit91bc72e2019-04-08 15:17:46 +010057 static FunctionInfo getFromMangledName(StringRef name) {
58 FunctionInfo fi;
59 if (!getFromMangledNameCheck(name, &fi)) {
60 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000061 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010062 return fi;
63 }
Kévin Petit8a560882019-03-21 15:24:34 +000064
Kévin Petit91bc72e2019-04-08 15:17:46 +010065 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
66 if (!name.consume_front("_Z")) {
67 return false;
68 }
69 size_t nameLen;
70 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000071 return false;
72 }
73
Kévin Petit91bc72e2019-04-08 15:17:46 +010074 finfo->name = name.take_front(nameLen);
75 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000076
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000078
Kévin Petit91bc72e2019-04-08 15:17:46 +010079 while (name.size() != 0) {
80
81 ArgTypeInfo ti;
82
83 // Try parsing a vector prefix
84 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040085 int numElems;
86 if (name.consumeInteger(10, numElems)) {
87 return false;
88 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010089
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040090 if (!name.consume_front("_")) {
91 return false;
92 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010093 }
94
95 // Parse the base type
96 char typeCode = name.front();
97 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040098 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +010099 case 'c': // char
100 case 'a': // signed char
101 case 's': // short
102 case 'i': // int
103 case 'l': // long
104 ti.signedness = ArgTypeInfo::SignedNess::Signed;
105 break;
106 case 'h': // unsigned char
107 case 't': // unsigned short
108 case 'j': // unsigned int
109 case 'm': // unsigned long
110 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
111 break;
112 case 'f':
113 ti.signedness = ArgTypeInfo::SignedNess::None;
114 break;
115 case 'S':
116 ti = prev_ti;
117 if (!name.consume_front("_")) {
118 return false;
119 }
120 break;
121 default:
122 return false;
123 }
124
125 finfo->argTypeInfos.push_back(ti);
126
127 prev_ti = ti;
128 }
129
130 return true;
131 };
Kévin Petit8a560882019-03-21 15:24:34 +0000132};
133
David Neto22f144c2017-06-12 14:26:21 -0400134uint32_t clz(uint32_t v) {
135 uint32_t r;
136 uint32_t shift;
137
138 r = (v > 0xFFFF) << 4;
139 v >>= r;
140 shift = (v > 0xFF) << 3;
141 v >>= shift;
142 r |= shift;
143 shift = (v > 0xF) << 2;
144 v >>= shift;
145 r |= shift;
146 shift = (v > 0x3) << 1;
147 v >>= shift;
148 r |= shift;
149 r |= (v >> 1);
150
151 return r;
152}
153
154Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
155 if (1 == elements) {
156 return Type::getInt1Ty(C);
157 } else {
158 return VectorType::get(Type::getInt1Ty(C), elements);
159 }
160}
161
162struct ReplaceOpenCLBuiltinPass final : public ModulePass {
163 static char ID;
164 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
165
166 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000167 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100168 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100169 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400170 bool replaceRecip(Module &M);
171 bool replaceDivide(Module &M);
172 bool replaceExp10(Module &M);
173 bool replaceLog10(Module &M);
174 bool replaceBarrier(Module &M);
175 bool replaceMemFence(Module &M);
176 bool replaceRelational(Module &M);
177 bool replaceIsInfAndIsNan(Module &M);
178 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000179 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000180 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000181 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000182 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000183 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000184 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000185 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceSignbit(Module &M);
187 bool replaceMadandMad24andMul24(Module &M);
188 bool replaceVloadHalf(Module &M);
189 bool replaceVloadHalf2(Module &M);
190 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700191 bool replaceClspvVloadaHalf2(Module &M);
192 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400193 bool replaceVstoreHalf(Module &M);
194 bool replaceVstoreHalf2(Module &M);
195 bool replaceVstoreHalf4(Module &M);
196 bool replaceReadImageF(Module &M);
197 bool replaceAtomics(Module &M);
198 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400199 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700200 bool replaceVload(Module &M);
201 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400202};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100203} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400204
205char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400206INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
207 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400208
209namespace clspv {
210ModulePass *createReplaceOpenCLBuiltinPass() {
211 return new ReplaceOpenCLBuiltinPass();
212}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400213} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400214
215bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
216 bool Changed = false;
217
Kévin Petit2444e9b2018-11-09 14:14:37 +0000218 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100219 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100220 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400221 Changed |= replaceRecip(M);
222 Changed |= replaceDivide(M);
223 Changed |= replaceExp10(M);
224 Changed |= replaceLog10(M);
225 Changed |= replaceBarrier(M);
226 Changed |= replaceMemFence(M);
227 Changed |= replaceRelational(M);
228 Changed |= replaceIsInfAndIsNan(M);
229 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000230 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000231 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000232 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000233 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000234 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000235 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000236 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400237 Changed |= replaceSignbit(M);
238 Changed |= replaceMadandMad24andMul24(M);
239 Changed |= replaceVloadHalf(M);
240 Changed |= replaceVloadHalf2(M);
241 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700242 Changed |= replaceClspvVloadaHalf2(M);
243 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400244 Changed |= replaceVstoreHalf(M);
245 Changed |= replaceVstoreHalf2(M);
246 Changed |= replaceVstoreHalf4(M);
247 Changed |= replaceReadImageF(M);
248 Changed |= replaceAtomics(M);
249 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400250 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700251 Changed |= replaceVload(M);
252 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400253
254 return Changed;
255}
256
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400257bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
258 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000259
Kévin Petite8edce32019-04-10 14:23:32 +0100260 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000261
262 for (auto Name : Names) {
263 // If we find a function with the matching name.
264 if (auto F = M.getFunction(Name)) {
265 SmallVector<Instruction *, 4> ToRemoves;
266
267 // Walk the users of the function.
268 for (auto &U : F->uses()) {
269 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000270
Kévin Petite8edce32019-04-10 14:23:32 +0100271 auto NewValue = Replacer(CI);
272
273 if (NewValue != nullptr) {
274 CI->replaceAllUsesWith(NewValue);
275 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000276
277 // Lastly, remember to remove the user.
278 ToRemoves.push_back(CI);
279 }
280 }
281
282 Changed = !ToRemoves.empty();
283
284 // And cleanup the calls we don't use anymore.
285 for (auto V : ToRemoves) {
286 V->eraseFromParent();
287 }
288
289 // And remove the function we don't need either too.
290 F->eraseFromParent();
291 }
292 }
293
294 return Changed;
295}
296
Kévin Petite8edce32019-04-10 14:23:32 +0100297bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100298
Kévin Petite8edce32019-04-10 14:23:32 +0100299 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400300 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
301 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
302 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
303 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100304 };
305
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400306 return replaceCallsWithValue(M, Names,
307 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100308}
309
310bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
311
312 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400313 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
314 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
315 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
316 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
317 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
318 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
319 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
320 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
321 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
322 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
323 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100324 };
325
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400326 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100327 auto XValue = CI->getOperand(0);
328 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100329
Kévin Petite8edce32019-04-10 14:23:32 +0100330 IRBuilder<> Builder(CI);
331 auto XmY = Builder.CreateSub(XValue, YValue);
332 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100333
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400334 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100335 auto F = CI->getCalledFunction();
336 auto finfo = FunctionInfo::getFromMangledName(F->getName());
337 if (finfo.isArgSigned(0)) {
338 Cmp = Builder.CreateICmpSGT(YValue, XValue);
339 } else {
340 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100341 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100342
Kévin Petite8edce32019-04-10 14:23:32 +0100343 return Builder.CreateSelect(Cmp, YmX, XmY);
344 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100345}
346
Kévin Petit8c1be282019-04-02 19:34:25 +0100347bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100348
Kévin Petite8edce32019-04-10 14:23:32 +0100349 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400350 "_Z8copysignff",
351 "_Z8copysignDv2_fS_",
352 "_Z8copysignDv3_fS_",
353 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100354 };
355
Kévin Petite8edce32019-04-10 14:23:32 +0100356 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
357 auto XValue = CI->getOperand(0);
358 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100359
Kévin Petite8edce32019-04-10 14:23:32 +0100360 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100361
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400362 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100363 if (Ty->isVectorTy()) {
364 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100365 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100366
Kévin Petite8edce32019-04-10 14:23:32 +0100367 // Return X with the sign of Y
368
369 // Sign bit masks
370 auto SignBit = IntTy->getScalarSizeInBits() - 1;
371 auto SignBitMask = 1 << SignBit;
372 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
373 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
374
375 IRBuilder<> Builder(CI);
376
377 // Extract sign of Y
378 auto YInt = Builder.CreateBitCast(YValue, IntTy);
379 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
380
381 // Clear sign bit in X
382 auto XInt = Builder.CreateBitCast(XValue, IntTy);
383 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
384
385 // Insert sign bit of Y into X
386 auto NewXInt = Builder.CreateOr(XInt, YSign);
387
388 // And cast back to floating-point
389 return Builder.CreateBitCast(NewXInt, Ty);
390 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100391}
392
David Neto22f144c2017-06-12 14:26:21 -0400393bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400394
Kévin Petite8edce32019-04-10 14:23:32 +0100395 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400396 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
397 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
398 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
399 };
400
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400401 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100402 // Recip has one arg.
403 auto Arg = CI->getOperand(0);
404 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
405 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
406 });
David Neto22f144c2017-06-12 14:26:21 -0400407}
408
409bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400410
Kévin Petite8edce32019-04-10 14:23:32 +0100411 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400412 "_Z11half_divideff", "_Z13native_divideff",
413 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
414 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
415 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
416 };
417
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400418 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100419 auto Op0 = CI->getOperand(0);
420 auto Op1 = CI->getOperand(1);
421 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
422 });
David Neto22f144c2017-06-12 14:26:21 -0400423}
424
425bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
426 bool Changed = false;
427
428 const std::map<const char *, const char *> Map = {
429 {"_Z5exp10f", "_Z3expf"},
430 {"_Z10half_exp10f", "_Z8half_expf"},
431 {"_Z12native_exp10f", "_Z10native_expf"},
432 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
433 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
434 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
435 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
436 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
437 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
438 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
439 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
440 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
441
442 for (auto Pair : Map) {
443 // If we find a function with the matching name.
444 if (auto F = M.getFunction(Pair.first)) {
445 SmallVector<Instruction *, 4> ToRemoves;
446
447 // Walk the users of the function.
448 for (auto &U : F->uses()) {
449 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
450 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
451
452 auto Arg = CI->getOperand(0);
453
454 // Constant of the natural log of 10 (ln(10)).
455 const double Ln10 =
456 2.302585092994045684017991454684364207601101488628772976033;
457
458 auto Mul = BinaryOperator::Create(
459 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
460 CI);
461
462 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
463
464 CI->replaceAllUsesWith(NewCI);
465
466 // Lastly, remember to remove the user.
467 ToRemoves.push_back(CI);
468 }
469 }
470
471 Changed = !ToRemoves.empty();
472
473 // And cleanup the calls we don't use anymore.
474 for (auto V : ToRemoves) {
475 V->eraseFromParent();
476 }
477
478 // And remove the function we don't need either too.
479 F->eraseFromParent();
480 }
481 }
482
483 return Changed;
484}
485
486bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
487 bool Changed = false;
488
489 const std::map<const char *, const char *> Map = {
490 {"_Z5log10f", "_Z3logf"},
491 {"_Z10half_log10f", "_Z8half_logf"},
492 {"_Z12native_log10f", "_Z10native_logf"},
493 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
494 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
495 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
496 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
497 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
498 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
499 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
500 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
501 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
502
503 for (auto Pair : Map) {
504 // If we find a function with the matching name.
505 if (auto F = M.getFunction(Pair.first)) {
506 SmallVector<Instruction *, 4> ToRemoves;
507
508 // Walk the users of the function.
509 for (auto &U : F->uses()) {
510 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
511 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
512
513 auto Arg = CI->getOperand(0);
514
515 // Constant of the reciprocal of the natural log of 10 (ln(10)).
516 const double Ln10 =
517 0.434294481903251827651128918916605082294397005803666566114;
518
519 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
520
521 auto Mul = BinaryOperator::Create(
522 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
523 "", CI);
524
525 CI->replaceAllUsesWith(Mul);
526
527 // Lastly, remember to remove the user.
528 ToRemoves.push_back(CI);
529 }
530 }
531
532 Changed = !ToRemoves.empty();
533
534 // And cleanup the calls we don't use anymore.
535 for (auto V : ToRemoves) {
536 V->eraseFromParent();
537 }
538
539 // And remove the function we don't need either too.
540 F->eraseFromParent();
541 }
542 }
543
544 return Changed;
545}
546
547bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
548 bool Changed = false;
549
550 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
551
552 const std::map<const char *, const char *> Map = {
553 {"_Z7barrierj", "__spirv_control_barrier"}};
554
555 for (auto Pair : Map) {
556 // If we find a function with the matching name.
557 if (auto F = M.getFunction(Pair.first)) {
558 SmallVector<Instruction *, 4> ToRemoves;
559
560 // Walk the users of the function.
561 for (auto &U : F->uses()) {
562 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
563 auto FType = F->getFunctionType();
564 SmallVector<Type *, 3> Params;
565 for (unsigned i = 0; i < 3; i++) {
566 Params.push_back(FType->getParamType(0));
567 }
568 auto NewFType =
569 FunctionType::get(FType->getReturnType(), Params, false);
570 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
alan-bakerb37f9732019-06-05 01:28:00 -0400571 cast<Function>(NewF.getCallee())->setCannotDuplicate();
David Neto22f144c2017-06-12 14:26:21 -0400572
573 auto Arg = CI->getOperand(0);
574
575 // We need to map the OpenCL constants to the SPIR-V equivalents.
576 const auto LocalMemFence =
577 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
578 const auto GlobalMemFence =
579 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
580 const auto ConstantSequentiallyConsistent = ConstantInt::get(
581 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
582 const auto ConstantScopeDevice =
583 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
584 const auto ConstantScopeWorkgroup =
585 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
586
587 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
588 const auto LocalMemFenceMask = BinaryOperator::Create(
589 Instruction::And, LocalMemFence, Arg, "", CI);
590 const auto WorkgroupShiftAmount =
591 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
592 clz(CLK_LOCAL_MEM_FENCE);
593 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
594 Instruction::Shl, LocalMemFenceMask,
595 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
596
597 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
598 const auto GlobalMemFenceMask = BinaryOperator::Create(
599 Instruction::And, GlobalMemFence, Arg, "", CI);
600 const auto UniformShiftAmount =
601 clz(spv::MemorySemanticsUniformMemoryMask) -
602 clz(CLK_GLOBAL_MEM_FENCE);
603 const auto MemorySemanticsUniform = BinaryOperator::Create(
604 Instruction::Shl, GlobalMemFenceMask,
605 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
606
607 // And combine the above together, also adding in
608 // MemorySemanticsSequentiallyConsistentMask.
609 auto MemorySemantics =
610 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
611 ConstantSequentiallyConsistent, "", CI);
612 MemorySemantics = BinaryOperator::Create(
613 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
614
615 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
616 // Device Scope, otherwise Workgroup Scope.
617 const auto Cmp =
618 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
619 GlobalMemFenceMask, GlobalMemFence, "", CI);
620 const auto MemoryScope = SelectInst::Create(
621 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
622
623 // Lastly, the Execution Scope is always Workgroup Scope.
624 const auto ExecutionScope = ConstantScopeWorkgroup;
625
626 auto NewCI = CallInst::Create(
627 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
628
629 CI->replaceAllUsesWith(NewCI);
630
631 // Lastly, remember to remove the user.
632 ToRemoves.push_back(CI);
633 }
634 }
635
636 Changed = !ToRemoves.empty();
637
638 // And cleanup the calls we don't use anymore.
639 for (auto V : ToRemoves) {
640 V->eraseFromParent();
641 }
642
643 // And remove the function we don't need either too.
644 F->eraseFromParent();
645 }
646 }
647
648 return Changed;
649}
650
651bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
652 bool Changed = false;
653
654 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
655
Neil Henning39672102017-09-29 14:33:13 +0100656 using Tuple = std::tuple<const char *, unsigned>;
657 const std::map<const char *, Tuple> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400658 {"_Z9mem_fencej", Tuple("__spirv_memory_barrier",
659 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100660 {"_Z14read_mem_fencej",
661 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
662 {"_Z15write_mem_fencej",
663 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400664
665 for (auto Pair : Map) {
666 // If we find a function with the matching name.
667 if (auto F = M.getFunction(Pair.first)) {
668 SmallVector<Instruction *, 4> ToRemoves;
669
670 // Walk the users of the function.
671 for (auto &U : F->uses()) {
672 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
673 auto FType = F->getFunctionType();
674 SmallVector<Type *, 2> Params;
675 for (unsigned i = 0; i < 2; i++) {
676 Params.push_back(FType->getParamType(0));
677 }
678 auto NewFType =
679 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100680 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400681
682 auto Arg = CI->getOperand(0);
683
684 // We need to map the OpenCL constants to the SPIR-V equivalents.
685 const auto LocalMemFence =
686 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
687 const auto GlobalMemFence =
688 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
689 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100690 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400691 const auto ConstantScopeDevice =
692 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
693
694 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
695 const auto LocalMemFenceMask = BinaryOperator::Create(
696 Instruction::And, LocalMemFence, Arg, "", CI);
697 const auto WorkgroupShiftAmount =
698 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
699 clz(CLK_LOCAL_MEM_FENCE);
700 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
701 Instruction::Shl, LocalMemFenceMask,
702 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
703
704 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
705 const auto GlobalMemFenceMask = BinaryOperator::Create(
706 Instruction::And, GlobalMemFence, Arg, "", CI);
707 const auto UniformShiftAmount =
708 clz(spv::MemorySemanticsUniformMemoryMask) -
709 clz(CLK_GLOBAL_MEM_FENCE);
710 const auto MemorySemanticsUniform = BinaryOperator::Create(
711 Instruction::Shl, GlobalMemFenceMask,
712 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
713
714 // And combine the above together, also adding in
715 // MemorySemanticsSequentiallyConsistentMask.
716 auto MemorySemantics =
717 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
718 ConstantMemorySemantics, "", CI);
719 MemorySemantics = BinaryOperator::Create(
720 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
721
722 // Memory Scope is always device.
723 const auto MemoryScope = ConstantScopeDevice;
724
725 auto NewCI =
726 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
727
728 CI->replaceAllUsesWith(NewCI);
729
730 // Lastly, remember to remove the user.
731 ToRemoves.push_back(CI);
732 }
733 }
734
735 Changed = !ToRemoves.empty();
736
737 // And cleanup the calls we don't use anymore.
738 for (auto V : ToRemoves) {
739 V->eraseFromParent();
740 }
741
742 // And remove the function we don't need either too.
743 F->eraseFromParent();
744 }
745 }
746
747 return Changed;
748}
749
750bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
751 bool Changed = false;
752
753 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
754 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
755 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
756 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
757 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
758 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
759 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
760 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
761 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
762 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
763 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
764 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
765 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
766 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
767 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
768 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
769 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
770 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
771 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
772 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
773 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
774 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
775 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
776 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
777 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
778 };
779
780 for (auto Pair : Map) {
781 // If we find a function with the matching name.
782 if (auto F = M.getFunction(Pair.first)) {
783 SmallVector<Instruction *, 4> ToRemoves;
784
785 // Walk the users of the function.
786 for (auto &U : F->uses()) {
787 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
788 // The predicate to use in the CmpInst.
789 auto Predicate = Pair.second.first;
790
791 // The value to return for true.
792 auto TrueValue =
793 ConstantInt::getSigned(CI->getType(), Pair.second.second);
794
795 // The value to return for false.
796 auto FalseValue = Constant::getNullValue(CI->getType());
797
798 auto Arg1 = CI->getOperand(0);
799 auto Arg2 = CI->getOperand(1);
800
801 const auto Cmp =
802 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
803
804 const auto Select =
805 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
806
807 CI->replaceAllUsesWith(Select);
808
809 // Lastly, remember to remove the user.
810 ToRemoves.push_back(CI);
811 }
812 }
813
814 Changed = !ToRemoves.empty();
815
816 // And cleanup the calls we don't use anymore.
817 for (auto V : ToRemoves) {
818 V->eraseFromParent();
819 }
820
821 // And remove the function we don't need either too.
822 F->eraseFromParent();
823 }
824 }
825
826 return Changed;
827}
828
829bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
830 bool Changed = false;
831
832 const std::map<const char *, std::pair<const char *, int32_t>> Map = {
833 {"_Z5isinff", {"__spirv_isinff", 1}},
834 {"_Z5isinfDv2_f", {"__spirv_isinfDv2_f", -1}},
835 {"_Z5isinfDv3_f", {"__spirv_isinfDv3_f", -1}},
836 {"_Z5isinfDv4_f", {"__spirv_isinfDv4_f", -1}},
837 {"_Z5isnanf", {"__spirv_isnanf", 1}},
838 {"_Z5isnanDv2_f", {"__spirv_isnanDv2_f", -1}},
839 {"_Z5isnanDv3_f", {"__spirv_isnanDv3_f", -1}},
840 {"_Z5isnanDv4_f", {"__spirv_isnanDv4_f", -1}},
841 };
842
843 for (auto Pair : Map) {
844 // If we find a function with the matching name.
845 if (auto F = M.getFunction(Pair.first)) {
846 SmallVector<Instruction *, 4> ToRemoves;
847
848 // Walk the users of the function.
849 for (auto &U : F->uses()) {
850 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
851 const auto CITy = CI->getType();
852
853 // The fake SPIR-V intrinsic to generate.
854 auto SPIRVIntrinsic = Pair.second.first;
855
856 // The value to return for true.
857 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
858
859 // The value to return for false.
860 auto FalseValue = Constant::getNullValue(CITy);
861
862 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
863 M.getContext(),
864 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
865
866 auto NewFType =
867 FunctionType::get(CorrespondingBoolTy,
868 F->getFunctionType()->getParamType(0), false);
869
870 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
871
872 auto Arg = CI->getOperand(0);
873
874 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
875
876 const auto Select =
877 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
878
879 CI->replaceAllUsesWith(Select);
880
881 // Lastly, remember to remove the user.
882 ToRemoves.push_back(CI);
883 }
884 }
885
886 Changed = !ToRemoves.empty();
887
888 // And cleanup the calls we don't use anymore.
889 for (auto V : ToRemoves) {
890 V->eraseFromParent();
891 }
892
893 // And remove the function we don't need either too.
894 F->eraseFromParent();
895 }
896 }
897
898 return Changed;
899}
900
901bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
902 bool Changed = false;
903
904 const std::map<const char *, const char *> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000905 // all
alan-bakerb39c8262019-03-08 14:03:37 -0500906 {"_Z3allc", ""},
907 {"_Z3allDv2_c", "__spirv_allDv2_c"},
908 {"_Z3allDv3_c", "__spirv_allDv3_c"},
909 {"_Z3allDv4_c", "__spirv_allDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000910 {"_Z3alls", ""},
911 {"_Z3allDv2_s", "__spirv_allDv2_s"},
912 {"_Z3allDv3_s", "__spirv_allDv3_s"},
913 {"_Z3allDv4_s", "__spirv_allDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400914 {"_Z3alli", ""},
915 {"_Z3allDv2_i", "__spirv_allDv2_i"},
916 {"_Z3allDv3_i", "__spirv_allDv3_i"},
917 {"_Z3allDv4_i", "__spirv_allDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000918 {"_Z3alll", ""},
919 {"_Z3allDv2_l", "__spirv_allDv2_l"},
920 {"_Z3allDv3_l", "__spirv_allDv3_l"},
921 {"_Z3allDv4_l", "__spirv_allDv4_l"},
922
923 // any
alan-bakerb39c8262019-03-08 14:03:37 -0500924 {"_Z3anyc", ""},
925 {"_Z3anyDv2_c", "__spirv_anyDv2_c"},
926 {"_Z3anyDv3_c", "__spirv_anyDv3_c"},
927 {"_Z3anyDv4_c", "__spirv_anyDv4_c"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000928 {"_Z3anys", ""},
929 {"_Z3anyDv2_s", "__spirv_anyDv2_s"},
930 {"_Z3anyDv3_s", "__spirv_anyDv3_s"},
931 {"_Z3anyDv4_s", "__spirv_anyDv4_s"},
David Neto22f144c2017-06-12 14:26:21 -0400932 {"_Z3anyi", ""},
933 {"_Z3anyDv2_i", "__spirv_anyDv2_i"},
934 {"_Z3anyDv3_i", "__spirv_anyDv3_i"},
935 {"_Z3anyDv4_i", "__spirv_anyDv4_i"},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000936 {"_Z3anyl", ""},
937 {"_Z3anyDv2_l", "__spirv_anyDv2_l"},
938 {"_Z3anyDv3_l", "__spirv_anyDv3_l"},
939 {"_Z3anyDv4_l", "__spirv_anyDv4_l"},
David Neto22f144c2017-06-12 14:26:21 -0400940 };
941
942 for (auto Pair : Map) {
943 // If we find a function with the matching name.
944 if (auto F = M.getFunction(Pair.first)) {
945 SmallVector<Instruction *, 4> ToRemoves;
946
947 // Walk the users of the function.
948 for (auto &U : F->uses()) {
949 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
950 // The fake SPIR-V intrinsic to generate.
951 auto SPIRVIntrinsic = Pair.second;
952
953 auto Arg = CI->getOperand(0);
954
955 Value *V;
956
Kévin Petitfd27cca2018-10-31 13:00:17 +0000957 // If the argument is a 32-bit int, just use a shift
958 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
959 V = BinaryOperator::Create(Instruction::LShr, Arg,
960 ConstantInt::get(Arg->getType(), 31), "",
961 CI);
962 } else {
David Neto22f144c2017-06-12 14:26:21 -0400963 // The value for zero to compare against.
964 const auto ZeroValue = Constant::getNullValue(Arg->getType());
965
David Neto22f144c2017-06-12 14:26:21 -0400966 // The value to return for true.
967 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
968
969 // The value to return for false.
970 const auto FalseValue = Constant::getNullValue(CI->getType());
971
Kévin Petitfd27cca2018-10-31 13:00:17 +0000972 const auto Cmp = CmpInst::Create(
973 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
974
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400975 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000976
977 // If we have a function to call, call it!
978 if (0 < strlen(SPIRVIntrinsic)) {
979
980 const auto NewFType = FunctionType::get(
981 Type::getInt1Ty(M.getContext()), Cmp->getType(), false);
982
983 const auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
984
985 const auto NewCI = CallInst::Create(NewF, Cmp, "", CI);
986
987 SelectSource = NewCI;
988
989 } else {
990 SelectSource = Cmp;
991 }
992
993 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400994 }
995
996 CI->replaceAllUsesWith(V);
997
998 // Lastly, remember to remove the user.
999 ToRemoves.push_back(CI);
1000 }
1001 }
1002
1003 Changed = !ToRemoves.empty();
1004
1005 // And cleanup the calls we don't use anymore.
1006 for (auto V : ToRemoves) {
1007 V->eraseFromParent();
1008 }
1009
1010 // And remove the function we don't need either too.
1011 F->eraseFromParent();
1012 }
1013 }
1014
1015 return Changed;
1016}
1017
Kévin Petitbf0036c2019-03-06 13:57:10 +00001018bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1019 bool Changed = false;
1020
1021 for (auto const &SymVal : M.getValueSymbolTable()) {
1022 // Skip symbols whose name doesn't match
1023 if (!SymVal.getKey().startswith("_Z8upsample")) {
1024 continue;
1025 }
1026 // Is there a function going by that name?
1027 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1028
1029 SmallVector<Instruction *, 4> ToRemoves;
1030
1031 // Walk the users of the function.
1032 for (auto &U : F->uses()) {
1033 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1034
1035 // Get arguments
1036 auto HiValue = CI->getOperand(0);
1037 auto LoValue = CI->getOperand(1);
1038
1039 // Don't touch overloads that aren't in OpenCL C
1040 auto HiType = HiValue->getType();
1041 auto LoType = LoValue->getType();
1042
1043 if (HiType != LoType) {
1044 continue;
1045 }
1046
1047 if (!HiType->isIntOrIntVectorTy()) {
1048 continue;
1049 }
1050
1051 if (HiType->getScalarSizeInBits() * 2 !=
1052 CI->getType()->getScalarSizeInBits()) {
1053 continue;
1054 }
1055
1056 if ((HiType->getScalarSizeInBits() != 8) &&
1057 (HiType->getScalarSizeInBits() != 16) &&
1058 (HiType->getScalarSizeInBits() != 32)) {
1059 continue;
1060 }
1061
1062 if (HiType->isVectorTy()) {
1063 if ((HiType->getVectorNumElements() != 2) &&
1064 (HiType->getVectorNumElements() != 3) &&
1065 (HiType->getVectorNumElements() != 4) &&
1066 (HiType->getVectorNumElements() != 8) &&
1067 (HiType->getVectorNumElements() != 16)) {
1068 continue;
1069 }
1070 }
1071
1072 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001073 auto HiCast =
1074 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1075 auto LoCast =
1076 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001077
1078 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001079 auto ShiftAmount =
1080 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001081 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1082 ShiftAmount, "", CI);
1083
1084 // OR both results
1085 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1086 "", CI);
1087
1088 // Replace call with the expression
1089 CI->replaceAllUsesWith(V);
1090
1091 // Lastly, remember to remove the user.
1092 ToRemoves.push_back(CI);
1093 }
1094 }
1095
1096 Changed = !ToRemoves.empty();
1097
1098 // And cleanup the calls we don't use anymore.
1099 for (auto V : ToRemoves) {
1100 V->eraseFromParent();
1101 }
1102
1103 // And remove the function we don't need either too.
1104 F->eraseFromParent();
1105 }
1106 }
1107
1108 return Changed;
1109}
1110
Kévin Petitd44eef52019-03-08 13:22:14 +00001111bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1112 bool Changed = false;
1113
1114 for (auto const &SymVal : M.getValueSymbolTable()) {
1115 // Skip symbols whose name doesn't match
1116 if (!SymVal.getKey().startswith("_Z6rotate")) {
1117 continue;
1118 }
1119 // Is there a function going by that name?
1120 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1121
1122 SmallVector<Instruction *, 4> ToRemoves;
1123
1124 // Walk the users of the function.
1125 for (auto &U : F->uses()) {
1126 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1127
1128 // Get arguments
1129 auto SrcValue = CI->getOperand(0);
1130 auto RotAmount = CI->getOperand(1);
1131
1132 // Don't touch overloads that aren't in OpenCL C
1133 auto SrcType = SrcValue->getType();
1134 auto RotType = RotAmount->getType();
1135
1136 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1137 continue;
1138 }
1139
1140 if (!SrcType->isIntOrIntVectorTy()) {
1141 continue;
1142 }
1143
1144 if ((SrcType->getScalarSizeInBits() != 8) &&
1145 (SrcType->getScalarSizeInBits() != 16) &&
1146 (SrcType->getScalarSizeInBits() != 32) &&
1147 (SrcType->getScalarSizeInBits() != 64)) {
1148 continue;
1149 }
1150
1151 if (SrcType->isVectorTy()) {
1152 if ((SrcType->getVectorNumElements() != 2) &&
1153 (SrcType->getVectorNumElements() != 3) &&
1154 (SrcType->getVectorNumElements() != 4) &&
1155 (SrcType->getVectorNumElements() != 8) &&
1156 (SrcType->getVectorNumElements() != 16)) {
1157 continue;
1158 }
1159 }
1160
1161 // The approach used is to shift the top bits down, the bottom bits up
1162 // and OR the two shifted values.
1163
1164 // The rotation amount is to be treated modulo the element size.
1165 // Since SPIR-V shift ops don't support this, let's apply the
1166 // modulo ahead of shifting. The element size is always a power of
1167 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001168 auto ModMask =
1169 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001170 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1171 ModMask, "", CI);
1172
1173 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001174 auto ScalarSize =
1175 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001176 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1177 RotAmount, "", CI);
1178
1179 // Now shift the bottom bits up and the top bits down
1180 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1181 RotAmount, "", CI);
1182 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1183 DownAmount, "", CI);
1184
1185 // Finally OR the two shifted values
1186 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1187 HiRotated, "", CI);
1188
1189 // Replace call with the expression
1190 CI->replaceAllUsesWith(V);
1191
1192 // Lastly, remember to remove the user.
1193 ToRemoves.push_back(CI);
1194 }
1195 }
1196
1197 Changed = !ToRemoves.empty();
1198
1199 // And cleanup the calls we don't use anymore.
1200 for (auto V : ToRemoves) {
1201 V->eraseFromParent();
1202 }
1203
1204 // And remove the function we don't need either too.
1205 F->eraseFromParent();
1206 }
1207 }
1208
1209 return Changed;
1210}
1211
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001212bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1213 bool Changed = false;
1214
1215 for (auto const &SymVal : M.getValueSymbolTable()) {
1216
1217 // Skip symbols whose name obviously doesn't match
1218 if (!SymVal.getKey().contains("convert_")) {
1219 continue;
1220 }
1221
1222 // Is there a function going by that name?
1223 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1224
1225 // Get info from the mangled name
1226 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001227 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001228
1229 // All functions of interest are handled by our mangled name parser
1230 if (!parsed) {
1231 continue;
1232 }
1233
1234 // Move on if this isn't a call to convert_
1235 if (!finfo.name.startswith("convert_")) {
1236 continue;
1237 }
1238
1239 // Extract the destination type from the function name
1240 StringRef DstTypeName = finfo.name;
1241 DstTypeName.consume_front("convert_");
1242
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001243 auto DstSignedNess =
1244 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1245 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1246 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1247 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1248 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1249 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1250 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1251 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1252 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1253 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001254
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001255 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001256 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001257
1258 SmallVector<Instruction *, 4> ToRemoves;
1259
1260 // Walk the users of the function.
1261 for (auto &U : F->uses()) {
1262 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1263
1264 // Get arguments
1265 auto SrcValue = CI->getOperand(0);
1266
1267 // Don't touch overloads that aren't in OpenCL C
1268 auto SrcType = SrcValue->getType();
1269 auto DstType = CI->getType();
1270
1271 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1272 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1273 continue;
1274 }
1275
1276 if (SrcType->isVectorTy()) {
1277
1278 if (SrcType->getVectorNumElements() !=
1279 DstType->getVectorNumElements()) {
1280 continue;
1281 }
1282
1283 if ((SrcType->getVectorNumElements() != 2) &&
1284 (SrcType->getVectorNumElements() != 3) &&
1285 (SrcType->getVectorNumElements() != 4) &&
1286 (SrcType->getVectorNumElements() != 8) &&
1287 (SrcType->getVectorNumElements() != 16)) {
1288 continue;
1289 }
1290 }
1291
1292 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1293 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1294
1295 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1296 bool DstIsInt = DstType->isIntOrIntVectorTy();
1297
1298 Value *V;
1299 if (SrcIsFloat && DstIsFloat) {
1300 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1301 } else if (SrcIsFloat && DstIsInt) {
1302 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001303 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1304 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001305 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001306 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1307 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001308 }
1309 } else if (SrcIsInt && DstIsFloat) {
1310 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001311 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1312 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001313 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001314 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1315 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001316 }
1317 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001318 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1319 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001320 } else {
1321 // Not something we're supposed to handle, just move on
1322 continue;
1323 }
1324
1325 // Replace call with the expression
1326 CI->replaceAllUsesWith(V);
1327
1328 // Lastly, remember to remove the user.
1329 ToRemoves.push_back(CI);
1330 }
1331 }
1332
1333 Changed = !ToRemoves.empty();
1334
1335 // And cleanup the calls we don't use anymore.
1336 for (auto V : ToRemoves) {
1337 V->eraseFromParent();
1338 }
1339
1340 // And remove the function we don't need either too.
1341 F->eraseFromParent();
1342 }
1343 }
1344
1345 return Changed;
1346}
1347
Kévin Petit8a560882019-03-21 15:24:34 +00001348bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1349 bool Changed = false;
1350
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001351 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001352
Kévin Petit617a76d2019-04-04 13:54:16 +01001353 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001354 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1355 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1356
1357 // Skip symbols whose name doesn't match
1358 if (!isMad && !isMul) {
1359 continue;
1360 }
1361
1362 // Is there a function going by that name?
1363 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001364 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001365 }
1366 }
1367
Kévin Petit617a76d2019-04-04 13:54:16 +01001368 for (auto F : FnWorklist) {
1369 SmallVector<Instruction *, 4> ToRemoves;
1370
1371 bool isMad = F->getName().startswith("_Z6mad_hi");
1372 // Walk the users of the function.
1373 for (auto &U : F->uses()) {
1374 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1375
1376 // Get arguments
1377 auto AValue = CI->getOperand(0);
1378 auto BValue = CI->getOperand(1);
1379 auto CValue = CI->getOperand(2);
1380
1381 // Don't touch overloads that aren't in OpenCL C
1382 auto AType = AValue->getType();
1383 auto BType = BValue->getType();
1384 auto CType = CValue->getType();
1385
1386 if ((AType != BType) || (CI->getType() != AType) ||
1387 (isMad && (AType != CType))) {
1388 continue;
1389 }
1390
1391 if (!AType->isIntOrIntVectorTy()) {
1392 continue;
1393 }
1394
1395 if ((AType->getScalarSizeInBits() != 8) &&
1396 (AType->getScalarSizeInBits() != 16) &&
1397 (AType->getScalarSizeInBits() != 32) &&
1398 (AType->getScalarSizeInBits() != 64)) {
1399 continue;
1400 }
1401
1402 if (AType->isVectorTy()) {
1403 if ((AType->getVectorNumElements() != 2) &&
1404 (AType->getVectorNumElements() != 3) &&
1405 (AType->getVectorNumElements() != 4) &&
1406 (AType->getVectorNumElements() != 8) &&
1407 (AType->getVectorNumElements() != 16)) {
1408 continue;
1409 }
1410 }
1411
1412 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001413 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001414
1415 // Select the appropriate signed/unsigned SPIR-V op
1416 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001417 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001418 opcode = spv::OpSMulExtended;
1419 } else {
1420 opcode = spv::OpUMulExtended;
1421 }
1422
1423 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001424 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001425 auto ExMulRetType = StructType::create(TwoValueType);
1426
1427 // Call the SPIR-V op
1428 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1429 ExMulRetType, {AValue, BValue});
1430
1431 // Get the high part of the result
1432 unsigned Idxs[] = {1};
1433 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1434
1435 // If we're handling a mad_hi, add the third argument to the result
1436 if (isMad) {
1437 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1438 }
1439
1440 // Replace call with the expression
1441 CI->replaceAllUsesWith(V);
1442
1443 // Lastly, remember to remove the user.
1444 ToRemoves.push_back(CI);
1445 }
1446 }
1447
1448 Changed = !ToRemoves.empty();
1449
1450 // And cleanup the calls we don't use anymore.
1451 for (auto V : ToRemoves) {
1452 V->eraseFromParent();
1453 }
1454
1455 // And remove the function we don't need either too.
1456 F->eraseFromParent();
1457 }
1458
Kévin Petit8a560882019-03-21 15:24:34 +00001459 return Changed;
1460}
1461
Kévin Petitf5b78a22018-10-25 14:32:17 +00001462bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1463 bool Changed = false;
1464
1465 for (auto const &SymVal : M.getValueSymbolTable()) {
1466 // Skip symbols whose name doesn't match
1467 if (!SymVal.getKey().startswith("_Z6select")) {
1468 continue;
1469 }
1470 // Is there a function going by that name?
1471 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1472
1473 SmallVector<Instruction *, 4> ToRemoves;
1474
1475 // Walk the users of the function.
1476 for (auto &U : F->uses()) {
1477 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1478
1479 // Get arguments
1480 auto FalseValue = CI->getOperand(0);
1481 auto TrueValue = CI->getOperand(1);
1482 auto PredicateValue = CI->getOperand(2);
1483
1484 // Don't touch overloads that aren't in OpenCL C
1485 auto FalseType = FalseValue->getType();
1486 auto TrueType = TrueValue->getType();
1487 auto PredicateType = PredicateValue->getType();
1488
1489 if (FalseType != TrueType) {
1490 continue;
1491 }
1492
1493 if (!PredicateType->isIntOrIntVectorTy()) {
1494 continue;
1495 }
1496
1497 if (!FalseType->isIntOrIntVectorTy() &&
1498 !FalseType->getScalarType()->isFloatingPointTy()) {
1499 continue;
1500 }
1501
1502 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1503 continue;
1504 }
1505
1506 if (FalseType->getScalarSizeInBits() !=
1507 PredicateType->getScalarSizeInBits()) {
1508 continue;
1509 }
1510
1511 if (FalseType->isVectorTy()) {
1512 if (FalseType->getVectorNumElements() !=
1513 PredicateType->getVectorNumElements()) {
1514 continue;
1515 }
1516
1517 if ((FalseType->getVectorNumElements() != 2) &&
1518 (FalseType->getVectorNumElements() != 3) &&
1519 (FalseType->getVectorNumElements() != 4) &&
1520 (FalseType->getVectorNumElements() != 8) &&
1521 (FalseType->getVectorNumElements() != 16)) {
1522 continue;
1523 }
1524 }
1525
1526 // Create constant
1527 const auto ZeroValue = Constant::getNullValue(PredicateType);
1528
1529 // Scalar and vector are to be treated differently
1530 CmpInst::Predicate Pred;
1531 if (PredicateType->isVectorTy()) {
1532 Pred = CmpInst::ICMP_SLT;
1533 } else {
1534 Pred = CmpInst::ICMP_NE;
1535 }
1536
1537 // Create comparison instruction
1538 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1539 ZeroValue, "", CI);
1540
1541 // Create select
1542 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1543
1544 // Replace call with the selection
1545 CI->replaceAllUsesWith(V);
1546
1547 // Lastly, remember to remove the user.
1548 ToRemoves.push_back(CI);
1549 }
1550 }
1551
1552 Changed = !ToRemoves.empty();
1553
1554 // And cleanup the calls we don't use anymore.
1555 for (auto V : ToRemoves) {
1556 V->eraseFromParent();
1557 }
1558
1559 // And remove the function we don't need either too.
1560 F->eraseFromParent();
1561 }
1562 }
1563
1564 return Changed;
1565}
1566
Kévin Petite7d0cce2018-10-31 12:38:56 +00001567bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1568 bool Changed = false;
1569
1570 for (auto const &SymVal : M.getValueSymbolTable()) {
1571 // Skip symbols whose name doesn't match
1572 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1573 continue;
1574 }
1575 // Is there a function going by that name?
1576 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1577
1578 SmallVector<Instruction *, 4> ToRemoves;
1579
1580 // Walk the users of the function.
1581 for (auto &U : F->uses()) {
1582 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1583
1584 if (CI->getNumOperands() != 4) {
1585 continue;
1586 }
1587
1588 // Get arguments
1589 auto FalseValue = CI->getOperand(0);
1590 auto TrueValue = CI->getOperand(1);
1591 auto PredicateValue = CI->getOperand(2);
1592
1593 // Don't touch overloads that aren't in OpenCL C
1594 auto FalseType = FalseValue->getType();
1595 auto TrueType = TrueValue->getType();
1596 auto PredicateType = PredicateValue->getType();
1597
1598 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1599 continue;
1600 }
1601
1602 if (TrueType->isVectorTy()) {
1603 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1604 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001605 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001606 }
1607 if ((TrueType->getVectorNumElements() != 2) &&
1608 (TrueType->getVectorNumElements() != 3) &&
1609 (TrueType->getVectorNumElements() != 4) &&
1610 (TrueType->getVectorNumElements() != 8) &&
1611 (TrueType->getVectorNumElements() != 16)) {
1612 continue;
1613 }
1614 }
1615
1616 // Remember the type of the operands
1617 auto OpType = TrueType;
1618
1619 // The actual bit selection will always be done on an integer type,
1620 // declare it here
1621 Type *BitType;
1622
1623 // If the operands are float, then bitcast them to int
1624 if (OpType->getScalarType()->isFloatingPointTy()) {
1625
1626 // First create the new type
1627 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1628 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1629 if (OpType->isVectorTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001630 BitType =
1631 VectorType::get(BitType, OpType->getVectorNumElements());
Kévin Petite7d0cce2018-10-31 12:38:56 +00001632 }
1633
1634 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001635 PredicateValue =
1636 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1637 FalseValue =
1638 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1639 TrueValue =
1640 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001641
1642 } else {
1643 // The operands have an integer type, use it directly
1644 BitType = OpType;
1645 }
1646
1647 // All the operands are now always integers
1648 // implement as (c & b) | (~c & a)
1649
1650 // Create our negated predicate value
1651 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001652 auto NotPredicateValue = BinaryOperator::Create(
1653 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001654
1655 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001656 auto BitsFalse = BinaryOperator::Create(
1657 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1658 auto BitsTrue = BinaryOperator::Create(
1659 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001660
1661 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1662 BitsTrue, "", CI);
1663
1664 // If we were dealing with a floating point type, we must bitcast
1665 // the result back to that
1666 if (OpType->getScalarType()->isFloatingPointTy()) {
1667 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1668 }
1669
1670 // Replace call with our new code
1671 CI->replaceAllUsesWith(V);
1672
1673 // Lastly, remember to remove the user.
1674 ToRemoves.push_back(CI);
1675 }
1676 }
1677
1678 Changed = !ToRemoves.empty();
1679
1680 // And cleanup the calls we don't use anymore.
1681 for (auto V : ToRemoves) {
1682 V->eraseFromParent();
1683 }
1684
1685 // And remove the function we don't need either too.
1686 F->eraseFromParent();
1687 }
1688 }
1689
1690 return Changed;
1691}
1692
Kévin Petit6b0a9532018-10-30 20:00:39 +00001693bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1694 bool Changed = false;
1695
1696 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001697 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1698 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1699 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1700 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1701 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1702 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001703 };
1704
1705 for (auto Pair : Map) {
1706 // If we find a function with the matching name.
1707 if (auto F = M.getFunction(Pair.first)) {
1708 SmallVector<Instruction *, 4> ToRemoves;
1709
1710 // Walk the users of the function.
1711 for (auto &U : F->uses()) {
1712 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1713
1714 auto ReplacementFn = Pair.second;
1715
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001716 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001717 Value *VectorArg;
1718
1719 // First figure out which function we're dealing with
1720 if (F->getName().startswith("_Z10smoothstep")) {
1721 ArgsToSplat.push_back(CI->getOperand(1));
1722 VectorArg = CI->getOperand(2);
1723 } else {
1724 VectorArg = CI->getOperand(1);
1725 }
1726
1727 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001728 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001729 auto VecType = VectorArg->getType();
1730
1731 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001732 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001733 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001734 auto index =
1735 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1736 NewVectorArg =
1737 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001738 }
1739 SplatArgs.push_back(NewVectorArg);
1740 }
1741
1742 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001743 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1744 const auto NewFType =
1745 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001746
1747 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1748
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001749 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001750 for (auto arg : SplatArgs) {
1751 NewArgs.push_back(arg);
1752 }
1753 NewArgs.push_back(VectorArg);
1754
1755 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1756
1757 CI->replaceAllUsesWith(NewCI);
1758
1759 // Lastly, remember to remove the user.
1760 ToRemoves.push_back(CI);
1761 }
1762 }
1763
1764 Changed = !ToRemoves.empty();
1765
1766 // And cleanup the calls we don't use anymore.
1767 for (auto V : ToRemoves) {
1768 V->eraseFromParent();
1769 }
1770
1771 // And remove the function we don't need either too.
1772 F->eraseFromParent();
1773 }
1774 }
1775
1776 return Changed;
1777}
1778
David Neto22f144c2017-06-12 14:26:21 -04001779bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1780 bool Changed = false;
1781
1782 const std::map<const char *, Instruction::BinaryOps> Map = {
1783 {"_Z7signbitf", Instruction::LShr},
1784 {"_Z7signbitDv2_f", Instruction::AShr},
1785 {"_Z7signbitDv3_f", Instruction::AShr},
1786 {"_Z7signbitDv4_f", Instruction::AShr},
1787 };
1788
1789 for (auto Pair : Map) {
1790 // If we find a function with the matching name.
1791 if (auto F = M.getFunction(Pair.first)) {
1792 SmallVector<Instruction *, 4> ToRemoves;
1793
1794 // Walk the users of the function.
1795 for (auto &U : F->uses()) {
1796 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1797 auto Arg = CI->getOperand(0);
1798
1799 auto Bitcast =
1800 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1801
1802 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1803 ConstantInt::get(CI->getType(), 31),
1804 "", CI);
1805
1806 CI->replaceAllUsesWith(Shr);
1807
1808 // Lastly, remember to remove the user.
1809 ToRemoves.push_back(CI);
1810 }
1811 }
1812
1813 Changed = !ToRemoves.empty();
1814
1815 // And cleanup the calls we don't use anymore.
1816 for (auto V : ToRemoves) {
1817 V->eraseFromParent();
1818 }
1819
1820 // And remove the function we don't need either too.
1821 F->eraseFromParent();
1822 }
1823 }
1824
1825 return Changed;
1826}
1827
1828bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1829 bool Changed = false;
1830
1831 const std::map<const char *,
1832 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1833 Map = {
1834 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1835 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1836 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1837 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1838 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1839 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1840 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1841 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1842 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1843 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1844 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1845 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1846 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1847 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1848 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1849 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1850 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1851 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1852 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1853 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1854 };
1855
1856 for (auto Pair : Map) {
1857 // If we find a function with the matching name.
1858 if (auto F = M.getFunction(Pair.first)) {
1859 SmallVector<Instruction *, 4> ToRemoves;
1860
1861 // Walk the users of the function.
1862 for (auto &U : F->uses()) {
1863 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1864 // The multiply instruction to use.
1865 auto MulInst = Pair.second.first;
1866
1867 // The add instruction to use.
1868 auto AddInst = Pair.second.second;
1869
1870 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1871
1872 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1873 CI->getArgOperand(1), "", CI);
1874
1875 if (Instruction::BinaryOpsEnd != AddInst) {
1876 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1877 CI);
1878 }
1879
1880 CI->replaceAllUsesWith(I);
1881
1882 // Lastly, remember to remove the user.
1883 ToRemoves.push_back(CI);
1884 }
1885 }
1886
1887 Changed = !ToRemoves.empty();
1888
1889 // And cleanup the calls we don't use anymore.
1890 for (auto V : ToRemoves) {
1891 V->eraseFromParent();
1892 }
1893
1894 // And remove the function we don't need either too.
1895 F->eraseFromParent();
1896 }
1897 }
1898
1899 return Changed;
1900}
1901
Derek Chowcfd368b2017-10-19 20:58:45 -07001902bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1903 bool Changed = false;
1904
1905 struct VectorStoreOps {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001906 const char *name;
Derek Chowcfd368b2017-10-19 20:58:45 -07001907 int n;
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001908 Type *(*get_scalar_type_function)(LLVMContext &);
1909 } vector_store_ops[] = {// TODO(derekjchow): Expand this list.
1910 {"_Z7vstore4Dv4_fjPU3AS1f", 4, Type::getFloatTy}};
Derek Chowcfd368b2017-10-19 20:58:45 -07001911
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001912 for (const auto &Op : vector_store_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001913 auto Name = Op.name;
1914 auto N = Op.n;
1915 auto TypeFn = Op.get_scalar_type_function;
1916 if (auto F = M.getFunction(Name)) {
1917 SmallVector<Instruction *, 4> ToRemoves;
1918
1919 // Walk the users of the function.
1920 for (auto &U : F->uses()) {
1921 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1922 // The value argument from vstoren.
1923 auto Arg0 = CI->getOperand(0);
1924
1925 // The index argument from vstoren.
1926 auto Arg1 = CI->getOperand(1);
1927
1928 // The pointer argument from vstoren.
1929 auto Arg2 = CI->getOperand(2);
1930
1931 // Get types.
1932 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1933 auto ScalarNPointerTy = PointerType::get(
1934 ScalarNTy, Arg2->getType()->getPointerAddressSpace());
1935
1936 // Cast to scalarn
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001937 auto Cast =
1938 CastInst::CreatePointerCast(Arg2, ScalarNPointerTy, "", CI);
Derek Chowcfd368b2017-10-19 20:58:45 -07001939 // Index to correct address
1940 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg1, "", CI);
1941 // Store
1942 auto Store = new StoreInst(Arg0, Index, CI);
1943
1944 CI->replaceAllUsesWith(Store);
1945 ToRemoves.push_back(CI);
1946 }
1947 }
1948
1949 Changed = !ToRemoves.empty();
1950
1951 // And cleanup the calls we don't use anymore.
1952 for (auto V : ToRemoves) {
1953 V->eraseFromParent();
1954 }
1955
1956 // And remove the function we don't need either too.
1957 F->eraseFromParent();
1958 }
1959 }
1960
1961 return Changed;
1962}
1963
1964bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1965 bool Changed = false;
1966
1967 struct VectorLoadOps {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001968 const char *name;
Derek Chowcfd368b2017-10-19 20:58:45 -07001969 int n;
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001970 Type *(*get_scalar_type_function)(LLVMContext &);
1971 } vector_load_ops[] = {// TODO(derekjchow): Expand this list.
1972 {"_Z6vload4jPU3AS1Kf", 4, Type::getFloatTy}};
Derek Chowcfd368b2017-10-19 20:58:45 -07001973
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001974 for (const auto &Op : vector_load_ops) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001975 auto Name = Op.name;
1976 auto N = Op.n;
1977 auto TypeFn = Op.get_scalar_type_function;
1978 // If we find a function with the matching name.
1979 if (auto F = M.getFunction(Name)) {
1980 SmallVector<Instruction *, 4> ToRemoves;
1981
1982 // Walk the users of the function.
1983 for (auto &U : F->uses()) {
1984 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1985 // The index argument from vloadn.
1986 auto Arg0 = CI->getOperand(0);
1987
1988 // The pointer argument from vloadn.
1989 auto Arg1 = CI->getOperand(1);
1990
1991 // Get types.
1992 auto ScalarNTy = VectorType::get(TypeFn(M.getContext()), N);
1993 auto ScalarNPointerTy = PointerType::get(
1994 ScalarNTy, Arg1->getType()->getPointerAddressSpace());
1995
1996 // Cast to scalarn
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001997 auto Cast =
1998 CastInst::CreatePointerCast(Arg1, ScalarNPointerTy, "", CI);
Derek Chowcfd368b2017-10-19 20:58:45 -07001999 // Index to correct address
2000 auto Index = GetElementPtrInst::Create(ScalarNTy, Cast, Arg0, "", CI);
2001 // Load
2002 auto Load = new LoadInst(Index, "", CI);
2003
2004 CI->replaceAllUsesWith(Load);
2005 ToRemoves.push_back(CI);
2006 }
2007 }
2008
2009 Changed = !ToRemoves.empty();
2010
2011 // And cleanup the calls we don't use anymore.
2012 for (auto V : ToRemoves) {
2013 V->eraseFromParent();
2014 }
2015
2016 // And remove the function we don't need either too.
2017 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002018 }
2019 }
2020
2021 return Changed;
2022}
2023
David Neto22f144c2017-06-12 14:26:21 -04002024bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2025 bool Changed = false;
2026
2027 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2028 "_Z10vload_halfjPU3AS2KDh"};
2029
2030 for (auto Name : Map) {
2031 // If we find a function with the matching name.
2032 if (auto F = M.getFunction(Name)) {
2033 SmallVector<Instruction *, 4> ToRemoves;
2034
2035 // Walk the users of the function.
2036 for (auto &U : F->uses()) {
2037 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2038 // The index argument from vload_half.
2039 auto Arg0 = CI->getOperand(0);
2040
2041 // The pointer argument from vload_half.
2042 auto Arg1 = CI->getOperand(1);
2043
David Neto22f144c2017-06-12 14:26:21 -04002044 auto IntTy = Type::getInt32Ty(M.getContext());
2045 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002046 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2047
David Neto22f144c2017-06-12 14:26:21 -04002048 // Our intrinsic to unpack a float2 from an int.
2049 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2050
2051 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2052
David Neto482550a2018-03-24 05:21:07 -07002053 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002054 auto ShortTy = Type::getInt16Ty(M.getContext());
2055 auto ShortPointerTy = PointerType::get(
2056 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002057
David Netoac825b82017-05-30 12:49:01 -04002058 // Cast the half* pointer to short*.
2059 auto Cast =
2060 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002061
David Netoac825b82017-05-30 12:49:01 -04002062 // Index into the correct address of the casted pointer.
2063 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2064
2065 // Load from the short* we casted to.
2066 auto Load = new LoadInst(Index, "", CI);
2067
2068 // ZExt the short -> int.
2069 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2070
2071 // Get our float2.
2072 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2073
2074 // Extract out the bottom element which is our float result.
2075 auto Extract = ExtractElementInst::Create(
2076 Call, ConstantInt::get(IntTy, 0), "", CI);
2077
2078 CI->replaceAllUsesWith(Extract);
2079 } else {
2080 // Assume the pointer argument points to storage aligned to 32bits
2081 // or more.
2082 // TODO(dneto): Do more analysis to make sure this is true?
2083 //
2084 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2085 // with:
2086 //
2087 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2088 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2089 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2090 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2091 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2092 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2093 // x float> %converted, %index_is_odd32
2094
2095 auto IntPointerTy = PointerType::get(
2096 IntTy, Arg1->getType()->getPointerAddressSpace());
2097
David Neto973e6a82017-05-30 13:48:18 -04002098 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002099 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002100 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002101 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2102
2103 auto One = ConstantInt::get(IntTy, 1);
2104 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2105 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2106
2107 // Index into the correct address of the casted pointer.
2108 auto Ptr =
2109 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2110
2111 // Load from the int* we casted to.
2112 auto Load = new LoadInst(Ptr, "", CI);
2113
2114 // Get our float2.
2115 auto Call = CallInst::Create(NewF, Load, "", CI);
2116
2117 // Extract out the float result, where the element number is
2118 // determined by whether the original index was even or odd.
2119 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2120
2121 CI->replaceAllUsesWith(Extract);
2122 }
David Neto22f144c2017-06-12 14:26:21 -04002123
2124 // Lastly, remember to remove the user.
2125 ToRemoves.push_back(CI);
2126 }
2127 }
2128
2129 Changed = !ToRemoves.empty();
2130
2131 // And cleanup the calls we don't use anymore.
2132 for (auto V : ToRemoves) {
2133 V->eraseFromParent();
2134 }
2135
2136 // And remove the function we don't need either too.
2137 F->eraseFromParent();
2138 }
2139 }
2140
2141 return Changed;
2142}
2143
2144bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002145
Kévin Petite8edce32019-04-10 14:23:32 +01002146 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002147 "_Z11vload_half2jPU3AS1KDh",
2148 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2149 "_Z11vload_half2jPU3AS2KDh",
2150 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2151 };
David Neto22f144c2017-06-12 14:26:21 -04002152
Kévin Petite8edce32019-04-10 14:23:32 +01002153 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2154 // The index argument from vload_half.
2155 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002156
Kévin Petite8edce32019-04-10 14:23:32 +01002157 // The pointer argument from vload_half.
2158 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002159
Kévin Petite8edce32019-04-10 14:23:32 +01002160 auto IntTy = Type::getInt32Ty(M.getContext());
2161 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002162 auto NewPointerTy =
2163 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002164 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002165
Kévin Petite8edce32019-04-10 14:23:32 +01002166 // Cast the half* pointer to int*.
2167 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002168
Kévin Petite8edce32019-04-10 14:23:32 +01002169 // Index into the correct address of the casted pointer.
2170 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002171
Kévin Petite8edce32019-04-10 14:23:32 +01002172 // Load from the int* we casted to.
2173 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002174
Kévin Petite8edce32019-04-10 14:23:32 +01002175 // Our intrinsic to unpack a float2 from an int.
2176 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002177
Kévin Petite8edce32019-04-10 14:23:32 +01002178 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002179
Kévin Petite8edce32019-04-10 14:23:32 +01002180 // Get our float2.
2181 return CallInst::Create(NewF, Load, "", CI);
2182 });
David Neto22f144c2017-06-12 14:26:21 -04002183}
2184
2185bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002186
Kévin Petite8edce32019-04-10 14:23:32 +01002187 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002188 "_Z11vload_half4jPU3AS1KDh",
2189 "_Z12vloada_half4jPU3AS1KDh",
2190 "_Z11vload_half4jPU3AS2KDh",
2191 "_Z12vloada_half4jPU3AS2KDh",
2192 };
David Neto22f144c2017-06-12 14:26:21 -04002193
Kévin Petite8edce32019-04-10 14:23:32 +01002194 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2195 // The index argument from vload_half.
2196 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002197
Kévin Petite8edce32019-04-10 14:23:32 +01002198 // The pointer argument from vload_half.
2199 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002200
Kévin Petite8edce32019-04-10 14:23:32 +01002201 auto IntTy = Type::getInt32Ty(M.getContext());
2202 auto Int2Ty = VectorType::get(IntTy, 2);
2203 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002204 auto NewPointerTy =
2205 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002206 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002207
Kévin Petite8edce32019-04-10 14:23:32 +01002208 // Cast the half* pointer to int2*.
2209 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002210
Kévin Petite8edce32019-04-10 14:23:32 +01002211 // Index into the correct address of the casted pointer.
2212 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002213
Kévin Petite8edce32019-04-10 14:23:32 +01002214 // Load from the int2* we casted to.
2215 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002216
Kévin Petite8edce32019-04-10 14:23:32 +01002217 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002218 auto X =
2219 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2220 auto Y =
2221 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002222
Kévin Petite8edce32019-04-10 14:23:32 +01002223 // Our intrinsic to unpack a float2 from an int.
2224 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002225
Kévin Petite8edce32019-04-10 14:23:32 +01002226 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002227
Kévin Petite8edce32019-04-10 14:23:32 +01002228 // Get the lower (x & y) components of our final float4.
2229 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002230
Kévin Petite8edce32019-04-10 14:23:32 +01002231 // Get the higher (z & w) components of our final float4.
2232 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002233
Kévin Petite8edce32019-04-10 14:23:32 +01002234 Constant *ShuffleMask[4] = {
2235 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2236 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002237
Kévin Petite8edce32019-04-10 14:23:32 +01002238 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002239 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2240 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002241 });
David Neto22f144c2017-06-12 14:26:21 -04002242}
2243
David Neto6ad93232018-06-07 15:42:58 -07002244bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002245
2246 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2247 //
2248 // %u = load i32 %ptr
2249 // %fxy = call <2 x float> Unpack2xHalf(u)
2250 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002251 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002252 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2253 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2254 "_Z20__clspv_vloada_half2jPKj", // private
2255 };
2256
Kévin Petite8edce32019-04-10 14:23:32 +01002257 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2258 auto Index = CI->getOperand(0);
2259 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002260
Kévin Petite8edce32019-04-10 14:23:32 +01002261 auto IntTy = Type::getInt32Ty(M.getContext());
2262 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2263 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002264
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002265 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002266 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002267
Kévin Petite8edce32019-04-10 14:23:32 +01002268 // Our intrinsic to unpack a float2 from an int.
2269 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002270
Kévin Petite8edce32019-04-10 14:23:32 +01002271 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002272
Kévin Petite8edce32019-04-10 14:23:32 +01002273 // Get our final float2.
2274 return CallInst::Create(NewF, Load, "", CI);
2275 });
David Neto6ad93232018-06-07 15:42:58 -07002276}
2277
2278bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002279
2280 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2281 //
2282 // %u2 = load <2 x i32> %ptr
2283 // %u2xy = extractelement %u2, 0
2284 // %u2zw = extractelement %u2, 1
2285 // %fxy = call <2 x float> Unpack2xHalf(uint)
2286 // %fzw = call <2 x float> Unpack2xHalf(uint)
2287 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002288 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002289 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2290 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2291 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2292 };
2293
Kévin Petite8edce32019-04-10 14:23:32 +01002294 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2295 auto Index = CI->getOperand(0);
2296 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002297
Kévin Petite8edce32019-04-10 14:23:32 +01002298 auto IntTy = Type::getInt32Ty(M.getContext());
2299 auto Int2Ty = VectorType::get(IntTy, 2);
2300 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2301 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002302
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002303 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002304 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002305
Kévin Petite8edce32019-04-10 14:23:32 +01002306 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002307 auto X =
2308 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2309 auto Y =
2310 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002311
Kévin Petite8edce32019-04-10 14:23:32 +01002312 // Our intrinsic to unpack a float2 from an int.
2313 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002314
Kévin Petite8edce32019-04-10 14:23:32 +01002315 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002316
Kévin Petite8edce32019-04-10 14:23:32 +01002317 // Get the lower (x & y) components of our final float4.
2318 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002319
Kévin Petite8edce32019-04-10 14:23:32 +01002320 // Get the higher (z & w) components of our final float4.
2321 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002322
Kévin Petite8edce32019-04-10 14:23:32 +01002323 Constant *ShuffleMask[4] = {
2324 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2325 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002326
Kévin Petite8edce32019-04-10 14:23:32 +01002327 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002328 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2329 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002330 });
David Neto6ad93232018-06-07 15:42:58 -07002331}
2332
David Neto22f144c2017-06-12 14:26:21 -04002333bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002334
Kévin Petite8edce32019-04-10 14:23:32 +01002335 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2336 "_Z15vstore_half_rtefjPU3AS1Dh",
2337 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002338
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002339 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002340 // The value to store.
2341 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002342
Kévin Petite8edce32019-04-10 14:23:32 +01002343 // The index argument from vstore_half.
2344 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002345
Kévin Petite8edce32019-04-10 14:23:32 +01002346 // The pointer argument from vstore_half.
2347 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002348
Kévin Petite8edce32019-04-10 14:23:32 +01002349 auto IntTy = Type::getInt32Ty(M.getContext());
2350 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2351 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2352 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002353
Kévin Petite8edce32019-04-10 14:23:32 +01002354 // Our intrinsic to pack a float2 to an int.
2355 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002356
Kévin Petite8edce32019-04-10 14:23:32 +01002357 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002358
Kévin Petite8edce32019-04-10 14:23:32 +01002359 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002360 auto TempVec = InsertElementInst::Create(
2361 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002362
Kévin Petite8edce32019-04-10 14:23:32 +01002363 // Pack the float2 -> half2 (in an int).
2364 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002365
Kévin Petite8edce32019-04-10 14:23:32 +01002366 Value *Ret;
2367 if (clspv::Option::F16BitStorage()) {
2368 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002369 auto ShortPointerTy =
2370 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002371
Kévin Petite8edce32019-04-10 14:23:32 +01002372 // Truncate our i32 to an i16.
2373 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002374
Kévin Petite8edce32019-04-10 14:23:32 +01002375 // Cast the half* pointer to short*.
2376 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002377
Kévin Petite8edce32019-04-10 14:23:32 +01002378 // Index into the correct address of the casted pointer.
2379 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002380
Kévin Petite8edce32019-04-10 14:23:32 +01002381 // Store to the int* we casted to.
2382 Ret = new StoreInst(Trunc, Index, CI);
2383 } else {
2384 // We can only write to 32-bit aligned words.
2385 //
2386 // Assuming base is aligned to 32-bits, replace the equivalent of
2387 // vstore_half(value, index, base)
2388 // with:
2389 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2390 // uint32_t write_to_upper_half = index & 1u;
2391 // uint32_t shift = write_to_upper_half << 4;
2392 //
2393 // // Pack the float value as a half number in bottom 16 bits
2394 // // of an i32.
2395 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2396 //
2397 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2398 // ^ ((packed & 0xffff) << shift)
2399 // // We only need relaxed consistency, but OpenCL 1.2 only has
2400 // // sequentially consistent atomics.
2401 // // TODO(dneto): Use relaxed consistency.
2402 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002403 auto IntPointerTy =
2404 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002405
Kévin Petite8edce32019-04-10 14:23:32 +01002406 auto Four = ConstantInt::get(IntTy, 4);
2407 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002408
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002409 auto IndexIsOdd =
2410 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002411 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002412 auto IndexIntoI32 =
2413 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2414 auto BaseI32Ptr =
2415 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2416 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2417 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002418 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2419 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002420 auto MaskBitsToWrite =
2421 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2422 auto MaskedCurrent = BinaryOperator::CreateAnd(
2423 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002424
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002425 auto XLowerBits =
2426 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2427 auto NewBitsToWrite =
2428 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2429 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2430 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002431
Kévin Petite8edce32019-04-10 14:23:32 +01002432 // Generate the call to atomi_xor.
2433 SmallVector<Type *, 5> ParamTypes;
2434 // The pointer type.
2435 ParamTypes.push_back(IntPointerTy);
2436 // The Types for memory scope, semantics, and value.
2437 ParamTypes.push_back(IntTy);
2438 ParamTypes.push_back(IntTy);
2439 ParamTypes.push_back(IntTy);
2440 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2441 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002442
Kévin Petite8edce32019-04-10 14:23:32 +01002443 const auto ConstantScopeDevice =
2444 ConstantInt::get(IntTy, spv::ScopeDevice);
2445 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2446 // (SPIR-V Workgroup).
2447 const auto AddrSpaceSemanticsBits =
2448 IntPointerTy->getPointerAddressSpace() == 1
2449 ? spv::MemorySemanticsUniformMemoryMask
2450 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002451
Kévin Petite8edce32019-04-10 14:23:32 +01002452 // We're using relaxed consistency here.
2453 const auto ConstantMemorySemantics =
2454 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2455 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002456
Kévin Petite8edce32019-04-10 14:23:32 +01002457 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2458 ConstantMemorySemantics, ValueToXor};
2459 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2460 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002461 }
David Neto22f144c2017-06-12 14:26:21 -04002462
Kévin Petite8edce32019-04-10 14:23:32 +01002463 return Ret;
2464 });
David Neto22f144c2017-06-12 14:26:21 -04002465}
2466
2467bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002468
Kévin Petite8edce32019-04-10 14:23:32 +01002469 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002470 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2471 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2472 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2473 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2474 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2475 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2476 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2477 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2478 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2479 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2480 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2481 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2482 };
David Neto22f144c2017-06-12 14:26:21 -04002483
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002484 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002485 // The value to store.
2486 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002487
Kévin Petite8edce32019-04-10 14:23:32 +01002488 // The index argument from vstore_half.
2489 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002490
Kévin Petite8edce32019-04-10 14:23:32 +01002491 // The pointer argument from vstore_half.
2492 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002493
Kévin Petite8edce32019-04-10 14:23:32 +01002494 auto IntTy = Type::getInt32Ty(M.getContext());
2495 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002496 auto NewPointerTy =
2497 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002498 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002499
Kévin Petite8edce32019-04-10 14:23:32 +01002500 // Our intrinsic to pack a float2 to an int.
2501 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002502
Kévin Petite8edce32019-04-10 14:23:32 +01002503 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002504
Kévin Petite8edce32019-04-10 14:23:32 +01002505 // Turn the packed x & y into the final packing.
2506 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002507
Kévin Petite8edce32019-04-10 14:23:32 +01002508 // Cast the half* pointer to int*.
2509 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002510
Kévin Petite8edce32019-04-10 14:23:32 +01002511 // Index into the correct address of the casted pointer.
2512 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002513
Kévin Petite8edce32019-04-10 14:23:32 +01002514 // Store to the int* we casted to.
2515 return new StoreInst(X, Index, CI);
2516 });
David Neto22f144c2017-06-12 14:26:21 -04002517}
2518
2519bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002520
Kévin Petite8edce32019-04-10 14:23:32 +01002521 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002522 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2523 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2524 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2525 "_Z13vstorea_half4Dv4_fjPDh", // private
2526 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2527 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2528 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2529 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2530 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2531 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2532 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2533 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2534 };
David Neto22f144c2017-06-12 14:26:21 -04002535
Kévin Petite8edce32019-04-10 14:23:32 +01002536 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2537 // The value to store.
2538 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002539
Kévin Petite8edce32019-04-10 14:23:32 +01002540 // The index argument from vstore_half.
2541 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002542
Kévin Petite8edce32019-04-10 14:23:32 +01002543 // The pointer argument from vstore_half.
2544 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002545
Kévin Petite8edce32019-04-10 14:23:32 +01002546 auto IntTy = Type::getInt32Ty(M.getContext());
2547 auto Int2Ty = VectorType::get(IntTy, 2);
2548 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002549 auto NewPointerTy =
2550 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002551 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002552
Kévin Petite8edce32019-04-10 14:23:32 +01002553 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2554 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002555
Kévin Petite8edce32019-04-10 14:23:32 +01002556 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002557 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2558 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002559
Kévin Petite8edce32019-04-10 14:23:32 +01002560 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2561 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002562
Kévin Petite8edce32019-04-10 14:23:32 +01002563 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002564 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2565 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002566
Kévin Petite8edce32019-04-10 14:23:32 +01002567 // Our intrinsic to pack a float2 to an int.
2568 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002569
Kévin Petite8edce32019-04-10 14:23:32 +01002570 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002571
Kévin Petite8edce32019-04-10 14:23:32 +01002572 // Turn the packed x & y into the final component of our int2.
2573 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002574
Kévin Petite8edce32019-04-10 14:23:32 +01002575 // Turn the packed z & w into the final component of our int2.
2576 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002577
Kévin Petite8edce32019-04-10 14:23:32 +01002578 auto Combine = InsertElementInst::Create(
2579 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002580 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2581 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002582
Kévin Petite8edce32019-04-10 14:23:32 +01002583 // Cast the half* pointer to int2*.
2584 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002585
Kévin Petite8edce32019-04-10 14:23:32 +01002586 // Index into the correct address of the casted pointer.
2587 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002588
Kévin Petite8edce32019-04-10 14:23:32 +01002589 // Store to the int2* we casted to.
2590 return new StoreInst(Combine, Index, CI);
2591 });
David Neto22f144c2017-06-12 14:26:21 -04002592}
2593
2594bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2595 bool Changed = false;
2596
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002597 const std::map<const char *, const char *> Map = {
2598 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2599 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2600 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i",
2601 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002602
2603 for (auto Pair : Map) {
2604 // If we find a function with the matching name.
2605 if (auto F = M.getFunction(Pair.first)) {
2606 SmallVector<Instruction *, 4> ToRemoves;
2607
2608 // Walk the users of the function.
2609 for (auto &U : F->uses()) {
2610 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2611 // The image.
2612 auto Arg0 = CI->getOperand(0);
2613
2614 // The sampler.
2615 auto Arg1 = CI->getOperand(1);
2616
2617 // The coordinate (integer type that we can't handle).
2618 auto Arg2 = CI->getOperand(2);
2619
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002620 auto FloatVecTy =
2621 VectorType::get(Type::getFloatTy(M.getContext()),
2622 Arg2->getType()->getVectorNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002623
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002624 auto NewFType = FunctionType::get(
2625 CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy},
2626 false);
David Neto22f144c2017-06-12 14:26:21 -04002627
2628 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2629
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002630 auto Cast =
2631 CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002632
2633 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2634
2635 CI->replaceAllUsesWith(NewCI);
2636
2637 // Lastly, remember to remove the user.
2638 ToRemoves.push_back(CI);
2639 }
2640 }
2641
2642 Changed = !ToRemoves.empty();
2643
2644 // And cleanup the calls we don't use anymore.
2645 for (auto V : ToRemoves) {
2646 V->eraseFromParent();
2647 }
2648
2649 // And remove the function we don't need either too.
2650 F->eraseFromParent();
2651 }
2652 }
2653
2654 return Changed;
2655}
2656
2657bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2658 bool Changed = false;
2659
2660 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002661 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002662 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002663 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002664 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002665 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002666 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002667 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002668 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002669 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002670 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002671 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002672 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002673 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002674 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002675 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002676 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002677 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002678 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002679 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002680 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002681 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002682 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2683 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2684 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002685
2686 for (auto Pair : Map) {
2687 // If we find a function with the matching name.
2688 if (auto F = M.getFunction(Pair.first)) {
2689 SmallVector<Instruction *, 4> ToRemoves;
2690
2691 // Walk the users of the function.
2692 for (auto &U : F->uses()) {
2693 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2694 auto FType = F->getFunctionType();
2695 SmallVector<Type *, 5> ParamTypes;
2696
2697 // The pointer type.
2698 ParamTypes.push_back(FType->getParamType(0));
2699
2700 auto IntTy = Type::getInt32Ty(M.getContext());
2701
2702 // The memory scope type.
2703 ParamTypes.push_back(IntTy);
2704
2705 // The memory semantics type.
2706 ParamTypes.push_back(IntTy);
2707
2708 if (2 < CI->getNumArgOperands()) {
2709 // The unequal memory semantics type.
2710 ParamTypes.push_back(IntTy);
2711
2712 // The value type.
2713 ParamTypes.push_back(FType->getParamType(2));
2714
2715 // The comparator type.
2716 ParamTypes.push_back(FType->getParamType(1));
2717 } else if (1 < CI->getNumArgOperands()) {
2718 // The value type.
2719 ParamTypes.push_back(FType->getParamType(1));
2720 }
2721
2722 auto NewFType =
2723 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2724 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2725
2726 // We need to map the OpenCL constants to the SPIR-V equivalents.
2727 const auto ConstantScopeDevice =
2728 ConstantInt::get(IntTy, spv::ScopeDevice);
2729 const auto ConstantMemorySemantics = ConstantInt::get(
2730 IntTy, spv::MemorySemanticsUniformMemoryMask |
2731 spv::MemorySemanticsSequentiallyConsistentMask);
2732
2733 SmallVector<Value *, 5> Params;
2734
2735 // The pointer.
2736 Params.push_back(CI->getArgOperand(0));
2737
2738 // The memory scope.
2739 Params.push_back(ConstantScopeDevice);
2740
2741 // The memory semantics.
2742 Params.push_back(ConstantMemorySemantics);
2743
2744 if (2 < CI->getNumArgOperands()) {
2745 // The unequal memory semantics.
2746 Params.push_back(ConstantMemorySemantics);
2747
2748 // The value.
2749 Params.push_back(CI->getArgOperand(2));
2750
2751 // The comparator.
2752 Params.push_back(CI->getArgOperand(1));
2753 } else if (1 < CI->getNumArgOperands()) {
2754 // The value.
2755 Params.push_back(CI->getArgOperand(1));
2756 }
2757
2758 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2759
2760 CI->replaceAllUsesWith(NewCI);
2761
2762 // Lastly, remember to remove the user.
2763 ToRemoves.push_back(CI);
2764 }
2765 }
2766
2767 Changed = !ToRemoves.empty();
2768
2769 // And cleanup the calls we don't use anymore.
2770 for (auto V : ToRemoves) {
2771 V->eraseFromParent();
2772 }
2773
2774 // And remove the function we don't need either too.
2775 F->eraseFromParent();
2776 }
2777 }
2778
Neil Henning39672102017-09-29 14:33:13 +01002779 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002780 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002781 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002782 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002783 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002784 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002785 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002786 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002787 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002788 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002789 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002790 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002791 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002792 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002793 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002794 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002795 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002796 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002797 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002798 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002799 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002800 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002801 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002802 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002803 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002804 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002805 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002806 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002807 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002808 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002809 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002810 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002811 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002812 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002813 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002814 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002815 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002816 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002817 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002818 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002819 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002820 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002821 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002822 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002823 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002824 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002825 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002826 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002827 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002828 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002829 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002830 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002831 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002832 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002833 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002834 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002835 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002836 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002837 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002838 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002839 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002840 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002841 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2842 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2843 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002844
2845 for (auto Pair : Map2) {
2846 // If we find a function with the matching name.
2847 if (auto F = M.getFunction(Pair.first)) {
2848 SmallVector<Instruction *, 4> ToRemoves;
2849
2850 // Walk the users of the function.
2851 for (auto &U : F->uses()) {
2852 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2853 auto AtomicOp = new AtomicRMWInst(
2854 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2855 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2856
2857 CI->replaceAllUsesWith(AtomicOp);
2858
2859 // Lastly, remember to remove the user.
2860 ToRemoves.push_back(CI);
2861 }
2862 }
2863
2864 Changed = !ToRemoves.empty();
2865
2866 // And cleanup the calls we don't use anymore.
2867 for (auto V : ToRemoves) {
2868 V->eraseFromParent();
2869 }
2870
2871 // And remove the function we don't need either too.
2872 F->eraseFromParent();
2873 }
2874 }
2875
David Neto22f144c2017-06-12 14:26:21 -04002876 return Changed;
2877}
2878
2879bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002880
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002881 std::vector<const char *> Names = {
2882 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01002883 };
2884
2885 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002886 auto IntTy = Type::getInt32Ty(M.getContext());
2887 auto FloatTy = Type::getFloatTy(M.getContext());
2888
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002889 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2890 ConstantInt::get(IntTy, 1),
2891 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002892
2893 Constant *UpShuffleMask[4] = {
2894 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2895 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2896
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002897 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2898 UndefValue::get(FloatTy),
2899 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002900
Kévin Petite8edce32019-04-10 14:23:32 +01002901 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002902 auto Arg0 =
2903 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2904 ConstantVector::get(DownShuffleMask), "", CI);
2905 auto Arg1 =
2906 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2907 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002908 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002909
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002910 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04002911
Kévin Petite8edce32019-04-10 14:23:32 +01002912 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002913
Kévin Petite8edce32019-04-10 14:23:32 +01002914 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002915
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002916 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2917 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002918 });
David Neto22f144c2017-06-12 14:26:21 -04002919}
David Neto62653202017-10-16 19:05:18 -04002920
2921bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2922 bool Changed = false;
2923
2924 // OpenCL's float result = fract(float x, float* ptr)
2925 //
2926 // In the LLVM domain:
2927 //
2928 // %floor_result = call spir_func float @floor(float %x)
2929 // store float %floor_result, float * %ptr
2930 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2931 // %result = call spir_func float
2932 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2933 //
2934 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2935 // and clspv.fract occur in the SPIR-V generator pass:
2936 //
2937 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2938 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2939 // ...
2940 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2941 // OpStore %ptr %floor_result
2942 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2943 // %fract_result = OpExtInst %float
2944 // %glsl_ext Fmin %fract_intermediate %just_under_1
2945
David Neto62653202017-10-16 19:05:18 -04002946 using std::string;
2947
2948 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2949 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002950 using QuadType =
2951 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04002952 auto make_quad = [](const char *a, const char *b, const char *c,
2953 const char *d) {
2954 return std::tuple<const char *, const char *, const char *, const char *>(
2955 a, b, c, d);
2956 };
2957 const std::vector<QuadType> Functions = {
2958 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002959 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
2960 "clspv.fract.v2f"),
2961 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
2962 "clspv.fract.v3f"),
2963 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
2964 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04002965 };
2966
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002967 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04002968 const StringRef fract_name(std::get<0>(quad));
2969
2970 // If we find a function with the matching name.
2971 if (auto F = M.getFunction(fract_name)) {
2972 if (F->use_begin() == F->use_end())
2973 continue;
2974
2975 // We have some uses.
2976 Changed = true;
2977
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002978 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04002979
2980 const StringRef floor_name(std::get<1>(quad));
2981 const StringRef fmin_name(std::get<2>(quad));
2982 const StringRef clspv_fract_name(std::get<3>(quad));
2983
2984 // This is either float or a float vector. All the float-like
2985 // types are this type.
2986 auto result_ty = F->getReturnType();
2987
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002988 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04002989 if (!fmin_fn) {
2990 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002991 FunctionType *fn_ty =
2992 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04002993 fmin_fn =
2994 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04002995 fmin_fn->addFnAttr(Attribute::ReadNone);
2996 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2997 }
2998
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002999 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003000 if (!floor_fn) {
3001 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003002 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003003 floor_fn = cast<Function>(
3004 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003005 floor_fn->addFnAttr(Attribute::ReadNone);
3006 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3007 }
3008
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003009 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003010 if (!clspv_fract_fn) {
3011 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003012 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003013 clspv_fract_fn = cast<Function>(
3014 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003015 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3016 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3017 }
3018
3019 // Number of significant significand bits, whether represented or not.
3020 unsigned num_significand_bits;
3021 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003022 case Type::HalfTyID:
3023 num_significand_bits = 11;
3024 break;
3025 case Type::FloatTyID:
3026 num_significand_bits = 24;
3027 break;
3028 case Type::DoubleTyID:
3029 num_significand_bits = 53;
3030 break;
3031 default:
3032 assert(false && "Unhandled float type when processing fract builtin");
3033 break;
David Neto62653202017-10-16 19:05:18 -04003034 }
3035 // Beware that the disassembler displays this value as
3036 // OpConstant %float 1
3037 // which is not quite right.
3038 const double kJustUnderOneScalar =
3039 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3040
3041 Constant *just_under_one =
3042 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3043 if (result_ty->isVectorTy()) {
3044 just_under_one = ConstantVector::getSplat(
3045 result_ty->getVectorNumElements(), just_under_one);
3046 }
3047
3048 IRBuilder<> Builder(Context);
3049
3050 SmallVector<Instruction *, 4> ToRemoves;
3051
3052 // Walk the users of the function.
3053 for (auto &U : F->uses()) {
3054 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3055
3056 Builder.SetInsertPoint(CI);
3057 auto arg = CI->getArgOperand(0);
3058 auto ptr = CI->getArgOperand(1);
3059
3060 // Compute floor result and store it.
3061 auto floor = Builder.CreateCall(floor_fn, {arg});
3062 Builder.CreateStore(floor, ptr);
3063
3064 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003065 auto fract_result =
3066 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003067
3068 CI->replaceAllUsesWith(fract_result);
3069
3070 // Lastly, remember to remove the user.
3071 ToRemoves.push_back(CI);
3072 }
3073 }
3074
3075 // And cleanup the calls we don't use anymore.
3076 for (auto V : ToRemoves) {
3077 V->eraseFromParent();
3078 }
3079
3080 // And remove the function we don't need either too.
3081 F->eraseFromParent();
3082 }
3083 }
3084
3085 return Changed;
3086}