blob: dfc92eb5d85a486b17c7a1cd44e5d56a89925c67 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040032#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070033
Diego Novilloa4c44fa2019-04-11 10:56:15 -040034#include "Passes.h"
35#include "SPIRVOp.h"
36
David Neto22f144c2017-06-12 14:26:21 -040037using namespace llvm;
38
39#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
40
41namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000042
43struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040044 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000045 SignedNess signedness;
46};
47
48struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000049 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000050 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000051
Kévin Petit91bc72e2019-04-08 15:17:46 +010052 bool isArgSigned(size_t arg) const {
53 assert(argTypeInfos.size() > arg);
54 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000055 }
56
Kévin Petit91bc72e2019-04-08 15:17:46 +010057 static FunctionInfo getFromMangledName(StringRef name) {
58 FunctionInfo fi;
59 if (!getFromMangledNameCheck(name, &fi)) {
60 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000061 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010062 return fi;
63 }
Kévin Petit8a560882019-03-21 15:24:34 +000064
Kévin Petit91bc72e2019-04-08 15:17:46 +010065 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
66 if (!name.consume_front("_Z")) {
67 return false;
68 }
69 size_t nameLen;
70 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000071 return false;
72 }
73
Kévin Petit91bc72e2019-04-08 15:17:46 +010074 finfo->name = name.take_front(nameLen);
75 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000076
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000078
Kévin Petit91bc72e2019-04-08 15:17:46 +010079 while (name.size() != 0) {
80
81 ArgTypeInfo ti;
82
83 // Try parsing a vector prefix
84 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040085 int numElems;
86 if (name.consumeInteger(10, numElems)) {
87 return false;
88 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010089
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040090 if (!name.consume_front("_")) {
91 return false;
92 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010093 }
94
95 // Parse the base type
96 char typeCode = name.front();
97 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040098 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +010099 case 'c': // char
100 case 'a': // signed char
101 case 's': // short
102 case 'i': // int
103 case 'l': // long
104 ti.signedness = ArgTypeInfo::SignedNess::Signed;
105 break;
106 case 'h': // unsigned char
107 case 't': // unsigned short
108 case 'j': // unsigned int
109 case 'm': // unsigned long
110 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
111 break;
112 case 'f':
113 ti.signedness = ArgTypeInfo::SignedNess::None;
114 break;
115 case 'S':
116 ti = prev_ti;
117 if (!name.consume_front("_")) {
118 return false;
119 }
120 break;
121 default:
122 return false;
123 }
124
125 finfo->argTypeInfos.push_back(ti);
126
127 prev_ti = ti;
128 }
129
130 return true;
131 };
Kévin Petit8a560882019-03-21 15:24:34 +0000132};
133
David Neto22f144c2017-06-12 14:26:21 -0400134uint32_t clz(uint32_t v) {
135 uint32_t r;
136 uint32_t shift;
137
138 r = (v > 0xFFFF) << 4;
139 v >>= r;
140 shift = (v > 0xFF) << 3;
141 v >>= shift;
142 r |= shift;
143 shift = (v > 0xF) << 2;
144 v >>= shift;
145 r |= shift;
146 shift = (v > 0x3) << 1;
147 v >>= shift;
148 r |= shift;
149 r |= (v >> 1);
150
151 return r;
152}
153
154Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
155 if (1 == elements) {
156 return Type::getInt1Ty(C);
157 } else {
158 return VectorType::get(Type::getInt1Ty(C), elements);
159 }
160}
161
162struct ReplaceOpenCLBuiltinPass final : public ModulePass {
163 static char ID;
164 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
165
166 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000167 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100168 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100169 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400170 bool replaceRecip(Module &M);
171 bool replaceDivide(Module &M);
Kévin Petit1329a002019-06-15 05:54:05 +0100172 bool replaceDot(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400173 bool replaceExp10(Module &M);
174 bool replaceLog10(Module &M);
175 bool replaceBarrier(Module &M);
176 bool replaceMemFence(Module &M);
177 bool replaceRelational(Module &M);
178 bool replaceIsInfAndIsNan(Module &M);
179 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000180 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000181 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000182 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000183 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000184 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000185 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000186 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400187 bool replaceSignbit(Module &M);
188 bool replaceMadandMad24andMul24(Module &M);
189 bool replaceVloadHalf(Module &M);
190 bool replaceVloadHalf2(Module &M);
191 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700192 bool replaceClspvVloadaHalf2(Module &M);
193 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400194 bool replaceVstoreHalf(Module &M);
195 bool replaceVstoreHalf2(Module &M);
196 bool replaceVstoreHalf4(Module &M);
197 bool replaceReadImageF(Module &M);
198 bool replaceAtomics(Module &M);
199 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400200 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700201 bool replaceVload(Module &M);
202 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400203};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100204} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400205
206char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400207INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
208 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400209
210namespace clspv {
211ModulePass *createReplaceOpenCLBuiltinPass() {
212 return new ReplaceOpenCLBuiltinPass();
213}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400214} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400215
216bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
217 bool Changed = false;
218
Kévin Petit2444e9b2018-11-09 14:14:37 +0000219 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100220 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100221 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400222 Changed |= replaceRecip(M);
223 Changed |= replaceDivide(M);
Kévin Petit1329a002019-06-15 05:54:05 +0100224 Changed |= replaceDot(M);
David Neto22f144c2017-06-12 14:26:21 -0400225 Changed |= replaceExp10(M);
226 Changed |= replaceLog10(M);
227 Changed |= replaceBarrier(M);
228 Changed |= replaceMemFence(M);
229 Changed |= replaceRelational(M);
230 Changed |= replaceIsInfAndIsNan(M);
231 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000232 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000233 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000234 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000235 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000236 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000237 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000238 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400239 Changed |= replaceSignbit(M);
240 Changed |= replaceMadandMad24andMul24(M);
241 Changed |= replaceVloadHalf(M);
242 Changed |= replaceVloadHalf2(M);
243 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700244 Changed |= replaceClspvVloadaHalf2(M);
245 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400246 Changed |= replaceVstoreHalf(M);
247 Changed |= replaceVstoreHalf2(M);
248 Changed |= replaceVstoreHalf4(M);
249 Changed |= replaceReadImageF(M);
250 Changed |= replaceAtomics(M);
251 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400252 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700253 Changed |= replaceVload(M);
254 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400255
256 return Changed;
257}
258
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400259bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
260 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000261
Kévin Petite8edce32019-04-10 14:23:32 +0100262 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000263
264 for (auto Name : Names) {
265 // If we find a function with the matching name.
266 if (auto F = M.getFunction(Name)) {
267 SmallVector<Instruction *, 4> ToRemoves;
268
269 // Walk the users of the function.
270 for (auto &U : F->uses()) {
271 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000272
Kévin Petite8edce32019-04-10 14:23:32 +0100273 auto NewValue = Replacer(CI);
274
275 if (NewValue != nullptr) {
276 CI->replaceAllUsesWith(NewValue);
277 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000278
279 // Lastly, remember to remove the user.
280 ToRemoves.push_back(CI);
281 }
282 }
283
284 Changed = !ToRemoves.empty();
285
286 // And cleanup the calls we don't use anymore.
287 for (auto V : ToRemoves) {
288 V->eraseFromParent();
289 }
290
291 // And remove the function we don't need either too.
292 F->eraseFromParent();
293 }
294 }
295
296 return Changed;
297}
298
Kévin Petite8edce32019-04-10 14:23:32 +0100299bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100300
Kévin Petite8edce32019-04-10 14:23:32 +0100301 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400302 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
303 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
304 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
305 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100306 };
307
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400308 return replaceCallsWithValue(M, Names,
309 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100310}
311
312bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
313
314 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400315 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
316 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
317 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
318 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
319 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
320 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
321 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
322 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
323 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
324 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
325 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100326 };
327
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400328 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100329 auto XValue = CI->getOperand(0);
330 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100331
Kévin Petite8edce32019-04-10 14:23:32 +0100332 IRBuilder<> Builder(CI);
333 auto XmY = Builder.CreateSub(XValue, YValue);
334 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100335
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400336 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100337 auto F = CI->getCalledFunction();
338 auto finfo = FunctionInfo::getFromMangledName(F->getName());
339 if (finfo.isArgSigned(0)) {
340 Cmp = Builder.CreateICmpSGT(YValue, XValue);
341 } else {
342 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100343 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100344
Kévin Petite8edce32019-04-10 14:23:32 +0100345 return Builder.CreateSelect(Cmp, YmX, XmY);
346 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100347}
348
Kévin Petit8c1be282019-04-02 19:34:25 +0100349bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100350
Kévin Petite8edce32019-04-10 14:23:32 +0100351 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400352 "_Z8copysignff",
353 "_Z8copysignDv2_fS_",
354 "_Z8copysignDv3_fS_",
355 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100356 };
357
Kévin Petite8edce32019-04-10 14:23:32 +0100358 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
359 auto XValue = CI->getOperand(0);
360 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100361
Kévin Petite8edce32019-04-10 14:23:32 +0100362 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100363
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400364 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100365 if (Ty->isVectorTy()) {
366 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100367 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100368
Kévin Petite8edce32019-04-10 14:23:32 +0100369 // Return X with the sign of Y
370
371 // Sign bit masks
372 auto SignBit = IntTy->getScalarSizeInBits() - 1;
373 auto SignBitMask = 1 << SignBit;
374 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
375 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
376
377 IRBuilder<> Builder(CI);
378
379 // Extract sign of Y
380 auto YInt = Builder.CreateBitCast(YValue, IntTy);
381 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
382
383 // Clear sign bit in X
384 auto XInt = Builder.CreateBitCast(XValue, IntTy);
385 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
386
387 // Insert sign bit of Y into X
388 auto NewXInt = Builder.CreateOr(XInt, YSign);
389
390 // And cast back to floating-point
391 return Builder.CreateBitCast(NewXInt, Ty);
392 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100393}
394
David Neto22f144c2017-06-12 14:26:21 -0400395bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400396
Kévin Petite8edce32019-04-10 14:23:32 +0100397 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400398 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
399 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
400 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
401 };
402
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400403 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100404 // Recip has one arg.
405 auto Arg = CI->getOperand(0);
406 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
407 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
408 });
David Neto22f144c2017-06-12 14:26:21 -0400409}
410
411bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400412
Kévin Petite8edce32019-04-10 14:23:32 +0100413 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400414 "_Z11half_divideff", "_Z13native_divideff",
415 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
416 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
417 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
418 };
419
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400420 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100421 auto Op0 = CI->getOperand(0);
422 auto Op1 = CI->getOperand(1);
423 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
424 });
David Neto22f144c2017-06-12 14:26:21 -0400425}
426
Kévin Petit1329a002019-06-15 05:54:05 +0100427bool ReplaceOpenCLBuiltinPass::replaceDot(Module &M) {
428
429 std::vector<const char *> Names = {
430 "_Z3dotff",
431 "_Z3dotDv2_fS_",
432 "_Z3dotDv3_fS_",
433 "_Z3dotDv4_fS_",
434 };
435
436 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
437 auto Op0 = CI->getOperand(0);
438 auto Op1 = CI->getOperand(1);
439
440 Value *V;
441 if (Op0->getType()->isVectorTy()) {
442 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
443 CI->getType(), {Op0, Op1});
444 } else {
445 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
446 }
447
448 return V;
449 });
450}
451
David Neto22f144c2017-06-12 14:26:21 -0400452bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
453 bool Changed = false;
454
455 const std::map<const char *, const char *> Map = {
456 {"_Z5exp10f", "_Z3expf"},
457 {"_Z10half_exp10f", "_Z8half_expf"},
458 {"_Z12native_exp10f", "_Z10native_expf"},
459 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
460 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
461 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
462 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
463 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
464 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
465 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
466 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
467 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
468
469 for (auto Pair : Map) {
470 // If we find a function with the matching name.
471 if (auto F = M.getFunction(Pair.first)) {
472 SmallVector<Instruction *, 4> ToRemoves;
473
474 // Walk the users of the function.
475 for (auto &U : F->uses()) {
476 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
477 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
478
479 auto Arg = CI->getOperand(0);
480
481 // Constant of the natural log of 10 (ln(10)).
482 const double Ln10 =
483 2.302585092994045684017991454684364207601101488628772976033;
484
485 auto Mul = BinaryOperator::Create(
486 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
487 CI);
488
489 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
490
491 CI->replaceAllUsesWith(NewCI);
492
493 // Lastly, remember to remove the user.
494 ToRemoves.push_back(CI);
495 }
496 }
497
498 Changed = !ToRemoves.empty();
499
500 // And cleanup the calls we don't use anymore.
501 for (auto V : ToRemoves) {
502 V->eraseFromParent();
503 }
504
505 // And remove the function we don't need either too.
506 F->eraseFromParent();
507 }
508 }
509
510 return Changed;
511}
512
513bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
514 bool Changed = false;
515
516 const std::map<const char *, const char *> Map = {
517 {"_Z5log10f", "_Z3logf"},
518 {"_Z10half_log10f", "_Z8half_logf"},
519 {"_Z12native_log10f", "_Z10native_logf"},
520 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
521 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
522 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
523 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
524 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
525 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
526 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
527 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
528 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
529
530 for (auto Pair : Map) {
531 // If we find a function with the matching name.
532 if (auto F = M.getFunction(Pair.first)) {
533 SmallVector<Instruction *, 4> ToRemoves;
534
535 // Walk the users of the function.
536 for (auto &U : F->uses()) {
537 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
538 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
539
540 auto Arg = CI->getOperand(0);
541
542 // Constant of the reciprocal of the natural log of 10 (ln(10)).
543 const double Ln10 =
544 0.434294481903251827651128918916605082294397005803666566114;
545
546 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
547
548 auto Mul = BinaryOperator::Create(
549 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
550 "", CI);
551
552 CI->replaceAllUsesWith(Mul);
553
554 // Lastly, remember to remove the user.
555 ToRemoves.push_back(CI);
556 }
557 }
558
559 Changed = !ToRemoves.empty();
560
561 // And cleanup the calls we don't use anymore.
562 for (auto V : ToRemoves) {
563 V->eraseFromParent();
564 }
565
566 // And remove the function we don't need either too.
567 F->eraseFromParent();
568 }
569 }
570
571 return Changed;
572}
573
574bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
575 bool Changed = false;
576
577 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
578
579 const std::map<const char *, const char *> Map = {
580 {"_Z7barrierj", "__spirv_control_barrier"}};
581
582 for (auto Pair : Map) {
583 // If we find a function with the matching name.
584 if (auto F = M.getFunction(Pair.first)) {
585 SmallVector<Instruction *, 4> ToRemoves;
586
587 // Walk the users of the function.
588 for (auto &U : F->uses()) {
589 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
590 auto FType = F->getFunctionType();
591 SmallVector<Type *, 3> Params;
592 for (unsigned i = 0; i < 3; i++) {
593 Params.push_back(FType->getParamType(0));
594 }
595 auto NewFType =
596 FunctionType::get(FType->getReturnType(), Params, false);
597 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
alan-bakerb37f9732019-06-05 01:28:00 -0400598 cast<Function>(NewF.getCallee())->setCannotDuplicate();
David Neto22f144c2017-06-12 14:26:21 -0400599
600 auto Arg = CI->getOperand(0);
601
602 // We need to map the OpenCL constants to the SPIR-V equivalents.
603 const auto LocalMemFence =
604 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
605 const auto GlobalMemFence =
606 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
607 const auto ConstantSequentiallyConsistent = ConstantInt::get(
608 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
609 const auto ConstantScopeDevice =
610 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
611 const auto ConstantScopeWorkgroup =
612 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
613
614 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
615 const auto LocalMemFenceMask = BinaryOperator::Create(
616 Instruction::And, LocalMemFence, Arg, "", CI);
617 const auto WorkgroupShiftAmount =
618 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
619 clz(CLK_LOCAL_MEM_FENCE);
620 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
621 Instruction::Shl, LocalMemFenceMask,
622 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
623
624 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
625 const auto GlobalMemFenceMask = BinaryOperator::Create(
626 Instruction::And, GlobalMemFence, Arg, "", CI);
627 const auto UniformShiftAmount =
628 clz(spv::MemorySemanticsUniformMemoryMask) -
629 clz(CLK_GLOBAL_MEM_FENCE);
630 const auto MemorySemanticsUniform = BinaryOperator::Create(
631 Instruction::Shl, GlobalMemFenceMask,
632 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
633
634 // And combine the above together, also adding in
635 // MemorySemanticsSequentiallyConsistentMask.
636 auto MemorySemantics =
637 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
638 ConstantSequentiallyConsistent, "", CI);
639 MemorySemantics = BinaryOperator::Create(
640 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
641
642 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
643 // Device Scope, otherwise Workgroup Scope.
644 const auto Cmp =
645 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
646 GlobalMemFenceMask, GlobalMemFence, "", CI);
647 const auto MemoryScope = SelectInst::Create(
648 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
649
650 // Lastly, the Execution Scope is always Workgroup Scope.
651 const auto ExecutionScope = ConstantScopeWorkgroup;
652
653 auto NewCI = CallInst::Create(
654 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
655
656 CI->replaceAllUsesWith(NewCI);
657
658 // Lastly, remember to remove the user.
659 ToRemoves.push_back(CI);
660 }
661 }
662
663 Changed = !ToRemoves.empty();
664
665 // And cleanup the calls we don't use anymore.
666 for (auto V : ToRemoves) {
667 V->eraseFromParent();
668 }
669
670 // And remove the function we don't need either too.
671 F->eraseFromParent();
672 }
673 }
674
675 return Changed;
676}
677
678bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
679 bool Changed = false;
680
681 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
682
Neil Henning39672102017-09-29 14:33:13 +0100683 using Tuple = std::tuple<const char *, unsigned>;
684 const std::map<const char *, Tuple> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400685 {"_Z9mem_fencej", Tuple("__spirv_memory_barrier",
686 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100687 {"_Z14read_mem_fencej",
688 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
689 {"_Z15write_mem_fencej",
690 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400691
692 for (auto Pair : Map) {
693 // If we find a function with the matching name.
694 if (auto F = M.getFunction(Pair.first)) {
695 SmallVector<Instruction *, 4> ToRemoves;
696
697 // Walk the users of the function.
698 for (auto &U : F->uses()) {
699 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
700 auto FType = F->getFunctionType();
701 SmallVector<Type *, 2> Params;
702 for (unsigned i = 0; i < 2; i++) {
703 Params.push_back(FType->getParamType(0));
704 }
705 auto NewFType =
706 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100707 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400708
709 auto Arg = CI->getOperand(0);
710
711 // We need to map the OpenCL constants to the SPIR-V equivalents.
712 const auto LocalMemFence =
713 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
714 const auto GlobalMemFence =
715 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
716 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100717 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400718 const auto ConstantScopeDevice =
719 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
720
721 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
722 const auto LocalMemFenceMask = BinaryOperator::Create(
723 Instruction::And, LocalMemFence, Arg, "", CI);
724 const auto WorkgroupShiftAmount =
725 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
726 clz(CLK_LOCAL_MEM_FENCE);
727 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
728 Instruction::Shl, LocalMemFenceMask,
729 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
730
731 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
732 const auto GlobalMemFenceMask = BinaryOperator::Create(
733 Instruction::And, GlobalMemFence, Arg, "", CI);
734 const auto UniformShiftAmount =
735 clz(spv::MemorySemanticsUniformMemoryMask) -
736 clz(CLK_GLOBAL_MEM_FENCE);
737 const auto MemorySemanticsUniform = BinaryOperator::Create(
738 Instruction::Shl, GlobalMemFenceMask,
739 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
740
741 // And combine the above together, also adding in
742 // MemorySemanticsSequentiallyConsistentMask.
743 auto MemorySemantics =
744 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
745 ConstantMemorySemantics, "", CI);
746 MemorySemantics = BinaryOperator::Create(
747 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
748
749 // Memory Scope is always device.
750 const auto MemoryScope = ConstantScopeDevice;
751
752 auto NewCI =
753 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
754
755 CI->replaceAllUsesWith(NewCI);
756
757 // Lastly, remember to remove the user.
758 ToRemoves.push_back(CI);
759 }
760 }
761
762 Changed = !ToRemoves.empty();
763
764 // And cleanup the calls we don't use anymore.
765 for (auto V : ToRemoves) {
766 V->eraseFromParent();
767 }
768
769 // And remove the function we don't need either too.
770 F->eraseFromParent();
771 }
772 }
773
774 return Changed;
775}
776
777bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
778 bool Changed = false;
779
780 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
781 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
782 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
783 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
784 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
785 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
786 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
787 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
788 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
789 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
790 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
791 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
792 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
793 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
794 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
795 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
796 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
797 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
798 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
799 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
800 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
801 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
802 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
803 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
804 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
805 };
806
807 for (auto Pair : Map) {
808 // If we find a function with the matching name.
809 if (auto F = M.getFunction(Pair.first)) {
810 SmallVector<Instruction *, 4> ToRemoves;
811
812 // Walk the users of the function.
813 for (auto &U : F->uses()) {
814 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
815 // The predicate to use in the CmpInst.
816 auto Predicate = Pair.second.first;
817
818 // The value to return for true.
819 auto TrueValue =
820 ConstantInt::getSigned(CI->getType(), Pair.second.second);
821
822 // The value to return for false.
823 auto FalseValue = Constant::getNullValue(CI->getType());
824
825 auto Arg1 = CI->getOperand(0);
826 auto Arg2 = CI->getOperand(1);
827
828 const auto Cmp =
829 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
830
831 const auto Select =
832 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
833
834 CI->replaceAllUsesWith(Select);
835
836 // Lastly, remember to remove the user.
837 ToRemoves.push_back(CI);
838 }
839 }
840
841 Changed = !ToRemoves.empty();
842
843 // And cleanup the calls we don't use anymore.
844 for (auto V : ToRemoves) {
845 V->eraseFromParent();
846 }
847
848 // And remove the function we don't need either too.
849 F->eraseFromParent();
850 }
851 }
852
853 return Changed;
854}
855
856bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
857 bool Changed = false;
858
Kévin Petitff03aee2019-06-12 19:39:03 +0100859 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
860 {"_Z5isinff", {spv::OpIsInf, 1}},
861 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
862 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
863 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
864 {"_Z5isnanf", {spv::OpIsNan, 1}},
865 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
866 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
867 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400868 };
869
870 for (auto Pair : Map) {
871 // If we find a function with the matching name.
872 if (auto F = M.getFunction(Pair.first)) {
873 SmallVector<Instruction *, 4> ToRemoves;
874
875 // Walk the users of the function.
876 for (auto &U : F->uses()) {
877 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
878 const auto CITy = CI->getType();
879
Kévin Petitff03aee2019-06-12 19:39:03 +0100880 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400881
882 // The value to return for true.
883 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
884
885 // The value to return for false.
886 auto FalseValue = Constant::getNullValue(CITy);
887
888 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
889 M.getContext(),
890 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
891
Kévin Petitff03aee2019-06-12 19:39:03 +0100892 auto NewCI =
893 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
894 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400895
896 const auto Select =
897 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
898
899 CI->replaceAllUsesWith(Select);
900
901 // Lastly, remember to remove the user.
902 ToRemoves.push_back(CI);
903 }
904 }
905
906 Changed = !ToRemoves.empty();
907
908 // And cleanup the calls we don't use anymore.
909 for (auto V : ToRemoves) {
910 V->eraseFromParent();
911 }
912
913 // And remove the function we don't need either too.
914 F->eraseFromParent();
915 }
916 }
917
918 return Changed;
919}
920
921bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
922 bool Changed = false;
923
Kévin Petitff03aee2019-06-12 19:39:03 +0100924 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000925 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100926 {"_Z3allc", spv::OpNop},
927 {"_Z3allDv2_c", spv::OpAll},
928 {"_Z3allDv3_c", spv::OpAll},
929 {"_Z3allDv4_c", spv::OpAll},
930 {"_Z3alls", spv::OpNop},
931 {"_Z3allDv2_s", spv::OpAll},
932 {"_Z3allDv3_s", spv::OpAll},
933 {"_Z3allDv4_s", spv::OpAll},
934 {"_Z3alli", spv::OpNop},
935 {"_Z3allDv2_i", spv::OpAll},
936 {"_Z3allDv3_i", spv::OpAll},
937 {"_Z3allDv4_i", spv::OpAll},
938 {"_Z3alll", spv::OpNop},
939 {"_Z3allDv2_l", spv::OpAll},
940 {"_Z3allDv3_l", spv::OpAll},
941 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000942
943 // any
Kévin Petitff03aee2019-06-12 19:39:03 +0100944 {"_Z3anyc", spv::OpNop},
945 {"_Z3anyDv2_c", spv::OpAny},
946 {"_Z3anyDv3_c", spv::OpAny},
947 {"_Z3anyDv4_c", spv::OpAny},
948 {"_Z3anys", spv::OpNop},
949 {"_Z3anyDv2_s", spv::OpAny},
950 {"_Z3anyDv3_s", spv::OpAny},
951 {"_Z3anyDv4_s", spv::OpAny},
952 {"_Z3anyi", spv::OpNop},
953 {"_Z3anyDv2_i", spv::OpAny},
954 {"_Z3anyDv3_i", spv::OpAny},
955 {"_Z3anyDv4_i", spv::OpAny},
956 {"_Z3anyl", spv::OpNop},
957 {"_Z3anyDv2_l", spv::OpAny},
958 {"_Z3anyDv3_l", spv::OpAny},
959 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -0400960 };
961
962 for (auto Pair : Map) {
963 // If we find a function with the matching name.
964 if (auto F = M.getFunction(Pair.first)) {
965 SmallVector<Instruction *, 4> ToRemoves;
966
967 // Walk the users of the function.
968 for (auto &U : F->uses()) {
969 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400970
971 auto Arg = CI->getOperand(0);
972
973 Value *V;
974
Kévin Petitfd27cca2018-10-31 13:00:17 +0000975 // If the argument is a 32-bit int, just use a shift
976 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
977 V = BinaryOperator::Create(Instruction::LShr, Arg,
978 ConstantInt::get(Arg->getType(), 31), "",
979 CI);
980 } else {
David Neto22f144c2017-06-12 14:26:21 -0400981 // The value for zero to compare against.
982 const auto ZeroValue = Constant::getNullValue(Arg->getType());
983
David Neto22f144c2017-06-12 14:26:21 -0400984 // The value to return for true.
985 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
986
987 // The value to return for false.
988 const auto FalseValue = Constant::getNullValue(CI->getType());
989
Kévin Petitfd27cca2018-10-31 13:00:17 +0000990 const auto Cmp = CmpInst::Create(
991 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
992
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400993 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000994
995 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +0100996 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000997
Kévin Petitff03aee2019-06-12 19:39:03 +0100998 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000999
Kévin Petitff03aee2019-06-12 19:39:03 +01001000 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +00001001
Kévin Petitff03aee2019-06-12 19:39:03 +01001002 const auto NewCI = clspv::InsertSPIRVOp(
1003 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +00001004 SelectSource = NewCI;
1005
1006 } else {
1007 SelectSource = Cmp;
1008 }
1009
1010 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001011 }
1012
1013 CI->replaceAllUsesWith(V);
1014
1015 // Lastly, remember to remove the user.
1016 ToRemoves.push_back(CI);
1017 }
1018 }
1019
1020 Changed = !ToRemoves.empty();
1021
1022 // And cleanup the calls we don't use anymore.
1023 for (auto V : ToRemoves) {
1024 V->eraseFromParent();
1025 }
1026
1027 // And remove the function we don't need either too.
1028 F->eraseFromParent();
1029 }
1030 }
1031
1032 return Changed;
1033}
1034
Kévin Petitbf0036c2019-03-06 13:57:10 +00001035bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1036 bool Changed = false;
1037
1038 for (auto const &SymVal : M.getValueSymbolTable()) {
1039 // Skip symbols whose name doesn't match
1040 if (!SymVal.getKey().startswith("_Z8upsample")) {
1041 continue;
1042 }
1043 // Is there a function going by that name?
1044 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1045
1046 SmallVector<Instruction *, 4> ToRemoves;
1047
1048 // Walk the users of the function.
1049 for (auto &U : F->uses()) {
1050 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1051
1052 // Get arguments
1053 auto HiValue = CI->getOperand(0);
1054 auto LoValue = CI->getOperand(1);
1055
1056 // Don't touch overloads that aren't in OpenCL C
1057 auto HiType = HiValue->getType();
1058 auto LoType = LoValue->getType();
1059
1060 if (HiType != LoType) {
1061 continue;
1062 }
1063
1064 if (!HiType->isIntOrIntVectorTy()) {
1065 continue;
1066 }
1067
1068 if (HiType->getScalarSizeInBits() * 2 !=
1069 CI->getType()->getScalarSizeInBits()) {
1070 continue;
1071 }
1072
1073 if ((HiType->getScalarSizeInBits() != 8) &&
1074 (HiType->getScalarSizeInBits() != 16) &&
1075 (HiType->getScalarSizeInBits() != 32)) {
1076 continue;
1077 }
1078
1079 if (HiType->isVectorTy()) {
1080 if ((HiType->getVectorNumElements() != 2) &&
1081 (HiType->getVectorNumElements() != 3) &&
1082 (HiType->getVectorNumElements() != 4) &&
1083 (HiType->getVectorNumElements() != 8) &&
1084 (HiType->getVectorNumElements() != 16)) {
1085 continue;
1086 }
1087 }
1088
1089 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001090 auto HiCast =
1091 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1092 auto LoCast =
1093 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001094
1095 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001096 auto ShiftAmount =
1097 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001098 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1099 ShiftAmount, "", CI);
1100
1101 // OR both results
1102 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1103 "", CI);
1104
1105 // Replace call with the expression
1106 CI->replaceAllUsesWith(V);
1107
1108 // Lastly, remember to remove the user.
1109 ToRemoves.push_back(CI);
1110 }
1111 }
1112
1113 Changed = !ToRemoves.empty();
1114
1115 // And cleanup the calls we don't use anymore.
1116 for (auto V : ToRemoves) {
1117 V->eraseFromParent();
1118 }
1119
1120 // And remove the function we don't need either too.
1121 F->eraseFromParent();
1122 }
1123 }
1124
1125 return Changed;
1126}
1127
Kévin Petitd44eef52019-03-08 13:22:14 +00001128bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1129 bool Changed = false;
1130
1131 for (auto const &SymVal : M.getValueSymbolTable()) {
1132 // Skip symbols whose name doesn't match
1133 if (!SymVal.getKey().startswith("_Z6rotate")) {
1134 continue;
1135 }
1136 // Is there a function going by that name?
1137 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1138
1139 SmallVector<Instruction *, 4> ToRemoves;
1140
1141 // Walk the users of the function.
1142 for (auto &U : F->uses()) {
1143 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1144
1145 // Get arguments
1146 auto SrcValue = CI->getOperand(0);
1147 auto RotAmount = CI->getOperand(1);
1148
1149 // Don't touch overloads that aren't in OpenCL C
1150 auto SrcType = SrcValue->getType();
1151 auto RotType = RotAmount->getType();
1152
1153 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1154 continue;
1155 }
1156
1157 if (!SrcType->isIntOrIntVectorTy()) {
1158 continue;
1159 }
1160
1161 if ((SrcType->getScalarSizeInBits() != 8) &&
1162 (SrcType->getScalarSizeInBits() != 16) &&
1163 (SrcType->getScalarSizeInBits() != 32) &&
1164 (SrcType->getScalarSizeInBits() != 64)) {
1165 continue;
1166 }
1167
1168 if (SrcType->isVectorTy()) {
1169 if ((SrcType->getVectorNumElements() != 2) &&
1170 (SrcType->getVectorNumElements() != 3) &&
1171 (SrcType->getVectorNumElements() != 4) &&
1172 (SrcType->getVectorNumElements() != 8) &&
1173 (SrcType->getVectorNumElements() != 16)) {
1174 continue;
1175 }
1176 }
1177
1178 // The approach used is to shift the top bits down, the bottom bits up
1179 // and OR the two shifted values.
1180
1181 // The rotation amount is to be treated modulo the element size.
1182 // Since SPIR-V shift ops don't support this, let's apply the
1183 // modulo ahead of shifting. The element size is always a power of
1184 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001185 auto ModMask =
1186 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001187 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1188 ModMask, "", CI);
1189
1190 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001191 auto ScalarSize =
1192 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001193 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1194 RotAmount, "", CI);
1195
1196 // Now shift the bottom bits up and the top bits down
1197 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1198 RotAmount, "", CI);
1199 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1200 DownAmount, "", CI);
1201
1202 // Finally OR the two shifted values
1203 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1204 HiRotated, "", CI);
1205
1206 // Replace call with the expression
1207 CI->replaceAllUsesWith(V);
1208
1209 // Lastly, remember to remove the user.
1210 ToRemoves.push_back(CI);
1211 }
1212 }
1213
1214 Changed = !ToRemoves.empty();
1215
1216 // And cleanup the calls we don't use anymore.
1217 for (auto V : ToRemoves) {
1218 V->eraseFromParent();
1219 }
1220
1221 // And remove the function we don't need either too.
1222 F->eraseFromParent();
1223 }
1224 }
1225
1226 return Changed;
1227}
1228
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001229bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1230 bool Changed = false;
1231
1232 for (auto const &SymVal : M.getValueSymbolTable()) {
1233
1234 // Skip symbols whose name obviously doesn't match
1235 if (!SymVal.getKey().contains("convert_")) {
1236 continue;
1237 }
1238
1239 // Is there a function going by that name?
1240 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1241
1242 // Get info from the mangled name
1243 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001244 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001245
1246 // All functions of interest are handled by our mangled name parser
1247 if (!parsed) {
1248 continue;
1249 }
1250
1251 // Move on if this isn't a call to convert_
1252 if (!finfo.name.startswith("convert_")) {
1253 continue;
1254 }
1255
1256 // Extract the destination type from the function name
1257 StringRef DstTypeName = finfo.name;
1258 DstTypeName.consume_front("convert_");
1259
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001260 auto DstSignedNess =
1261 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1262 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1263 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1264 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1265 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1266 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1267 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1268 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1269 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1270 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001271
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001272 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001273 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001274
1275 SmallVector<Instruction *, 4> ToRemoves;
1276
1277 // Walk the users of the function.
1278 for (auto &U : F->uses()) {
1279 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1280
1281 // Get arguments
1282 auto SrcValue = CI->getOperand(0);
1283
1284 // Don't touch overloads that aren't in OpenCL C
1285 auto SrcType = SrcValue->getType();
1286 auto DstType = CI->getType();
1287
1288 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1289 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1290 continue;
1291 }
1292
1293 if (SrcType->isVectorTy()) {
1294
1295 if (SrcType->getVectorNumElements() !=
1296 DstType->getVectorNumElements()) {
1297 continue;
1298 }
1299
1300 if ((SrcType->getVectorNumElements() != 2) &&
1301 (SrcType->getVectorNumElements() != 3) &&
1302 (SrcType->getVectorNumElements() != 4) &&
1303 (SrcType->getVectorNumElements() != 8) &&
1304 (SrcType->getVectorNumElements() != 16)) {
1305 continue;
1306 }
1307 }
1308
1309 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1310 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1311
1312 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1313 bool DstIsInt = DstType->isIntOrIntVectorTy();
1314
1315 Value *V;
1316 if (SrcIsFloat && DstIsFloat) {
1317 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1318 } else if (SrcIsFloat && DstIsInt) {
1319 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001320 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1321 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001322 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001323 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1324 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001325 }
1326 } else if (SrcIsInt && DstIsFloat) {
1327 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001328 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1329 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001330 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001331 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1332 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001333 }
1334 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001335 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1336 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001337 } else {
1338 // Not something we're supposed to handle, just move on
1339 continue;
1340 }
1341
1342 // Replace call with the expression
1343 CI->replaceAllUsesWith(V);
1344
1345 // Lastly, remember to remove the user.
1346 ToRemoves.push_back(CI);
1347 }
1348 }
1349
1350 Changed = !ToRemoves.empty();
1351
1352 // And cleanup the calls we don't use anymore.
1353 for (auto V : ToRemoves) {
1354 V->eraseFromParent();
1355 }
1356
1357 // And remove the function we don't need either too.
1358 F->eraseFromParent();
1359 }
1360 }
1361
1362 return Changed;
1363}
1364
Kévin Petit8a560882019-03-21 15:24:34 +00001365bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1366 bool Changed = false;
1367
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001368 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001369
Kévin Petit617a76d2019-04-04 13:54:16 +01001370 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001371 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1372 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1373
1374 // Skip symbols whose name doesn't match
1375 if (!isMad && !isMul) {
1376 continue;
1377 }
1378
1379 // Is there a function going by that name?
1380 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001381 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001382 }
1383 }
1384
Kévin Petit617a76d2019-04-04 13:54:16 +01001385 for (auto F : FnWorklist) {
1386 SmallVector<Instruction *, 4> ToRemoves;
1387
1388 bool isMad = F->getName().startswith("_Z6mad_hi");
1389 // Walk the users of the function.
1390 for (auto &U : F->uses()) {
1391 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1392
1393 // Get arguments
1394 auto AValue = CI->getOperand(0);
1395 auto BValue = CI->getOperand(1);
1396 auto CValue = CI->getOperand(2);
1397
1398 // Don't touch overloads that aren't in OpenCL C
1399 auto AType = AValue->getType();
1400 auto BType = BValue->getType();
1401 auto CType = CValue->getType();
1402
1403 if ((AType != BType) || (CI->getType() != AType) ||
1404 (isMad && (AType != CType))) {
1405 continue;
1406 }
1407
1408 if (!AType->isIntOrIntVectorTy()) {
1409 continue;
1410 }
1411
1412 if ((AType->getScalarSizeInBits() != 8) &&
1413 (AType->getScalarSizeInBits() != 16) &&
1414 (AType->getScalarSizeInBits() != 32) &&
1415 (AType->getScalarSizeInBits() != 64)) {
1416 continue;
1417 }
1418
1419 if (AType->isVectorTy()) {
1420 if ((AType->getVectorNumElements() != 2) &&
1421 (AType->getVectorNumElements() != 3) &&
1422 (AType->getVectorNumElements() != 4) &&
1423 (AType->getVectorNumElements() != 8) &&
1424 (AType->getVectorNumElements() != 16)) {
1425 continue;
1426 }
1427 }
1428
1429 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001430 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001431
1432 // Select the appropriate signed/unsigned SPIR-V op
1433 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001434 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001435 opcode = spv::OpSMulExtended;
1436 } else {
1437 opcode = spv::OpUMulExtended;
1438 }
1439
1440 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001441 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001442 auto ExMulRetType = StructType::create(TwoValueType);
1443
1444 // Call the SPIR-V op
1445 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1446 ExMulRetType, {AValue, BValue});
1447
1448 // Get the high part of the result
1449 unsigned Idxs[] = {1};
1450 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1451
1452 // If we're handling a mad_hi, add the third argument to the result
1453 if (isMad) {
1454 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1455 }
1456
1457 // Replace call with the expression
1458 CI->replaceAllUsesWith(V);
1459
1460 // Lastly, remember to remove the user.
1461 ToRemoves.push_back(CI);
1462 }
1463 }
1464
1465 Changed = !ToRemoves.empty();
1466
1467 // And cleanup the calls we don't use anymore.
1468 for (auto V : ToRemoves) {
1469 V->eraseFromParent();
1470 }
1471
1472 // And remove the function we don't need either too.
1473 F->eraseFromParent();
1474 }
1475
Kévin Petit8a560882019-03-21 15:24:34 +00001476 return Changed;
1477}
1478
Kévin Petitf5b78a22018-10-25 14:32:17 +00001479bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1480 bool Changed = false;
1481
1482 for (auto const &SymVal : M.getValueSymbolTable()) {
1483 // Skip symbols whose name doesn't match
1484 if (!SymVal.getKey().startswith("_Z6select")) {
1485 continue;
1486 }
1487 // Is there a function going by that name?
1488 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1489
1490 SmallVector<Instruction *, 4> ToRemoves;
1491
1492 // Walk the users of the function.
1493 for (auto &U : F->uses()) {
1494 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1495
1496 // Get arguments
1497 auto FalseValue = CI->getOperand(0);
1498 auto TrueValue = CI->getOperand(1);
1499 auto PredicateValue = CI->getOperand(2);
1500
1501 // Don't touch overloads that aren't in OpenCL C
1502 auto FalseType = FalseValue->getType();
1503 auto TrueType = TrueValue->getType();
1504 auto PredicateType = PredicateValue->getType();
1505
1506 if (FalseType != TrueType) {
1507 continue;
1508 }
1509
1510 if (!PredicateType->isIntOrIntVectorTy()) {
1511 continue;
1512 }
1513
1514 if (!FalseType->isIntOrIntVectorTy() &&
1515 !FalseType->getScalarType()->isFloatingPointTy()) {
1516 continue;
1517 }
1518
1519 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1520 continue;
1521 }
1522
1523 if (FalseType->getScalarSizeInBits() !=
1524 PredicateType->getScalarSizeInBits()) {
1525 continue;
1526 }
1527
1528 if (FalseType->isVectorTy()) {
1529 if (FalseType->getVectorNumElements() !=
1530 PredicateType->getVectorNumElements()) {
1531 continue;
1532 }
1533
1534 if ((FalseType->getVectorNumElements() != 2) &&
1535 (FalseType->getVectorNumElements() != 3) &&
1536 (FalseType->getVectorNumElements() != 4) &&
1537 (FalseType->getVectorNumElements() != 8) &&
1538 (FalseType->getVectorNumElements() != 16)) {
1539 continue;
1540 }
1541 }
1542
1543 // Create constant
1544 const auto ZeroValue = Constant::getNullValue(PredicateType);
1545
1546 // Scalar and vector are to be treated differently
1547 CmpInst::Predicate Pred;
1548 if (PredicateType->isVectorTy()) {
1549 Pred = CmpInst::ICMP_SLT;
1550 } else {
1551 Pred = CmpInst::ICMP_NE;
1552 }
1553
1554 // Create comparison instruction
1555 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1556 ZeroValue, "", CI);
1557
1558 // Create select
1559 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1560
1561 // Replace call with the selection
1562 CI->replaceAllUsesWith(V);
1563
1564 // Lastly, remember to remove the user.
1565 ToRemoves.push_back(CI);
1566 }
1567 }
1568
1569 Changed = !ToRemoves.empty();
1570
1571 // And cleanup the calls we don't use anymore.
1572 for (auto V : ToRemoves) {
1573 V->eraseFromParent();
1574 }
1575
1576 // And remove the function we don't need either too.
1577 F->eraseFromParent();
1578 }
1579 }
1580
1581 return Changed;
1582}
1583
Kévin Petite7d0cce2018-10-31 12:38:56 +00001584bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1585 bool Changed = false;
1586
1587 for (auto const &SymVal : M.getValueSymbolTable()) {
1588 // Skip symbols whose name doesn't match
1589 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1590 continue;
1591 }
1592 // Is there a function going by that name?
1593 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1594
1595 SmallVector<Instruction *, 4> ToRemoves;
1596
1597 // Walk the users of the function.
1598 for (auto &U : F->uses()) {
1599 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1600
1601 if (CI->getNumOperands() != 4) {
1602 continue;
1603 }
1604
1605 // Get arguments
1606 auto FalseValue = CI->getOperand(0);
1607 auto TrueValue = CI->getOperand(1);
1608 auto PredicateValue = CI->getOperand(2);
1609
1610 // Don't touch overloads that aren't in OpenCL C
1611 auto FalseType = FalseValue->getType();
1612 auto TrueType = TrueValue->getType();
1613 auto PredicateType = PredicateValue->getType();
1614
1615 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1616 continue;
1617 }
1618
1619 if (TrueType->isVectorTy()) {
1620 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1621 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001622 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001623 }
1624 if ((TrueType->getVectorNumElements() != 2) &&
1625 (TrueType->getVectorNumElements() != 3) &&
1626 (TrueType->getVectorNumElements() != 4) &&
1627 (TrueType->getVectorNumElements() != 8) &&
1628 (TrueType->getVectorNumElements() != 16)) {
1629 continue;
1630 }
1631 }
1632
1633 // Remember the type of the operands
1634 auto OpType = TrueType;
1635
1636 // The actual bit selection will always be done on an integer type,
1637 // declare it here
1638 Type *BitType;
1639
1640 // If the operands are float, then bitcast them to int
1641 if (OpType->getScalarType()->isFloatingPointTy()) {
1642
1643 // First create the new type
1644 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1645 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1646 if (OpType->isVectorTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001647 BitType =
1648 VectorType::get(BitType, OpType->getVectorNumElements());
Kévin Petite7d0cce2018-10-31 12:38:56 +00001649 }
1650
1651 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001652 PredicateValue =
1653 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1654 FalseValue =
1655 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1656 TrueValue =
1657 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001658
1659 } else {
1660 // The operands have an integer type, use it directly
1661 BitType = OpType;
1662 }
1663
1664 // All the operands are now always integers
1665 // implement as (c & b) | (~c & a)
1666
1667 // Create our negated predicate value
1668 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001669 auto NotPredicateValue = BinaryOperator::Create(
1670 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001671
1672 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001673 auto BitsFalse = BinaryOperator::Create(
1674 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1675 auto BitsTrue = BinaryOperator::Create(
1676 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001677
1678 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1679 BitsTrue, "", CI);
1680
1681 // If we were dealing with a floating point type, we must bitcast
1682 // the result back to that
1683 if (OpType->getScalarType()->isFloatingPointTy()) {
1684 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1685 }
1686
1687 // Replace call with our new code
1688 CI->replaceAllUsesWith(V);
1689
1690 // Lastly, remember to remove the user.
1691 ToRemoves.push_back(CI);
1692 }
1693 }
1694
1695 Changed = !ToRemoves.empty();
1696
1697 // And cleanup the calls we don't use anymore.
1698 for (auto V : ToRemoves) {
1699 V->eraseFromParent();
1700 }
1701
1702 // And remove the function we don't need either too.
1703 F->eraseFromParent();
1704 }
1705 }
1706
1707 return Changed;
1708}
1709
Kévin Petit6b0a9532018-10-30 20:00:39 +00001710bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1711 bool Changed = false;
1712
1713 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001714 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1715 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1716 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1717 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1718 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1719 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001720 };
1721
1722 for (auto Pair : Map) {
1723 // If we find a function with the matching name.
1724 if (auto F = M.getFunction(Pair.first)) {
1725 SmallVector<Instruction *, 4> ToRemoves;
1726
1727 // Walk the users of the function.
1728 for (auto &U : F->uses()) {
1729 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1730
1731 auto ReplacementFn = Pair.second;
1732
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001733 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001734 Value *VectorArg;
1735
1736 // First figure out which function we're dealing with
1737 if (F->getName().startswith("_Z10smoothstep")) {
1738 ArgsToSplat.push_back(CI->getOperand(1));
1739 VectorArg = CI->getOperand(2);
1740 } else {
1741 VectorArg = CI->getOperand(1);
1742 }
1743
1744 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001745 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001746 auto VecType = VectorArg->getType();
1747
1748 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001749 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001750 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001751 auto index =
1752 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1753 NewVectorArg =
1754 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001755 }
1756 SplatArgs.push_back(NewVectorArg);
1757 }
1758
1759 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001760 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1761 const auto NewFType =
1762 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001763
1764 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1765
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001766 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001767 for (auto arg : SplatArgs) {
1768 NewArgs.push_back(arg);
1769 }
1770 NewArgs.push_back(VectorArg);
1771
1772 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1773
1774 CI->replaceAllUsesWith(NewCI);
1775
1776 // Lastly, remember to remove the user.
1777 ToRemoves.push_back(CI);
1778 }
1779 }
1780
1781 Changed = !ToRemoves.empty();
1782
1783 // And cleanup the calls we don't use anymore.
1784 for (auto V : ToRemoves) {
1785 V->eraseFromParent();
1786 }
1787
1788 // And remove the function we don't need either too.
1789 F->eraseFromParent();
1790 }
1791 }
1792
1793 return Changed;
1794}
1795
David Neto22f144c2017-06-12 14:26:21 -04001796bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1797 bool Changed = false;
1798
1799 const std::map<const char *, Instruction::BinaryOps> Map = {
1800 {"_Z7signbitf", Instruction::LShr},
1801 {"_Z7signbitDv2_f", Instruction::AShr},
1802 {"_Z7signbitDv3_f", Instruction::AShr},
1803 {"_Z7signbitDv4_f", Instruction::AShr},
1804 };
1805
1806 for (auto Pair : Map) {
1807 // If we find a function with the matching name.
1808 if (auto F = M.getFunction(Pair.first)) {
1809 SmallVector<Instruction *, 4> ToRemoves;
1810
1811 // Walk the users of the function.
1812 for (auto &U : F->uses()) {
1813 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1814 auto Arg = CI->getOperand(0);
1815
1816 auto Bitcast =
1817 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1818
1819 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1820 ConstantInt::get(CI->getType(), 31),
1821 "", CI);
1822
1823 CI->replaceAllUsesWith(Shr);
1824
1825 // Lastly, remember to remove the user.
1826 ToRemoves.push_back(CI);
1827 }
1828 }
1829
1830 Changed = !ToRemoves.empty();
1831
1832 // And cleanup the calls we don't use anymore.
1833 for (auto V : ToRemoves) {
1834 V->eraseFromParent();
1835 }
1836
1837 // And remove the function we don't need either too.
1838 F->eraseFromParent();
1839 }
1840 }
1841
1842 return Changed;
1843}
1844
1845bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1846 bool Changed = false;
1847
1848 const std::map<const char *,
1849 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1850 Map = {
1851 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1852 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1853 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1854 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1855 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1856 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1857 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1858 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1859 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1860 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1861 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1862 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1863 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1864 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1865 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1866 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1867 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1868 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1869 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1870 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1871 };
1872
1873 for (auto Pair : Map) {
1874 // If we find a function with the matching name.
1875 if (auto F = M.getFunction(Pair.first)) {
1876 SmallVector<Instruction *, 4> ToRemoves;
1877
1878 // Walk the users of the function.
1879 for (auto &U : F->uses()) {
1880 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1881 // The multiply instruction to use.
1882 auto MulInst = Pair.second.first;
1883
1884 // The add instruction to use.
1885 auto AddInst = Pair.second.second;
1886
1887 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1888
1889 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1890 CI->getArgOperand(1), "", CI);
1891
1892 if (Instruction::BinaryOpsEnd != AddInst) {
1893 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1894 CI);
1895 }
1896
1897 CI->replaceAllUsesWith(I);
1898
1899 // Lastly, remember to remove the user.
1900 ToRemoves.push_back(CI);
1901 }
1902 }
1903
1904 Changed = !ToRemoves.empty();
1905
1906 // And cleanup the calls we don't use anymore.
1907 for (auto V : ToRemoves) {
1908 V->eraseFromParent();
1909 }
1910
1911 // And remove the function we don't need either too.
1912 F->eraseFromParent();
1913 }
1914 }
1915
1916 return Changed;
1917}
1918
Derek Chowcfd368b2017-10-19 20:58:45 -07001919bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1920 bool Changed = false;
1921
alan-bakerf795f392019-06-11 18:24:34 -04001922 for (auto const &SymVal : M.getValueSymbolTable()) {
1923 if (!SymVal.getKey().contains("vstore"))
1924 continue;
1925 if (SymVal.getKey().contains("vstore_"))
1926 continue;
1927 if (SymVal.getKey().contains("vstorea"))
1928 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001929
alan-bakerf795f392019-06-11 18:24:34 -04001930 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001931 SmallVector<Instruction *, 4> ToRemoves;
1932
alan-bakerf795f392019-06-11 18:24:34 -04001933 auto fname = F->getName();
1934 if (!fname.consume_front("_Z"))
1935 continue;
1936 size_t name_len;
1937 if (fname.consumeInteger(10, name_len))
1938 continue;
1939 std::string name = fname.take_front(name_len);
1940
1941 bool ok = StringSwitch<bool>(name)
1942 .Case("vstore2", true)
1943 .Case("vstore3", true)
1944 .Case("vstore4", true)
1945 .Case("vstore8", true)
1946 .Case("vstore16", true)
1947 .Default(false);
1948 if (!ok)
1949 continue;
1950
Derek Chowcfd368b2017-10-19 20:58:45 -07001951 for (auto &U : F->uses()) {
1952 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04001953 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001954
alan-bakerf795f392019-06-11 18:24:34 -04001955 auto data_type = data->getType();
1956 if (!data_type->isVectorTy())
1957 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001958
alan-bakerf795f392019-06-11 18:24:34 -04001959 auto elems = data_type->getVectorNumElements();
1960 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
1961 elems != 16)
1962 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001963
alan-bakerf795f392019-06-11 18:24:34 -04001964 auto offset = CI->getOperand(1);
1965 auto ptr = CI->getOperand(2);
1966 auto ptr_type = ptr->getType();
1967 auto pointee_type = ptr_type->getPointerElementType();
1968 if (pointee_type != data_type->getVectorElementType())
1969 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001970
alan-bakerf795f392019-06-11 18:24:34 -04001971 // Avoid pointer casts. Instead generate the correct number of stores
1972 // and rely on drivers to coalesce appropriately.
1973 IRBuilder<> builder(CI);
1974 auto elems_const = builder.getInt32(elems);
1975 auto adjust = builder.CreateMul(offset, elems_const);
1976 for (auto i = 0; i < elems; ++i) {
1977 auto idx = builder.getInt32(i);
1978 auto add = builder.CreateAdd(adjust, idx);
1979 auto gep = builder.CreateGEP(ptr, add);
1980 auto extract = builder.CreateExtractElement(data, i);
1981 auto store = builder.CreateStore(extract, gep);
1982 }
Derek Chowcfd368b2017-10-19 20:58:45 -07001983
Derek Chowcfd368b2017-10-19 20:58:45 -07001984 ToRemoves.push_back(CI);
1985 }
1986 }
1987
1988 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07001989 for (auto V : ToRemoves) {
1990 V->eraseFromParent();
1991 }
Derek Chowcfd368b2017-10-19 20:58:45 -07001992 F->eraseFromParent();
1993 }
1994 }
1995
1996 return Changed;
1997}
1998
1999bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2000 bool Changed = false;
2001
alan-bakerf795f392019-06-11 18:24:34 -04002002 for (auto const &SymVal : M.getValueSymbolTable()) {
2003 if (!SymVal.getKey().contains("vload"))
2004 continue;
2005 if (SymVal.getKey().contains("vload_"))
2006 continue;
2007 if (SymVal.getKey().contains("vloada"))
2008 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002009
alan-bakerf795f392019-06-11 18:24:34 -04002010 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002011 SmallVector<Instruction *, 4> ToRemoves;
2012
alan-bakerf795f392019-06-11 18:24:34 -04002013 auto fname = F->getName();
2014 if (!fname.consume_front("_Z"))
2015 continue;
2016 size_t name_len;
2017 if (fname.consumeInteger(10, name_len))
2018 continue;
2019 std::string name = fname.take_front(name_len);
2020
2021 bool ok = StringSwitch<bool>(name)
2022 .Case("vload2", true)
2023 .Case("vload3", true)
2024 .Case("vload4", true)
2025 .Case("vload8", true)
2026 .Case("vload16", true)
2027 .Default(false);
2028 if (!ok)
2029 continue;
2030
Derek Chowcfd368b2017-10-19 20:58:45 -07002031 for (auto &U : F->uses()) {
2032 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002033 auto ret_type = F->getReturnType();
2034 if (!ret_type->isVectorTy())
2035 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002036
alan-bakerf795f392019-06-11 18:24:34 -04002037 auto elems = ret_type->getVectorNumElements();
2038 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2039 elems != 16)
2040 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002041
alan-bakerf795f392019-06-11 18:24:34 -04002042 auto offset = CI->getOperand(0);
2043 auto ptr = CI->getOperand(1);
2044 auto ptr_type = ptr->getType();
2045 auto pointee_type = ptr_type->getPointerElementType();
2046 if (pointee_type != ret_type->getVectorElementType())
2047 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002048
alan-bakerf795f392019-06-11 18:24:34 -04002049 // Avoid pointer casts. Instead generate the correct number of loads
2050 // and rely on drivers to coalesce appropriately.
2051 IRBuilder<> builder(CI);
2052 auto elems_const = builder.getInt32(elems);
2053 Value *insert = UndefValue::get(ret_type);
2054 auto adjust = builder.CreateMul(offset, elems_const);
2055 for (auto i = 0; i < elems; ++i) {
2056 auto idx = builder.getInt32(i);
2057 auto add = builder.CreateAdd(adjust, idx);
2058 auto gep = builder.CreateGEP(ptr, add);
2059 auto load = builder.CreateLoad(gep);
2060 insert = builder.CreateInsertElement(insert, load, i);
2061 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002062
alan-bakerf795f392019-06-11 18:24:34 -04002063 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002064 ToRemoves.push_back(CI);
2065 }
2066 }
2067
2068 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002069 for (auto V : ToRemoves) {
2070 V->eraseFromParent();
2071 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002072 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002073 }
2074 }
2075
2076 return Changed;
2077}
2078
David Neto22f144c2017-06-12 14:26:21 -04002079bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2080 bool Changed = false;
2081
2082 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2083 "_Z10vload_halfjPU3AS2KDh"};
2084
2085 for (auto Name : Map) {
2086 // If we find a function with the matching name.
2087 if (auto F = M.getFunction(Name)) {
2088 SmallVector<Instruction *, 4> ToRemoves;
2089
2090 // Walk the users of the function.
2091 for (auto &U : F->uses()) {
2092 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2093 // The index argument from vload_half.
2094 auto Arg0 = CI->getOperand(0);
2095
2096 // The pointer argument from vload_half.
2097 auto Arg1 = CI->getOperand(1);
2098
David Neto22f144c2017-06-12 14:26:21 -04002099 auto IntTy = Type::getInt32Ty(M.getContext());
2100 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002101 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2102
David Neto22f144c2017-06-12 14:26:21 -04002103 // Our intrinsic to unpack a float2 from an int.
2104 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2105
2106 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2107
David Neto482550a2018-03-24 05:21:07 -07002108 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002109 auto ShortTy = Type::getInt16Ty(M.getContext());
2110 auto ShortPointerTy = PointerType::get(
2111 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002112
David Netoac825b82017-05-30 12:49:01 -04002113 // Cast the half* pointer to short*.
2114 auto Cast =
2115 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002116
David Netoac825b82017-05-30 12:49:01 -04002117 // Index into the correct address of the casted pointer.
2118 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2119
2120 // Load from the short* we casted to.
2121 auto Load = new LoadInst(Index, "", CI);
2122
2123 // ZExt the short -> int.
2124 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2125
2126 // Get our float2.
2127 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2128
2129 // Extract out the bottom element which is our float result.
2130 auto Extract = ExtractElementInst::Create(
2131 Call, ConstantInt::get(IntTy, 0), "", CI);
2132
2133 CI->replaceAllUsesWith(Extract);
2134 } else {
2135 // Assume the pointer argument points to storage aligned to 32bits
2136 // or more.
2137 // TODO(dneto): Do more analysis to make sure this is true?
2138 //
2139 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2140 // with:
2141 //
2142 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2143 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2144 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2145 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2146 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2147 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2148 // x float> %converted, %index_is_odd32
2149
2150 auto IntPointerTy = PointerType::get(
2151 IntTy, Arg1->getType()->getPointerAddressSpace());
2152
David Neto973e6a82017-05-30 13:48:18 -04002153 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002154 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002155 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002156 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2157
2158 auto One = ConstantInt::get(IntTy, 1);
2159 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2160 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2161
2162 // Index into the correct address of the casted pointer.
2163 auto Ptr =
2164 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2165
2166 // Load from the int* we casted to.
2167 auto Load = new LoadInst(Ptr, "", CI);
2168
2169 // Get our float2.
2170 auto Call = CallInst::Create(NewF, Load, "", CI);
2171
2172 // Extract out the float result, where the element number is
2173 // determined by whether the original index was even or odd.
2174 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2175
2176 CI->replaceAllUsesWith(Extract);
2177 }
David Neto22f144c2017-06-12 14:26:21 -04002178
2179 // Lastly, remember to remove the user.
2180 ToRemoves.push_back(CI);
2181 }
2182 }
2183
2184 Changed = !ToRemoves.empty();
2185
2186 // And cleanup the calls we don't use anymore.
2187 for (auto V : ToRemoves) {
2188 V->eraseFromParent();
2189 }
2190
2191 // And remove the function we don't need either too.
2192 F->eraseFromParent();
2193 }
2194 }
2195
2196 return Changed;
2197}
2198
2199bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002200
Kévin Petite8edce32019-04-10 14:23:32 +01002201 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002202 "_Z11vload_half2jPU3AS1KDh",
2203 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2204 "_Z11vload_half2jPU3AS2KDh",
2205 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2206 };
David Neto22f144c2017-06-12 14:26:21 -04002207
Kévin Petite8edce32019-04-10 14:23:32 +01002208 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2209 // The index argument from vload_half.
2210 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002211
Kévin Petite8edce32019-04-10 14:23:32 +01002212 // The pointer argument from vload_half.
2213 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002214
Kévin Petite8edce32019-04-10 14:23:32 +01002215 auto IntTy = Type::getInt32Ty(M.getContext());
2216 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002217 auto NewPointerTy =
2218 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002219 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002220
Kévin Petite8edce32019-04-10 14:23:32 +01002221 // Cast the half* pointer to int*.
2222 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002223
Kévin Petite8edce32019-04-10 14:23:32 +01002224 // Index into the correct address of the casted pointer.
2225 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002226
Kévin Petite8edce32019-04-10 14:23:32 +01002227 // Load from the int* we casted to.
2228 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002229
Kévin Petite8edce32019-04-10 14:23:32 +01002230 // Our intrinsic to unpack a float2 from an int.
2231 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002232
Kévin Petite8edce32019-04-10 14:23:32 +01002233 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002234
Kévin Petite8edce32019-04-10 14:23:32 +01002235 // Get our float2.
2236 return CallInst::Create(NewF, Load, "", CI);
2237 });
David Neto22f144c2017-06-12 14:26:21 -04002238}
2239
2240bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002241
Kévin Petite8edce32019-04-10 14:23:32 +01002242 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002243 "_Z11vload_half4jPU3AS1KDh",
2244 "_Z12vloada_half4jPU3AS1KDh",
2245 "_Z11vload_half4jPU3AS2KDh",
2246 "_Z12vloada_half4jPU3AS2KDh",
2247 };
David Neto22f144c2017-06-12 14:26:21 -04002248
Kévin Petite8edce32019-04-10 14:23:32 +01002249 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2250 // The index argument from vload_half.
2251 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002252
Kévin Petite8edce32019-04-10 14:23:32 +01002253 // The pointer argument from vload_half.
2254 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002255
Kévin Petite8edce32019-04-10 14:23:32 +01002256 auto IntTy = Type::getInt32Ty(M.getContext());
2257 auto Int2Ty = VectorType::get(IntTy, 2);
2258 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002259 auto NewPointerTy =
2260 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002261 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002262
Kévin Petite8edce32019-04-10 14:23:32 +01002263 // Cast the half* pointer to int2*.
2264 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002265
Kévin Petite8edce32019-04-10 14:23:32 +01002266 // Index into the correct address of the casted pointer.
2267 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002268
Kévin Petite8edce32019-04-10 14:23:32 +01002269 // Load from the int2* we casted to.
2270 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002271
Kévin Petite8edce32019-04-10 14:23:32 +01002272 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002273 auto X =
2274 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2275 auto Y =
2276 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002277
Kévin Petite8edce32019-04-10 14:23:32 +01002278 // Our intrinsic to unpack a float2 from an int.
2279 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002280
Kévin Petite8edce32019-04-10 14:23:32 +01002281 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002282
Kévin Petite8edce32019-04-10 14:23:32 +01002283 // Get the lower (x & y) components of our final float4.
2284 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002285
Kévin Petite8edce32019-04-10 14:23:32 +01002286 // Get the higher (z & w) components of our final float4.
2287 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002288
Kévin Petite8edce32019-04-10 14:23:32 +01002289 Constant *ShuffleMask[4] = {
2290 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2291 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002292
Kévin Petite8edce32019-04-10 14:23:32 +01002293 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002294 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2295 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002296 });
David Neto22f144c2017-06-12 14:26:21 -04002297}
2298
David Neto6ad93232018-06-07 15:42:58 -07002299bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002300
2301 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2302 //
2303 // %u = load i32 %ptr
2304 // %fxy = call <2 x float> Unpack2xHalf(u)
2305 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002306 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002307 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2308 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2309 "_Z20__clspv_vloada_half2jPKj", // private
2310 };
2311
Kévin Petite8edce32019-04-10 14:23:32 +01002312 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2313 auto Index = CI->getOperand(0);
2314 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002315
Kévin Petite8edce32019-04-10 14:23:32 +01002316 auto IntTy = Type::getInt32Ty(M.getContext());
2317 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2318 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002319
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002320 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002321 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002322
Kévin Petite8edce32019-04-10 14:23:32 +01002323 // Our intrinsic to unpack a float2 from an int.
2324 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002325
Kévin Petite8edce32019-04-10 14:23:32 +01002326 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002327
Kévin Petite8edce32019-04-10 14:23:32 +01002328 // Get our final float2.
2329 return CallInst::Create(NewF, Load, "", CI);
2330 });
David Neto6ad93232018-06-07 15:42:58 -07002331}
2332
2333bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002334
2335 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2336 //
2337 // %u2 = load <2 x i32> %ptr
2338 // %u2xy = extractelement %u2, 0
2339 // %u2zw = extractelement %u2, 1
2340 // %fxy = call <2 x float> Unpack2xHalf(uint)
2341 // %fzw = call <2 x float> Unpack2xHalf(uint)
2342 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002343 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002344 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2345 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2346 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2347 };
2348
Kévin Petite8edce32019-04-10 14:23:32 +01002349 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2350 auto Index = CI->getOperand(0);
2351 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002352
Kévin Petite8edce32019-04-10 14:23:32 +01002353 auto IntTy = Type::getInt32Ty(M.getContext());
2354 auto Int2Ty = VectorType::get(IntTy, 2);
2355 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2356 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002357
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002358 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002359 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002360
Kévin Petite8edce32019-04-10 14:23:32 +01002361 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002362 auto X =
2363 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2364 auto Y =
2365 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002366
Kévin Petite8edce32019-04-10 14:23:32 +01002367 // Our intrinsic to unpack a float2 from an int.
2368 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002369
Kévin Petite8edce32019-04-10 14:23:32 +01002370 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002371
Kévin Petite8edce32019-04-10 14:23:32 +01002372 // Get the lower (x & y) components of our final float4.
2373 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002374
Kévin Petite8edce32019-04-10 14:23:32 +01002375 // Get the higher (z & w) components of our final float4.
2376 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002377
Kévin Petite8edce32019-04-10 14:23:32 +01002378 Constant *ShuffleMask[4] = {
2379 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2380 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002381
Kévin Petite8edce32019-04-10 14:23:32 +01002382 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002383 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2384 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002385 });
David Neto6ad93232018-06-07 15:42:58 -07002386}
2387
David Neto22f144c2017-06-12 14:26:21 -04002388bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002389
Kévin Petite8edce32019-04-10 14:23:32 +01002390 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2391 "_Z15vstore_half_rtefjPU3AS1Dh",
2392 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002393
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002394 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002395 // The value to store.
2396 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002397
Kévin Petite8edce32019-04-10 14:23:32 +01002398 // The index argument from vstore_half.
2399 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002400
Kévin Petite8edce32019-04-10 14:23:32 +01002401 // The pointer argument from vstore_half.
2402 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002403
Kévin Petite8edce32019-04-10 14:23:32 +01002404 auto IntTy = Type::getInt32Ty(M.getContext());
2405 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2406 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2407 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002408
Kévin Petite8edce32019-04-10 14:23:32 +01002409 // Our intrinsic to pack a float2 to an int.
2410 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002411
Kévin Petite8edce32019-04-10 14:23:32 +01002412 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002413
Kévin Petite8edce32019-04-10 14:23:32 +01002414 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002415 auto TempVec = InsertElementInst::Create(
2416 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002417
Kévin Petite8edce32019-04-10 14:23:32 +01002418 // Pack the float2 -> half2 (in an int).
2419 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002420
Kévin Petite8edce32019-04-10 14:23:32 +01002421 Value *Ret;
2422 if (clspv::Option::F16BitStorage()) {
2423 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002424 auto ShortPointerTy =
2425 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002426
Kévin Petite8edce32019-04-10 14:23:32 +01002427 // Truncate our i32 to an i16.
2428 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002429
Kévin Petite8edce32019-04-10 14:23:32 +01002430 // Cast the half* pointer to short*.
2431 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002432
Kévin Petite8edce32019-04-10 14:23:32 +01002433 // Index into the correct address of the casted pointer.
2434 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002435
Kévin Petite8edce32019-04-10 14:23:32 +01002436 // Store to the int* we casted to.
2437 Ret = new StoreInst(Trunc, Index, CI);
2438 } else {
2439 // We can only write to 32-bit aligned words.
2440 //
2441 // Assuming base is aligned to 32-bits, replace the equivalent of
2442 // vstore_half(value, index, base)
2443 // with:
2444 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2445 // uint32_t write_to_upper_half = index & 1u;
2446 // uint32_t shift = write_to_upper_half << 4;
2447 //
2448 // // Pack the float value as a half number in bottom 16 bits
2449 // // of an i32.
2450 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2451 //
2452 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2453 // ^ ((packed & 0xffff) << shift)
2454 // // We only need relaxed consistency, but OpenCL 1.2 only has
2455 // // sequentially consistent atomics.
2456 // // TODO(dneto): Use relaxed consistency.
2457 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002458 auto IntPointerTy =
2459 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002460
Kévin Petite8edce32019-04-10 14:23:32 +01002461 auto Four = ConstantInt::get(IntTy, 4);
2462 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002463
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002464 auto IndexIsOdd =
2465 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002466 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002467 auto IndexIntoI32 =
2468 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2469 auto BaseI32Ptr =
2470 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2471 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2472 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002473 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2474 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002475 auto MaskBitsToWrite =
2476 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2477 auto MaskedCurrent = BinaryOperator::CreateAnd(
2478 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002479
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002480 auto XLowerBits =
2481 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2482 auto NewBitsToWrite =
2483 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2484 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2485 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002486
Kévin Petite8edce32019-04-10 14:23:32 +01002487 // Generate the call to atomi_xor.
2488 SmallVector<Type *, 5> ParamTypes;
2489 // The pointer type.
2490 ParamTypes.push_back(IntPointerTy);
2491 // The Types for memory scope, semantics, and value.
2492 ParamTypes.push_back(IntTy);
2493 ParamTypes.push_back(IntTy);
2494 ParamTypes.push_back(IntTy);
2495 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2496 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002497
Kévin Petite8edce32019-04-10 14:23:32 +01002498 const auto ConstantScopeDevice =
2499 ConstantInt::get(IntTy, spv::ScopeDevice);
2500 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2501 // (SPIR-V Workgroup).
2502 const auto AddrSpaceSemanticsBits =
2503 IntPointerTy->getPointerAddressSpace() == 1
2504 ? spv::MemorySemanticsUniformMemoryMask
2505 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002506
Kévin Petite8edce32019-04-10 14:23:32 +01002507 // We're using relaxed consistency here.
2508 const auto ConstantMemorySemantics =
2509 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2510 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002511
Kévin Petite8edce32019-04-10 14:23:32 +01002512 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2513 ConstantMemorySemantics, ValueToXor};
2514 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2515 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002516 }
David Neto22f144c2017-06-12 14:26:21 -04002517
Kévin Petite8edce32019-04-10 14:23:32 +01002518 return Ret;
2519 });
David Neto22f144c2017-06-12 14:26:21 -04002520}
2521
2522bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002523
Kévin Petite8edce32019-04-10 14:23:32 +01002524 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002525 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2526 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2527 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2528 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2529 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2530 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2531 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2532 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2533 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2534 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2535 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2536 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2537 };
David Neto22f144c2017-06-12 14:26:21 -04002538
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002539 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002540 // The value to store.
2541 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002542
Kévin Petite8edce32019-04-10 14:23:32 +01002543 // The index argument from vstore_half.
2544 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002545
Kévin Petite8edce32019-04-10 14:23:32 +01002546 // The pointer argument from vstore_half.
2547 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002548
Kévin Petite8edce32019-04-10 14:23:32 +01002549 auto IntTy = Type::getInt32Ty(M.getContext());
2550 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002551 auto NewPointerTy =
2552 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002553 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002554
Kévin Petite8edce32019-04-10 14:23:32 +01002555 // Our intrinsic to pack a float2 to an int.
2556 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002557
Kévin Petite8edce32019-04-10 14:23:32 +01002558 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002559
Kévin Petite8edce32019-04-10 14:23:32 +01002560 // Turn the packed x & y into the final packing.
2561 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002562
Kévin Petite8edce32019-04-10 14:23:32 +01002563 // Cast the half* pointer to int*.
2564 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002565
Kévin Petite8edce32019-04-10 14:23:32 +01002566 // Index into the correct address of the casted pointer.
2567 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002568
Kévin Petite8edce32019-04-10 14:23:32 +01002569 // Store to the int* we casted to.
2570 return new StoreInst(X, Index, CI);
2571 });
David Neto22f144c2017-06-12 14:26:21 -04002572}
2573
2574bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002575
Kévin Petite8edce32019-04-10 14:23:32 +01002576 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002577 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2578 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2579 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2580 "_Z13vstorea_half4Dv4_fjPDh", // private
2581 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2582 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2583 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2584 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2585 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2586 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2587 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2588 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2589 };
David Neto22f144c2017-06-12 14:26:21 -04002590
Kévin Petite8edce32019-04-10 14:23:32 +01002591 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2592 // The value to store.
2593 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002594
Kévin Petite8edce32019-04-10 14:23:32 +01002595 // The index argument from vstore_half.
2596 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002597
Kévin Petite8edce32019-04-10 14:23:32 +01002598 // The pointer argument from vstore_half.
2599 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002600
Kévin Petite8edce32019-04-10 14:23:32 +01002601 auto IntTy = Type::getInt32Ty(M.getContext());
2602 auto Int2Ty = VectorType::get(IntTy, 2);
2603 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002604 auto NewPointerTy =
2605 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002606 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002607
Kévin Petite8edce32019-04-10 14:23:32 +01002608 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2609 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002610
Kévin Petite8edce32019-04-10 14:23:32 +01002611 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002612 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2613 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002614
Kévin Petite8edce32019-04-10 14:23:32 +01002615 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2616 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002617
Kévin Petite8edce32019-04-10 14:23:32 +01002618 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002619 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2620 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002621
Kévin Petite8edce32019-04-10 14:23:32 +01002622 // Our intrinsic to pack a float2 to an int.
2623 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002624
Kévin Petite8edce32019-04-10 14:23:32 +01002625 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002626
Kévin Petite8edce32019-04-10 14:23:32 +01002627 // Turn the packed x & y into the final component of our int2.
2628 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002629
Kévin Petite8edce32019-04-10 14:23:32 +01002630 // Turn the packed z & w into the final component of our int2.
2631 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002632
Kévin Petite8edce32019-04-10 14:23:32 +01002633 auto Combine = InsertElementInst::Create(
2634 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002635 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2636 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002637
Kévin Petite8edce32019-04-10 14:23:32 +01002638 // Cast the half* pointer to int2*.
2639 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002640
Kévin Petite8edce32019-04-10 14:23:32 +01002641 // Index into the correct address of the casted pointer.
2642 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002643
Kévin Petite8edce32019-04-10 14:23:32 +01002644 // Store to the int2* we casted to.
2645 return new StoreInst(Combine, Index, CI);
2646 });
David Neto22f144c2017-06-12 14:26:21 -04002647}
2648
2649bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2650 bool Changed = false;
2651
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002652 const std::map<const char *, const char *> Map = {
2653 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2654 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2655 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i",
2656 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002657
2658 for (auto Pair : Map) {
2659 // If we find a function with the matching name.
2660 if (auto F = M.getFunction(Pair.first)) {
2661 SmallVector<Instruction *, 4> ToRemoves;
2662
2663 // Walk the users of the function.
2664 for (auto &U : F->uses()) {
2665 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2666 // The image.
2667 auto Arg0 = CI->getOperand(0);
2668
2669 // The sampler.
2670 auto Arg1 = CI->getOperand(1);
2671
2672 // The coordinate (integer type that we can't handle).
2673 auto Arg2 = CI->getOperand(2);
2674
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002675 auto FloatVecTy =
2676 VectorType::get(Type::getFloatTy(M.getContext()),
2677 Arg2->getType()->getVectorNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002678
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002679 auto NewFType = FunctionType::get(
2680 CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy},
2681 false);
David Neto22f144c2017-06-12 14:26:21 -04002682
2683 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2684
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002685 auto Cast =
2686 CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002687
2688 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2689
2690 CI->replaceAllUsesWith(NewCI);
2691
2692 // Lastly, remember to remove the user.
2693 ToRemoves.push_back(CI);
2694 }
2695 }
2696
2697 Changed = !ToRemoves.empty();
2698
2699 // And cleanup the calls we don't use anymore.
2700 for (auto V : ToRemoves) {
2701 V->eraseFromParent();
2702 }
2703
2704 // And remove the function we don't need either too.
2705 F->eraseFromParent();
2706 }
2707 }
2708
2709 return Changed;
2710}
2711
2712bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2713 bool Changed = false;
2714
2715 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002716 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002717 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002718 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002719 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002720 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002721 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002722 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002723 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002724 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002725 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002726 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002727 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002728 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002729 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002730 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002731 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002732 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002733 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002734 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002735 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002736 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002737 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2738 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2739 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002740
2741 for (auto Pair : Map) {
2742 // If we find a function with the matching name.
2743 if (auto F = M.getFunction(Pair.first)) {
2744 SmallVector<Instruction *, 4> ToRemoves;
2745
2746 // Walk the users of the function.
2747 for (auto &U : F->uses()) {
2748 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2749 auto FType = F->getFunctionType();
2750 SmallVector<Type *, 5> ParamTypes;
2751
2752 // The pointer type.
2753 ParamTypes.push_back(FType->getParamType(0));
2754
2755 auto IntTy = Type::getInt32Ty(M.getContext());
2756
2757 // The memory scope type.
2758 ParamTypes.push_back(IntTy);
2759
2760 // The memory semantics type.
2761 ParamTypes.push_back(IntTy);
2762
2763 if (2 < CI->getNumArgOperands()) {
2764 // The unequal memory semantics type.
2765 ParamTypes.push_back(IntTy);
2766
2767 // The value type.
2768 ParamTypes.push_back(FType->getParamType(2));
2769
2770 // The comparator type.
2771 ParamTypes.push_back(FType->getParamType(1));
2772 } else if (1 < CI->getNumArgOperands()) {
2773 // The value type.
2774 ParamTypes.push_back(FType->getParamType(1));
2775 }
2776
2777 auto NewFType =
2778 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2779 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2780
2781 // We need to map the OpenCL constants to the SPIR-V equivalents.
2782 const auto ConstantScopeDevice =
2783 ConstantInt::get(IntTy, spv::ScopeDevice);
2784 const auto ConstantMemorySemantics = ConstantInt::get(
2785 IntTy, spv::MemorySemanticsUniformMemoryMask |
2786 spv::MemorySemanticsSequentiallyConsistentMask);
2787
2788 SmallVector<Value *, 5> Params;
2789
2790 // The pointer.
2791 Params.push_back(CI->getArgOperand(0));
2792
2793 // The memory scope.
2794 Params.push_back(ConstantScopeDevice);
2795
2796 // The memory semantics.
2797 Params.push_back(ConstantMemorySemantics);
2798
2799 if (2 < CI->getNumArgOperands()) {
2800 // The unequal memory semantics.
2801 Params.push_back(ConstantMemorySemantics);
2802
2803 // The value.
2804 Params.push_back(CI->getArgOperand(2));
2805
2806 // The comparator.
2807 Params.push_back(CI->getArgOperand(1));
2808 } else if (1 < CI->getNumArgOperands()) {
2809 // The value.
2810 Params.push_back(CI->getArgOperand(1));
2811 }
2812
2813 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2814
2815 CI->replaceAllUsesWith(NewCI);
2816
2817 // Lastly, remember to remove the user.
2818 ToRemoves.push_back(CI);
2819 }
2820 }
2821
2822 Changed = !ToRemoves.empty();
2823
2824 // And cleanup the calls we don't use anymore.
2825 for (auto V : ToRemoves) {
2826 V->eraseFromParent();
2827 }
2828
2829 // And remove the function we don't need either too.
2830 F->eraseFromParent();
2831 }
2832 }
2833
Neil Henning39672102017-09-29 14:33:13 +01002834 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002835 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002836 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002837 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002838 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002839 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002840 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002841 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002842 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002843 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002844 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002845 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002846 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002847 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002848 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002849 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002850 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002851 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002852 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002853 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002854 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002855 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002856 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002857 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002858 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002859 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002860 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002861 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002862 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002863 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002864 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002865 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002866 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002867 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002868 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002869 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002870 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002871 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002872 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002873 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002874 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002875 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002876 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002877 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002878 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002879 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002880 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002881 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002882 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002883 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002884 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002885 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002886 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002887 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002888 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002889 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002890 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002891 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002892 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002893 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002894 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002895 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002896 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2897 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2898 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002899
2900 for (auto Pair : Map2) {
2901 // If we find a function with the matching name.
2902 if (auto F = M.getFunction(Pair.first)) {
2903 SmallVector<Instruction *, 4> ToRemoves;
2904
2905 // Walk the users of the function.
2906 for (auto &U : F->uses()) {
2907 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2908 auto AtomicOp = new AtomicRMWInst(
2909 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2910 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2911
2912 CI->replaceAllUsesWith(AtomicOp);
2913
2914 // Lastly, remember to remove the user.
2915 ToRemoves.push_back(CI);
2916 }
2917 }
2918
2919 Changed = !ToRemoves.empty();
2920
2921 // And cleanup the calls we don't use anymore.
2922 for (auto V : ToRemoves) {
2923 V->eraseFromParent();
2924 }
2925
2926 // And remove the function we don't need either too.
2927 F->eraseFromParent();
2928 }
2929 }
2930
David Neto22f144c2017-06-12 14:26:21 -04002931 return Changed;
2932}
2933
2934bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002935
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002936 std::vector<const char *> Names = {
2937 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01002938 };
2939
2940 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002941 auto IntTy = Type::getInt32Ty(M.getContext());
2942 auto FloatTy = Type::getFloatTy(M.getContext());
2943
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002944 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2945 ConstantInt::get(IntTy, 1),
2946 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002947
2948 Constant *UpShuffleMask[4] = {
2949 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2950 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2951
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002952 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2953 UndefValue::get(FloatTy),
2954 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002955
Kévin Petite8edce32019-04-10 14:23:32 +01002956 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002957 auto Arg0 =
2958 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2959 ConstantVector::get(DownShuffleMask), "", CI);
2960 auto Arg1 =
2961 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2962 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002963 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002964
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002965 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04002966
Kévin Petite8edce32019-04-10 14:23:32 +01002967 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002968
Kévin Petite8edce32019-04-10 14:23:32 +01002969 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002970
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002971 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2972 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002973 });
David Neto22f144c2017-06-12 14:26:21 -04002974}
David Neto62653202017-10-16 19:05:18 -04002975
2976bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2977 bool Changed = false;
2978
2979 // OpenCL's float result = fract(float x, float* ptr)
2980 //
2981 // In the LLVM domain:
2982 //
2983 // %floor_result = call spir_func float @floor(float %x)
2984 // store float %floor_result, float * %ptr
2985 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2986 // %result = call spir_func float
2987 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2988 //
2989 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2990 // and clspv.fract occur in the SPIR-V generator pass:
2991 //
2992 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2993 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2994 // ...
2995 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2996 // OpStore %ptr %floor_result
2997 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2998 // %fract_result = OpExtInst %float
2999 // %glsl_ext Fmin %fract_intermediate %just_under_1
3000
David Neto62653202017-10-16 19:05:18 -04003001 using std::string;
3002
3003 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3004 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003005 using QuadType =
3006 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04003007 auto make_quad = [](const char *a, const char *b, const char *c,
3008 const char *d) {
3009 return std::tuple<const char *, const char *, const char *, const char *>(
3010 a, b, c, d);
3011 };
3012 const std::vector<QuadType> Functions = {
3013 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003014 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
3015 "clspv.fract.v2f"),
3016 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
3017 "clspv.fract.v3f"),
3018 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
3019 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04003020 };
3021
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003022 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04003023 const StringRef fract_name(std::get<0>(quad));
3024
3025 // If we find a function with the matching name.
3026 if (auto F = M.getFunction(fract_name)) {
3027 if (F->use_begin() == F->use_end())
3028 continue;
3029
3030 // We have some uses.
3031 Changed = true;
3032
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003033 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04003034
3035 const StringRef floor_name(std::get<1>(quad));
3036 const StringRef fmin_name(std::get<2>(quad));
3037 const StringRef clspv_fract_name(std::get<3>(quad));
3038
3039 // This is either float or a float vector. All the float-like
3040 // types are this type.
3041 auto result_ty = F->getReturnType();
3042
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003043 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04003044 if (!fmin_fn) {
3045 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003046 FunctionType *fn_ty =
3047 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003048 fmin_fn =
3049 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003050 fmin_fn->addFnAttr(Attribute::ReadNone);
3051 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3052 }
3053
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003054 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003055 if (!floor_fn) {
3056 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003057 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003058 floor_fn = cast<Function>(
3059 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003060 floor_fn->addFnAttr(Attribute::ReadNone);
3061 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3062 }
3063
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003064 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003065 if (!clspv_fract_fn) {
3066 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003067 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003068 clspv_fract_fn = cast<Function>(
3069 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003070 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3071 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3072 }
3073
3074 // Number of significant significand bits, whether represented or not.
3075 unsigned num_significand_bits;
3076 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003077 case Type::HalfTyID:
3078 num_significand_bits = 11;
3079 break;
3080 case Type::FloatTyID:
3081 num_significand_bits = 24;
3082 break;
3083 case Type::DoubleTyID:
3084 num_significand_bits = 53;
3085 break;
3086 default:
3087 assert(false && "Unhandled float type when processing fract builtin");
3088 break;
David Neto62653202017-10-16 19:05:18 -04003089 }
3090 // Beware that the disassembler displays this value as
3091 // OpConstant %float 1
3092 // which is not quite right.
3093 const double kJustUnderOneScalar =
3094 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3095
3096 Constant *just_under_one =
3097 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3098 if (result_ty->isVectorTy()) {
3099 just_under_one = ConstantVector::getSplat(
3100 result_ty->getVectorNumElements(), just_under_one);
3101 }
3102
3103 IRBuilder<> Builder(Context);
3104
3105 SmallVector<Instruction *, 4> ToRemoves;
3106
3107 // Walk the users of the function.
3108 for (auto &U : F->uses()) {
3109 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3110
3111 Builder.SetInsertPoint(CI);
3112 auto arg = CI->getArgOperand(0);
3113 auto ptr = CI->getArgOperand(1);
3114
3115 // Compute floor result and store it.
3116 auto floor = Builder.CreateCall(floor_fn, {arg});
3117 Builder.CreateStore(floor, ptr);
3118
3119 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003120 auto fract_result =
3121 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003122
3123 CI->replaceAllUsesWith(fract_result);
3124
3125 // Lastly, remember to remove the user.
3126 ToRemoves.push_back(CI);
3127 }
3128 }
3129
3130 // And cleanup the calls we don't use anymore.
3131 for (auto V : ToRemoves) {
3132 V->eraseFromParent();
3133 }
3134
3135 // And remove the function we don't need either too.
3136 F->eraseFromParent();
3137 }
3138 }
3139
3140 return Changed;
3141}