blob: 2a057a549ef2352be2ffec1f1c1e2fb1c3c4fa85 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040032#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070033
Diego Novilloa4c44fa2019-04-11 10:56:15 -040034#include "Passes.h"
35#include "SPIRVOp.h"
36
David Neto22f144c2017-06-12 14:26:21 -040037using namespace llvm;
38
39#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
40
41namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000042
43struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040044 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000045 SignedNess signedness;
46};
47
48struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000049 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000050 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000051
Kévin Petit91bc72e2019-04-08 15:17:46 +010052 bool isArgSigned(size_t arg) const {
53 assert(argTypeInfos.size() > arg);
54 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000055 }
56
Kévin Petit91bc72e2019-04-08 15:17:46 +010057 static FunctionInfo getFromMangledName(StringRef name) {
58 FunctionInfo fi;
59 if (!getFromMangledNameCheck(name, &fi)) {
60 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000061 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010062 return fi;
63 }
Kévin Petit8a560882019-03-21 15:24:34 +000064
Kévin Petit91bc72e2019-04-08 15:17:46 +010065 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
66 if (!name.consume_front("_Z")) {
67 return false;
68 }
69 size_t nameLen;
70 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000071 return false;
72 }
73
Kévin Petit91bc72e2019-04-08 15:17:46 +010074 finfo->name = name.take_front(nameLen);
75 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000076
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000078
Kévin Petit91bc72e2019-04-08 15:17:46 +010079 while (name.size() != 0) {
80
81 ArgTypeInfo ti;
82
83 // Try parsing a vector prefix
84 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040085 int numElems;
86 if (name.consumeInteger(10, numElems)) {
87 return false;
88 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010089
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040090 if (!name.consume_front("_")) {
91 return false;
92 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010093 }
94
95 // Parse the base type
96 char typeCode = name.front();
97 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040098 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +010099 case 'c': // char
100 case 'a': // signed char
101 case 's': // short
102 case 'i': // int
103 case 'l': // long
104 ti.signedness = ArgTypeInfo::SignedNess::Signed;
105 break;
106 case 'h': // unsigned char
107 case 't': // unsigned short
108 case 'j': // unsigned int
109 case 'm': // unsigned long
110 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
111 break;
112 case 'f':
113 ti.signedness = ArgTypeInfo::SignedNess::None;
114 break;
115 case 'S':
116 ti = prev_ti;
117 if (!name.consume_front("_")) {
118 return false;
119 }
120 break;
121 default:
122 return false;
123 }
124
125 finfo->argTypeInfos.push_back(ti);
126
127 prev_ti = ti;
128 }
129
130 return true;
131 };
Kévin Petit8a560882019-03-21 15:24:34 +0000132};
133
David Neto22f144c2017-06-12 14:26:21 -0400134uint32_t clz(uint32_t v) {
135 uint32_t r;
136 uint32_t shift;
137
138 r = (v > 0xFFFF) << 4;
139 v >>= r;
140 shift = (v > 0xFF) << 3;
141 v >>= shift;
142 r |= shift;
143 shift = (v > 0xF) << 2;
144 v >>= shift;
145 r |= shift;
146 shift = (v > 0x3) << 1;
147 v >>= shift;
148 r |= shift;
149 r |= (v >> 1);
150
151 return r;
152}
153
154Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
155 if (1 == elements) {
156 return Type::getInt1Ty(C);
157 } else {
158 return VectorType::get(Type::getInt1Ty(C), elements);
159 }
160}
161
162struct ReplaceOpenCLBuiltinPass final : public ModulePass {
163 static char ID;
164 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
165
166 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000167 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100168 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100169 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400170 bool replaceRecip(Module &M);
171 bool replaceDivide(Module &M);
172 bool replaceExp10(Module &M);
173 bool replaceLog10(Module &M);
174 bool replaceBarrier(Module &M);
175 bool replaceMemFence(Module &M);
176 bool replaceRelational(Module &M);
177 bool replaceIsInfAndIsNan(Module &M);
178 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000179 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000180 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000181 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000182 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000183 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000184 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000185 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceSignbit(Module &M);
187 bool replaceMadandMad24andMul24(Module &M);
188 bool replaceVloadHalf(Module &M);
189 bool replaceVloadHalf2(Module &M);
190 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700191 bool replaceClspvVloadaHalf2(Module &M);
192 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400193 bool replaceVstoreHalf(Module &M);
194 bool replaceVstoreHalf2(Module &M);
195 bool replaceVstoreHalf4(Module &M);
196 bool replaceReadImageF(Module &M);
197 bool replaceAtomics(Module &M);
198 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400199 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700200 bool replaceVload(Module &M);
201 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400202};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100203} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400204
205char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400206INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
207 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400208
209namespace clspv {
210ModulePass *createReplaceOpenCLBuiltinPass() {
211 return new ReplaceOpenCLBuiltinPass();
212}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400213} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400214
215bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
216 bool Changed = false;
217
Kévin Petit2444e9b2018-11-09 14:14:37 +0000218 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100219 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100220 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400221 Changed |= replaceRecip(M);
222 Changed |= replaceDivide(M);
223 Changed |= replaceExp10(M);
224 Changed |= replaceLog10(M);
225 Changed |= replaceBarrier(M);
226 Changed |= replaceMemFence(M);
227 Changed |= replaceRelational(M);
228 Changed |= replaceIsInfAndIsNan(M);
229 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000230 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000231 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000232 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000233 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000234 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000235 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000236 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400237 Changed |= replaceSignbit(M);
238 Changed |= replaceMadandMad24andMul24(M);
239 Changed |= replaceVloadHalf(M);
240 Changed |= replaceVloadHalf2(M);
241 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700242 Changed |= replaceClspvVloadaHalf2(M);
243 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400244 Changed |= replaceVstoreHalf(M);
245 Changed |= replaceVstoreHalf2(M);
246 Changed |= replaceVstoreHalf4(M);
247 Changed |= replaceReadImageF(M);
248 Changed |= replaceAtomics(M);
249 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400250 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700251 Changed |= replaceVload(M);
252 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400253
254 return Changed;
255}
256
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400257bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
258 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000259
Kévin Petite8edce32019-04-10 14:23:32 +0100260 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000261
262 for (auto Name : Names) {
263 // If we find a function with the matching name.
264 if (auto F = M.getFunction(Name)) {
265 SmallVector<Instruction *, 4> ToRemoves;
266
267 // Walk the users of the function.
268 for (auto &U : F->uses()) {
269 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000270
Kévin Petite8edce32019-04-10 14:23:32 +0100271 auto NewValue = Replacer(CI);
272
273 if (NewValue != nullptr) {
274 CI->replaceAllUsesWith(NewValue);
275 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000276
277 // Lastly, remember to remove the user.
278 ToRemoves.push_back(CI);
279 }
280 }
281
282 Changed = !ToRemoves.empty();
283
284 // And cleanup the calls we don't use anymore.
285 for (auto V : ToRemoves) {
286 V->eraseFromParent();
287 }
288
289 // And remove the function we don't need either too.
290 F->eraseFromParent();
291 }
292 }
293
294 return Changed;
295}
296
Kévin Petite8edce32019-04-10 14:23:32 +0100297bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100298
Kévin Petite8edce32019-04-10 14:23:32 +0100299 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400300 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
301 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
302 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
303 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100304 };
305
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400306 return replaceCallsWithValue(M, Names,
307 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100308}
309
310bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
311
312 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400313 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
314 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
315 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
316 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
317 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
318 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
319 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
320 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
321 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
322 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
323 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100324 };
325
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400326 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100327 auto XValue = CI->getOperand(0);
328 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100329
Kévin Petite8edce32019-04-10 14:23:32 +0100330 IRBuilder<> Builder(CI);
331 auto XmY = Builder.CreateSub(XValue, YValue);
332 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100333
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400334 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100335 auto F = CI->getCalledFunction();
336 auto finfo = FunctionInfo::getFromMangledName(F->getName());
337 if (finfo.isArgSigned(0)) {
338 Cmp = Builder.CreateICmpSGT(YValue, XValue);
339 } else {
340 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100341 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100342
Kévin Petite8edce32019-04-10 14:23:32 +0100343 return Builder.CreateSelect(Cmp, YmX, XmY);
344 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100345}
346
Kévin Petit8c1be282019-04-02 19:34:25 +0100347bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100348
Kévin Petite8edce32019-04-10 14:23:32 +0100349 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400350 "_Z8copysignff",
351 "_Z8copysignDv2_fS_",
352 "_Z8copysignDv3_fS_",
353 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100354 };
355
Kévin Petite8edce32019-04-10 14:23:32 +0100356 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
357 auto XValue = CI->getOperand(0);
358 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100359
Kévin Petite8edce32019-04-10 14:23:32 +0100360 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100361
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400362 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100363 if (Ty->isVectorTy()) {
364 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100365 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100366
Kévin Petite8edce32019-04-10 14:23:32 +0100367 // Return X with the sign of Y
368
369 // Sign bit masks
370 auto SignBit = IntTy->getScalarSizeInBits() - 1;
371 auto SignBitMask = 1 << SignBit;
372 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
373 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
374
375 IRBuilder<> Builder(CI);
376
377 // Extract sign of Y
378 auto YInt = Builder.CreateBitCast(YValue, IntTy);
379 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
380
381 // Clear sign bit in X
382 auto XInt = Builder.CreateBitCast(XValue, IntTy);
383 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
384
385 // Insert sign bit of Y into X
386 auto NewXInt = Builder.CreateOr(XInt, YSign);
387
388 // And cast back to floating-point
389 return Builder.CreateBitCast(NewXInt, Ty);
390 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100391}
392
David Neto22f144c2017-06-12 14:26:21 -0400393bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400394
Kévin Petite8edce32019-04-10 14:23:32 +0100395 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400396 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
397 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
398 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
399 };
400
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400401 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100402 // Recip has one arg.
403 auto Arg = CI->getOperand(0);
404 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
405 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
406 });
David Neto22f144c2017-06-12 14:26:21 -0400407}
408
409bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400410
Kévin Petite8edce32019-04-10 14:23:32 +0100411 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400412 "_Z11half_divideff", "_Z13native_divideff",
413 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
414 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
415 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
416 };
417
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400418 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100419 auto Op0 = CI->getOperand(0);
420 auto Op1 = CI->getOperand(1);
421 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
422 });
David Neto22f144c2017-06-12 14:26:21 -0400423}
424
425bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
426 bool Changed = false;
427
428 const std::map<const char *, const char *> Map = {
429 {"_Z5exp10f", "_Z3expf"},
430 {"_Z10half_exp10f", "_Z8half_expf"},
431 {"_Z12native_exp10f", "_Z10native_expf"},
432 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
433 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
434 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
435 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
436 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
437 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
438 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
439 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
440 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
441
442 for (auto Pair : Map) {
443 // If we find a function with the matching name.
444 if (auto F = M.getFunction(Pair.first)) {
445 SmallVector<Instruction *, 4> ToRemoves;
446
447 // Walk the users of the function.
448 for (auto &U : F->uses()) {
449 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
450 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
451
452 auto Arg = CI->getOperand(0);
453
454 // Constant of the natural log of 10 (ln(10)).
455 const double Ln10 =
456 2.302585092994045684017991454684364207601101488628772976033;
457
458 auto Mul = BinaryOperator::Create(
459 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
460 CI);
461
462 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
463
464 CI->replaceAllUsesWith(NewCI);
465
466 // Lastly, remember to remove the user.
467 ToRemoves.push_back(CI);
468 }
469 }
470
471 Changed = !ToRemoves.empty();
472
473 // And cleanup the calls we don't use anymore.
474 for (auto V : ToRemoves) {
475 V->eraseFromParent();
476 }
477
478 // And remove the function we don't need either too.
479 F->eraseFromParent();
480 }
481 }
482
483 return Changed;
484}
485
486bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
487 bool Changed = false;
488
489 const std::map<const char *, const char *> Map = {
490 {"_Z5log10f", "_Z3logf"},
491 {"_Z10half_log10f", "_Z8half_logf"},
492 {"_Z12native_log10f", "_Z10native_logf"},
493 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
494 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
495 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
496 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
497 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
498 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
499 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
500 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
501 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
502
503 for (auto Pair : Map) {
504 // If we find a function with the matching name.
505 if (auto F = M.getFunction(Pair.first)) {
506 SmallVector<Instruction *, 4> ToRemoves;
507
508 // Walk the users of the function.
509 for (auto &U : F->uses()) {
510 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
511 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
512
513 auto Arg = CI->getOperand(0);
514
515 // Constant of the reciprocal of the natural log of 10 (ln(10)).
516 const double Ln10 =
517 0.434294481903251827651128918916605082294397005803666566114;
518
519 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
520
521 auto Mul = BinaryOperator::Create(
522 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
523 "", CI);
524
525 CI->replaceAllUsesWith(Mul);
526
527 // Lastly, remember to remove the user.
528 ToRemoves.push_back(CI);
529 }
530 }
531
532 Changed = !ToRemoves.empty();
533
534 // And cleanup the calls we don't use anymore.
535 for (auto V : ToRemoves) {
536 V->eraseFromParent();
537 }
538
539 // And remove the function we don't need either too.
540 F->eraseFromParent();
541 }
542 }
543
544 return Changed;
545}
546
547bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
548 bool Changed = false;
549
550 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
551
552 const std::map<const char *, const char *> Map = {
553 {"_Z7barrierj", "__spirv_control_barrier"}};
554
555 for (auto Pair : Map) {
556 // If we find a function with the matching name.
557 if (auto F = M.getFunction(Pair.first)) {
558 SmallVector<Instruction *, 4> ToRemoves;
559
560 // Walk the users of the function.
561 for (auto &U : F->uses()) {
562 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
563 auto FType = F->getFunctionType();
564 SmallVector<Type *, 3> Params;
565 for (unsigned i = 0; i < 3; i++) {
566 Params.push_back(FType->getParamType(0));
567 }
568 auto NewFType =
569 FunctionType::get(FType->getReturnType(), Params, false);
570 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
alan-bakerb37f9732019-06-05 01:28:00 -0400571 cast<Function>(NewF.getCallee())->setCannotDuplicate();
David Neto22f144c2017-06-12 14:26:21 -0400572
573 auto Arg = CI->getOperand(0);
574
575 // We need to map the OpenCL constants to the SPIR-V equivalents.
576 const auto LocalMemFence =
577 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
578 const auto GlobalMemFence =
579 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
580 const auto ConstantSequentiallyConsistent = ConstantInt::get(
581 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
582 const auto ConstantScopeDevice =
583 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
584 const auto ConstantScopeWorkgroup =
585 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
586
587 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
588 const auto LocalMemFenceMask = BinaryOperator::Create(
589 Instruction::And, LocalMemFence, Arg, "", CI);
590 const auto WorkgroupShiftAmount =
591 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
592 clz(CLK_LOCAL_MEM_FENCE);
593 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
594 Instruction::Shl, LocalMemFenceMask,
595 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
596
597 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
598 const auto GlobalMemFenceMask = BinaryOperator::Create(
599 Instruction::And, GlobalMemFence, Arg, "", CI);
600 const auto UniformShiftAmount =
601 clz(spv::MemorySemanticsUniformMemoryMask) -
602 clz(CLK_GLOBAL_MEM_FENCE);
603 const auto MemorySemanticsUniform = BinaryOperator::Create(
604 Instruction::Shl, GlobalMemFenceMask,
605 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
606
607 // And combine the above together, also adding in
608 // MemorySemanticsSequentiallyConsistentMask.
609 auto MemorySemantics =
610 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
611 ConstantSequentiallyConsistent, "", CI);
612 MemorySemantics = BinaryOperator::Create(
613 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
614
615 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
616 // Device Scope, otherwise Workgroup Scope.
617 const auto Cmp =
618 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
619 GlobalMemFenceMask, GlobalMemFence, "", CI);
620 const auto MemoryScope = SelectInst::Create(
621 Cmp, ConstantScopeDevice, ConstantScopeWorkgroup, "", CI);
622
623 // Lastly, the Execution Scope is always Workgroup Scope.
624 const auto ExecutionScope = ConstantScopeWorkgroup;
625
626 auto NewCI = CallInst::Create(
627 NewF, {ExecutionScope, MemoryScope, MemorySemantics}, "", CI);
628
629 CI->replaceAllUsesWith(NewCI);
630
631 // Lastly, remember to remove the user.
632 ToRemoves.push_back(CI);
633 }
634 }
635
636 Changed = !ToRemoves.empty();
637
638 // And cleanup the calls we don't use anymore.
639 for (auto V : ToRemoves) {
640 V->eraseFromParent();
641 }
642
643 // And remove the function we don't need either too.
644 F->eraseFromParent();
645 }
646 }
647
648 return Changed;
649}
650
651bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
652 bool Changed = false;
653
654 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
655
Neil Henning39672102017-09-29 14:33:13 +0100656 using Tuple = std::tuple<const char *, unsigned>;
657 const std::map<const char *, Tuple> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400658 {"_Z9mem_fencej", Tuple("__spirv_memory_barrier",
659 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100660 {"_Z14read_mem_fencej",
661 Tuple("__spirv_memory_barrier", spv::MemorySemanticsAcquireMask)},
662 {"_Z15write_mem_fencej",
663 Tuple("__spirv_memory_barrier", spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400664
665 for (auto Pair : Map) {
666 // If we find a function with the matching name.
667 if (auto F = M.getFunction(Pair.first)) {
668 SmallVector<Instruction *, 4> ToRemoves;
669
670 // Walk the users of the function.
671 for (auto &U : F->uses()) {
672 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
673 auto FType = F->getFunctionType();
674 SmallVector<Type *, 2> Params;
675 for (unsigned i = 0; i < 2; i++) {
676 Params.push_back(FType->getParamType(0));
677 }
678 auto NewFType =
679 FunctionType::get(FType->getReturnType(), Params, false);
Neil Henning39672102017-09-29 14:33:13 +0100680 auto NewF = M.getOrInsertFunction(std::get<0>(Pair.second), NewFType);
David Neto22f144c2017-06-12 14:26:21 -0400681
682 auto Arg = CI->getOperand(0);
683
684 // We need to map the OpenCL constants to the SPIR-V equivalents.
685 const auto LocalMemFence =
686 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
687 const auto GlobalMemFence =
688 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
689 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100690 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400691 const auto ConstantScopeDevice =
692 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
693
694 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
695 const auto LocalMemFenceMask = BinaryOperator::Create(
696 Instruction::And, LocalMemFence, Arg, "", CI);
697 const auto WorkgroupShiftAmount =
698 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
699 clz(CLK_LOCAL_MEM_FENCE);
700 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
701 Instruction::Shl, LocalMemFenceMask,
702 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
703
704 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
705 const auto GlobalMemFenceMask = BinaryOperator::Create(
706 Instruction::And, GlobalMemFence, Arg, "", CI);
707 const auto UniformShiftAmount =
708 clz(spv::MemorySemanticsUniformMemoryMask) -
709 clz(CLK_GLOBAL_MEM_FENCE);
710 const auto MemorySemanticsUniform = BinaryOperator::Create(
711 Instruction::Shl, GlobalMemFenceMask,
712 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
713
714 // And combine the above together, also adding in
715 // MemorySemanticsSequentiallyConsistentMask.
716 auto MemorySemantics =
717 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
718 ConstantMemorySemantics, "", CI);
719 MemorySemantics = BinaryOperator::Create(
720 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
721
722 // Memory Scope is always device.
723 const auto MemoryScope = ConstantScopeDevice;
724
725 auto NewCI =
726 CallInst::Create(NewF, {MemoryScope, MemorySemantics}, "", CI);
727
728 CI->replaceAllUsesWith(NewCI);
729
730 // Lastly, remember to remove the user.
731 ToRemoves.push_back(CI);
732 }
733 }
734
735 Changed = !ToRemoves.empty();
736
737 // And cleanup the calls we don't use anymore.
738 for (auto V : ToRemoves) {
739 V->eraseFromParent();
740 }
741
742 // And remove the function we don't need either too.
743 F->eraseFromParent();
744 }
745 }
746
747 return Changed;
748}
749
750bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
751 bool Changed = false;
752
753 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
754 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
755 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
756 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
757 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
758 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
759 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
760 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
761 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
762 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
763 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
764 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
765 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
766 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
767 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
768 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
769 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
770 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
771 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
772 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
773 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
774 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
775 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
776 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
777 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
778 };
779
780 for (auto Pair : Map) {
781 // If we find a function with the matching name.
782 if (auto F = M.getFunction(Pair.first)) {
783 SmallVector<Instruction *, 4> ToRemoves;
784
785 // Walk the users of the function.
786 for (auto &U : F->uses()) {
787 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
788 // The predicate to use in the CmpInst.
789 auto Predicate = Pair.second.first;
790
791 // The value to return for true.
792 auto TrueValue =
793 ConstantInt::getSigned(CI->getType(), Pair.second.second);
794
795 // The value to return for false.
796 auto FalseValue = Constant::getNullValue(CI->getType());
797
798 auto Arg1 = CI->getOperand(0);
799 auto Arg2 = CI->getOperand(1);
800
801 const auto Cmp =
802 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
803
804 const auto Select =
805 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
806
807 CI->replaceAllUsesWith(Select);
808
809 // Lastly, remember to remove the user.
810 ToRemoves.push_back(CI);
811 }
812 }
813
814 Changed = !ToRemoves.empty();
815
816 // And cleanup the calls we don't use anymore.
817 for (auto V : ToRemoves) {
818 V->eraseFromParent();
819 }
820
821 // And remove the function we don't need either too.
822 F->eraseFromParent();
823 }
824 }
825
826 return Changed;
827}
828
829bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
830 bool Changed = false;
831
Kévin Petitff03aee2019-06-12 19:39:03 +0100832 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
833 {"_Z5isinff", {spv::OpIsInf, 1}},
834 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
835 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
836 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
837 {"_Z5isnanf", {spv::OpIsNan, 1}},
838 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
839 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
840 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400841 };
842
843 for (auto Pair : Map) {
844 // If we find a function with the matching name.
845 if (auto F = M.getFunction(Pair.first)) {
846 SmallVector<Instruction *, 4> ToRemoves;
847
848 // Walk the users of the function.
849 for (auto &U : F->uses()) {
850 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
851 const auto CITy = CI->getType();
852
Kévin Petitff03aee2019-06-12 19:39:03 +0100853 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400854
855 // The value to return for true.
856 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
857
858 // The value to return for false.
859 auto FalseValue = Constant::getNullValue(CITy);
860
861 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
862 M.getContext(),
863 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
864
Kévin Petitff03aee2019-06-12 19:39:03 +0100865 auto NewCI =
866 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
867 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400868
869 const auto Select =
870 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
871
872 CI->replaceAllUsesWith(Select);
873
874 // Lastly, remember to remove the user.
875 ToRemoves.push_back(CI);
876 }
877 }
878
879 Changed = !ToRemoves.empty();
880
881 // And cleanup the calls we don't use anymore.
882 for (auto V : ToRemoves) {
883 V->eraseFromParent();
884 }
885
886 // And remove the function we don't need either too.
887 F->eraseFromParent();
888 }
889 }
890
891 return Changed;
892}
893
894bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
895 bool Changed = false;
896
Kévin Petitff03aee2019-06-12 19:39:03 +0100897 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000898 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100899 {"_Z3allc", spv::OpNop},
900 {"_Z3allDv2_c", spv::OpAll},
901 {"_Z3allDv3_c", spv::OpAll},
902 {"_Z3allDv4_c", spv::OpAll},
903 {"_Z3alls", spv::OpNop},
904 {"_Z3allDv2_s", spv::OpAll},
905 {"_Z3allDv3_s", spv::OpAll},
906 {"_Z3allDv4_s", spv::OpAll},
907 {"_Z3alli", spv::OpNop},
908 {"_Z3allDv2_i", spv::OpAll},
909 {"_Z3allDv3_i", spv::OpAll},
910 {"_Z3allDv4_i", spv::OpAll},
911 {"_Z3alll", spv::OpNop},
912 {"_Z3allDv2_l", spv::OpAll},
913 {"_Z3allDv3_l", spv::OpAll},
914 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000915
916 // any
Kévin Petitff03aee2019-06-12 19:39:03 +0100917 {"_Z3anyc", spv::OpNop},
918 {"_Z3anyDv2_c", spv::OpAny},
919 {"_Z3anyDv3_c", spv::OpAny},
920 {"_Z3anyDv4_c", spv::OpAny},
921 {"_Z3anys", spv::OpNop},
922 {"_Z3anyDv2_s", spv::OpAny},
923 {"_Z3anyDv3_s", spv::OpAny},
924 {"_Z3anyDv4_s", spv::OpAny},
925 {"_Z3anyi", spv::OpNop},
926 {"_Z3anyDv2_i", spv::OpAny},
927 {"_Z3anyDv3_i", spv::OpAny},
928 {"_Z3anyDv4_i", spv::OpAny},
929 {"_Z3anyl", spv::OpNop},
930 {"_Z3anyDv2_l", spv::OpAny},
931 {"_Z3anyDv3_l", spv::OpAny},
932 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -0400933 };
934
935 for (auto Pair : Map) {
936 // If we find a function with the matching name.
937 if (auto F = M.getFunction(Pair.first)) {
938 SmallVector<Instruction *, 4> ToRemoves;
939
940 // Walk the users of the function.
941 for (auto &U : F->uses()) {
942 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400943
944 auto Arg = CI->getOperand(0);
945
946 Value *V;
947
Kévin Petitfd27cca2018-10-31 13:00:17 +0000948 // If the argument is a 32-bit int, just use a shift
949 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
950 V = BinaryOperator::Create(Instruction::LShr, Arg,
951 ConstantInt::get(Arg->getType(), 31), "",
952 CI);
953 } else {
David Neto22f144c2017-06-12 14:26:21 -0400954 // The value for zero to compare against.
955 const auto ZeroValue = Constant::getNullValue(Arg->getType());
956
David Neto22f144c2017-06-12 14:26:21 -0400957 // The value to return for true.
958 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
959
960 // The value to return for false.
961 const auto FalseValue = Constant::getNullValue(CI->getType());
962
Kévin Petitfd27cca2018-10-31 13:00:17 +0000963 const auto Cmp = CmpInst::Create(
964 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
965
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400966 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000967
968 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +0100969 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000970
Kévin Petitff03aee2019-06-12 19:39:03 +0100971 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000972
Kévin Petitff03aee2019-06-12 19:39:03 +0100973 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +0000974
Kévin Petitff03aee2019-06-12 19:39:03 +0100975 const auto NewCI = clspv::InsertSPIRVOp(
976 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +0000977 SelectSource = NewCI;
978
979 } else {
980 SelectSource = Cmp;
981 }
982
983 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400984 }
985
986 CI->replaceAllUsesWith(V);
987
988 // Lastly, remember to remove the user.
989 ToRemoves.push_back(CI);
990 }
991 }
992
993 Changed = !ToRemoves.empty();
994
995 // And cleanup the calls we don't use anymore.
996 for (auto V : ToRemoves) {
997 V->eraseFromParent();
998 }
999
1000 // And remove the function we don't need either too.
1001 F->eraseFromParent();
1002 }
1003 }
1004
1005 return Changed;
1006}
1007
Kévin Petitbf0036c2019-03-06 13:57:10 +00001008bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1009 bool Changed = false;
1010
1011 for (auto const &SymVal : M.getValueSymbolTable()) {
1012 // Skip symbols whose name doesn't match
1013 if (!SymVal.getKey().startswith("_Z8upsample")) {
1014 continue;
1015 }
1016 // Is there a function going by that name?
1017 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1018
1019 SmallVector<Instruction *, 4> ToRemoves;
1020
1021 // Walk the users of the function.
1022 for (auto &U : F->uses()) {
1023 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1024
1025 // Get arguments
1026 auto HiValue = CI->getOperand(0);
1027 auto LoValue = CI->getOperand(1);
1028
1029 // Don't touch overloads that aren't in OpenCL C
1030 auto HiType = HiValue->getType();
1031 auto LoType = LoValue->getType();
1032
1033 if (HiType != LoType) {
1034 continue;
1035 }
1036
1037 if (!HiType->isIntOrIntVectorTy()) {
1038 continue;
1039 }
1040
1041 if (HiType->getScalarSizeInBits() * 2 !=
1042 CI->getType()->getScalarSizeInBits()) {
1043 continue;
1044 }
1045
1046 if ((HiType->getScalarSizeInBits() != 8) &&
1047 (HiType->getScalarSizeInBits() != 16) &&
1048 (HiType->getScalarSizeInBits() != 32)) {
1049 continue;
1050 }
1051
1052 if (HiType->isVectorTy()) {
1053 if ((HiType->getVectorNumElements() != 2) &&
1054 (HiType->getVectorNumElements() != 3) &&
1055 (HiType->getVectorNumElements() != 4) &&
1056 (HiType->getVectorNumElements() != 8) &&
1057 (HiType->getVectorNumElements() != 16)) {
1058 continue;
1059 }
1060 }
1061
1062 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001063 auto HiCast =
1064 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1065 auto LoCast =
1066 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001067
1068 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001069 auto ShiftAmount =
1070 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001071 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1072 ShiftAmount, "", CI);
1073
1074 // OR both results
1075 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1076 "", CI);
1077
1078 // Replace call with the expression
1079 CI->replaceAllUsesWith(V);
1080
1081 // Lastly, remember to remove the user.
1082 ToRemoves.push_back(CI);
1083 }
1084 }
1085
1086 Changed = !ToRemoves.empty();
1087
1088 // And cleanup the calls we don't use anymore.
1089 for (auto V : ToRemoves) {
1090 V->eraseFromParent();
1091 }
1092
1093 // And remove the function we don't need either too.
1094 F->eraseFromParent();
1095 }
1096 }
1097
1098 return Changed;
1099}
1100
Kévin Petitd44eef52019-03-08 13:22:14 +00001101bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1102 bool Changed = false;
1103
1104 for (auto const &SymVal : M.getValueSymbolTable()) {
1105 // Skip symbols whose name doesn't match
1106 if (!SymVal.getKey().startswith("_Z6rotate")) {
1107 continue;
1108 }
1109 // Is there a function going by that name?
1110 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1111
1112 SmallVector<Instruction *, 4> ToRemoves;
1113
1114 // Walk the users of the function.
1115 for (auto &U : F->uses()) {
1116 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1117
1118 // Get arguments
1119 auto SrcValue = CI->getOperand(0);
1120 auto RotAmount = CI->getOperand(1);
1121
1122 // Don't touch overloads that aren't in OpenCL C
1123 auto SrcType = SrcValue->getType();
1124 auto RotType = RotAmount->getType();
1125
1126 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1127 continue;
1128 }
1129
1130 if (!SrcType->isIntOrIntVectorTy()) {
1131 continue;
1132 }
1133
1134 if ((SrcType->getScalarSizeInBits() != 8) &&
1135 (SrcType->getScalarSizeInBits() != 16) &&
1136 (SrcType->getScalarSizeInBits() != 32) &&
1137 (SrcType->getScalarSizeInBits() != 64)) {
1138 continue;
1139 }
1140
1141 if (SrcType->isVectorTy()) {
1142 if ((SrcType->getVectorNumElements() != 2) &&
1143 (SrcType->getVectorNumElements() != 3) &&
1144 (SrcType->getVectorNumElements() != 4) &&
1145 (SrcType->getVectorNumElements() != 8) &&
1146 (SrcType->getVectorNumElements() != 16)) {
1147 continue;
1148 }
1149 }
1150
1151 // The approach used is to shift the top bits down, the bottom bits up
1152 // and OR the two shifted values.
1153
1154 // The rotation amount is to be treated modulo the element size.
1155 // Since SPIR-V shift ops don't support this, let's apply the
1156 // modulo ahead of shifting. The element size is always a power of
1157 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001158 auto ModMask =
1159 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001160 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1161 ModMask, "", CI);
1162
1163 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001164 auto ScalarSize =
1165 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001166 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1167 RotAmount, "", CI);
1168
1169 // Now shift the bottom bits up and the top bits down
1170 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1171 RotAmount, "", CI);
1172 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1173 DownAmount, "", CI);
1174
1175 // Finally OR the two shifted values
1176 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1177 HiRotated, "", CI);
1178
1179 // Replace call with the expression
1180 CI->replaceAllUsesWith(V);
1181
1182 // Lastly, remember to remove the user.
1183 ToRemoves.push_back(CI);
1184 }
1185 }
1186
1187 Changed = !ToRemoves.empty();
1188
1189 // And cleanup the calls we don't use anymore.
1190 for (auto V : ToRemoves) {
1191 V->eraseFromParent();
1192 }
1193
1194 // And remove the function we don't need either too.
1195 F->eraseFromParent();
1196 }
1197 }
1198
1199 return Changed;
1200}
1201
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001202bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1203 bool Changed = false;
1204
1205 for (auto const &SymVal : M.getValueSymbolTable()) {
1206
1207 // Skip symbols whose name obviously doesn't match
1208 if (!SymVal.getKey().contains("convert_")) {
1209 continue;
1210 }
1211
1212 // Is there a function going by that name?
1213 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1214
1215 // Get info from the mangled name
1216 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001217 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001218
1219 // All functions of interest are handled by our mangled name parser
1220 if (!parsed) {
1221 continue;
1222 }
1223
1224 // Move on if this isn't a call to convert_
1225 if (!finfo.name.startswith("convert_")) {
1226 continue;
1227 }
1228
1229 // Extract the destination type from the function name
1230 StringRef DstTypeName = finfo.name;
1231 DstTypeName.consume_front("convert_");
1232
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001233 auto DstSignedNess =
1234 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1235 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1236 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1237 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1238 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1239 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1240 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1241 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1242 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1243 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001244
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001245 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001246 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001247
1248 SmallVector<Instruction *, 4> ToRemoves;
1249
1250 // Walk the users of the function.
1251 for (auto &U : F->uses()) {
1252 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1253
1254 // Get arguments
1255 auto SrcValue = CI->getOperand(0);
1256
1257 // Don't touch overloads that aren't in OpenCL C
1258 auto SrcType = SrcValue->getType();
1259 auto DstType = CI->getType();
1260
1261 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1262 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1263 continue;
1264 }
1265
1266 if (SrcType->isVectorTy()) {
1267
1268 if (SrcType->getVectorNumElements() !=
1269 DstType->getVectorNumElements()) {
1270 continue;
1271 }
1272
1273 if ((SrcType->getVectorNumElements() != 2) &&
1274 (SrcType->getVectorNumElements() != 3) &&
1275 (SrcType->getVectorNumElements() != 4) &&
1276 (SrcType->getVectorNumElements() != 8) &&
1277 (SrcType->getVectorNumElements() != 16)) {
1278 continue;
1279 }
1280 }
1281
1282 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1283 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1284
1285 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1286 bool DstIsInt = DstType->isIntOrIntVectorTy();
1287
1288 Value *V;
1289 if (SrcIsFloat && DstIsFloat) {
1290 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1291 } else if (SrcIsFloat && DstIsInt) {
1292 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001293 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1294 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001295 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001296 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1297 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001298 }
1299 } else if (SrcIsInt && DstIsFloat) {
1300 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001301 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1302 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001303 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001304 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1305 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001306 }
1307 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001308 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1309 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001310 } else {
1311 // Not something we're supposed to handle, just move on
1312 continue;
1313 }
1314
1315 // Replace call with the expression
1316 CI->replaceAllUsesWith(V);
1317
1318 // Lastly, remember to remove the user.
1319 ToRemoves.push_back(CI);
1320 }
1321 }
1322
1323 Changed = !ToRemoves.empty();
1324
1325 // And cleanup the calls we don't use anymore.
1326 for (auto V : ToRemoves) {
1327 V->eraseFromParent();
1328 }
1329
1330 // And remove the function we don't need either too.
1331 F->eraseFromParent();
1332 }
1333 }
1334
1335 return Changed;
1336}
1337
Kévin Petit8a560882019-03-21 15:24:34 +00001338bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1339 bool Changed = false;
1340
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001341 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001342
Kévin Petit617a76d2019-04-04 13:54:16 +01001343 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001344 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1345 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1346
1347 // Skip symbols whose name doesn't match
1348 if (!isMad && !isMul) {
1349 continue;
1350 }
1351
1352 // Is there a function going by that name?
1353 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001354 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001355 }
1356 }
1357
Kévin Petit617a76d2019-04-04 13:54:16 +01001358 for (auto F : FnWorklist) {
1359 SmallVector<Instruction *, 4> ToRemoves;
1360
1361 bool isMad = F->getName().startswith("_Z6mad_hi");
1362 // Walk the users of the function.
1363 for (auto &U : F->uses()) {
1364 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1365
1366 // Get arguments
1367 auto AValue = CI->getOperand(0);
1368 auto BValue = CI->getOperand(1);
1369 auto CValue = CI->getOperand(2);
1370
1371 // Don't touch overloads that aren't in OpenCL C
1372 auto AType = AValue->getType();
1373 auto BType = BValue->getType();
1374 auto CType = CValue->getType();
1375
1376 if ((AType != BType) || (CI->getType() != AType) ||
1377 (isMad && (AType != CType))) {
1378 continue;
1379 }
1380
1381 if (!AType->isIntOrIntVectorTy()) {
1382 continue;
1383 }
1384
1385 if ((AType->getScalarSizeInBits() != 8) &&
1386 (AType->getScalarSizeInBits() != 16) &&
1387 (AType->getScalarSizeInBits() != 32) &&
1388 (AType->getScalarSizeInBits() != 64)) {
1389 continue;
1390 }
1391
1392 if (AType->isVectorTy()) {
1393 if ((AType->getVectorNumElements() != 2) &&
1394 (AType->getVectorNumElements() != 3) &&
1395 (AType->getVectorNumElements() != 4) &&
1396 (AType->getVectorNumElements() != 8) &&
1397 (AType->getVectorNumElements() != 16)) {
1398 continue;
1399 }
1400 }
1401
1402 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001403 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001404
1405 // Select the appropriate signed/unsigned SPIR-V op
1406 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001407 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001408 opcode = spv::OpSMulExtended;
1409 } else {
1410 opcode = spv::OpUMulExtended;
1411 }
1412
1413 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001414 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001415 auto ExMulRetType = StructType::create(TwoValueType);
1416
1417 // Call the SPIR-V op
1418 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1419 ExMulRetType, {AValue, BValue});
1420
1421 // Get the high part of the result
1422 unsigned Idxs[] = {1};
1423 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1424
1425 // If we're handling a mad_hi, add the third argument to the result
1426 if (isMad) {
1427 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1428 }
1429
1430 // Replace call with the expression
1431 CI->replaceAllUsesWith(V);
1432
1433 // Lastly, remember to remove the user.
1434 ToRemoves.push_back(CI);
1435 }
1436 }
1437
1438 Changed = !ToRemoves.empty();
1439
1440 // And cleanup the calls we don't use anymore.
1441 for (auto V : ToRemoves) {
1442 V->eraseFromParent();
1443 }
1444
1445 // And remove the function we don't need either too.
1446 F->eraseFromParent();
1447 }
1448
Kévin Petit8a560882019-03-21 15:24:34 +00001449 return Changed;
1450}
1451
Kévin Petitf5b78a22018-10-25 14:32:17 +00001452bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1453 bool Changed = false;
1454
1455 for (auto const &SymVal : M.getValueSymbolTable()) {
1456 // Skip symbols whose name doesn't match
1457 if (!SymVal.getKey().startswith("_Z6select")) {
1458 continue;
1459 }
1460 // Is there a function going by that name?
1461 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1462
1463 SmallVector<Instruction *, 4> ToRemoves;
1464
1465 // Walk the users of the function.
1466 for (auto &U : F->uses()) {
1467 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1468
1469 // Get arguments
1470 auto FalseValue = CI->getOperand(0);
1471 auto TrueValue = CI->getOperand(1);
1472 auto PredicateValue = CI->getOperand(2);
1473
1474 // Don't touch overloads that aren't in OpenCL C
1475 auto FalseType = FalseValue->getType();
1476 auto TrueType = TrueValue->getType();
1477 auto PredicateType = PredicateValue->getType();
1478
1479 if (FalseType != TrueType) {
1480 continue;
1481 }
1482
1483 if (!PredicateType->isIntOrIntVectorTy()) {
1484 continue;
1485 }
1486
1487 if (!FalseType->isIntOrIntVectorTy() &&
1488 !FalseType->getScalarType()->isFloatingPointTy()) {
1489 continue;
1490 }
1491
1492 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1493 continue;
1494 }
1495
1496 if (FalseType->getScalarSizeInBits() !=
1497 PredicateType->getScalarSizeInBits()) {
1498 continue;
1499 }
1500
1501 if (FalseType->isVectorTy()) {
1502 if (FalseType->getVectorNumElements() !=
1503 PredicateType->getVectorNumElements()) {
1504 continue;
1505 }
1506
1507 if ((FalseType->getVectorNumElements() != 2) &&
1508 (FalseType->getVectorNumElements() != 3) &&
1509 (FalseType->getVectorNumElements() != 4) &&
1510 (FalseType->getVectorNumElements() != 8) &&
1511 (FalseType->getVectorNumElements() != 16)) {
1512 continue;
1513 }
1514 }
1515
1516 // Create constant
1517 const auto ZeroValue = Constant::getNullValue(PredicateType);
1518
1519 // Scalar and vector are to be treated differently
1520 CmpInst::Predicate Pred;
1521 if (PredicateType->isVectorTy()) {
1522 Pred = CmpInst::ICMP_SLT;
1523 } else {
1524 Pred = CmpInst::ICMP_NE;
1525 }
1526
1527 // Create comparison instruction
1528 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1529 ZeroValue, "", CI);
1530
1531 // Create select
1532 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1533
1534 // Replace call with the selection
1535 CI->replaceAllUsesWith(V);
1536
1537 // Lastly, remember to remove the user.
1538 ToRemoves.push_back(CI);
1539 }
1540 }
1541
1542 Changed = !ToRemoves.empty();
1543
1544 // And cleanup the calls we don't use anymore.
1545 for (auto V : ToRemoves) {
1546 V->eraseFromParent();
1547 }
1548
1549 // And remove the function we don't need either too.
1550 F->eraseFromParent();
1551 }
1552 }
1553
1554 return Changed;
1555}
1556
Kévin Petite7d0cce2018-10-31 12:38:56 +00001557bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1558 bool Changed = false;
1559
1560 for (auto const &SymVal : M.getValueSymbolTable()) {
1561 // Skip symbols whose name doesn't match
1562 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1563 continue;
1564 }
1565 // Is there a function going by that name?
1566 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1567
1568 SmallVector<Instruction *, 4> ToRemoves;
1569
1570 // Walk the users of the function.
1571 for (auto &U : F->uses()) {
1572 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1573
1574 if (CI->getNumOperands() != 4) {
1575 continue;
1576 }
1577
1578 // Get arguments
1579 auto FalseValue = CI->getOperand(0);
1580 auto TrueValue = CI->getOperand(1);
1581 auto PredicateValue = CI->getOperand(2);
1582
1583 // Don't touch overloads that aren't in OpenCL C
1584 auto FalseType = FalseValue->getType();
1585 auto TrueType = TrueValue->getType();
1586 auto PredicateType = PredicateValue->getType();
1587
1588 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1589 continue;
1590 }
1591
1592 if (TrueType->isVectorTy()) {
1593 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1594 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001595 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001596 }
1597 if ((TrueType->getVectorNumElements() != 2) &&
1598 (TrueType->getVectorNumElements() != 3) &&
1599 (TrueType->getVectorNumElements() != 4) &&
1600 (TrueType->getVectorNumElements() != 8) &&
1601 (TrueType->getVectorNumElements() != 16)) {
1602 continue;
1603 }
1604 }
1605
1606 // Remember the type of the operands
1607 auto OpType = TrueType;
1608
1609 // The actual bit selection will always be done on an integer type,
1610 // declare it here
1611 Type *BitType;
1612
1613 // If the operands are float, then bitcast them to int
1614 if (OpType->getScalarType()->isFloatingPointTy()) {
1615
1616 // First create the new type
1617 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1618 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1619 if (OpType->isVectorTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001620 BitType =
1621 VectorType::get(BitType, OpType->getVectorNumElements());
Kévin Petite7d0cce2018-10-31 12:38:56 +00001622 }
1623
1624 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001625 PredicateValue =
1626 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1627 FalseValue =
1628 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1629 TrueValue =
1630 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001631
1632 } else {
1633 // The operands have an integer type, use it directly
1634 BitType = OpType;
1635 }
1636
1637 // All the operands are now always integers
1638 // implement as (c & b) | (~c & a)
1639
1640 // Create our negated predicate value
1641 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001642 auto NotPredicateValue = BinaryOperator::Create(
1643 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001644
1645 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001646 auto BitsFalse = BinaryOperator::Create(
1647 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1648 auto BitsTrue = BinaryOperator::Create(
1649 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001650
1651 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1652 BitsTrue, "", CI);
1653
1654 // If we were dealing with a floating point type, we must bitcast
1655 // the result back to that
1656 if (OpType->getScalarType()->isFloatingPointTy()) {
1657 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1658 }
1659
1660 // Replace call with our new code
1661 CI->replaceAllUsesWith(V);
1662
1663 // Lastly, remember to remove the user.
1664 ToRemoves.push_back(CI);
1665 }
1666 }
1667
1668 Changed = !ToRemoves.empty();
1669
1670 // And cleanup the calls we don't use anymore.
1671 for (auto V : ToRemoves) {
1672 V->eraseFromParent();
1673 }
1674
1675 // And remove the function we don't need either too.
1676 F->eraseFromParent();
1677 }
1678 }
1679
1680 return Changed;
1681}
1682
Kévin Petit6b0a9532018-10-30 20:00:39 +00001683bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1684 bool Changed = false;
1685
1686 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001687 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1688 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1689 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1690 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1691 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1692 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001693 };
1694
1695 for (auto Pair : Map) {
1696 // If we find a function with the matching name.
1697 if (auto F = M.getFunction(Pair.first)) {
1698 SmallVector<Instruction *, 4> ToRemoves;
1699
1700 // Walk the users of the function.
1701 for (auto &U : F->uses()) {
1702 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1703
1704 auto ReplacementFn = Pair.second;
1705
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001706 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001707 Value *VectorArg;
1708
1709 // First figure out which function we're dealing with
1710 if (F->getName().startswith("_Z10smoothstep")) {
1711 ArgsToSplat.push_back(CI->getOperand(1));
1712 VectorArg = CI->getOperand(2);
1713 } else {
1714 VectorArg = CI->getOperand(1);
1715 }
1716
1717 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001718 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001719 auto VecType = VectorArg->getType();
1720
1721 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001722 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001723 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001724 auto index =
1725 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1726 NewVectorArg =
1727 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001728 }
1729 SplatArgs.push_back(NewVectorArg);
1730 }
1731
1732 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001733 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1734 const auto NewFType =
1735 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001736
1737 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1738
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001739 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001740 for (auto arg : SplatArgs) {
1741 NewArgs.push_back(arg);
1742 }
1743 NewArgs.push_back(VectorArg);
1744
1745 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1746
1747 CI->replaceAllUsesWith(NewCI);
1748
1749 // Lastly, remember to remove the user.
1750 ToRemoves.push_back(CI);
1751 }
1752 }
1753
1754 Changed = !ToRemoves.empty();
1755
1756 // And cleanup the calls we don't use anymore.
1757 for (auto V : ToRemoves) {
1758 V->eraseFromParent();
1759 }
1760
1761 // And remove the function we don't need either too.
1762 F->eraseFromParent();
1763 }
1764 }
1765
1766 return Changed;
1767}
1768
David Neto22f144c2017-06-12 14:26:21 -04001769bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1770 bool Changed = false;
1771
1772 const std::map<const char *, Instruction::BinaryOps> Map = {
1773 {"_Z7signbitf", Instruction::LShr},
1774 {"_Z7signbitDv2_f", Instruction::AShr},
1775 {"_Z7signbitDv3_f", Instruction::AShr},
1776 {"_Z7signbitDv4_f", Instruction::AShr},
1777 };
1778
1779 for (auto Pair : Map) {
1780 // If we find a function with the matching name.
1781 if (auto F = M.getFunction(Pair.first)) {
1782 SmallVector<Instruction *, 4> ToRemoves;
1783
1784 // Walk the users of the function.
1785 for (auto &U : F->uses()) {
1786 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1787 auto Arg = CI->getOperand(0);
1788
1789 auto Bitcast =
1790 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1791
1792 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1793 ConstantInt::get(CI->getType(), 31),
1794 "", CI);
1795
1796 CI->replaceAllUsesWith(Shr);
1797
1798 // Lastly, remember to remove the user.
1799 ToRemoves.push_back(CI);
1800 }
1801 }
1802
1803 Changed = !ToRemoves.empty();
1804
1805 // And cleanup the calls we don't use anymore.
1806 for (auto V : ToRemoves) {
1807 V->eraseFromParent();
1808 }
1809
1810 // And remove the function we don't need either too.
1811 F->eraseFromParent();
1812 }
1813 }
1814
1815 return Changed;
1816}
1817
1818bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1819 bool Changed = false;
1820
1821 const std::map<const char *,
1822 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1823 Map = {
1824 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1825 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1826 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1827 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1828 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1829 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1830 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1831 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1832 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1833 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1834 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1835 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1836 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1837 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1838 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1839 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1840 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1841 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1842 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1843 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1844 };
1845
1846 for (auto Pair : Map) {
1847 // If we find a function with the matching name.
1848 if (auto F = M.getFunction(Pair.first)) {
1849 SmallVector<Instruction *, 4> ToRemoves;
1850
1851 // Walk the users of the function.
1852 for (auto &U : F->uses()) {
1853 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1854 // The multiply instruction to use.
1855 auto MulInst = Pair.second.first;
1856
1857 // The add instruction to use.
1858 auto AddInst = Pair.second.second;
1859
1860 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1861
1862 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1863 CI->getArgOperand(1), "", CI);
1864
1865 if (Instruction::BinaryOpsEnd != AddInst) {
1866 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1867 CI);
1868 }
1869
1870 CI->replaceAllUsesWith(I);
1871
1872 // Lastly, remember to remove the user.
1873 ToRemoves.push_back(CI);
1874 }
1875 }
1876
1877 Changed = !ToRemoves.empty();
1878
1879 // And cleanup the calls we don't use anymore.
1880 for (auto V : ToRemoves) {
1881 V->eraseFromParent();
1882 }
1883
1884 // And remove the function we don't need either too.
1885 F->eraseFromParent();
1886 }
1887 }
1888
1889 return Changed;
1890}
1891
Derek Chowcfd368b2017-10-19 20:58:45 -07001892bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1893 bool Changed = false;
1894
alan-bakerf795f392019-06-11 18:24:34 -04001895 for (auto const &SymVal : M.getValueSymbolTable()) {
1896 if (!SymVal.getKey().contains("vstore"))
1897 continue;
1898 if (SymVal.getKey().contains("vstore_"))
1899 continue;
1900 if (SymVal.getKey().contains("vstorea"))
1901 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001902
alan-bakerf795f392019-06-11 18:24:34 -04001903 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001904 SmallVector<Instruction *, 4> ToRemoves;
1905
alan-bakerf795f392019-06-11 18:24:34 -04001906 auto fname = F->getName();
1907 if (!fname.consume_front("_Z"))
1908 continue;
1909 size_t name_len;
1910 if (fname.consumeInteger(10, name_len))
1911 continue;
1912 std::string name = fname.take_front(name_len);
1913
1914 bool ok = StringSwitch<bool>(name)
1915 .Case("vstore2", true)
1916 .Case("vstore3", true)
1917 .Case("vstore4", true)
1918 .Case("vstore8", true)
1919 .Case("vstore16", true)
1920 .Default(false);
1921 if (!ok)
1922 continue;
1923
Derek Chowcfd368b2017-10-19 20:58:45 -07001924 for (auto &U : F->uses()) {
1925 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04001926 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001927
alan-bakerf795f392019-06-11 18:24:34 -04001928 auto data_type = data->getType();
1929 if (!data_type->isVectorTy())
1930 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001931
alan-bakerf795f392019-06-11 18:24:34 -04001932 auto elems = data_type->getVectorNumElements();
1933 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
1934 elems != 16)
1935 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001936
alan-bakerf795f392019-06-11 18:24:34 -04001937 auto offset = CI->getOperand(1);
1938 auto ptr = CI->getOperand(2);
1939 auto ptr_type = ptr->getType();
1940 auto pointee_type = ptr_type->getPointerElementType();
1941 if (pointee_type != data_type->getVectorElementType())
1942 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001943
alan-bakerf795f392019-06-11 18:24:34 -04001944 // Avoid pointer casts. Instead generate the correct number of stores
1945 // and rely on drivers to coalesce appropriately.
1946 IRBuilder<> builder(CI);
1947 auto elems_const = builder.getInt32(elems);
1948 auto adjust = builder.CreateMul(offset, elems_const);
1949 for (auto i = 0; i < elems; ++i) {
1950 auto idx = builder.getInt32(i);
1951 auto add = builder.CreateAdd(adjust, idx);
1952 auto gep = builder.CreateGEP(ptr, add);
1953 auto extract = builder.CreateExtractElement(data, i);
1954 auto store = builder.CreateStore(extract, gep);
1955 }
Derek Chowcfd368b2017-10-19 20:58:45 -07001956
Derek Chowcfd368b2017-10-19 20:58:45 -07001957 ToRemoves.push_back(CI);
1958 }
1959 }
1960
1961 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07001962 for (auto V : ToRemoves) {
1963 V->eraseFromParent();
1964 }
Derek Chowcfd368b2017-10-19 20:58:45 -07001965 F->eraseFromParent();
1966 }
1967 }
1968
1969 return Changed;
1970}
1971
1972bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1973 bool Changed = false;
1974
alan-bakerf795f392019-06-11 18:24:34 -04001975 for (auto const &SymVal : M.getValueSymbolTable()) {
1976 if (!SymVal.getKey().contains("vload"))
1977 continue;
1978 if (SymVal.getKey().contains("vload_"))
1979 continue;
1980 if (SymVal.getKey().contains("vloada"))
1981 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001982
alan-bakerf795f392019-06-11 18:24:34 -04001983 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001984 SmallVector<Instruction *, 4> ToRemoves;
1985
alan-bakerf795f392019-06-11 18:24:34 -04001986 auto fname = F->getName();
1987 if (!fname.consume_front("_Z"))
1988 continue;
1989 size_t name_len;
1990 if (fname.consumeInteger(10, name_len))
1991 continue;
1992 std::string name = fname.take_front(name_len);
1993
1994 bool ok = StringSwitch<bool>(name)
1995 .Case("vload2", true)
1996 .Case("vload3", true)
1997 .Case("vload4", true)
1998 .Case("vload8", true)
1999 .Case("vload16", true)
2000 .Default(false);
2001 if (!ok)
2002 continue;
2003
Derek Chowcfd368b2017-10-19 20:58:45 -07002004 for (auto &U : F->uses()) {
2005 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002006 auto ret_type = F->getReturnType();
2007 if (!ret_type->isVectorTy())
2008 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002009
alan-bakerf795f392019-06-11 18:24:34 -04002010 auto elems = ret_type->getVectorNumElements();
2011 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2012 elems != 16)
2013 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002014
alan-bakerf795f392019-06-11 18:24:34 -04002015 auto offset = CI->getOperand(0);
2016 auto ptr = CI->getOperand(1);
2017 auto ptr_type = ptr->getType();
2018 auto pointee_type = ptr_type->getPointerElementType();
2019 if (pointee_type != ret_type->getVectorElementType())
2020 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002021
alan-bakerf795f392019-06-11 18:24:34 -04002022 // Avoid pointer casts. Instead generate the correct number of loads
2023 // and rely on drivers to coalesce appropriately.
2024 IRBuilder<> builder(CI);
2025 auto elems_const = builder.getInt32(elems);
2026 Value *insert = UndefValue::get(ret_type);
2027 auto adjust = builder.CreateMul(offset, elems_const);
2028 for (auto i = 0; i < elems; ++i) {
2029 auto idx = builder.getInt32(i);
2030 auto add = builder.CreateAdd(adjust, idx);
2031 auto gep = builder.CreateGEP(ptr, add);
2032 auto load = builder.CreateLoad(gep);
2033 insert = builder.CreateInsertElement(insert, load, i);
2034 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002035
alan-bakerf795f392019-06-11 18:24:34 -04002036 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002037 ToRemoves.push_back(CI);
2038 }
2039 }
2040
2041 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002042 for (auto V : ToRemoves) {
2043 V->eraseFromParent();
2044 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002045 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002046 }
2047 }
2048
2049 return Changed;
2050}
2051
David Neto22f144c2017-06-12 14:26:21 -04002052bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2053 bool Changed = false;
2054
2055 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2056 "_Z10vload_halfjPU3AS2KDh"};
2057
2058 for (auto Name : Map) {
2059 // If we find a function with the matching name.
2060 if (auto F = M.getFunction(Name)) {
2061 SmallVector<Instruction *, 4> ToRemoves;
2062
2063 // Walk the users of the function.
2064 for (auto &U : F->uses()) {
2065 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2066 // The index argument from vload_half.
2067 auto Arg0 = CI->getOperand(0);
2068
2069 // The pointer argument from vload_half.
2070 auto Arg1 = CI->getOperand(1);
2071
David Neto22f144c2017-06-12 14:26:21 -04002072 auto IntTy = Type::getInt32Ty(M.getContext());
2073 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002074 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2075
David Neto22f144c2017-06-12 14:26:21 -04002076 // Our intrinsic to unpack a float2 from an int.
2077 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2078
2079 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2080
David Neto482550a2018-03-24 05:21:07 -07002081 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002082 auto ShortTy = Type::getInt16Ty(M.getContext());
2083 auto ShortPointerTy = PointerType::get(
2084 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002085
David Netoac825b82017-05-30 12:49:01 -04002086 // Cast the half* pointer to short*.
2087 auto Cast =
2088 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002089
David Netoac825b82017-05-30 12:49:01 -04002090 // Index into the correct address of the casted pointer.
2091 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2092
2093 // Load from the short* we casted to.
2094 auto Load = new LoadInst(Index, "", CI);
2095
2096 // ZExt the short -> int.
2097 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2098
2099 // Get our float2.
2100 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2101
2102 // Extract out the bottom element which is our float result.
2103 auto Extract = ExtractElementInst::Create(
2104 Call, ConstantInt::get(IntTy, 0), "", CI);
2105
2106 CI->replaceAllUsesWith(Extract);
2107 } else {
2108 // Assume the pointer argument points to storage aligned to 32bits
2109 // or more.
2110 // TODO(dneto): Do more analysis to make sure this is true?
2111 //
2112 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2113 // with:
2114 //
2115 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2116 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2117 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2118 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2119 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2120 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2121 // x float> %converted, %index_is_odd32
2122
2123 auto IntPointerTy = PointerType::get(
2124 IntTy, Arg1->getType()->getPointerAddressSpace());
2125
David Neto973e6a82017-05-30 13:48:18 -04002126 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002127 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002128 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002129 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2130
2131 auto One = ConstantInt::get(IntTy, 1);
2132 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2133 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2134
2135 // Index into the correct address of the casted pointer.
2136 auto Ptr =
2137 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2138
2139 // Load from the int* we casted to.
2140 auto Load = new LoadInst(Ptr, "", CI);
2141
2142 // Get our float2.
2143 auto Call = CallInst::Create(NewF, Load, "", CI);
2144
2145 // Extract out the float result, where the element number is
2146 // determined by whether the original index was even or odd.
2147 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2148
2149 CI->replaceAllUsesWith(Extract);
2150 }
David Neto22f144c2017-06-12 14:26:21 -04002151
2152 // Lastly, remember to remove the user.
2153 ToRemoves.push_back(CI);
2154 }
2155 }
2156
2157 Changed = !ToRemoves.empty();
2158
2159 // And cleanup the calls we don't use anymore.
2160 for (auto V : ToRemoves) {
2161 V->eraseFromParent();
2162 }
2163
2164 // And remove the function we don't need either too.
2165 F->eraseFromParent();
2166 }
2167 }
2168
2169 return Changed;
2170}
2171
2172bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002173
Kévin Petite8edce32019-04-10 14:23:32 +01002174 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002175 "_Z11vload_half2jPU3AS1KDh",
2176 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2177 "_Z11vload_half2jPU3AS2KDh",
2178 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2179 };
David Neto22f144c2017-06-12 14:26:21 -04002180
Kévin Petite8edce32019-04-10 14:23:32 +01002181 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2182 // The index argument from vload_half.
2183 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002184
Kévin Petite8edce32019-04-10 14:23:32 +01002185 // The pointer argument from vload_half.
2186 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002187
Kévin Petite8edce32019-04-10 14:23:32 +01002188 auto IntTy = Type::getInt32Ty(M.getContext());
2189 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002190 auto NewPointerTy =
2191 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002192 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002193
Kévin Petite8edce32019-04-10 14:23:32 +01002194 // Cast the half* pointer to int*.
2195 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002196
Kévin Petite8edce32019-04-10 14:23:32 +01002197 // Index into the correct address of the casted pointer.
2198 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002199
Kévin Petite8edce32019-04-10 14:23:32 +01002200 // Load from the int* we casted to.
2201 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002202
Kévin Petite8edce32019-04-10 14:23:32 +01002203 // Our intrinsic to unpack a float2 from an int.
2204 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002205
Kévin Petite8edce32019-04-10 14:23:32 +01002206 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002207
Kévin Petite8edce32019-04-10 14:23:32 +01002208 // Get our float2.
2209 return CallInst::Create(NewF, Load, "", CI);
2210 });
David Neto22f144c2017-06-12 14:26:21 -04002211}
2212
2213bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002214
Kévin Petite8edce32019-04-10 14:23:32 +01002215 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002216 "_Z11vload_half4jPU3AS1KDh",
2217 "_Z12vloada_half4jPU3AS1KDh",
2218 "_Z11vload_half4jPU3AS2KDh",
2219 "_Z12vloada_half4jPU3AS2KDh",
2220 };
David Neto22f144c2017-06-12 14:26:21 -04002221
Kévin Petite8edce32019-04-10 14:23:32 +01002222 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2223 // The index argument from vload_half.
2224 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002225
Kévin Petite8edce32019-04-10 14:23:32 +01002226 // The pointer argument from vload_half.
2227 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002228
Kévin Petite8edce32019-04-10 14:23:32 +01002229 auto IntTy = Type::getInt32Ty(M.getContext());
2230 auto Int2Ty = VectorType::get(IntTy, 2);
2231 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002232 auto NewPointerTy =
2233 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002234 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002235
Kévin Petite8edce32019-04-10 14:23:32 +01002236 // Cast the half* pointer to int2*.
2237 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002238
Kévin Petite8edce32019-04-10 14:23:32 +01002239 // Index into the correct address of the casted pointer.
2240 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002241
Kévin Petite8edce32019-04-10 14:23:32 +01002242 // Load from the int2* we casted to.
2243 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002244
Kévin Petite8edce32019-04-10 14:23:32 +01002245 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002246 auto X =
2247 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2248 auto Y =
2249 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002250
Kévin Petite8edce32019-04-10 14:23:32 +01002251 // Our intrinsic to unpack a float2 from an int.
2252 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002253
Kévin Petite8edce32019-04-10 14:23:32 +01002254 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002255
Kévin Petite8edce32019-04-10 14:23:32 +01002256 // Get the lower (x & y) components of our final float4.
2257 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002258
Kévin Petite8edce32019-04-10 14:23:32 +01002259 // Get the higher (z & w) components of our final float4.
2260 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002261
Kévin Petite8edce32019-04-10 14:23:32 +01002262 Constant *ShuffleMask[4] = {
2263 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2264 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002265
Kévin Petite8edce32019-04-10 14:23:32 +01002266 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002267 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2268 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002269 });
David Neto22f144c2017-06-12 14:26:21 -04002270}
2271
David Neto6ad93232018-06-07 15:42:58 -07002272bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002273
2274 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2275 //
2276 // %u = load i32 %ptr
2277 // %fxy = call <2 x float> Unpack2xHalf(u)
2278 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002279 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002280 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2281 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2282 "_Z20__clspv_vloada_half2jPKj", // private
2283 };
2284
Kévin Petite8edce32019-04-10 14:23:32 +01002285 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2286 auto Index = CI->getOperand(0);
2287 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002288
Kévin Petite8edce32019-04-10 14:23:32 +01002289 auto IntTy = Type::getInt32Ty(M.getContext());
2290 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2291 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002292
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002293 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002294 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002295
Kévin Petite8edce32019-04-10 14:23:32 +01002296 // Our intrinsic to unpack a float2 from an int.
2297 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002298
Kévin Petite8edce32019-04-10 14:23:32 +01002299 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002300
Kévin Petite8edce32019-04-10 14:23:32 +01002301 // Get our final float2.
2302 return CallInst::Create(NewF, Load, "", CI);
2303 });
David Neto6ad93232018-06-07 15:42:58 -07002304}
2305
2306bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002307
2308 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2309 //
2310 // %u2 = load <2 x i32> %ptr
2311 // %u2xy = extractelement %u2, 0
2312 // %u2zw = extractelement %u2, 1
2313 // %fxy = call <2 x float> Unpack2xHalf(uint)
2314 // %fzw = call <2 x float> Unpack2xHalf(uint)
2315 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002316 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002317 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2318 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2319 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2320 };
2321
Kévin Petite8edce32019-04-10 14:23:32 +01002322 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2323 auto Index = CI->getOperand(0);
2324 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002325
Kévin Petite8edce32019-04-10 14:23:32 +01002326 auto IntTy = Type::getInt32Ty(M.getContext());
2327 auto Int2Ty = VectorType::get(IntTy, 2);
2328 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2329 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002330
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002331 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002332 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002333
Kévin Petite8edce32019-04-10 14:23:32 +01002334 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002335 auto X =
2336 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2337 auto Y =
2338 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002339
Kévin Petite8edce32019-04-10 14:23:32 +01002340 // Our intrinsic to unpack a float2 from an int.
2341 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002342
Kévin Petite8edce32019-04-10 14:23:32 +01002343 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002344
Kévin Petite8edce32019-04-10 14:23:32 +01002345 // Get the lower (x & y) components of our final float4.
2346 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002347
Kévin Petite8edce32019-04-10 14:23:32 +01002348 // Get the higher (z & w) components of our final float4.
2349 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002350
Kévin Petite8edce32019-04-10 14:23:32 +01002351 Constant *ShuffleMask[4] = {
2352 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2353 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002354
Kévin Petite8edce32019-04-10 14:23:32 +01002355 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002356 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2357 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002358 });
David Neto6ad93232018-06-07 15:42:58 -07002359}
2360
David Neto22f144c2017-06-12 14:26:21 -04002361bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002362
Kévin Petite8edce32019-04-10 14:23:32 +01002363 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2364 "_Z15vstore_half_rtefjPU3AS1Dh",
2365 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002366
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002367 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002368 // The value to store.
2369 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002370
Kévin Petite8edce32019-04-10 14:23:32 +01002371 // The index argument from vstore_half.
2372 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002373
Kévin Petite8edce32019-04-10 14:23:32 +01002374 // The pointer argument from vstore_half.
2375 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002376
Kévin Petite8edce32019-04-10 14:23:32 +01002377 auto IntTy = Type::getInt32Ty(M.getContext());
2378 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2379 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2380 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002381
Kévin Petite8edce32019-04-10 14:23:32 +01002382 // Our intrinsic to pack a float2 to an int.
2383 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002384
Kévin Petite8edce32019-04-10 14:23:32 +01002385 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002386
Kévin Petite8edce32019-04-10 14:23:32 +01002387 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002388 auto TempVec = InsertElementInst::Create(
2389 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002390
Kévin Petite8edce32019-04-10 14:23:32 +01002391 // Pack the float2 -> half2 (in an int).
2392 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002393
Kévin Petite8edce32019-04-10 14:23:32 +01002394 Value *Ret;
2395 if (clspv::Option::F16BitStorage()) {
2396 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002397 auto ShortPointerTy =
2398 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002399
Kévin Petite8edce32019-04-10 14:23:32 +01002400 // Truncate our i32 to an i16.
2401 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002402
Kévin Petite8edce32019-04-10 14:23:32 +01002403 // Cast the half* pointer to short*.
2404 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002405
Kévin Petite8edce32019-04-10 14:23:32 +01002406 // Index into the correct address of the casted pointer.
2407 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002408
Kévin Petite8edce32019-04-10 14:23:32 +01002409 // Store to the int* we casted to.
2410 Ret = new StoreInst(Trunc, Index, CI);
2411 } else {
2412 // We can only write to 32-bit aligned words.
2413 //
2414 // Assuming base is aligned to 32-bits, replace the equivalent of
2415 // vstore_half(value, index, base)
2416 // with:
2417 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2418 // uint32_t write_to_upper_half = index & 1u;
2419 // uint32_t shift = write_to_upper_half << 4;
2420 //
2421 // // Pack the float value as a half number in bottom 16 bits
2422 // // of an i32.
2423 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2424 //
2425 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2426 // ^ ((packed & 0xffff) << shift)
2427 // // We only need relaxed consistency, but OpenCL 1.2 only has
2428 // // sequentially consistent atomics.
2429 // // TODO(dneto): Use relaxed consistency.
2430 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002431 auto IntPointerTy =
2432 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002433
Kévin Petite8edce32019-04-10 14:23:32 +01002434 auto Four = ConstantInt::get(IntTy, 4);
2435 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002436
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002437 auto IndexIsOdd =
2438 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002439 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002440 auto IndexIntoI32 =
2441 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2442 auto BaseI32Ptr =
2443 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2444 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2445 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002446 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2447 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002448 auto MaskBitsToWrite =
2449 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2450 auto MaskedCurrent = BinaryOperator::CreateAnd(
2451 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002452
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002453 auto XLowerBits =
2454 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2455 auto NewBitsToWrite =
2456 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2457 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2458 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002459
Kévin Petite8edce32019-04-10 14:23:32 +01002460 // Generate the call to atomi_xor.
2461 SmallVector<Type *, 5> ParamTypes;
2462 // The pointer type.
2463 ParamTypes.push_back(IntPointerTy);
2464 // The Types for memory scope, semantics, and value.
2465 ParamTypes.push_back(IntTy);
2466 ParamTypes.push_back(IntTy);
2467 ParamTypes.push_back(IntTy);
2468 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2469 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002470
Kévin Petite8edce32019-04-10 14:23:32 +01002471 const auto ConstantScopeDevice =
2472 ConstantInt::get(IntTy, spv::ScopeDevice);
2473 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2474 // (SPIR-V Workgroup).
2475 const auto AddrSpaceSemanticsBits =
2476 IntPointerTy->getPointerAddressSpace() == 1
2477 ? spv::MemorySemanticsUniformMemoryMask
2478 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002479
Kévin Petite8edce32019-04-10 14:23:32 +01002480 // We're using relaxed consistency here.
2481 const auto ConstantMemorySemantics =
2482 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2483 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002484
Kévin Petite8edce32019-04-10 14:23:32 +01002485 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2486 ConstantMemorySemantics, ValueToXor};
2487 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2488 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002489 }
David Neto22f144c2017-06-12 14:26:21 -04002490
Kévin Petite8edce32019-04-10 14:23:32 +01002491 return Ret;
2492 });
David Neto22f144c2017-06-12 14:26:21 -04002493}
2494
2495bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002496
Kévin Petite8edce32019-04-10 14:23:32 +01002497 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002498 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2499 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2500 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2501 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2502 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2503 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2504 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2505 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2506 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2507 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2508 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2509 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2510 };
David Neto22f144c2017-06-12 14:26:21 -04002511
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002512 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002513 // The value to store.
2514 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002515
Kévin Petite8edce32019-04-10 14:23:32 +01002516 // The index argument from vstore_half.
2517 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002518
Kévin Petite8edce32019-04-10 14:23:32 +01002519 // The pointer argument from vstore_half.
2520 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002521
Kévin Petite8edce32019-04-10 14:23:32 +01002522 auto IntTy = Type::getInt32Ty(M.getContext());
2523 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002524 auto NewPointerTy =
2525 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002526 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002527
Kévin Petite8edce32019-04-10 14:23:32 +01002528 // Our intrinsic to pack a float2 to an int.
2529 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002530
Kévin Petite8edce32019-04-10 14:23:32 +01002531 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002532
Kévin Petite8edce32019-04-10 14:23:32 +01002533 // Turn the packed x & y into the final packing.
2534 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002535
Kévin Petite8edce32019-04-10 14:23:32 +01002536 // Cast the half* pointer to int*.
2537 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002538
Kévin Petite8edce32019-04-10 14:23:32 +01002539 // Index into the correct address of the casted pointer.
2540 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002541
Kévin Petite8edce32019-04-10 14:23:32 +01002542 // Store to the int* we casted to.
2543 return new StoreInst(X, Index, CI);
2544 });
David Neto22f144c2017-06-12 14:26:21 -04002545}
2546
2547bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002548
Kévin Petite8edce32019-04-10 14:23:32 +01002549 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002550 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2551 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2552 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2553 "_Z13vstorea_half4Dv4_fjPDh", // private
2554 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2555 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2556 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2557 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2558 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2559 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2560 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2561 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2562 };
David Neto22f144c2017-06-12 14:26:21 -04002563
Kévin Petite8edce32019-04-10 14:23:32 +01002564 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2565 // The value to store.
2566 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002567
Kévin Petite8edce32019-04-10 14:23:32 +01002568 // The index argument from vstore_half.
2569 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002570
Kévin Petite8edce32019-04-10 14:23:32 +01002571 // The pointer argument from vstore_half.
2572 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002573
Kévin Petite8edce32019-04-10 14:23:32 +01002574 auto IntTy = Type::getInt32Ty(M.getContext());
2575 auto Int2Ty = VectorType::get(IntTy, 2);
2576 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002577 auto NewPointerTy =
2578 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002579 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002580
Kévin Petite8edce32019-04-10 14:23:32 +01002581 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2582 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002583
Kévin Petite8edce32019-04-10 14:23:32 +01002584 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002585 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2586 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002587
Kévin Petite8edce32019-04-10 14:23:32 +01002588 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2589 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002590
Kévin Petite8edce32019-04-10 14:23:32 +01002591 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002592 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2593 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002594
Kévin Petite8edce32019-04-10 14:23:32 +01002595 // Our intrinsic to pack a float2 to an int.
2596 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002597
Kévin Petite8edce32019-04-10 14:23:32 +01002598 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002599
Kévin Petite8edce32019-04-10 14:23:32 +01002600 // Turn the packed x & y into the final component of our int2.
2601 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002602
Kévin Petite8edce32019-04-10 14:23:32 +01002603 // Turn the packed z & w into the final component of our int2.
2604 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002605
Kévin Petite8edce32019-04-10 14:23:32 +01002606 auto Combine = InsertElementInst::Create(
2607 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002608 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2609 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002610
Kévin Petite8edce32019-04-10 14:23:32 +01002611 // Cast the half* pointer to int2*.
2612 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002613
Kévin Petite8edce32019-04-10 14:23:32 +01002614 // Index into the correct address of the casted pointer.
2615 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002616
Kévin Petite8edce32019-04-10 14:23:32 +01002617 // Store to the int2* we casted to.
2618 return new StoreInst(Combine, Index, CI);
2619 });
David Neto22f144c2017-06-12 14:26:21 -04002620}
2621
2622bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2623 bool Changed = false;
2624
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002625 const std::map<const char *, const char *> Map = {
2626 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2627 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2628 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i",
2629 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002630
2631 for (auto Pair : Map) {
2632 // If we find a function with the matching name.
2633 if (auto F = M.getFunction(Pair.first)) {
2634 SmallVector<Instruction *, 4> ToRemoves;
2635
2636 // Walk the users of the function.
2637 for (auto &U : F->uses()) {
2638 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2639 // The image.
2640 auto Arg0 = CI->getOperand(0);
2641
2642 // The sampler.
2643 auto Arg1 = CI->getOperand(1);
2644
2645 // The coordinate (integer type that we can't handle).
2646 auto Arg2 = CI->getOperand(2);
2647
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002648 auto FloatVecTy =
2649 VectorType::get(Type::getFloatTy(M.getContext()),
2650 Arg2->getType()->getVectorNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002651
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002652 auto NewFType = FunctionType::get(
2653 CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy},
2654 false);
David Neto22f144c2017-06-12 14:26:21 -04002655
2656 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2657
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002658 auto Cast =
2659 CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002660
2661 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2662
2663 CI->replaceAllUsesWith(NewCI);
2664
2665 // Lastly, remember to remove the user.
2666 ToRemoves.push_back(CI);
2667 }
2668 }
2669
2670 Changed = !ToRemoves.empty();
2671
2672 // And cleanup the calls we don't use anymore.
2673 for (auto V : ToRemoves) {
2674 V->eraseFromParent();
2675 }
2676
2677 // And remove the function we don't need either too.
2678 F->eraseFromParent();
2679 }
2680 }
2681
2682 return Changed;
2683}
2684
2685bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2686 bool Changed = false;
2687
2688 const std::map<const char *, const char *> Map = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002689 {"_Z8atom_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002690 {"_Z8atom_incPU3AS3Vi", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002691 {"_Z8atom_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002692 {"_Z8atom_incPU3AS3Vj", "spirv.atomic_inc"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002693 {"_Z8atom_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002694 {"_Z8atom_decPU3AS3Vi", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002695 {"_Z8atom_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002696 {"_Z8atom_decPU3AS3Vj", "spirv.atomic_dec"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002697 {"_Z12atom_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002698 {"_Z12atom_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002699 {"_Z12atom_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002700 {"_Z12atom_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"},
David Neto22f144c2017-06-12 14:26:21 -04002701 {"_Z10atomic_incPU3AS1Vi", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002702 {"_Z10atomic_incPU3AS3Vi", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002703 {"_Z10atomic_incPU3AS1Vj", "spirv.atomic_inc"},
Kévin Petita303dc62019-03-26 21:40:35 +00002704 {"_Z10atomic_incPU3AS3Vj", "spirv.atomic_inc"},
David Neto22f144c2017-06-12 14:26:21 -04002705 {"_Z10atomic_decPU3AS1Vi", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002706 {"_Z10atomic_decPU3AS3Vi", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002707 {"_Z10atomic_decPU3AS1Vj", "spirv.atomic_dec"},
Kévin Petita303dc62019-03-26 21:40:35 +00002708 {"_Z10atomic_decPU3AS3Vj", "spirv.atomic_dec"},
David Neto22f144c2017-06-12 14:26:21 -04002709 {"_Z14atomic_cmpxchgPU3AS1Viii", "spirv.atomic_compare_exchange"},
Kévin Petita303dc62019-03-26 21:40:35 +00002710 {"_Z14atomic_cmpxchgPU3AS3Viii", "spirv.atomic_compare_exchange"},
2711 {"_Z14atomic_cmpxchgPU3AS1Vjjj", "spirv.atomic_compare_exchange"},
2712 {"_Z14atomic_cmpxchgPU3AS3Vjjj", "spirv.atomic_compare_exchange"}};
David Neto22f144c2017-06-12 14:26:21 -04002713
2714 for (auto Pair : Map) {
2715 // If we find a function with the matching name.
2716 if (auto F = M.getFunction(Pair.first)) {
2717 SmallVector<Instruction *, 4> ToRemoves;
2718
2719 // Walk the users of the function.
2720 for (auto &U : F->uses()) {
2721 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2722 auto FType = F->getFunctionType();
2723 SmallVector<Type *, 5> ParamTypes;
2724
2725 // The pointer type.
2726 ParamTypes.push_back(FType->getParamType(0));
2727
2728 auto IntTy = Type::getInt32Ty(M.getContext());
2729
2730 // The memory scope type.
2731 ParamTypes.push_back(IntTy);
2732
2733 // The memory semantics type.
2734 ParamTypes.push_back(IntTy);
2735
2736 if (2 < CI->getNumArgOperands()) {
2737 // The unequal memory semantics type.
2738 ParamTypes.push_back(IntTy);
2739
2740 // The value type.
2741 ParamTypes.push_back(FType->getParamType(2));
2742
2743 // The comparator type.
2744 ParamTypes.push_back(FType->getParamType(1));
2745 } else if (1 < CI->getNumArgOperands()) {
2746 // The value type.
2747 ParamTypes.push_back(FType->getParamType(1));
2748 }
2749
2750 auto NewFType =
2751 FunctionType::get(FType->getReturnType(), ParamTypes, false);
2752 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2753
2754 // We need to map the OpenCL constants to the SPIR-V equivalents.
2755 const auto ConstantScopeDevice =
2756 ConstantInt::get(IntTy, spv::ScopeDevice);
2757 const auto ConstantMemorySemantics = ConstantInt::get(
2758 IntTy, spv::MemorySemanticsUniformMemoryMask |
2759 spv::MemorySemanticsSequentiallyConsistentMask);
2760
2761 SmallVector<Value *, 5> Params;
2762
2763 // The pointer.
2764 Params.push_back(CI->getArgOperand(0));
2765
2766 // The memory scope.
2767 Params.push_back(ConstantScopeDevice);
2768
2769 // The memory semantics.
2770 Params.push_back(ConstantMemorySemantics);
2771
2772 if (2 < CI->getNumArgOperands()) {
2773 // The unequal memory semantics.
2774 Params.push_back(ConstantMemorySemantics);
2775
2776 // The value.
2777 Params.push_back(CI->getArgOperand(2));
2778
2779 // The comparator.
2780 Params.push_back(CI->getArgOperand(1));
2781 } else if (1 < CI->getNumArgOperands()) {
2782 // The value.
2783 Params.push_back(CI->getArgOperand(1));
2784 }
2785
2786 auto NewCI = CallInst::Create(NewF, Params, "", CI);
2787
2788 CI->replaceAllUsesWith(NewCI);
2789
2790 // Lastly, remember to remove the user.
2791 ToRemoves.push_back(CI);
2792 }
2793 }
2794
2795 Changed = !ToRemoves.empty();
2796
2797 // And cleanup the calls we don't use anymore.
2798 for (auto V : ToRemoves) {
2799 V->eraseFromParent();
2800 }
2801
2802 // And remove the function we don't need either too.
2803 F->eraseFromParent();
2804 }
2805 }
2806
Neil Henning39672102017-09-29 14:33:13 +01002807 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002808 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002809 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002810 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002811 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002812 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002813 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002814 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002815 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002816 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002817 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002818 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002819 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002820 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002821 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002822 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002823 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002824 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002825 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002826 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002827 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002828 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002829 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002830 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002831 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002832 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002833 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002834 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002835 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002836 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002837 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002838 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002839 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002840 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002841 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002842 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002843 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002844 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002845 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002846 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002847 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002848 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002849 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002850 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002851 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002852 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002853 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002854 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002855 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002856 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002857 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002858 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002859 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002860 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002861 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002862 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002863 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002864 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002865 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002866 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002867 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002868 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002869 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2870 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2871 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002872
2873 for (auto Pair : Map2) {
2874 // If we find a function with the matching name.
2875 if (auto F = M.getFunction(Pair.first)) {
2876 SmallVector<Instruction *, 4> ToRemoves;
2877
2878 // Walk the users of the function.
2879 for (auto &U : F->uses()) {
2880 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2881 auto AtomicOp = new AtomicRMWInst(
2882 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2883 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2884
2885 CI->replaceAllUsesWith(AtomicOp);
2886
2887 // Lastly, remember to remove the user.
2888 ToRemoves.push_back(CI);
2889 }
2890 }
2891
2892 Changed = !ToRemoves.empty();
2893
2894 // And cleanup the calls we don't use anymore.
2895 for (auto V : ToRemoves) {
2896 V->eraseFromParent();
2897 }
2898
2899 // And remove the function we don't need either too.
2900 F->eraseFromParent();
2901 }
2902 }
2903
David Neto22f144c2017-06-12 14:26:21 -04002904 return Changed;
2905}
2906
2907bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002908
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002909 std::vector<const char *> Names = {
2910 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01002911 };
2912
2913 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002914 auto IntTy = Type::getInt32Ty(M.getContext());
2915 auto FloatTy = Type::getFloatTy(M.getContext());
2916
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002917 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2918 ConstantInt::get(IntTy, 1),
2919 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002920
2921 Constant *UpShuffleMask[4] = {
2922 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2923 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2924
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002925 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2926 UndefValue::get(FloatTy),
2927 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002928
Kévin Petite8edce32019-04-10 14:23:32 +01002929 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002930 auto Arg0 =
2931 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2932 ConstantVector::get(DownShuffleMask), "", CI);
2933 auto Arg1 =
2934 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2935 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002936 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002937
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002938 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04002939
Kévin Petite8edce32019-04-10 14:23:32 +01002940 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002941
Kévin Petite8edce32019-04-10 14:23:32 +01002942 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002943
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002944 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2945 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002946 });
David Neto22f144c2017-06-12 14:26:21 -04002947}
David Neto62653202017-10-16 19:05:18 -04002948
2949bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2950 bool Changed = false;
2951
2952 // OpenCL's float result = fract(float x, float* ptr)
2953 //
2954 // In the LLVM domain:
2955 //
2956 // %floor_result = call spir_func float @floor(float %x)
2957 // store float %floor_result, float * %ptr
2958 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2959 // %result = call spir_func float
2960 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2961 //
2962 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2963 // and clspv.fract occur in the SPIR-V generator pass:
2964 //
2965 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2966 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2967 // ...
2968 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2969 // OpStore %ptr %floor_result
2970 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2971 // %fract_result = OpExtInst %float
2972 // %glsl_ext Fmin %fract_intermediate %just_under_1
2973
David Neto62653202017-10-16 19:05:18 -04002974 using std::string;
2975
2976 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2977 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002978 using QuadType =
2979 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04002980 auto make_quad = [](const char *a, const char *b, const char *c,
2981 const char *d) {
2982 return std::tuple<const char *, const char *, const char *, const char *>(
2983 a, b, c, d);
2984 };
2985 const std::vector<QuadType> Functions = {
2986 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002987 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
2988 "clspv.fract.v2f"),
2989 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
2990 "clspv.fract.v3f"),
2991 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
2992 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04002993 };
2994
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002995 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04002996 const StringRef fract_name(std::get<0>(quad));
2997
2998 // If we find a function with the matching name.
2999 if (auto F = M.getFunction(fract_name)) {
3000 if (F->use_begin() == F->use_end())
3001 continue;
3002
3003 // We have some uses.
3004 Changed = true;
3005
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003006 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04003007
3008 const StringRef floor_name(std::get<1>(quad));
3009 const StringRef fmin_name(std::get<2>(quad));
3010 const StringRef clspv_fract_name(std::get<3>(quad));
3011
3012 // This is either float or a float vector. All the float-like
3013 // types are this type.
3014 auto result_ty = F->getReturnType();
3015
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003016 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04003017 if (!fmin_fn) {
3018 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003019 FunctionType *fn_ty =
3020 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003021 fmin_fn =
3022 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003023 fmin_fn->addFnAttr(Attribute::ReadNone);
3024 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3025 }
3026
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003027 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003028 if (!floor_fn) {
3029 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003030 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003031 floor_fn = cast<Function>(
3032 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003033 floor_fn->addFnAttr(Attribute::ReadNone);
3034 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3035 }
3036
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003037 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003038 if (!clspv_fract_fn) {
3039 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003040 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003041 clspv_fract_fn = cast<Function>(
3042 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003043 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3044 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3045 }
3046
3047 // Number of significant significand bits, whether represented or not.
3048 unsigned num_significand_bits;
3049 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003050 case Type::HalfTyID:
3051 num_significand_bits = 11;
3052 break;
3053 case Type::FloatTyID:
3054 num_significand_bits = 24;
3055 break;
3056 case Type::DoubleTyID:
3057 num_significand_bits = 53;
3058 break;
3059 default:
3060 assert(false && "Unhandled float type when processing fract builtin");
3061 break;
David Neto62653202017-10-16 19:05:18 -04003062 }
3063 // Beware that the disassembler displays this value as
3064 // OpConstant %float 1
3065 // which is not quite right.
3066 const double kJustUnderOneScalar =
3067 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3068
3069 Constant *just_under_one =
3070 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3071 if (result_ty->isVectorTy()) {
3072 just_under_one = ConstantVector::getSplat(
3073 result_ty->getVectorNumElements(), just_under_one);
3074 }
3075
3076 IRBuilder<> Builder(Context);
3077
3078 SmallVector<Instruction *, 4> ToRemoves;
3079
3080 // Walk the users of the function.
3081 for (auto &U : F->uses()) {
3082 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3083
3084 Builder.SetInsertPoint(CI);
3085 auto arg = CI->getArgOperand(0);
3086 auto ptr = CI->getArgOperand(1);
3087
3088 // Compute floor result and store it.
3089 auto floor = Builder.CreateCall(floor_fn, {arg});
3090 Builder.CreateStore(floor, ptr);
3091
3092 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003093 auto fract_result =
3094 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003095
3096 CI->replaceAllUsesWith(fract_result);
3097
3098 // Lastly, remember to remove the user.
3099 ToRemoves.push_back(CI);
3100 }
3101 }
3102
3103 // And cleanup the calls we don't use anymore.
3104 for (auto V : ToRemoves) {
3105 V->eraseFromParent();
3106 }
3107
3108 // And remove the function we don't need either too.
3109 F->eraseFromParent();
3110 }
3111 }
3112
3113 return Changed;
3114}