blob: 78de09f5414f4da85f2374c8662567f647c1b91b [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040032#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070033
Diego Novilloa4c44fa2019-04-11 10:56:15 -040034#include "Passes.h"
35#include "SPIRVOp.h"
36
David Neto22f144c2017-06-12 14:26:21 -040037using namespace llvm;
38
39#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
40
41namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000042
43struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040044 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000045 SignedNess signedness;
46};
47
48struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000049 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000050 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000051
Kévin Petit91bc72e2019-04-08 15:17:46 +010052 bool isArgSigned(size_t arg) const {
53 assert(argTypeInfos.size() > arg);
54 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000055 }
56
Kévin Petit91bc72e2019-04-08 15:17:46 +010057 static FunctionInfo getFromMangledName(StringRef name) {
58 FunctionInfo fi;
59 if (!getFromMangledNameCheck(name, &fi)) {
60 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000061 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010062 return fi;
63 }
Kévin Petit8a560882019-03-21 15:24:34 +000064
Kévin Petit91bc72e2019-04-08 15:17:46 +010065 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
66 if (!name.consume_front("_Z")) {
67 return false;
68 }
69 size_t nameLen;
70 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000071 return false;
72 }
73
Kévin Petit91bc72e2019-04-08 15:17:46 +010074 finfo->name = name.take_front(nameLen);
75 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000076
Kévin Petit91bc72e2019-04-08 15:17:46 +010077 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000078
Kévin Petit91bc72e2019-04-08 15:17:46 +010079 while (name.size() != 0) {
80
81 ArgTypeInfo ti;
82
83 // Try parsing a vector prefix
84 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040085 int numElems;
86 if (name.consumeInteger(10, numElems)) {
87 return false;
88 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010089
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040090 if (!name.consume_front("_")) {
91 return false;
92 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010093 }
94
95 // Parse the base type
96 char typeCode = name.front();
97 name = name.drop_front(1);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040098 switch (typeCode) {
Kévin Petit91bc72e2019-04-08 15:17:46 +010099 case 'c': // char
100 case 'a': // signed char
101 case 's': // short
102 case 'i': // int
103 case 'l': // long
104 ti.signedness = ArgTypeInfo::SignedNess::Signed;
105 break;
106 case 'h': // unsigned char
107 case 't': // unsigned short
108 case 'j': // unsigned int
109 case 'm': // unsigned long
110 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
111 break;
112 case 'f':
113 ti.signedness = ArgTypeInfo::SignedNess::None;
114 break;
115 case 'S':
116 ti = prev_ti;
117 if (!name.consume_front("_")) {
118 return false;
119 }
120 break;
121 default:
122 return false;
123 }
124
125 finfo->argTypeInfos.push_back(ti);
126
127 prev_ti = ti;
128 }
129
130 return true;
131 };
Kévin Petit8a560882019-03-21 15:24:34 +0000132};
133
David Neto22f144c2017-06-12 14:26:21 -0400134uint32_t clz(uint32_t v) {
135 uint32_t r;
136 uint32_t shift;
137
138 r = (v > 0xFFFF) << 4;
139 v >>= r;
140 shift = (v > 0xFF) << 3;
141 v >>= shift;
142 r |= shift;
143 shift = (v > 0xF) << 2;
144 v >>= shift;
145 r |= shift;
146 shift = (v > 0x3) << 1;
147 v >>= shift;
148 r |= shift;
149 r |= (v >> 1);
150
151 return r;
152}
153
154Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
155 if (1 == elements) {
156 return Type::getInt1Ty(C);
157 } else {
158 return VectorType::get(Type::getInt1Ty(C), elements);
159 }
160}
161
162struct ReplaceOpenCLBuiltinPass final : public ModulePass {
163 static char ID;
164 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
165
166 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000167 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100168 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100169 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400170 bool replaceRecip(Module &M);
171 bool replaceDivide(Module &M);
Kévin Petit1329a002019-06-15 05:54:05 +0100172 bool replaceDot(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400173 bool replaceExp10(Module &M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100174 bool replaceFmod(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400175 bool replaceLog10(Module &M);
176 bool replaceBarrier(Module &M);
177 bool replaceMemFence(Module &M);
178 bool replaceRelational(Module &M);
179 bool replaceIsInfAndIsNan(Module &M);
180 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000181 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000182 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000183 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000184 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000185 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000186 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000187 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400188 bool replaceSignbit(Module &M);
189 bool replaceMadandMad24andMul24(Module &M);
190 bool replaceVloadHalf(Module &M);
191 bool replaceVloadHalf2(Module &M);
192 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700193 bool replaceClspvVloadaHalf2(Module &M);
194 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400195 bool replaceVstoreHalf(Module &M);
196 bool replaceVstoreHalf2(Module &M);
197 bool replaceVstoreHalf4(Module &M);
198 bool replaceReadImageF(Module &M);
199 bool replaceAtomics(Module &M);
200 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400201 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700202 bool replaceVload(Module &M);
203 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400204};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100205} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400206
207char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400208INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
209 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400210
211namespace clspv {
212ModulePass *createReplaceOpenCLBuiltinPass() {
213 return new ReplaceOpenCLBuiltinPass();
214}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400215} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400216
217bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
218 bool Changed = false;
219
Kévin Petit2444e9b2018-11-09 14:14:37 +0000220 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100221 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100222 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400223 Changed |= replaceRecip(M);
224 Changed |= replaceDivide(M);
Kévin Petit1329a002019-06-15 05:54:05 +0100225 Changed |= replaceDot(M);
David Neto22f144c2017-06-12 14:26:21 -0400226 Changed |= replaceExp10(M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100227 Changed |= replaceFmod(M);
David Neto22f144c2017-06-12 14:26:21 -0400228 Changed |= replaceLog10(M);
229 Changed |= replaceBarrier(M);
230 Changed |= replaceMemFence(M);
231 Changed |= replaceRelational(M);
232 Changed |= replaceIsInfAndIsNan(M);
233 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000234 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000235 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000236 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000237 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000238 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000239 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000240 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400241 Changed |= replaceSignbit(M);
242 Changed |= replaceMadandMad24andMul24(M);
243 Changed |= replaceVloadHalf(M);
244 Changed |= replaceVloadHalf2(M);
245 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700246 Changed |= replaceClspvVloadaHalf2(M);
247 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400248 Changed |= replaceVstoreHalf(M);
249 Changed |= replaceVstoreHalf2(M);
250 Changed |= replaceVstoreHalf4(M);
251 Changed |= replaceReadImageF(M);
252 Changed |= replaceAtomics(M);
253 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400254 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700255 Changed |= replaceVload(M);
256 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400257
258 return Changed;
259}
260
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400261bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
262 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000263
Kévin Petite8edce32019-04-10 14:23:32 +0100264 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000265
266 for (auto Name : Names) {
267 // If we find a function with the matching name.
268 if (auto F = M.getFunction(Name)) {
269 SmallVector<Instruction *, 4> ToRemoves;
270
271 // Walk the users of the function.
272 for (auto &U : F->uses()) {
273 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000274
Kévin Petite8edce32019-04-10 14:23:32 +0100275 auto NewValue = Replacer(CI);
276
277 if (NewValue != nullptr) {
278 CI->replaceAllUsesWith(NewValue);
279 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000280
281 // Lastly, remember to remove the user.
282 ToRemoves.push_back(CI);
283 }
284 }
285
286 Changed = !ToRemoves.empty();
287
288 // And cleanup the calls we don't use anymore.
289 for (auto V : ToRemoves) {
290 V->eraseFromParent();
291 }
292
293 // And remove the function we don't need either too.
294 F->eraseFromParent();
295 }
296 }
297
298 return Changed;
299}
300
Kévin Petite8edce32019-04-10 14:23:32 +0100301bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100302
Kévin Petite8edce32019-04-10 14:23:32 +0100303 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400304 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
305 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
306 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
307 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100308 };
309
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400310 return replaceCallsWithValue(M, Names,
311 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100312}
313
314bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
315
316 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400317 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
318 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
319 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
320 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
321 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
322 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
323 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
324 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
325 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
326 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
327 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100328 };
329
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400330 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100331 auto XValue = CI->getOperand(0);
332 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100333
Kévin Petite8edce32019-04-10 14:23:32 +0100334 IRBuilder<> Builder(CI);
335 auto XmY = Builder.CreateSub(XValue, YValue);
336 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100337
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400338 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100339 auto F = CI->getCalledFunction();
340 auto finfo = FunctionInfo::getFromMangledName(F->getName());
341 if (finfo.isArgSigned(0)) {
342 Cmp = Builder.CreateICmpSGT(YValue, XValue);
343 } else {
344 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100345 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100346
Kévin Petite8edce32019-04-10 14:23:32 +0100347 return Builder.CreateSelect(Cmp, YmX, XmY);
348 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100349}
350
Kévin Petit8c1be282019-04-02 19:34:25 +0100351bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100352
Kévin Petite8edce32019-04-10 14:23:32 +0100353 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400354 "_Z8copysignff",
355 "_Z8copysignDv2_fS_",
356 "_Z8copysignDv3_fS_",
357 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100358 };
359
Kévin Petite8edce32019-04-10 14:23:32 +0100360 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
361 auto XValue = CI->getOperand(0);
362 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100363
Kévin Petite8edce32019-04-10 14:23:32 +0100364 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100365
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400366 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100367 if (Ty->isVectorTy()) {
368 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100369 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100370
Kévin Petite8edce32019-04-10 14:23:32 +0100371 // Return X with the sign of Y
372
373 // Sign bit masks
374 auto SignBit = IntTy->getScalarSizeInBits() - 1;
375 auto SignBitMask = 1 << SignBit;
376 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
377 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
378
379 IRBuilder<> Builder(CI);
380
381 // Extract sign of Y
382 auto YInt = Builder.CreateBitCast(YValue, IntTy);
383 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
384
385 // Clear sign bit in X
386 auto XInt = Builder.CreateBitCast(XValue, IntTy);
387 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
388
389 // Insert sign bit of Y into X
390 auto NewXInt = Builder.CreateOr(XInt, YSign);
391
392 // And cast back to floating-point
393 return Builder.CreateBitCast(NewXInt, Ty);
394 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100395}
396
David Neto22f144c2017-06-12 14:26:21 -0400397bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400398
Kévin Petite8edce32019-04-10 14:23:32 +0100399 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400400 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
401 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
402 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
403 };
404
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400405 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100406 // Recip has one arg.
407 auto Arg = CI->getOperand(0);
408 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
409 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
410 });
David Neto22f144c2017-06-12 14:26:21 -0400411}
412
413bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400414
Kévin Petite8edce32019-04-10 14:23:32 +0100415 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400416 "_Z11half_divideff", "_Z13native_divideff",
417 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
418 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
419 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
420 };
421
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400422 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100423 auto Op0 = CI->getOperand(0);
424 auto Op1 = CI->getOperand(1);
425 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
426 });
David Neto22f144c2017-06-12 14:26:21 -0400427}
428
Kévin Petit1329a002019-06-15 05:54:05 +0100429bool ReplaceOpenCLBuiltinPass::replaceDot(Module &M) {
430
431 std::vector<const char *> Names = {
432 "_Z3dotff",
433 "_Z3dotDv2_fS_",
434 "_Z3dotDv3_fS_",
435 "_Z3dotDv4_fS_",
436 };
437
438 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
439 auto Op0 = CI->getOperand(0);
440 auto Op1 = CI->getOperand(1);
441
442 Value *V;
443 if (Op0->getType()->isVectorTy()) {
444 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
445 CI->getType(), {Op0, Op1});
446 } else {
447 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
448 }
449
450 return V;
451 });
452}
453
David Neto22f144c2017-06-12 14:26:21 -0400454bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
455 bool Changed = false;
456
457 const std::map<const char *, const char *> Map = {
458 {"_Z5exp10f", "_Z3expf"},
459 {"_Z10half_exp10f", "_Z8half_expf"},
460 {"_Z12native_exp10f", "_Z10native_expf"},
461 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
462 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
463 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
464 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
465 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
466 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
467 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
468 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
469 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
470
471 for (auto Pair : Map) {
472 // If we find a function with the matching name.
473 if (auto F = M.getFunction(Pair.first)) {
474 SmallVector<Instruction *, 4> ToRemoves;
475
476 // Walk the users of the function.
477 for (auto &U : F->uses()) {
478 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
479 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
480
481 auto Arg = CI->getOperand(0);
482
483 // Constant of the natural log of 10 (ln(10)).
484 const double Ln10 =
485 2.302585092994045684017991454684364207601101488628772976033;
486
487 auto Mul = BinaryOperator::Create(
488 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
489 CI);
490
491 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
492
493 CI->replaceAllUsesWith(NewCI);
494
495 // Lastly, remember to remove the user.
496 ToRemoves.push_back(CI);
497 }
498 }
499
500 Changed = !ToRemoves.empty();
501
502 // And cleanup the calls we don't use anymore.
503 for (auto V : ToRemoves) {
504 V->eraseFromParent();
505 }
506
507 // And remove the function we don't need either too.
508 F->eraseFromParent();
509 }
510 }
511
512 return Changed;
513}
514
Kévin Petit0644a9c2019-06-20 21:08:46 +0100515bool ReplaceOpenCLBuiltinPass::replaceFmod(Module &M) {
516
517 std::vector<const char *> Names = {
518 "_Z4fmodff",
519 "_Z4fmodDv2_fS_",
520 "_Z4fmodDv3_fS_",
521 "_Z4fmodDv4_fS_",
522 };
523
524 // OpenCL fmod(x,y) is x - y * trunc(x/y)
525 // The sign for a non-zero result is taken from x.
526 // (Try an example.)
527 // So translate to FRem
528 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
529 auto Op0 = CI->getOperand(0);
530 auto Op1 = CI->getOperand(1);
531 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
532 });
533}
534
David Neto22f144c2017-06-12 14:26:21 -0400535bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
536 bool Changed = false;
537
538 const std::map<const char *, const char *> Map = {
539 {"_Z5log10f", "_Z3logf"},
540 {"_Z10half_log10f", "_Z8half_logf"},
541 {"_Z12native_log10f", "_Z10native_logf"},
542 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
543 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
544 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
545 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
546 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
547 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
548 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
549 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
550 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
551
552 for (auto Pair : Map) {
553 // If we find a function with the matching name.
554 if (auto F = M.getFunction(Pair.first)) {
555 SmallVector<Instruction *, 4> ToRemoves;
556
557 // Walk the users of the function.
558 for (auto &U : F->uses()) {
559 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
560 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
561
562 auto Arg = CI->getOperand(0);
563
564 // Constant of the reciprocal of the natural log of 10 (ln(10)).
565 const double Ln10 =
566 0.434294481903251827651128918916605082294397005803666566114;
567
568 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
569
570 auto Mul = BinaryOperator::Create(
571 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
572 "", CI);
573
574 CI->replaceAllUsesWith(Mul);
575
576 // Lastly, remember to remove the user.
577 ToRemoves.push_back(CI);
578 }
579 }
580
581 Changed = !ToRemoves.empty();
582
583 // And cleanup the calls we don't use anymore.
584 for (auto V : ToRemoves) {
585 V->eraseFromParent();
586 }
587
588 // And remove the function we don't need either too.
589 F->eraseFromParent();
590 }
591 }
592
593 return Changed;
594}
595
596bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400597
598 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
599
Kévin Petitc4643922019-06-17 19:32:05 +0100600 const std::vector<const char *> Names = {
601 {"_Z7barrierj"},
602 };
David Neto22f144c2017-06-12 14:26:21 -0400603
Kévin Petitc4643922019-06-17 19:32:05 +0100604 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
605 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400606
Kévin Petitc4643922019-06-17 19:32:05 +0100607 // We need to map the OpenCL constants to the SPIR-V equivalents.
608 const auto LocalMemFence =
609 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
610 const auto GlobalMemFence =
611 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
612 const auto ConstantSequentiallyConsistent = ConstantInt::get(
613 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
614 const auto ConstantScopeDevice =
615 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
616 const auto ConstantScopeWorkgroup =
617 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400618
Kévin Petitc4643922019-06-17 19:32:05 +0100619 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
620 const auto LocalMemFenceMask =
621 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
622 const auto WorkgroupShiftAmount =
623 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
624 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
625 Instruction::Shl, LocalMemFenceMask,
626 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400627
Kévin Petitc4643922019-06-17 19:32:05 +0100628 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
629 const auto GlobalMemFenceMask =
630 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
631 const auto UniformShiftAmount =
632 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
633 const auto MemorySemanticsUniform = BinaryOperator::Create(
634 Instruction::Shl, GlobalMemFenceMask,
635 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400636
Kévin Petitc4643922019-06-17 19:32:05 +0100637 // And combine the above together, also adding in
638 // MemorySemanticsSequentiallyConsistentMask.
639 auto MemorySemantics =
640 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
641 ConstantSequentiallyConsistent, "", CI);
642 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
643 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400644
Kévin Petitc4643922019-06-17 19:32:05 +0100645 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
646 // Device Scope, otherwise Workgroup Scope.
647 const auto Cmp =
648 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, GlobalMemFenceMask,
649 GlobalMemFence, "", CI);
650 const auto MemoryScope = SelectInst::Create(Cmp, ConstantScopeDevice,
651 ConstantScopeWorkgroup, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400652
Kévin Petitc4643922019-06-17 19:32:05 +0100653 // Lastly, the Execution Scope is always Workgroup Scope.
654 const auto ExecutionScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400655
Kévin Petitc4643922019-06-17 19:32:05 +0100656 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
657 {Attribute::NoDuplicate}, CI->getType(),
658 {ExecutionScope, MemoryScope, MemorySemantics});
659 });
David Neto22f144c2017-06-12 14:26:21 -0400660}
661
662bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
663 bool Changed = false;
664
665 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
666
Kévin Petitc4643922019-06-17 19:32:05 +0100667 using Tuple = std::tuple<spv::Op, unsigned>;
Neil Henning39672102017-09-29 14:33:13 +0100668 const std::map<const char *, Tuple> Map = {
Kévin Petitc4643922019-06-17 19:32:05 +0100669 {"_Z9mem_fencej", Tuple(spv::OpMemoryBarrier,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400670 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100671 {"_Z14read_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100672 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsAcquireMask)},
Neil Henning39672102017-09-29 14:33:13 +0100673 {"_Z15write_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100674 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400675
676 for (auto Pair : Map) {
677 // If we find a function with the matching name.
678 if (auto F = M.getFunction(Pair.first)) {
679 SmallVector<Instruction *, 4> ToRemoves;
680
681 // Walk the users of the function.
682 for (auto &U : F->uses()) {
683 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400684
685 auto Arg = CI->getOperand(0);
686
687 // We need to map the OpenCL constants to the SPIR-V equivalents.
688 const auto LocalMemFence =
689 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
690 const auto GlobalMemFence =
691 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
692 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100693 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400694 const auto ConstantScopeDevice =
695 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
696
697 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
698 const auto LocalMemFenceMask = BinaryOperator::Create(
699 Instruction::And, LocalMemFence, Arg, "", CI);
700 const auto WorkgroupShiftAmount =
701 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
702 clz(CLK_LOCAL_MEM_FENCE);
703 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
704 Instruction::Shl, LocalMemFenceMask,
705 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
706
707 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
708 const auto GlobalMemFenceMask = BinaryOperator::Create(
709 Instruction::And, GlobalMemFence, Arg, "", CI);
710 const auto UniformShiftAmount =
711 clz(spv::MemorySemanticsUniformMemoryMask) -
712 clz(CLK_GLOBAL_MEM_FENCE);
713 const auto MemorySemanticsUniform = BinaryOperator::Create(
714 Instruction::Shl, GlobalMemFenceMask,
715 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
716
717 // And combine the above together, also adding in
718 // MemorySemanticsSequentiallyConsistentMask.
719 auto MemorySemantics =
720 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
721 ConstantMemorySemantics, "", CI);
722 MemorySemantics = BinaryOperator::Create(
723 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
724
725 // Memory Scope is always device.
726 const auto MemoryScope = ConstantScopeDevice;
727
Kévin Petitc4643922019-06-17 19:32:05 +0100728 const auto SPIRVOp = std::get<0>(Pair.second);
729 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {}, CI->getType(),
730 {MemoryScope, MemorySemantics});
David Neto22f144c2017-06-12 14:26:21 -0400731
732 CI->replaceAllUsesWith(NewCI);
733
734 // Lastly, remember to remove the user.
735 ToRemoves.push_back(CI);
736 }
737 }
738
739 Changed = !ToRemoves.empty();
740
741 // And cleanup the calls we don't use anymore.
742 for (auto V : ToRemoves) {
743 V->eraseFromParent();
744 }
745
746 // And remove the function we don't need either too.
747 F->eraseFromParent();
748 }
749 }
750
751 return Changed;
752}
753
754bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
755 bool Changed = false;
756
757 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
758 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
759 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
760 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
761 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
762 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
763 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
764 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
765 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
766 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
767 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
768 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
769 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
770 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
771 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
772 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
773 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
774 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
775 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
776 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
777 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
778 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
779 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
780 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
781 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
782 };
783
784 for (auto Pair : Map) {
785 // If we find a function with the matching name.
786 if (auto F = M.getFunction(Pair.first)) {
787 SmallVector<Instruction *, 4> ToRemoves;
788
789 // Walk the users of the function.
790 for (auto &U : F->uses()) {
791 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
792 // The predicate to use in the CmpInst.
793 auto Predicate = Pair.second.first;
794
795 // The value to return for true.
796 auto TrueValue =
797 ConstantInt::getSigned(CI->getType(), Pair.second.second);
798
799 // The value to return for false.
800 auto FalseValue = Constant::getNullValue(CI->getType());
801
802 auto Arg1 = CI->getOperand(0);
803 auto Arg2 = CI->getOperand(1);
804
805 const auto Cmp =
806 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
807
808 const auto Select =
809 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
810
811 CI->replaceAllUsesWith(Select);
812
813 // Lastly, remember to remove the user.
814 ToRemoves.push_back(CI);
815 }
816 }
817
818 Changed = !ToRemoves.empty();
819
820 // And cleanup the calls we don't use anymore.
821 for (auto V : ToRemoves) {
822 V->eraseFromParent();
823 }
824
825 // And remove the function we don't need either too.
826 F->eraseFromParent();
827 }
828 }
829
830 return Changed;
831}
832
833bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
834 bool Changed = false;
835
Kévin Petitff03aee2019-06-12 19:39:03 +0100836 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
837 {"_Z5isinff", {spv::OpIsInf, 1}},
838 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
839 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
840 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
841 {"_Z5isnanf", {spv::OpIsNan, 1}},
842 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
843 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
844 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400845 };
846
847 for (auto Pair : Map) {
848 // If we find a function with the matching name.
849 if (auto F = M.getFunction(Pair.first)) {
850 SmallVector<Instruction *, 4> ToRemoves;
851
852 // Walk the users of the function.
853 for (auto &U : F->uses()) {
854 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
855 const auto CITy = CI->getType();
856
Kévin Petitff03aee2019-06-12 19:39:03 +0100857 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400858
859 // The value to return for true.
860 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
861
862 // The value to return for false.
863 auto FalseValue = Constant::getNullValue(CITy);
864
865 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
866 M.getContext(),
867 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
868
Kévin Petitff03aee2019-06-12 19:39:03 +0100869 auto NewCI =
870 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
871 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400872
873 const auto Select =
874 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
875
876 CI->replaceAllUsesWith(Select);
877
878 // Lastly, remember to remove the user.
879 ToRemoves.push_back(CI);
880 }
881 }
882
883 Changed = !ToRemoves.empty();
884
885 // And cleanup the calls we don't use anymore.
886 for (auto V : ToRemoves) {
887 V->eraseFromParent();
888 }
889
890 // And remove the function we don't need either too.
891 F->eraseFromParent();
892 }
893 }
894
895 return Changed;
896}
897
898bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
899 bool Changed = false;
900
Kévin Petitff03aee2019-06-12 19:39:03 +0100901 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000902 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100903 {"_Z3allc", spv::OpNop},
904 {"_Z3allDv2_c", spv::OpAll},
905 {"_Z3allDv3_c", spv::OpAll},
906 {"_Z3allDv4_c", spv::OpAll},
907 {"_Z3alls", spv::OpNop},
908 {"_Z3allDv2_s", spv::OpAll},
909 {"_Z3allDv3_s", spv::OpAll},
910 {"_Z3allDv4_s", spv::OpAll},
911 {"_Z3alli", spv::OpNop},
912 {"_Z3allDv2_i", spv::OpAll},
913 {"_Z3allDv3_i", spv::OpAll},
914 {"_Z3allDv4_i", spv::OpAll},
915 {"_Z3alll", spv::OpNop},
916 {"_Z3allDv2_l", spv::OpAll},
917 {"_Z3allDv3_l", spv::OpAll},
918 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000919
920 // any
Kévin Petitff03aee2019-06-12 19:39:03 +0100921 {"_Z3anyc", spv::OpNop},
922 {"_Z3anyDv2_c", spv::OpAny},
923 {"_Z3anyDv3_c", spv::OpAny},
924 {"_Z3anyDv4_c", spv::OpAny},
925 {"_Z3anys", spv::OpNop},
926 {"_Z3anyDv2_s", spv::OpAny},
927 {"_Z3anyDv3_s", spv::OpAny},
928 {"_Z3anyDv4_s", spv::OpAny},
929 {"_Z3anyi", spv::OpNop},
930 {"_Z3anyDv2_i", spv::OpAny},
931 {"_Z3anyDv3_i", spv::OpAny},
932 {"_Z3anyDv4_i", spv::OpAny},
933 {"_Z3anyl", spv::OpNop},
934 {"_Z3anyDv2_l", spv::OpAny},
935 {"_Z3anyDv3_l", spv::OpAny},
936 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -0400937 };
938
939 for (auto Pair : Map) {
940 // If we find a function with the matching name.
941 if (auto F = M.getFunction(Pair.first)) {
942 SmallVector<Instruction *, 4> ToRemoves;
943
944 // Walk the users of the function.
945 for (auto &U : F->uses()) {
946 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400947
948 auto Arg = CI->getOperand(0);
949
950 Value *V;
951
Kévin Petitfd27cca2018-10-31 13:00:17 +0000952 // If the argument is a 32-bit int, just use a shift
953 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
954 V = BinaryOperator::Create(Instruction::LShr, Arg,
955 ConstantInt::get(Arg->getType(), 31), "",
956 CI);
957 } else {
David Neto22f144c2017-06-12 14:26:21 -0400958 // The value for zero to compare against.
959 const auto ZeroValue = Constant::getNullValue(Arg->getType());
960
David Neto22f144c2017-06-12 14:26:21 -0400961 // The value to return for true.
962 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
963
964 // The value to return for false.
965 const auto FalseValue = Constant::getNullValue(CI->getType());
966
Kévin Petitfd27cca2018-10-31 13:00:17 +0000967 const auto Cmp = CmpInst::Create(
968 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
969
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400970 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000971
972 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +0100973 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000974
Kévin Petitff03aee2019-06-12 19:39:03 +0100975 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000976
Kévin Petitff03aee2019-06-12 19:39:03 +0100977 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +0000978
Kévin Petitff03aee2019-06-12 19:39:03 +0100979 const auto NewCI = clspv::InsertSPIRVOp(
980 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +0000981 SelectSource = NewCI;
982
983 } else {
984 SelectSource = Cmp;
985 }
986
987 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400988 }
989
990 CI->replaceAllUsesWith(V);
991
992 // Lastly, remember to remove the user.
993 ToRemoves.push_back(CI);
994 }
995 }
996
997 Changed = !ToRemoves.empty();
998
999 // And cleanup the calls we don't use anymore.
1000 for (auto V : ToRemoves) {
1001 V->eraseFromParent();
1002 }
1003
1004 // And remove the function we don't need either too.
1005 F->eraseFromParent();
1006 }
1007 }
1008
1009 return Changed;
1010}
1011
Kévin Petitbf0036c2019-03-06 13:57:10 +00001012bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1013 bool Changed = false;
1014
1015 for (auto const &SymVal : M.getValueSymbolTable()) {
1016 // Skip symbols whose name doesn't match
1017 if (!SymVal.getKey().startswith("_Z8upsample")) {
1018 continue;
1019 }
1020 // Is there a function going by that name?
1021 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1022
1023 SmallVector<Instruction *, 4> ToRemoves;
1024
1025 // Walk the users of the function.
1026 for (auto &U : F->uses()) {
1027 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1028
1029 // Get arguments
1030 auto HiValue = CI->getOperand(0);
1031 auto LoValue = CI->getOperand(1);
1032
1033 // Don't touch overloads that aren't in OpenCL C
1034 auto HiType = HiValue->getType();
1035 auto LoType = LoValue->getType();
1036
1037 if (HiType != LoType) {
1038 continue;
1039 }
1040
1041 if (!HiType->isIntOrIntVectorTy()) {
1042 continue;
1043 }
1044
1045 if (HiType->getScalarSizeInBits() * 2 !=
1046 CI->getType()->getScalarSizeInBits()) {
1047 continue;
1048 }
1049
1050 if ((HiType->getScalarSizeInBits() != 8) &&
1051 (HiType->getScalarSizeInBits() != 16) &&
1052 (HiType->getScalarSizeInBits() != 32)) {
1053 continue;
1054 }
1055
1056 if (HiType->isVectorTy()) {
1057 if ((HiType->getVectorNumElements() != 2) &&
1058 (HiType->getVectorNumElements() != 3) &&
1059 (HiType->getVectorNumElements() != 4) &&
1060 (HiType->getVectorNumElements() != 8) &&
1061 (HiType->getVectorNumElements() != 16)) {
1062 continue;
1063 }
1064 }
1065
1066 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001067 auto HiCast =
1068 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1069 auto LoCast =
1070 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001071
1072 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001073 auto ShiftAmount =
1074 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001075 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1076 ShiftAmount, "", CI);
1077
1078 // OR both results
1079 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1080 "", CI);
1081
1082 // Replace call with the expression
1083 CI->replaceAllUsesWith(V);
1084
1085 // Lastly, remember to remove the user.
1086 ToRemoves.push_back(CI);
1087 }
1088 }
1089
1090 Changed = !ToRemoves.empty();
1091
1092 // And cleanup the calls we don't use anymore.
1093 for (auto V : ToRemoves) {
1094 V->eraseFromParent();
1095 }
1096
1097 // And remove the function we don't need either too.
1098 F->eraseFromParent();
1099 }
1100 }
1101
1102 return Changed;
1103}
1104
Kévin Petitd44eef52019-03-08 13:22:14 +00001105bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1106 bool Changed = false;
1107
1108 for (auto const &SymVal : M.getValueSymbolTable()) {
1109 // Skip symbols whose name doesn't match
1110 if (!SymVal.getKey().startswith("_Z6rotate")) {
1111 continue;
1112 }
1113 // Is there a function going by that name?
1114 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1115
1116 SmallVector<Instruction *, 4> ToRemoves;
1117
1118 // Walk the users of the function.
1119 for (auto &U : F->uses()) {
1120 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1121
1122 // Get arguments
1123 auto SrcValue = CI->getOperand(0);
1124 auto RotAmount = CI->getOperand(1);
1125
1126 // Don't touch overloads that aren't in OpenCL C
1127 auto SrcType = SrcValue->getType();
1128 auto RotType = RotAmount->getType();
1129
1130 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1131 continue;
1132 }
1133
1134 if (!SrcType->isIntOrIntVectorTy()) {
1135 continue;
1136 }
1137
1138 if ((SrcType->getScalarSizeInBits() != 8) &&
1139 (SrcType->getScalarSizeInBits() != 16) &&
1140 (SrcType->getScalarSizeInBits() != 32) &&
1141 (SrcType->getScalarSizeInBits() != 64)) {
1142 continue;
1143 }
1144
1145 if (SrcType->isVectorTy()) {
1146 if ((SrcType->getVectorNumElements() != 2) &&
1147 (SrcType->getVectorNumElements() != 3) &&
1148 (SrcType->getVectorNumElements() != 4) &&
1149 (SrcType->getVectorNumElements() != 8) &&
1150 (SrcType->getVectorNumElements() != 16)) {
1151 continue;
1152 }
1153 }
1154
1155 // The approach used is to shift the top bits down, the bottom bits up
1156 // and OR the two shifted values.
1157
1158 // The rotation amount is to be treated modulo the element size.
1159 // Since SPIR-V shift ops don't support this, let's apply the
1160 // modulo ahead of shifting. The element size is always a power of
1161 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001162 auto ModMask =
1163 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001164 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1165 ModMask, "", CI);
1166
1167 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001168 auto ScalarSize =
1169 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001170 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1171 RotAmount, "", CI);
1172
1173 // Now shift the bottom bits up and the top bits down
1174 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1175 RotAmount, "", CI);
1176 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1177 DownAmount, "", CI);
1178
1179 // Finally OR the two shifted values
1180 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1181 HiRotated, "", CI);
1182
1183 // Replace call with the expression
1184 CI->replaceAllUsesWith(V);
1185
1186 // Lastly, remember to remove the user.
1187 ToRemoves.push_back(CI);
1188 }
1189 }
1190
1191 Changed = !ToRemoves.empty();
1192
1193 // And cleanup the calls we don't use anymore.
1194 for (auto V : ToRemoves) {
1195 V->eraseFromParent();
1196 }
1197
1198 // And remove the function we don't need either too.
1199 F->eraseFromParent();
1200 }
1201 }
1202
1203 return Changed;
1204}
1205
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001206bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1207 bool Changed = false;
1208
1209 for (auto const &SymVal : M.getValueSymbolTable()) {
1210
1211 // Skip symbols whose name obviously doesn't match
1212 if (!SymVal.getKey().contains("convert_")) {
1213 continue;
1214 }
1215
1216 // Is there a function going by that name?
1217 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1218
1219 // Get info from the mangled name
1220 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001221 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001222
1223 // All functions of interest are handled by our mangled name parser
1224 if (!parsed) {
1225 continue;
1226 }
1227
1228 // Move on if this isn't a call to convert_
1229 if (!finfo.name.startswith("convert_")) {
1230 continue;
1231 }
1232
1233 // Extract the destination type from the function name
1234 StringRef DstTypeName = finfo.name;
1235 DstTypeName.consume_front("convert_");
1236
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001237 auto DstSignedNess =
1238 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1239 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1240 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1241 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1242 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1243 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1244 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1245 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1246 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1247 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001248
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001249 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001250 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001251
1252 SmallVector<Instruction *, 4> ToRemoves;
1253
1254 // Walk the users of the function.
1255 for (auto &U : F->uses()) {
1256 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1257
1258 // Get arguments
1259 auto SrcValue = CI->getOperand(0);
1260
1261 // Don't touch overloads that aren't in OpenCL C
1262 auto SrcType = SrcValue->getType();
1263 auto DstType = CI->getType();
1264
1265 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1266 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1267 continue;
1268 }
1269
1270 if (SrcType->isVectorTy()) {
1271
1272 if (SrcType->getVectorNumElements() !=
1273 DstType->getVectorNumElements()) {
1274 continue;
1275 }
1276
1277 if ((SrcType->getVectorNumElements() != 2) &&
1278 (SrcType->getVectorNumElements() != 3) &&
1279 (SrcType->getVectorNumElements() != 4) &&
1280 (SrcType->getVectorNumElements() != 8) &&
1281 (SrcType->getVectorNumElements() != 16)) {
1282 continue;
1283 }
1284 }
1285
1286 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1287 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1288
1289 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1290 bool DstIsInt = DstType->isIntOrIntVectorTy();
1291
1292 Value *V;
1293 if (SrcIsFloat && DstIsFloat) {
1294 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1295 } else if (SrcIsFloat && DstIsInt) {
1296 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001297 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1298 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001299 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001300 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1301 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001302 }
1303 } else if (SrcIsInt && DstIsFloat) {
1304 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001305 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1306 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001307 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001308 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1309 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001310 }
1311 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001312 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1313 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001314 } else {
1315 // Not something we're supposed to handle, just move on
1316 continue;
1317 }
1318
1319 // Replace call with the expression
1320 CI->replaceAllUsesWith(V);
1321
1322 // Lastly, remember to remove the user.
1323 ToRemoves.push_back(CI);
1324 }
1325 }
1326
1327 Changed = !ToRemoves.empty();
1328
1329 // And cleanup the calls we don't use anymore.
1330 for (auto V : ToRemoves) {
1331 V->eraseFromParent();
1332 }
1333
1334 // And remove the function we don't need either too.
1335 F->eraseFromParent();
1336 }
1337 }
1338
1339 return Changed;
1340}
1341
Kévin Petit8a560882019-03-21 15:24:34 +00001342bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1343 bool Changed = false;
1344
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001345 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001346
Kévin Petit617a76d2019-04-04 13:54:16 +01001347 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001348 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1349 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1350
1351 // Skip symbols whose name doesn't match
1352 if (!isMad && !isMul) {
1353 continue;
1354 }
1355
1356 // Is there a function going by that name?
1357 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001358 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001359 }
1360 }
1361
Kévin Petit617a76d2019-04-04 13:54:16 +01001362 for (auto F : FnWorklist) {
1363 SmallVector<Instruction *, 4> ToRemoves;
1364
1365 bool isMad = F->getName().startswith("_Z6mad_hi");
1366 // Walk the users of the function.
1367 for (auto &U : F->uses()) {
1368 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1369
1370 // Get arguments
1371 auto AValue = CI->getOperand(0);
1372 auto BValue = CI->getOperand(1);
1373 auto CValue = CI->getOperand(2);
1374
1375 // Don't touch overloads that aren't in OpenCL C
1376 auto AType = AValue->getType();
1377 auto BType = BValue->getType();
1378 auto CType = CValue->getType();
1379
1380 if ((AType != BType) || (CI->getType() != AType) ||
1381 (isMad && (AType != CType))) {
1382 continue;
1383 }
1384
1385 if (!AType->isIntOrIntVectorTy()) {
1386 continue;
1387 }
1388
1389 if ((AType->getScalarSizeInBits() != 8) &&
1390 (AType->getScalarSizeInBits() != 16) &&
1391 (AType->getScalarSizeInBits() != 32) &&
1392 (AType->getScalarSizeInBits() != 64)) {
1393 continue;
1394 }
1395
1396 if (AType->isVectorTy()) {
1397 if ((AType->getVectorNumElements() != 2) &&
1398 (AType->getVectorNumElements() != 3) &&
1399 (AType->getVectorNumElements() != 4) &&
1400 (AType->getVectorNumElements() != 8) &&
1401 (AType->getVectorNumElements() != 16)) {
1402 continue;
1403 }
1404 }
1405
1406 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001407 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001408
1409 // Select the appropriate signed/unsigned SPIR-V op
1410 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001411 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001412 opcode = spv::OpSMulExtended;
1413 } else {
1414 opcode = spv::OpUMulExtended;
1415 }
1416
1417 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001418 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001419 auto ExMulRetType = StructType::create(TwoValueType);
1420
1421 // Call the SPIR-V op
1422 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1423 ExMulRetType, {AValue, BValue});
1424
1425 // Get the high part of the result
1426 unsigned Idxs[] = {1};
1427 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1428
1429 // If we're handling a mad_hi, add the third argument to the result
1430 if (isMad) {
1431 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1432 }
1433
1434 // Replace call with the expression
1435 CI->replaceAllUsesWith(V);
1436
1437 // Lastly, remember to remove the user.
1438 ToRemoves.push_back(CI);
1439 }
1440 }
1441
1442 Changed = !ToRemoves.empty();
1443
1444 // And cleanup the calls we don't use anymore.
1445 for (auto V : ToRemoves) {
1446 V->eraseFromParent();
1447 }
1448
1449 // And remove the function we don't need either too.
1450 F->eraseFromParent();
1451 }
1452
Kévin Petit8a560882019-03-21 15:24:34 +00001453 return Changed;
1454}
1455
Kévin Petitf5b78a22018-10-25 14:32:17 +00001456bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1457 bool Changed = false;
1458
1459 for (auto const &SymVal : M.getValueSymbolTable()) {
1460 // Skip symbols whose name doesn't match
1461 if (!SymVal.getKey().startswith("_Z6select")) {
1462 continue;
1463 }
1464 // Is there a function going by that name?
1465 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1466
1467 SmallVector<Instruction *, 4> ToRemoves;
1468
1469 // Walk the users of the function.
1470 for (auto &U : F->uses()) {
1471 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1472
1473 // Get arguments
1474 auto FalseValue = CI->getOperand(0);
1475 auto TrueValue = CI->getOperand(1);
1476 auto PredicateValue = CI->getOperand(2);
1477
1478 // Don't touch overloads that aren't in OpenCL C
1479 auto FalseType = FalseValue->getType();
1480 auto TrueType = TrueValue->getType();
1481 auto PredicateType = PredicateValue->getType();
1482
1483 if (FalseType != TrueType) {
1484 continue;
1485 }
1486
1487 if (!PredicateType->isIntOrIntVectorTy()) {
1488 continue;
1489 }
1490
1491 if (!FalseType->isIntOrIntVectorTy() &&
1492 !FalseType->getScalarType()->isFloatingPointTy()) {
1493 continue;
1494 }
1495
1496 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1497 continue;
1498 }
1499
1500 if (FalseType->getScalarSizeInBits() !=
1501 PredicateType->getScalarSizeInBits()) {
1502 continue;
1503 }
1504
1505 if (FalseType->isVectorTy()) {
1506 if (FalseType->getVectorNumElements() !=
1507 PredicateType->getVectorNumElements()) {
1508 continue;
1509 }
1510
1511 if ((FalseType->getVectorNumElements() != 2) &&
1512 (FalseType->getVectorNumElements() != 3) &&
1513 (FalseType->getVectorNumElements() != 4) &&
1514 (FalseType->getVectorNumElements() != 8) &&
1515 (FalseType->getVectorNumElements() != 16)) {
1516 continue;
1517 }
1518 }
1519
1520 // Create constant
1521 const auto ZeroValue = Constant::getNullValue(PredicateType);
1522
1523 // Scalar and vector are to be treated differently
1524 CmpInst::Predicate Pred;
1525 if (PredicateType->isVectorTy()) {
1526 Pred = CmpInst::ICMP_SLT;
1527 } else {
1528 Pred = CmpInst::ICMP_NE;
1529 }
1530
1531 // Create comparison instruction
1532 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1533 ZeroValue, "", CI);
1534
1535 // Create select
1536 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1537
1538 // Replace call with the selection
1539 CI->replaceAllUsesWith(V);
1540
1541 // Lastly, remember to remove the user.
1542 ToRemoves.push_back(CI);
1543 }
1544 }
1545
1546 Changed = !ToRemoves.empty();
1547
1548 // And cleanup the calls we don't use anymore.
1549 for (auto V : ToRemoves) {
1550 V->eraseFromParent();
1551 }
1552
1553 // And remove the function we don't need either too.
1554 F->eraseFromParent();
1555 }
1556 }
1557
1558 return Changed;
1559}
1560
Kévin Petite7d0cce2018-10-31 12:38:56 +00001561bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1562 bool Changed = false;
1563
1564 for (auto const &SymVal : M.getValueSymbolTable()) {
1565 // Skip symbols whose name doesn't match
1566 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1567 continue;
1568 }
1569 // Is there a function going by that name?
1570 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1571
1572 SmallVector<Instruction *, 4> ToRemoves;
1573
1574 // Walk the users of the function.
1575 for (auto &U : F->uses()) {
1576 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1577
1578 if (CI->getNumOperands() != 4) {
1579 continue;
1580 }
1581
1582 // Get arguments
1583 auto FalseValue = CI->getOperand(0);
1584 auto TrueValue = CI->getOperand(1);
1585 auto PredicateValue = CI->getOperand(2);
1586
1587 // Don't touch overloads that aren't in OpenCL C
1588 auto FalseType = FalseValue->getType();
1589 auto TrueType = TrueValue->getType();
1590 auto PredicateType = PredicateValue->getType();
1591
1592 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1593 continue;
1594 }
1595
1596 if (TrueType->isVectorTy()) {
1597 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1598 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001599 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001600 }
1601 if ((TrueType->getVectorNumElements() != 2) &&
1602 (TrueType->getVectorNumElements() != 3) &&
1603 (TrueType->getVectorNumElements() != 4) &&
1604 (TrueType->getVectorNumElements() != 8) &&
1605 (TrueType->getVectorNumElements() != 16)) {
1606 continue;
1607 }
1608 }
1609
1610 // Remember the type of the operands
1611 auto OpType = TrueType;
1612
1613 // The actual bit selection will always be done on an integer type,
1614 // declare it here
1615 Type *BitType;
1616
1617 // If the operands are float, then bitcast them to int
1618 if (OpType->getScalarType()->isFloatingPointTy()) {
1619
1620 // First create the new type
1621 auto ScalarSize = OpType->getScalarType()->getPrimitiveSizeInBits();
1622 BitType = Type::getIntNTy(M.getContext(), ScalarSize);
1623 if (OpType->isVectorTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001624 BitType =
1625 VectorType::get(BitType, OpType->getVectorNumElements());
Kévin Petite7d0cce2018-10-31 12:38:56 +00001626 }
1627
1628 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001629 PredicateValue =
1630 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1631 FalseValue =
1632 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1633 TrueValue =
1634 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001635
1636 } else {
1637 // The operands have an integer type, use it directly
1638 BitType = OpType;
1639 }
1640
1641 // All the operands are now always integers
1642 // implement as (c & b) | (~c & a)
1643
1644 // Create our negated predicate value
1645 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001646 auto NotPredicateValue = BinaryOperator::Create(
1647 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001648
1649 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001650 auto BitsFalse = BinaryOperator::Create(
1651 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1652 auto BitsTrue = BinaryOperator::Create(
1653 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001654
1655 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1656 BitsTrue, "", CI);
1657
1658 // If we were dealing with a floating point type, we must bitcast
1659 // the result back to that
1660 if (OpType->getScalarType()->isFloatingPointTy()) {
1661 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1662 }
1663
1664 // Replace call with our new code
1665 CI->replaceAllUsesWith(V);
1666
1667 // Lastly, remember to remove the user.
1668 ToRemoves.push_back(CI);
1669 }
1670 }
1671
1672 Changed = !ToRemoves.empty();
1673
1674 // And cleanup the calls we don't use anymore.
1675 for (auto V : ToRemoves) {
1676 V->eraseFromParent();
1677 }
1678
1679 // And remove the function we don't need either too.
1680 F->eraseFromParent();
1681 }
1682 }
1683
1684 return Changed;
1685}
1686
Kévin Petit6b0a9532018-10-30 20:00:39 +00001687bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1688 bool Changed = false;
1689
1690 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001691 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1692 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1693 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1694 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1695 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1696 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001697 };
1698
1699 for (auto Pair : Map) {
1700 // If we find a function with the matching name.
1701 if (auto F = M.getFunction(Pair.first)) {
1702 SmallVector<Instruction *, 4> ToRemoves;
1703
1704 // Walk the users of the function.
1705 for (auto &U : F->uses()) {
1706 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1707
1708 auto ReplacementFn = Pair.second;
1709
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001710 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001711 Value *VectorArg;
1712
1713 // First figure out which function we're dealing with
1714 if (F->getName().startswith("_Z10smoothstep")) {
1715 ArgsToSplat.push_back(CI->getOperand(1));
1716 VectorArg = CI->getOperand(2);
1717 } else {
1718 VectorArg = CI->getOperand(1);
1719 }
1720
1721 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001722 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001723 auto VecType = VectorArg->getType();
1724
1725 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001726 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001727 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001728 auto index =
1729 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1730 NewVectorArg =
1731 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001732 }
1733 SplatArgs.push_back(NewVectorArg);
1734 }
1735
1736 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001737 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1738 const auto NewFType =
1739 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001740
1741 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1742
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001743 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001744 for (auto arg : SplatArgs) {
1745 NewArgs.push_back(arg);
1746 }
1747 NewArgs.push_back(VectorArg);
1748
1749 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1750
1751 CI->replaceAllUsesWith(NewCI);
1752
1753 // Lastly, remember to remove the user.
1754 ToRemoves.push_back(CI);
1755 }
1756 }
1757
1758 Changed = !ToRemoves.empty();
1759
1760 // And cleanup the calls we don't use anymore.
1761 for (auto V : ToRemoves) {
1762 V->eraseFromParent();
1763 }
1764
1765 // And remove the function we don't need either too.
1766 F->eraseFromParent();
1767 }
1768 }
1769
1770 return Changed;
1771}
1772
David Neto22f144c2017-06-12 14:26:21 -04001773bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1774 bool Changed = false;
1775
1776 const std::map<const char *, Instruction::BinaryOps> Map = {
1777 {"_Z7signbitf", Instruction::LShr},
1778 {"_Z7signbitDv2_f", Instruction::AShr},
1779 {"_Z7signbitDv3_f", Instruction::AShr},
1780 {"_Z7signbitDv4_f", Instruction::AShr},
1781 };
1782
1783 for (auto Pair : Map) {
1784 // If we find a function with the matching name.
1785 if (auto F = M.getFunction(Pair.first)) {
1786 SmallVector<Instruction *, 4> ToRemoves;
1787
1788 // Walk the users of the function.
1789 for (auto &U : F->uses()) {
1790 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1791 auto Arg = CI->getOperand(0);
1792
1793 auto Bitcast =
1794 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1795
1796 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1797 ConstantInt::get(CI->getType(), 31),
1798 "", CI);
1799
1800 CI->replaceAllUsesWith(Shr);
1801
1802 // Lastly, remember to remove the user.
1803 ToRemoves.push_back(CI);
1804 }
1805 }
1806
1807 Changed = !ToRemoves.empty();
1808
1809 // And cleanup the calls we don't use anymore.
1810 for (auto V : ToRemoves) {
1811 V->eraseFromParent();
1812 }
1813
1814 // And remove the function we don't need either too.
1815 F->eraseFromParent();
1816 }
1817 }
1818
1819 return Changed;
1820}
1821
1822bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1823 bool Changed = false;
1824
1825 const std::map<const char *,
1826 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1827 Map = {
1828 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1829 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1830 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1831 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1832 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1833 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1834 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1835 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1836 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1837 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1838 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1839 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1840 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1841 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1842 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1843 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1844 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1845 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1846 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1847 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1848 };
1849
1850 for (auto Pair : Map) {
1851 // If we find a function with the matching name.
1852 if (auto F = M.getFunction(Pair.first)) {
1853 SmallVector<Instruction *, 4> ToRemoves;
1854
1855 // Walk the users of the function.
1856 for (auto &U : F->uses()) {
1857 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1858 // The multiply instruction to use.
1859 auto MulInst = Pair.second.first;
1860
1861 // The add instruction to use.
1862 auto AddInst = Pair.second.second;
1863
1864 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1865
1866 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1867 CI->getArgOperand(1), "", CI);
1868
1869 if (Instruction::BinaryOpsEnd != AddInst) {
1870 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1871 CI);
1872 }
1873
1874 CI->replaceAllUsesWith(I);
1875
1876 // Lastly, remember to remove the user.
1877 ToRemoves.push_back(CI);
1878 }
1879 }
1880
1881 Changed = !ToRemoves.empty();
1882
1883 // And cleanup the calls we don't use anymore.
1884 for (auto V : ToRemoves) {
1885 V->eraseFromParent();
1886 }
1887
1888 // And remove the function we don't need either too.
1889 F->eraseFromParent();
1890 }
1891 }
1892
1893 return Changed;
1894}
1895
Derek Chowcfd368b2017-10-19 20:58:45 -07001896bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1897 bool Changed = false;
1898
alan-bakerf795f392019-06-11 18:24:34 -04001899 for (auto const &SymVal : M.getValueSymbolTable()) {
1900 if (!SymVal.getKey().contains("vstore"))
1901 continue;
1902 if (SymVal.getKey().contains("vstore_"))
1903 continue;
1904 if (SymVal.getKey().contains("vstorea"))
1905 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001906
alan-bakerf795f392019-06-11 18:24:34 -04001907 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001908 SmallVector<Instruction *, 4> ToRemoves;
1909
alan-bakerf795f392019-06-11 18:24:34 -04001910 auto fname = F->getName();
1911 if (!fname.consume_front("_Z"))
1912 continue;
1913 size_t name_len;
1914 if (fname.consumeInteger(10, name_len))
1915 continue;
1916 std::string name = fname.take_front(name_len);
1917
1918 bool ok = StringSwitch<bool>(name)
1919 .Case("vstore2", true)
1920 .Case("vstore3", true)
1921 .Case("vstore4", true)
1922 .Case("vstore8", true)
1923 .Case("vstore16", true)
1924 .Default(false);
1925 if (!ok)
1926 continue;
1927
Derek Chowcfd368b2017-10-19 20:58:45 -07001928 for (auto &U : F->uses()) {
1929 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04001930 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001931
alan-bakerf795f392019-06-11 18:24:34 -04001932 auto data_type = data->getType();
1933 if (!data_type->isVectorTy())
1934 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001935
alan-bakerf795f392019-06-11 18:24:34 -04001936 auto elems = data_type->getVectorNumElements();
1937 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
1938 elems != 16)
1939 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001940
alan-bakerf795f392019-06-11 18:24:34 -04001941 auto offset = CI->getOperand(1);
1942 auto ptr = CI->getOperand(2);
1943 auto ptr_type = ptr->getType();
1944 auto pointee_type = ptr_type->getPointerElementType();
1945 if (pointee_type != data_type->getVectorElementType())
1946 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001947
alan-bakerf795f392019-06-11 18:24:34 -04001948 // Avoid pointer casts. Instead generate the correct number of stores
1949 // and rely on drivers to coalesce appropriately.
1950 IRBuilder<> builder(CI);
1951 auto elems_const = builder.getInt32(elems);
1952 auto adjust = builder.CreateMul(offset, elems_const);
1953 for (auto i = 0; i < elems; ++i) {
1954 auto idx = builder.getInt32(i);
1955 auto add = builder.CreateAdd(adjust, idx);
1956 auto gep = builder.CreateGEP(ptr, add);
1957 auto extract = builder.CreateExtractElement(data, i);
1958 auto store = builder.CreateStore(extract, gep);
1959 }
Derek Chowcfd368b2017-10-19 20:58:45 -07001960
Derek Chowcfd368b2017-10-19 20:58:45 -07001961 ToRemoves.push_back(CI);
1962 }
1963 }
1964
1965 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07001966 for (auto V : ToRemoves) {
1967 V->eraseFromParent();
1968 }
Derek Chowcfd368b2017-10-19 20:58:45 -07001969 F->eraseFromParent();
1970 }
1971 }
1972
1973 return Changed;
1974}
1975
1976bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
1977 bool Changed = false;
1978
alan-bakerf795f392019-06-11 18:24:34 -04001979 for (auto const &SymVal : M.getValueSymbolTable()) {
1980 if (!SymVal.getKey().contains("vload"))
1981 continue;
1982 if (SymVal.getKey().contains("vload_"))
1983 continue;
1984 if (SymVal.getKey().contains("vloada"))
1985 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001986
alan-bakerf795f392019-06-11 18:24:34 -04001987 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001988 SmallVector<Instruction *, 4> ToRemoves;
1989
alan-bakerf795f392019-06-11 18:24:34 -04001990 auto fname = F->getName();
1991 if (!fname.consume_front("_Z"))
1992 continue;
1993 size_t name_len;
1994 if (fname.consumeInteger(10, name_len))
1995 continue;
1996 std::string name = fname.take_front(name_len);
1997
1998 bool ok = StringSwitch<bool>(name)
1999 .Case("vload2", true)
2000 .Case("vload3", true)
2001 .Case("vload4", true)
2002 .Case("vload8", true)
2003 .Case("vload16", true)
2004 .Default(false);
2005 if (!ok)
2006 continue;
2007
Derek Chowcfd368b2017-10-19 20:58:45 -07002008 for (auto &U : F->uses()) {
2009 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002010 auto ret_type = F->getReturnType();
2011 if (!ret_type->isVectorTy())
2012 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002013
alan-bakerf795f392019-06-11 18:24:34 -04002014 auto elems = ret_type->getVectorNumElements();
2015 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2016 elems != 16)
2017 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002018
alan-bakerf795f392019-06-11 18:24:34 -04002019 auto offset = CI->getOperand(0);
2020 auto ptr = CI->getOperand(1);
2021 auto ptr_type = ptr->getType();
2022 auto pointee_type = ptr_type->getPointerElementType();
2023 if (pointee_type != ret_type->getVectorElementType())
2024 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002025
alan-bakerf795f392019-06-11 18:24:34 -04002026 // Avoid pointer casts. Instead generate the correct number of loads
2027 // and rely on drivers to coalesce appropriately.
2028 IRBuilder<> builder(CI);
2029 auto elems_const = builder.getInt32(elems);
2030 Value *insert = UndefValue::get(ret_type);
2031 auto adjust = builder.CreateMul(offset, elems_const);
2032 for (auto i = 0; i < elems; ++i) {
2033 auto idx = builder.getInt32(i);
2034 auto add = builder.CreateAdd(adjust, idx);
2035 auto gep = builder.CreateGEP(ptr, add);
2036 auto load = builder.CreateLoad(gep);
2037 insert = builder.CreateInsertElement(insert, load, i);
2038 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002039
alan-bakerf795f392019-06-11 18:24:34 -04002040 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002041 ToRemoves.push_back(CI);
2042 }
2043 }
2044
2045 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002046 for (auto V : ToRemoves) {
2047 V->eraseFromParent();
2048 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002049 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002050 }
2051 }
2052
2053 return Changed;
2054}
2055
David Neto22f144c2017-06-12 14:26:21 -04002056bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2057 bool Changed = false;
2058
2059 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2060 "_Z10vload_halfjPU3AS2KDh"};
2061
2062 for (auto Name : Map) {
2063 // If we find a function with the matching name.
2064 if (auto F = M.getFunction(Name)) {
2065 SmallVector<Instruction *, 4> ToRemoves;
2066
2067 // Walk the users of the function.
2068 for (auto &U : F->uses()) {
2069 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2070 // The index argument from vload_half.
2071 auto Arg0 = CI->getOperand(0);
2072
2073 // The pointer argument from vload_half.
2074 auto Arg1 = CI->getOperand(1);
2075
David Neto22f144c2017-06-12 14:26:21 -04002076 auto IntTy = Type::getInt32Ty(M.getContext());
2077 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002078 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2079
David Neto22f144c2017-06-12 14:26:21 -04002080 // Our intrinsic to unpack a float2 from an int.
2081 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2082
2083 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2084
David Neto482550a2018-03-24 05:21:07 -07002085 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002086 auto ShortTy = Type::getInt16Ty(M.getContext());
2087 auto ShortPointerTy = PointerType::get(
2088 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002089
David Netoac825b82017-05-30 12:49:01 -04002090 // Cast the half* pointer to short*.
2091 auto Cast =
2092 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002093
David Netoac825b82017-05-30 12:49:01 -04002094 // Index into the correct address of the casted pointer.
2095 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2096
2097 // Load from the short* we casted to.
2098 auto Load = new LoadInst(Index, "", CI);
2099
2100 // ZExt the short -> int.
2101 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2102
2103 // Get our float2.
2104 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2105
2106 // Extract out the bottom element which is our float result.
2107 auto Extract = ExtractElementInst::Create(
2108 Call, ConstantInt::get(IntTy, 0), "", CI);
2109
2110 CI->replaceAllUsesWith(Extract);
2111 } else {
2112 // Assume the pointer argument points to storage aligned to 32bits
2113 // or more.
2114 // TODO(dneto): Do more analysis to make sure this is true?
2115 //
2116 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2117 // with:
2118 //
2119 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2120 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2121 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2122 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2123 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2124 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2125 // x float> %converted, %index_is_odd32
2126
2127 auto IntPointerTy = PointerType::get(
2128 IntTy, Arg1->getType()->getPointerAddressSpace());
2129
David Neto973e6a82017-05-30 13:48:18 -04002130 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002131 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002132 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002133 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2134
2135 auto One = ConstantInt::get(IntTy, 1);
2136 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2137 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2138
2139 // Index into the correct address of the casted pointer.
2140 auto Ptr =
2141 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2142
2143 // Load from the int* we casted to.
2144 auto Load = new LoadInst(Ptr, "", CI);
2145
2146 // Get our float2.
2147 auto Call = CallInst::Create(NewF, Load, "", CI);
2148
2149 // Extract out the float result, where the element number is
2150 // determined by whether the original index was even or odd.
2151 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2152
2153 CI->replaceAllUsesWith(Extract);
2154 }
David Neto22f144c2017-06-12 14:26:21 -04002155
2156 // Lastly, remember to remove the user.
2157 ToRemoves.push_back(CI);
2158 }
2159 }
2160
2161 Changed = !ToRemoves.empty();
2162
2163 // And cleanup the calls we don't use anymore.
2164 for (auto V : ToRemoves) {
2165 V->eraseFromParent();
2166 }
2167
2168 // And remove the function we don't need either too.
2169 F->eraseFromParent();
2170 }
2171 }
2172
2173 return Changed;
2174}
2175
2176bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002177
Kévin Petite8edce32019-04-10 14:23:32 +01002178 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002179 "_Z11vload_half2jPU3AS1KDh",
2180 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2181 "_Z11vload_half2jPU3AS2KDh",
2182 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2183 };
David Neto22f144c2017-06-12 14:26:21 -04002184
Kévin Petite8edce32019-04-10 14:23:32 +01002185 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2186 // The index argument from vload_half.
2187 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002188
Kévin Petite8edce32019-04-10 14:23:32 +01002189 // The pointer argument from vload_half.
2190 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002191
Kévin Petite8edce32019-04-10 14:23:32 +01002192 auto IntTy = Type::getInt32Ty(M.getContext());
2193 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002194 auto NewPointerTy =
2195 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002196 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002197
Kévin Petite8edce32019-04-10 14:23:32 +01002198 // Cast the half* pointer to int*.
2199 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002200
Kévin Petite8edce32019-04-10 14:23:32 +01002201 // Index into the correct address of the casted pointer.
2202 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002203
Kévin Petite8edce32019-04-10 14:23:32 +01002204 // Load from the int* we casted to.
2205 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002206
Kévin Petite8edce32019-04-10 14:23:32 +01002207 // Our intrinsic to unpack a float2 from an int.
2208 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002209
Kévin Petite8edce32019-04-10 14:23:32 +01002210 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002211
Kévin Petite8edce32019-04-10 14:23:32 +01002212 // Get our float2.
2213 return CallInst::Create(NewF, Load, "", CI);
2214 });
David Neto22f144c2017-06-12 14:26:21 -04002215}
2216
2217bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002218
Kévin Petite8edce32019-04-10 14:23:32 +01002219 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002220 "_Z11vload_half4jPU3AS1KDh",
2221 "_Z12vloada_half4jPU3AS1KDh",
2222 "_Z11vload_half4jPU3AS2KDh",
2223 "_Z12vloada_half4jPU3AS2KDh",
2224 };
David Neto22f144c2017-06-12 14:26:21 -04002225
Kévin Petite8edce32019-04-10 14:23:32 +01002226 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2227 // The index argument from vload_half.
2228 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002229
Kévin Petite8edce32019-04-10 14:23:32 +01002230 // The pointer argument from vload_half.
2231 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002232
Kévin Petite8edce32019-04-10 14:23:32 +01002233 auto IntTy = Type::getInt32Ty(M.getContext());
2234 auto Int2Ty = VectorType::get(IntTy, 2);
2235 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002236 auto NewPointerTy =
2237 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002238 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002239
Kévin Petite8edce32019-04-10 14:23:32 +01002240 // Cast the half* pointer to int2*.
2241 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002242
Kévin Petite8edce32019-04-10 14:23:32 +01002243 // Index into the correct address of the casted pointer.
2244 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002245
Kévin Petite8edce32019-04-10 14:23:32 +01002246 // Load from the int2* we casted to.
2247 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002248
Kévin Petite8edce32019-04-10 14:23:32 +01002249 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002250 auto X =
2251 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2252 auto Y =
2253 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002254
Kévin Petite8edce32019-04-10 14:23:32 +01002255 // Our intrinsic to unpack a float2 from an int.
2256 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002257
Kévin Petite8edce32019-04-10 14:23:32 +01002258 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002259
Kévin Petite8edce32019-04-10 14:23:32 +01002260 // Get the lower (x & y) components of our final float4.
2261 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002262
Kévin Petite8edce32019-04-10 14:23:32 +01002263 // Get the higher (z & w) components of our final float4.
2264 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002265
Kévin Petite8edce32019-04-10 14:23:32 +01002266 Constant *ShuffleMask[4] = {
2267 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2268 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002269
Kévin Petite8edce32019-04-10 14:23:32 +01002270 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002271 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2272 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002273 });
David Neto22f144c2017-06-12 14:26:21 -04002274}
2275
David Neto6ad93232018-06-07 15:42:58 -07002276bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002277
2278 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2279 //
2280 // %u = load i32 %ptr
2281 // %fxy = call <2 x float> Unpack2xHalf(u)
2282 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002283 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002284 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2285 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2286 "_Z20__clspv_vloada_half2jPKj", // private
2287 };
2288
Kévin Petite8edce32019-04-10 14:23:32 +01002289 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2290 auto Index = CI->getOperand(0);
2291 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002292
Kévin Petite8edce32019-04-10 14:23:32 +01002293 auto IntTy = Type::getInt32Ty(M.getContext());
2294 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2295 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002296
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002297 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002298 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002299
Kévin Petite8edce32019-04-10 14:23:32 +01002300 // Our intrinsic to unpack a float2 from an int.
2301 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002302
Kévin Petite8edce32019-04-10 14:23:32 +01002303 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002304
Kévin Petite8edce32019-04-10 14:23:32 +01002305 // Get our final float2.
2306 return CallInst::Create(NewF, Load, "", CI);
2307 });
David Neto6ad93232018-06-07 15:42:58 -07002308}
2309
2310bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002311
2312 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2313 //
2314 // %u2 = load <2 x i32> %ptr
2315 // %u2xy = extractelement %u2, 0
2316 // %u2zw = extractelement %u2, 1
2317 // %fxy = call <2 x float> Unpack2xHalf(uint)
2318 // %fzw = call <2 x float> Unpack2xHalf(uint)
2319 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002320 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002321 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2322 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2323 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2324 };
2325
Kévin Petite8edce32019-04-10 14:23:32 +01002326 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2327 auto Index = CI->getOperand(0);
2328 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002329
Kévin Petite8edce32019-04-10 14:23:32 +01002330 auto IntTy = Type::getInt32Ty(M.getContext());
2331 auto Int2Ty = VectorType::get(IntTy, 2);
2332 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2333 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002334
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002335 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002336 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002337
Kévin Petite8edce32019-04-10 14:23:32 +01002338 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002339 auto X =
2340 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2341 auto Y =
2342 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002343
Kévin Petite8edce32019-04-10 14:23:32 +01002344 // Our intrinsic to unpack a float2 from an int.
2345 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002346
Kévin Petite8edce32019-04-10 14:23:32 +01002347 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002348
Kévin Petite8edce32019-04-10 14:23:32 +01002349 // Get the lower (x & y) components of our final float4.
2350 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002351
Kévin Petite8edce32019-04-10 14:23:32 +01002352 // Get the higher (z & w) components of our final float4.
2353 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002354
Kévin Petite8edce32019-04-10 14:23:32 +01002355 Constant *ShuffleMask[4] = {
2356 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2357 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002358
Kévin Petite8edce32019-04-10 14:23:32 +01002359 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002360 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2361 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002362 });
David Neto6ad93232018-06-07 15:42:58 -07002363}
2364
David Neto22f144c2017-06-12 14:26:21 -04002365bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002366
Kévin Petite8edce32019-04-10 14:23:32 +01002367 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2368 "_Z15vstore_half_rtefjPU3AS1Dh",
2369 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002370
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002371 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002372 // The value to store.
2373 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002374
Kévin Petite8edce32019-04-10 14:23:32 +01002375 // The index argument from vstore_half.
2376 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002377
Kévin Petite8edce32019-04-10 14:23:32 +01002378 // The pointer argument from vstore_half.
2379 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002380
Kévin Petite8edce32019-04-10 14:23:32 +01002381 auto IntTy = Type::getInt32Ty(M.getContext());
2382 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2383 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2384 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002385
Kévin Petite8edce32019-04-10 14:23:32 +01002386 // Our intrinsic to pack a float2 to an int.
2387 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002388
Kévin Petite8edce32019-04-10 14:23:32 +01002389 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002390
Kévin Petite8edce32019-04-10 14:23:32 +01002391 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002392 auto TempVec = InsertElementInst::Create(
2393 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002394
Kévin Petite8edce32019-04-10 14:23:32 +01002395 // Pack the float2 -> half2 (in an int).
2396 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002397
Kévin Petite8edce32019-04-10 14:23:32 +01002398 Value *Ret;
2399 if (clspv::Option::F16BitStorage()) {
2400 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002401 auto ShortPointerTy =
2402 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002403
Kévin Petite8edce32019-04-10 14:23:32 +01002404 // Truncate our i32 to an i16.
2405 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002406
Kévin Petite8edce32019-04-10 14:23:32 +01002407 // Cast the half* pointer to short*.
2408 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002409
Kévin Petite8edce32019-04-10 14:23:32 +01002410 // Index into the correct address of the casted pointer.
2411 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002412
Kévin Petite8edce32019-04-10 14:23:32 +01002413 // Store to the int* we casted to.
2414 Ret = new StoreInst(Trunc, Index, CI);
2415 } else {
2416 // We can only write to 32-bit aligned words.
2417 //
2418 // Assuming base is aligned to 32-bits, replace the equivalent of
2419 // vstore_half(value, index, base)
2420 // with:
2421 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2422 // uint32_t write_to_upper_half = index & 1u;
2423 // uint32_t shift = write_to_upper_half << 4;
2424 //
2425 // // Pack the float value as a half number in bottom 16 bits
2426 // // of an i32.
2427 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2428 //
2429 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2430 // ^ ((packed & 0xffff) << shift)
2431 // // We only need relaxed consistency, but OpenCL 1.2 only has
2432 // // sequentially consistent atomics.
2433 // // TODO(dneto): Use relaxed consistency.
2434 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002435 auto IntPointerTy =
2436 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002437
Kévin Petite8edce32019-04-10 14:23:32 +01002438 auto Four = ConstantInt::get(IntTy, 4);
2439 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002440
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002441 auto IndexIsOdd =
2442 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002443 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002444 auto IndexIntoI32 =
2445 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2446 auto BaseI32Ptr =
2447 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2448 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2449 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002450 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2451 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002452 auto MaskBitsToWrite =
2453 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2454 auto MaskedCurrent = BinaryOperator::CreateAnd(
2455 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002456
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002457 auto XLowerBits =
2458 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2459 auto NewBitsToWrite =
2460 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2461 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2462 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002463
Kévin Petite8edce32019-04-10 14:23:32 +01002464 // Generate the call to atomi_xor.
2465 SmallVector<Type *, 5> ParamTypes;
2466 // The pointer type.
2467 ParamTypes.push_back(IntPointerTy);
2468 // The Types for memory scope, semantics, and value.
2469 ParamTypes.push_back(IntTy);
2470 ParamTypes.push_back(IntTy);
2471 ParamTypes.push_back(IntTy);
2472 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2473 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002474
Kévin Petite8edce32019-04-10 14:23:32 +01002475 const auto ConstantScopeDevice =
2476 ConstantInt::get(IntTy, spv::ScopeDevice);
2477 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2478 // (SPIR-V Workgroup).
2479 const auto AddrSpaceSemanticsBits =
2480 IntPointerTy->getPointerAddressSpace() == 1
2481 ? spv::MemorySemanticsUniformMemoryMask
2482 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002483
Kévin Petite8edce32019-04-10 14:23:32 +01002484 // We're using relaxed consistency here.
2485 const auto ConstantMemorySemantics =
2486 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2487 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002488
Kévin Petite8edce32019-04-10 14:23:32 +01002489 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2490 ConstantMemorySemantics, ValueToXor};
2491 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2492 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002493 }
David Neto22f144c2017-06-12 14:26:21 -04002494
Kévin Petite8edce32019-04-10 14:23:32 +01002495 return Ret;
2496 });
David Neto22f144c2017-06-12 14:26:21 -04002497}
2498
2499bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002500
Kévin Petite8edce32019-04-10 14:23:32 +01002501 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002502 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2503 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2504 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2505 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2506 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2507 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2508 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2509 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2510 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2511 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2512 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2513 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2514 };
David Neto22f144c2017-06-12 14:26:21 -04002515
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002516 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002517 // The value to store.
2518 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002519
Kévin Petite8edce32019-04-10 14:23:32 +01002520 // The index argument from vstore_half.
2521 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002522
Kévin Petite8edce32019-04-10 14:23:32 +01002523 // The pointer argument from vstore_half.
2524 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002525
Kévin Petite8edce32019-04-10 14:23:32 +01002526 auto IntTy = Type::getInt32Ty(M.getContext());
2527 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002528 auto NewPointerTy =
2529 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002530 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002531
Kévin Petite8edce32019-04-10 14:23:32 +01002532 // Our intrinsic to pack a float2 to an int.
2533 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002534
Kévin Petite8edce32019-04-10 14:23:32 +01002535 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002536
Kévin Petite8edce32019-04-10 14:23:32 +01002537 // Turn the packed x & y into the final packing.
2538 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002539
Kévin Petite8edce32019-04-10 14:23:32 +01002540 // Cast the half* pointer to int*.
2541 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002542
Kévin Petite8edce32019-04-10 14:23:32 +01002543 // Index into the correct address of the casted pointer.
2544 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002545
Kévin Petite8edce32019-04-10 14:23:32 +01002546 // Store to the int* we casted to.
2547 return new StoreInst(X, Index, CI);
2548 });
David Neto22f144c2017-06-12 14:26:21 -04002549}
2550
2551bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002552
Kévin Petite8edce32019-04-10 14:23:32 +01002553 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002554 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2555 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2556 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2557 "_Z13vstorea_half4Dv4_fjPDh", // private
2558 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2559 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2560 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2561 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2562 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2563 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2564 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2565 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2566 };
David Neto22f144c2017-06-12 14:26:21 -04002567
Kévin Petite8edce32019-04-10 14:23:32 +01002568 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2569 // The value to store.
2570 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002571
Kévin Petite8edce32019-04-10 14:23:32 +01002572 // The index argument from vstore_half.
2573 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002574
Kévin Petite8edce32019-04-10 14:23:32 +01002575 // The pointer argument from vstore_half.
2576 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002577
Kévin Petite8edce32019-04-10 14:23:32 +01002578 auto IntTy = Type::getInt32Ty(M.getContext());
2579 auto Int2Ty = VectorType::get(IntTy, 2);
2580 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002581 auto NewPointerTy =
2582 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002583 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002584
Kévin Petite8edce32019-04-10 14:23:32 +01002585 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2586 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002587
Kévin Petite8edce32019-04-10 14:23:32 +01002588 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002589 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2590 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002591
Kévin Petite8edce32019-04-10 14:23:32 +01002592 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2593 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002594
Kévin Petite8edce32019-04-10 14:23:32 +01002595 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002596 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2597 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002598
Kévin Petite8edce32019-04-10 14:23:32 +01002599 // Our intrinsic to pack a float2 to an int.
2600 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002601
Kévin Petite8edce32019-04-10 14:23:32 +01002602 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002603
Kévin Petite8edce32019-04-10 14:23:32 +01002604 // Turn the packed x & y into the final component of our int2.
2605 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002606
Kévin Petite8edce32019-04-10 14:23:32 +01002607 // Turn the packed z & w into the final component of our int2.
2608 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002609
Kévin Petite8edce32019-04-10 14:23:32 +01002610 auto Combine = InsertElementInst::Create(
2611 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002612 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2613 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002614
Kévin Petite8edce32019-04-10 14:23:32 +01002615 // Cast the half* pointer to int2*.
2616 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002617
Kévin Petite8edce32019-04-10 14:23:32 +01002618 // Index into the correct address of the casted pointer.
2619 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002620
Kévin Petite8edce32019-04-10 14:23:32 +01002621 // Store to the int2* we casted to.
2622 return new StoreInst(Combine, Index, CI);
2623 });
David Neto22f144c2017-06-12 14:26:21 -04002624}
2625
2626bool ReplaceOpenCLBuiltinPass::replaceReadImageF(Module &M) {
2627 bool Changed = false;
2628
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002629 const std::map<const char *, const char *> Map = {
2630 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2631 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2632 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_i",
2633 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002634
2635 for (auto Pair : Map) {
2636 // If we find a function with the matching name.
2637 if (auto F = M.getFunction(Pair.first)) {
2638 SmallVector<Instruction *, 4> ToRemoves;
2639
2640 // Walk the users of the function.
2641 for (auto &U : F->uses()) {
2642 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2643 // The image.
2644 auto Arg0 = CI->getOperand(0);
2645
2646 // The sampler.
2647 auto Arg1 = CI->getOperand(1);
2648
2649 // The coordinate (integer type that we can't handle).
2650 auto Arg2 = CI->getOperand(2);
2651
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002652 auto FloatVecTy =
2653 VectorType::get(Type::getFloatTy(M.getContext()),
2654 Arg2->getType()->getVectorNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002655
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002656 auto NewFType = FunctionType::get(
2657 CI->getType(), {Arg0->getType(), Arg1->getType(), FloatVecTy},
2658 false);
David Neto22f144c2017-06-12 14:26:21 -04002659
2660 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2661
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002662 auto Cast =
2663 CastInst::Create(Instruction::SIToFP, Arg2, FloatVecTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002664
2665 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2666
2667 CI->replaceAllUsesWith(NewCI);
2668
2669 // Lastly, remember to remove the user.
2670 ToRemoves.push_back(CI);
2671 }
2672 }
2673
2674 Changed = !ToRemoves.empty();
2675
2676 // And cleanup the calls we don't use anymore.
2677 for (auto V : ToRemoves) {
2678 V->eraseFromParent();
2679 }
2680
2681 // And remove the function we don't need either too.
2682 F->eraseFromParent();
2683 }
2684 }
2685
2686 return Changed;
2687}
2688
2689bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
2690 bool Changed = false;
2691
Kévin Petit9b340262019-06-19 18:31:11 +01002692 const std::map<const char *, spv::Op> Map = {
2693 {"_Z8atom_incPU3AS1Vi", spv::OpAtomicIIncrement},
2694 {"_Z8atom_incPU3AS3Vi", spv::OpAtomicIIncrement},
2695 {"_Z8atom_incPU3AS1Vj", spv::OpAtomicIIncrement},
2696 {"_Z8atom_incPU3AS3Vj", spv::OpAtomicIIncrement},
2697 {"_Z8atom_decPU3AS1Vi", spv::OpAtomicIDecrement},
2698 {"_Z8atom_decPU3AS3Vi", spv::OpAtomicIDecrement},
2699 {"_Z8atom_decPU3AS1Vj", spv::OpAtomicIDecrement},
2700 {"_Z8atom_decPU3AS3Vj", spv::OpAtomicIDecrement},
2701 {"_Z12atom_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2702 {"_Z12atom_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2703 {"_Z12atom_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2704 {"_Z12atom_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange},
2705 {"_Z10atomic_incPU3AS1Vi", spv::OpAtomicIIncrement},
2706 {"_Z10atomic_incPU3AS3Vi", spv::OpAtomicIIncrement},
2707 {"_Z10atomic_incPU3AS1Vj", spv::OpAtomicIIncrement},
2708 {"_Z10atomic_incPU3AS3Vj", spv::OpAtomicIIncrement},
2709 {"_Z10atomic_decPU3AS1Vi", spv::OpAtomicIDecrement},
2710 {"_Z10atomic_decPU3AS3Vi", spv::OpAtomicIDecrement},
2711 {"_Z10atomic_decPU3AS1Vj", spv::OpAtomicIDecrement},
2712 {"_Z10atomic_decPU3AS3Vj", spv::OpAtomicIDecrement},
2713 {"_Z14atomic_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
2714 {"_Z14atomic_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
2715 {"_Z14atomic_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
2716 {"_Z14atomic_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange}};
David Neto22f144c2017-06-12 14:26:21 -04002717
2718 for (auto Pair : Map) {
2719 // If we find a function with the matching name.
2720 if (auto F = M.getFunction(Pair.first)) {
2721 SmallVector<Instruction *, 4> ToRemoves;
2722
2723 // Walk the users of the function.
2724 for (auto &U : F->uses()) {
2725 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04002726
2727 auto IntTy = Type::getInt32Ty(M.getContext());
2728
David Neto22f144c2017-06-12 14:26:21 -04002729 // We need to map the OpenCL constants to the SPIR-V equivalents.
2730 const auto ConstantScopeDevice =
2731 ConstantInt::get(IntTy, spv::ScopeDevice);
2732 const auto ConstantMemorySemantics = ConstantInt::get(
2733 IntTy, spv::MemorySemanticsUniformMemoryMask |
2734 spv::MemorySemanticsSequentiallyConsistentMask);
2735
2736 SmallVector<Value *, 5> Params;
2737
2738 // The pointer.
2739 Params.push_back(CI->getArgOperand(0));
2740
2741 // The memory scope.
2742 Params.push_back(ConstantScopeDevice);
2743
2744 // The memory semantics.
2745 Params.push_back(ConstantMemorySemantics);
2746
2747 if (2 < CI->getNumArgOperands()) {
2748 // The unequal memory semantics.
2749 Params.push_back(ConstantMemorySemantics);
2750
2751 // The value.
2752 Params.push_back(CI->getArgOperand(2));
2753
2754 // The comparator.
2755 Params.push_back(CI->getArgOperand(1));
2756 } else if (1 < CI->getNumArgOperands()) {
2757 // The value.
2758 Params.push_back(CI->getArgOperand(1));
2759 }
2760
Kévin Petit9b340262019-06-19 18:31:11 +01002761 auto NewCI =
2762 clspv::InsertSPIRVOp(CI, Pair.second, {}, CI->getType(), Params);
David Neto22f144c2017-06-12 14:26:21 -04002763
2764 CI->replaceAllUsesWith(NewCI);
2765
2766 // Lastly, remember to remove the user.
2767 ToRemoves.push_back(CI);
2768 }
2769 }
2770
2771 Changed = !ToRemoves.empty();
2772
2773 // And cleanup the calls we don't use anymore.
2774 for (auto V : ToRemoves) {
2775 V->eraseFromParent();
2776 }
2777
2778 // And remove the function we don't need either too.
2779 F->eraseFromParent();
2780 }
2781 }
2782
Neil Henning39672102017-09-29 14:33:13 +01002783 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002784 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002785 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002786 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002787 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002788 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002789 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002790 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002791 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002792 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002793 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002794 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002795 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002796 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002797 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002798 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002799 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002800 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002801 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002802 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002803 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002804 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002805 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002806 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002807 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002808 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002809 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002810 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002811 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002812 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002813 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00002814 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002815 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01002816 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002817 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002818 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00002819 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01002820 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002821 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002822 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00002823 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01002824 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002825 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002826 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00002827 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01002828 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00002829 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01002830 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00002831 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01002832 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00002833 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01002834 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00002835 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01002836 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002837 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002838 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00002839 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01002840 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002841 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002842 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00002843 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01002844 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00002845 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
2846 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
2847 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01002848
2849 for (auto Pair : Map2) {
2850 // If we find a function with the matching name.
2851 if (auto F = M.getFunction(Pair.first)) {
2852 SmallVector<Instruction *, 4> ToRemoves;
2853
2854 // Walk the users of the function.
2855 for (auto &U : F->uses()) {
2856 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2857 auto AtomicOp = new AtomicRMWInst(
2858 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
2859 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
2860
2861 CI->replaceAllUsesWith(AtomicOp);
2862
2863 // Lastly, remember to remove the user.
2864 ToRemoves.push_back(CI);
2865 }
2866 }
2867
2868 Changed = !ToRemoves.empty();
2869
2870 // And cleanup the calls we don't use anymore.
2871 for (auto V : ToRemoves) {
2872 V->eraseFromParent();
2873 }
2874
2875 // And remove the function we don't need either too.
2876 F->eraseFromParent();
2877 }
2878 }
2879
David Neto22f144c2017-06-12 14:26:21 -04002880 return Changed;
2881}
2882
2883bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002884
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002885 std::vector<const char *> Names = {
2886 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01002887 };
2888
2889 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002890 auto IntTy = Type::getInt32Ty(M.getContext());
2891 auto FloatTy = Type::getFloatTy(M.getContext());
2892
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002893 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2894 ConstantInt::get(IntTy, 1),
2895 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002896
2897 Constant *UpShuffleMask[4] = {
2898 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2899 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2900
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002901 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2902 UndefValue::get(FloatTy),
2903 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002904
Kévin Petite8edce32019-04-10 14:23:32 +01002905 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002906 auto Arg0 =
2907 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2908 ConstantVector::get(DownShuffleMask), "", CI);
2909 auto Arg1 =
2910 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2911 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002912 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002913
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002914 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04002915
Kévin Petite8edce32019-04-10 14:23:32 +01002916 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002917
Kévin Petite8edce32019-04-10 14:23:32 +01002918 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002919
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002920 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2921 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002922 });
David Neto22f144c2017-06-12 14:26:21 -04002923}
David Neto62653202017-10-16 19:05:18 -04002924
2925bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
2926 bool Changed = false;
2927
2928 // OpenCL's float result = fract(float x, float* ptr)
2929 //
2930 // In the LLVM domain:
2931 //
2932 // %floor_result = call spir_func float @floor(float %x)
2933 // store float %floor_result, float * %ptr
2934 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2935 // %result = call spir_func float
2936 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2937 //
2938 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2939 // and clspv.fract occur in the SPIR-V generator pass:
2940 //
2941 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2942 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2943 // ...
2944 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2945 // OpStore %ptr %floor_result
2946 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2947 // %fract_result = OpExtInst %float
2948 // %glsl_ext Fmin %fract_intermediate %just_under_1
2949
David Neto62653202017-10-16 19:05:18 -04002950 using std::string;
2951
2952 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2953 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002954 using QuadType =
2955 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04002956 auto make_quad = [](const char *a, const char *b, const char *c,
2957 const char *d) {
2958 return std::tuple<const char *, const char *, const char *, const char *>(
2959 a, b, c, d);
2960 };
2961 const std::vector<QuadType> Functions = {
2962 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002963 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
2964 "clspv.fract.v2f"),
2965 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
2966 "clspv.fract.v3f"),
2967 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
2968 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04002969 };
2970
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002971 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04002972 const StringRef fract_name(std::get<0>(quad));
2973
2974 // If we find a function with the matching name.
2975 if (auto F = M.getFunction(fract_name)) {
2976 if (F->use_begin() == F->use_end())
2977 continue;
2978
2979 // We have some uses.
2980 Changed = true;
2981
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002982 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04002983
2984 const StringRef floor_name(std::get<1>(quad));
2985 const StringRef fmin_name(std::get<2>(quad));
2986 const StringRef clspv_fract_name(std::get<3>(quad));
2987
2988 // This is either float or a float vector. All the float-like
2989 // types are this type.
2990 auto result_ty = F->getReturnType();
2991
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002992 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04002993 if (!fmin_fn) {
2994 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002995 FunctionType *fn_ty =
2996 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04002997 fmin_fn =
2998 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04002999 fmin_fn->addFnAttr(Attribute::ReadNone);
3000 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3001 }
3002
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003003 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003004 if (!floor_fn) {
3005 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003006 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003007 floor_fn = cast<Function>(
3008 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003009 floor_fn->addFnAttr(Attribute::ReadNone);
3010 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3011 }
3012
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003013 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003014 if (!clspv_fract_fn) {
3015 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003016 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003017 clspv_fract_fn = cast<Function>(
3018 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003019 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3020 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3021 }
3022
3023 // Number of significant significand bits, whether represented or not.
3024 unsigned num_significand_bits;
3025 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003026 case Type::HalfTyID:
3027 num_significand_bits = 11;
3028 break;
3029 case Type::FloatTyID:
3030 num_significand_bits = 24;
3031 break;
3032 case Type::DoubleTyID:
3033 num_significand_bits = 53;
3034 break;
3035 default:
3036 assert(false && "Unhandled float type when processing fract builtin");
3037 break;
David Neto62653202017-10-16 19:05:18 -04003038 }
3039 // Beware that the disassembler displays this value as
3040 // OpConstant %float 1
3041 // which is not quite right.
3042 const double kJustUnderOneScalar =
3043 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3044
3045 Constant *just_under_one =
3046 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3047 if (result_ty->isVectorTy()) {
3048 just_under_one = ConstantVector::getSplat(
3049 result_ty->getVectorNumElements(), just_under_one);
3050 }
3051
3052 IRBuilder<> Builder(Context);
3053
3054 SmallVector<Instruction *, 4> ToRemoves;
3055
3056 // Walk the users of the function.
3057 for (auto &U : F->uses()) {
3058 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3059
3060 Builder.SetInsertPoint(CI);
3061 auto arg = CI->getArgOperand(0);
3062 auto ptr = CI->getArgOperand(1);
3063
3064 // Compute floor result and store it.
3065 auto floor = Builder.CreateCall(floor_fn, {arg});
3066 Builder.CreateStore(floor, ptr);
3067
3068 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003069 auto fract_result =
3070 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003071
3072 CI->replaceAllUsesWith(fract_result);
3073
3074 // Lastly, remember to remove the user.
3075 ToRemoves.push_back(CI);
3076 }
3077 }
3078
3079 // And cleanup the calls we don't use anymore.
3080 for (auto V : ToRemoves) {
3081 V->eraseFromParent();
3082 }
3083
3084 // And remove the function we don't need either too.
3085 F->eraseFromParent();
3086 }
3087 }
3088
3089 return Changed;
3090}