blob: 10ae485ce56955872ba9d09e6bf89267f1a7ddc4 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
David Neto118188e2018-08-24 11:27:54 -040030#include "spirv/1.0/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
James Pricec05f6052020-01-14 13:37:20 -050033#include "clspv/DescriptorMap.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040034#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070035
alan-baker931d18a2019-12-12 08:21:32 -050036#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040037#include "Passes.h"
38#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050039#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040
David Neto22f144c2017-06-12 14:26:21 -040041using namespace llvm;
42
43#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
44
45namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000046
47struct ArgTypeInfo {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040048 enum class SignedNess { None, Unsigned, Signed };
Kévin Petit8a560882019-03-21 15:24:34 +000049 SignedNess signedness;
50};
51
52struct FunctionInfo {
Kévin Petit9d1a9d12019-03-25 15:23:46 +000053 StringRef name;
Kévin Petit8a560882019-03-21 15:24:34 +000054 std::vector<ArgTypeInfo> argTypeInfos;
Kévin Petit8a560882019-03-21 15:24:34 +000055
Kévin Petit91bc72e2019-04-08 15:17:46 +010056 bool isArgSigned(size_t arg) const {
57 assert(argTypeInfos.size() > arg);
58 return argTypeInfos[arg].signedness == ArgTypeInfo::SignedNess::Signed;
Kévin Petit8a560882019-03-21 15:24:34 +000059 }
60
Kévin Petit91bc72e2019-04-08 15:17:46 +010061 static FunctionInfo getFromMangledName(StringRef name) {
62 FunctionInfo fi;
63 if (!getFromMangledNameCheck(name, &fi)) {
64 llvm_unreachable("Can't parse mangled function name!");
Kévin Petit8a560882019-03-21 15:24:34 +000065 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010066 return fi;
67 }
Kévin Petit8a560882019-03-21 15:24:34 +000068
Kévin Petit91bc72e2019-04-08 15:17:46 +010069 static bool getFromMangledNameCheck(StringRef name, FunctionInfo *finfo) {
70 if (!name.consume_front("_Z")) {
71 return false;
72 }
73 size_t nameLen;
74 if (name.consumeInteger(10, nameLen)) {
Kévin Petit8a560882019-03-21 15:24:34 +000075 return false;
76 }
77
Kévin Petit91bc72e2019-04-08 15:17:46 +010078 finfo->name = name.take_front(nameLen);
79 name = name.drop_front(nameLen);
Kévin Petit8a560882019-03-21 15:24:34 +000080
Kévin Petit91bc72e2019-04-08 15:17:46 +010081 ArgTypeInfo prev_ti;
Kévin Petit8a560882019-03-21 15:24:34 +000082
Kévin Petit91bc72e2019-04-08 15:17:46 +010083 while (name.size() != 0) {
84
85 ArgTypeInfo ti;
86
87 // Try parsing a vector prefix
88 if (name.consume_front("Dv")) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040089 int numElems;
90 if (name.consumeInteger(10, numElems)) {
91 return false;
92 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010093
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040094 if (!name.consume_front("_")) {
95 return false;
96 }
Kévin Petit91bc72e2019-04-08 15:17:46 +010097 }
98
99 // Parse the base type
alan-baker4092cc52020-01-15 16:42:57 -0500100 if (name.consume_front("Dh")) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100101 ti.signedness = ArgTypeInfo::SignedNess::None;
alan-baker4092cc52020-01-15 16:42:57 -0500102 } else {
103 char typeCode = name.front();
104 name = name.drop_front(1);
105 switch (typeCode) {
106 case 'c': // char
107 case 'a': // signed char
108 case 's': // short
109 case 'i': // int
110 case 'l': // long
111 ti.signedness = ArgTypeInfo::SignedNess::Signed;
112 break;
113 case 'h': // unsigned char
114 case 't': // unsigned short
115 case 'j': // unsigned int
116 case 'm': // unsigned long
117 ti.signedness = ArgTypeInfo::SignedNess::Unsigned;
118 break;
119 case 'f':
120 ti.signedness = ArgTypeInfo::SignedNess::None;
121 break;
122 case 'S':
123 ti = prev_ti;
124 if (!name.consume_front("_")) {
125 return false;
126 }
127 break;
128 default:
Kévin Petit91bc72e2019-04-08 15:17:46 +0100129 return false;
130 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100131 }
132
133 finfo->argTypeInfos.push_back(ti);
134
135 prev_ti = ti;
136 }
137
138 return true;
139 };
Kévin Petit8a560882019-03-21 15:24:34 +0000140};
141
David Neto22f144c2017-06-12 14:26:21 -0400142uint32_t clz(uint32_t v) {
143 uint32_t r;
144 uint32_t shift;
145
146 r = (v > 0xFFFF) << 4;
147 v >>= r;
148 shift = (v > 0xFF) << 3;
149 v >>= shift;
150 r |= shift;
151 shift = (v > 0xF) << 2;
152 v >>= shift;
153 r |= shift;
154 shift = (v > 0x3) << 1;
155 v >>= shift;
156 r |= shift;
157 r |= (v >> 1);
158
159 return r;
160}
161
162Type *getBoolOrBoolVectorTy(LLVMContext &C, unsigned elements) {
163 if (1 == elements) {
164 return Type::getInt1Ty(C);
165 } else {
166 return VectorType::get(Type::getInt1Ty(C), elements);
167 }
168}
169
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100170Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
171 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
172 if (Ty->isVectorTy()) {
173 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
174 }
175 return IntTy;
176}
177
David Neto22f144c2017-06-12 14:26:21 -0400178struct ReplaceOpenCLBuiltinPass final : public ModulePass {
179 static char ID;
180 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
181
182 bool runOnModule(Module &M) override;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000183 bool replaceAbs(Module &M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100184 bool replaceAbsDiff(Module &M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100185 bool replaceCopysign(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400186 bool replaceRecip(Module &M);
187 bool replaceDivide(Module &M);
Kévin Petit1329a002019-06-15 05:54:05 +0100188 bool replaceDot(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400189 bool replaceExp10(Module &M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100190 bool replaceFmod(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400191 bool replaceLog10(Module &M);
192 bool replaceBarrier(Module &M);
193 bool replaceMemFence(Module &M);
194 bool replaceRelational(Module &M);
195 bool replaceIsInfAndIsNan(Module &M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100196 bool replaceIsFinite(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400197 bool replaceAllAndAny(Module &M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000198 bool replaceUpsample(Module &M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000199 bool replaceRotate(Module &M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000200 bool replaceConvert(Module &M);
Kévin Petit8a560882019-03-21 15:24:34 +0000201 bool replaceMulHiMadHi(Module &M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000202 bool replaceSelect(Module &M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000203 bool replaceBitSelect(Module &M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000204 bool replaceStepSmoothStep(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400205 bool replaceSignbit(Module &M);
206 bool replaceMadandMad24andMul24(Module &M);
207 bool replaceVloadHalf(Module &M);
208 bool replaceVloadHalf2(Module &M);
209 bool replaceVloadHalf4(Module &M);
David Neto6ad93232018-06-07 15:42:58 -0700210 bool replaceClspvVloadaHalf2(Module &M);
211 bool replaceClspvVloadaHalf4(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400212 bool replaceVstoreHalf(Module &M);
213 bool replaceVstoreHalf2(Module &M);
214 bool replaceVstoreHalf4(Module &M);
alan-bakerf7e17cb2020-01-02 07:29:59 -0500215 bool replaceHalfReadImage(Module &M);
216 bool replaceHalfWriteImage(Module &M);
alan-baker931d18a2019-12-12 08:21:32 -0500217 bool replaceUnsampledReadImage(Module &M);
Kévin Petit06517a12019-12-09 19:40:31 +0000218 bool replaceSampledReadImageWithIntCoords(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400219 bool replaceAtomics(Module &M);
220 bool replaceCross(Module &M);
David Neto62653202017-10-16 19:05:18 -0400221 bool replaceFract(Module &M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700222 bool replaceVload(Module &M);
223 bool replaceVstore(Module &M);
David Neto22f144c2017-06-12 14:26:21 -0400224};
Kévin Petit91bc72e2019-04-08 15:17:46 +0100225} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400226
227char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400228INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
229 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400230
231namespace clspv {
232ModulePass *createReplaceOpenCLBuiltinPass() {
233 return new ReplaceOpenCLBuiltinPass();
234}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400235} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400236
237bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
238 bool Changed = false;
239
Kévin Petit2444e9b2018-11-09 14:14:37 +0000240 Changed |= replaceAbs(M);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100241 Changed |= replaceAbsDiff(M);
Kévin Petit8c1be282019-04-02 19:34:25 +0100242 Changed |= replaceCopysign(M);
David Neto22f144c2017-06-12 14:26:21 -0400243 Changed |= replaceRecip(M);
244 Changed |= replaceDivide(M);
Kévin Petit1329a002019-06-15 05:54:05 +0100245 Changed |= replaceDot(M);
David Neto22f144c2017-06-12 14:26:21 -0400246 Changed |= replaceExp10(M);
Kévin Petit0644a9c2019-06-20 21:08:46 +0100247 Changed |= replaceFmod(M);
David Neto22f144c2017-06-12 14:26:21 -0400248 Changed |= replaceLog10(M);
249 Changed |= replaceBarrier(M);
250 Changed |= replaceMemFence(M);
251 Changed |= replaceRelational(M);
252 Changed |= replaceIsInfAndIsNan(M);
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100253 Changed |= replaceIsFinite(M);
David Neto22f144c2017-06-12 14:26:21 -0400254 Changed |= replaceAllAndAny(M);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000255 Changed |= replaceUpsample(M);
Kévin Petitd44eef52019-03-08 13:22:14 +0000256 Changed |= replaceRotate(M);
Kévin Petit9d1a9d12019-03-25 15:23:46 +0000257 Changed |= replaceConvert(M);
Kévin Petit8a560882019-03-21 15:24:34 +0000258 Changed |= replaceMulHiMadHi(M);
Kévin Petitf5b78a22018-10-25 14:32:17 +0000259 Changed |= replaceSelect(M);
Kévin Petite7d0cce2018-10-31 12:38:56 +0000260 Changed |= replaceBitSelect(M);
Kévin Petit6b0a9532018-10-30 20:00:39 +0000261 Changed |= replaceStepSmoothStep(M);
David Neto22f144c2017-06-12 14:26:21 -0400262 Changed |= replaceSignbit(M);
263 Changed |= replaceMadandMad24andMul24(M);
264 Changed |= replaceVloadHalf(M);
265 Changed |= replaceVloadHalf2(M);
266 Changed |= replaceVloadHalf4(M);
David Neto6ad93232018-06-07 15:42:58 -0700267 Changed |= replaceClspvVloadaHalf2(M);
268 Changed |= replaceClspvVloadaHalf4(M);
David Neto22f144c2017-06-12 14:26:21 -0400269 Changed |= replaceVstoreHalf(M);
270 Changed |= replaceVstoreHalf2(M);
271 Changed |= replaceVstoreHalf4(M);
alan-bakerf7e17cb2020-01-02 07:29:59 -0500272 // Replace the half image builtins before handling other image builtins.
273 Changed |= replaceHalfReadImage(M);
274 Changed |= replaceHalfWriteImage(M);
alan-baker931d18a2019-12-12 08:21:32 -0500275 // Replace unsampled reads before converting sampled read coordinates.
276 Changed |= replaceUnsampledReadImage(M);
Kévin Petit06517a12019-12-09 19:40:31 +0000277 Changed |= replaceSampledReadImageWithIntCoords(M);
David Neto22f144c2017-06-12 14:26:21 -0400278 Changed |= replaceAtomics(M);
279 Changed |= replaceCross(M);
David Neto62653202017-10-16 19:05:18 -0400280 Changed |= replaceFract(M);
Derek Chowcfd368b2017-10-19 20:58:45 -0700281 Changed |= replaceVload(M);
282 Changed |= replaceVstore(M);
David Neto22f144c2017-06-12 14:26:21 -0400283
284 return Changed;
285}
286
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400287bool replaceCallsWithValue(Module &M, std::vector<const char *> Names,
288 std::function<Value *(CallInst *)> Replacer) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000289
Kévin Petite8edce32019-04-10 14:23:32 +0100290 bool Changed = false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000291
292 for (auto Name : Names) {
293 // If we find a function with the matching name.
294 if (auto F = M.getFunction(Name)) {
295 SmallVector<Instruction *, 4> ToRemoves;
296
297 // Walk the users of the function.
298 for (auto &U : F->uses()) {
299 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
Kévin Petit2444e9b2018-11-09 14:14:37 +0000300
Kévin Petite8edce32019-04-10 14:23:32 +0100301 auto NewValue = Replacer(CI);
302
303 if (NewValue != nullptr) {
304 CI->replaceAllUsesWith(NewValue);
305 }
Kévin Petit2444e9b2018-11-09 14:14:37 +0000306
307 // Lastly, remember to remove the user.
308 ToRemoves.push_back(CI);
309 }
310 }
311
312 Changed = !ToRemoves.empty();
313
314 // And cleanup the calls we don't use anymore.
315 for (auto V : ToRemoves) {
316 V->eraseFromParent();
317 }
318
319 // And remove the function we don't need either too.
320 F->eraseFromParent();
321 }
322 }
323
324 return Changed;
325}
326
Kévin Petite8edce32019-04-10 14:23:32 +0100327bool ReplaceOpenCLBuiltinPass::replaceAbs(Module &M) {
Kévin Petit91bc72e2019-04-08 15:17:46 +0100328
Kévin Petite8edce32019-04-10 14:23:32 +0100329 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400330 "_Z3absh", "_Z3absDv2_h", "_Z3absDv3_h", "_Z3absDv4_h",
331 "_Z3abst", "_Z3absDv2_t", "_Z3absDv3_t", "_Z3absDv4_t",
332 "_Z3absj", "_Z3absDv2_j", "_Z3absDv3_j", "_Z3absDv4_j",
333 "_Z3absm", "_Z3absDv2_m", "_Z3absDv3_m", "_Z3absDv4_m",
Kévin Petite8edce32019-04-10 14:23:32 +0100334 };
335
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400336 return replaceCallsWithValue(M, Names,
337 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100338}
339
340bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Module &M) {
341
342 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400343 "_Z8abs_diffcc", "_Z8abs_diffDv2_cS_", "_Z8abs_diffDv3_cS_",
344 "_Z8abs_diffDv4_cS_", "_Z8abs_diffhh", "_Z8abs_diffDv2_hS_",
345 "_Z8abs_diffDv3_hS_", "_Z8abs_diffDv4_hS_", "_Z8abs_diffss",
346 "_Z8abs_diffDv2_sS_", "_Z8abs_diffDv3_sS_", "_Z8abs_diffDv4_sS_",
347 "_Z8abs_difftt", "_Z8abs_diffDv2_tS_", "_Z8abs_diffDv3_tS_",
348 "_Z8abs_diffDv4_tS_", "_Z8abs_diffii", "_Z8abs_diffDv2_iS_",
349 "_Z8abs_diffDv3_iS_", "_Z8abs_diffDv4_iS_", "_Z8abs_diffjj",
350 "_Z8abs_diffDv2_jS_", "_Z8abs_diffDv3_jS_", "_Z8abs_diffDv4_jS_",
351 "_Z8abs_diffll", "_Z8abs_diffDv2_lS_", "_Z8abs_diffDv3_lS_",
352 "_Z8abs_diffDv4_lS_", "_Z8abs_diffmm", "_Z8abs_diffDv2_mS_",
353 "_Z8abs_diffDv3_mS_", "_Z8abs_diffDv4_mS_",
Kévin Petit91bc72e2019-04-08 15:17:46 +0100354 };
355
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400356 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100357 auto XValue = CI->getOperand(0);
358 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100359
Kévin Petite8edce32019-04-10 14:23:32 +0100360 IRBuilder<> Builder(CI);
361 auto XmY = Builder.CreateSub(XValue, YValue);
362 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100363
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400364 Value *Cmp;
Kévin Petite8edce32019-04-10 14:23:32 +0100365 auto F = CI->getCalledFunction();
366 auto finfo = FunctionInfo::getFromMangledName(F->getName());
367 if (finfo.isArgSigned(0)) {
368 Cmp = Builder.CreateICmpSGT(YValue, XValue);
369 } else {
370 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100371 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100372
Kévin Petite8edce32019-04-10 14:23:32 +0100373 return Builder.CreateSelect(Cmp, YmX, XmY);
374 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100375}
376
Kévin Petit8c1be282019-04-02 19:34:25 +0100377bool ReplaceOpenCLBuiltinPass::replaceCopysign(Module &M) {
Kévin Petit8c1be282019-04-02 19:34:25 +0100378
Kévin Petite8edce32019-04-10 14:23:32 +0100379 std::vector<const char *> Names = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400380 "_Z8copysignff",
381 "_Z8copysignDv2_fS_",
382 "_Z8copysignDv3_fS_",
383 "_Z8copysignDv4_fS_",
Kévin Petit8c1be282019-04-02 19:34:25 +0100384 };
385
Kévin Petite8edce32019-04-10 14:23:32 +0100386 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
387 auto XValue = CI->getOperand(0);
388 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100389
Kévin Petite8edce32019-04-10 14:23:32 +0100390 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100391
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400392 Type *IntTy = Type::getIntNTy(M.getContext(), Ty->getScalarSizeInBits());
Kévin Petite8edce32019-04-10 14:23:32 +0100393 if (Ty->isVectorTy()) {
394 IntTy = VectorType::get(IntTy, Ty->getVectorNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100395 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100396
Kévin Petite8edce32019-04-10 14:23:32 +0100397 // Return X with the sign of Y
398
399 // Sign bit masks
400 auto SignBit = IntTy->getScalarSizeInBits() - 1;
401 auto SignBitMask = 1 << SignBit;
402 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
403 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
404
405 IRBuilder<> Builder(CI);
406
407 // Extract sign of Y
408 auto YInt = Builder.CreateBitCast(YValue, IntTy);
409 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
410
411 // Clear sign bit in X
412 auto XInt = Builder.CreateBitCast(XValue, IntTy);
413 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
414
415 // Insert sign bit of Y into X
416 auto NewXInt = Builder.CreateOr(XInt, YSign);
417
418 // And cast back to floating-point
419 return Builder.CreateBitCast(NewXInt, Ty);
420 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100421}
422
David Neto22f144c2017-06-12 14:26:21 -0400423bool ReplaceOpenCLBuiltinPass::replaceRecip(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400424
Kévin Petite8edce32019-04-10 14:23:32 +0100425 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400426 "_Z10half_recipf", "_Z12native_recipf", "_Z10half_recipDv2_f",
427 "_Z12native_recipDv2_f", "_Z10half_recipDv3_f", "_Z12native_recipDv3_f",
428 "_Z10half_recipDv4_f", "_Z12native_recipDv4_f",
429 };
430
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400431 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100432 // Recip has one arg.
433 auto Arg = CI->getOperand(0);
434 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
435 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
436 });
David Neto22f144c2017-06-12 14:26:21 -0400437}
438
439bool ReplaceOpenCLBuiltinPass::replaceDivide(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400440
Kévin Petite8edce32019-04-10 14:23:32 +0100441 std::vector<const char *> Names = {
David Neto22f144c2017-06-12 14:26:21 -0400442 "_Z11half_divideff", "_Z13native_divideff",
443 "_Z11half_divideDv2_fS_", "_Z13native_divideDv2_fS_",
444 "_Z11half_divideDv3_fS_", "_Z13native_divideDv3_fS_",
445 "_Z11half_divideDv4_fS_", "_Z13native_divideDv4_fS_",
446 };
447
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400448 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100449 auto Op0 = CI->getOperand(0);
450 auto Op1 = CI->getOperand(1);
451 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
452 });
David Neto22f144c2017-06-12 14:26:21 -0400453}
454
Kévin Petit1329a002019-06-15 05:54:05 +0100455bool ReplaceOpenCLBuiltinPass::replaceDot(Module &M) {
456
457 std::vector<const char *> Names = {
458 "_Z3dotff",
459 "_Z3dotDv2_fS_",
460 "_Z3dotDv3_fS_",
461 "_Z3dotDv4_fS_",
462 };
463
464 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
465 auto Op0 = CI->getOperand(0);
466 auto Op1 = CI->getOperand(1);
467
468 Value *V;
469 if (Op0->getType()->isVectorTy()) {
470 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
471 CI->getType(), {Op0, Op1});
472 } else {
473 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
474 }
475
476 return V;
477 });
478}
479
David Neto22f144c2017-06-12 14:26:21 -0400480bool ReplaceOpenCLBuiltinPass::replaceExp10(Module &M) {
481 bool Changed = false;
482
483 const std::map<const char *, const char *> Map = {
484 {"_Z5exp10f", "_Z3expf"},
485 {"_Z10half_exp10f", "_Z8half_expf"},
486 {"_Z12native_exp10f", "_Z10native_expf"},
487 {"_Z5exp10Dv2_f", "_Z3expDv2_f"},
488 {"_Z10half_exp10Dv2_f", "_Z8half_expDv2_f"},
489 {"_Z12native_exp10Dv2_f", "_Z10native_expDv2_f"},
490 {"_Z5exp10Dv3_f", "_Z3expDv3_f"},
491 {"_Z10half_exp10Dv3_f", "_Z8half_expDv3_f"},
492 {"_Z12native_exp10Dv3_f", "_Z10native_expDv3_f"},
493 {"_Z5exp10Dv4_f", "_Z3expDv4_f"},
494 {"_Z10half_exp10Dv4_f", "_Z8half_expDv4_f"},
495 {"_Z12native_exp10Dv4_f", "_Z10native_expDv4_f"}};
496
497 for (auto Pair : Map) {
498 // If we find a function with the matching name.
499 if (auto F = M.getFunction(Pair.first)) {
500 SmallVector<Instruction *, 4> ToRemoves;
501
502 // Walk the users of the function.
503 for (auto &U : F->uses()) {
504 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
505 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
506
507 auto Arg = CI->getOperand(0);
508
509 // Constant of the natural log of 10 (ln(10)).
510 const double Ln10 =
511 2.302585092994045684017991454684364207601101488628772976033;
512
513 auto Mul = BinaryOperator::Create(
514 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "",
515 CI);
516
517 auto NewCI = CallInst::Create(NewF, Mul, "", CI);
518
519 CI->replaceAllUsesWith(NewCI);
520
521 // Lastly, remember to remove the user.
522 ToRemoves.push_back(CI);
523 }
524 }
525
526 Changed = !ToRemoves.empty();
527
528 // And cleanup the calls we don't use anymore.
529 for (auto V : ToRemoves) {
530 V->eraseFromParent();
531 }
532
533 // And remove the function we don't need either too.
534 F->eraseFromParent();
535 }
536 }
537
538 return Changed;
539}
540
Kévin Petit0644a9c2019-06-20 21:08:46 +0100541bool ReplaceOpenCLBuiltinPass::replaceFmod(Module &M) {
542
543 std::vector<const char *> Names = {
544 "_Z4fmodff",
545 "_Z4fmodDv2_fS_",
546 "_Z4fmodDv3_fS_",
547 "_Z4fmodDv4_fS_",
548 };
549
550 // OpenCL fmod(x,y) is x - y * trunc(x/y)
551 // The sign for a non-zero result is taken from x.
552 // (Try an example.)
553 // So translate to FRem
554 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
555 auto Op0 = CI->getOperand(0);
556 auto Op1 = CI->getOperand(1);
557 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
558 });
559}
560
David Neto22f144c2017-06-12 14:26:21 -0400561bool ReplaceOpenCLBuiltinPass::replaceLog10(Module &M) {
562 bool Changed = false;
563
564 const std::map<const char *, const char *> Map = {
565 {"_Z5log10f", "_Z3logf"},
566 {"_Z10half_log10f", "_Z8half_logf"},
567 {"_Z12native_log10f", "_Z10native_logf"},
568 {"_Z5log10Dv2_f", "_Z3logDv2_f"},
569 {"_Z10half_log10Dv2_f", "_Z8half_logDv2_f"},
570 {"_Z12native_log10Dv2_f", "_Z10native_logDv2_f"},
571 {"_Z5log10Dv3_f", "_Z3logDv3_f"},
572 {"_Z10half_log10Dv3_f", "_Z8half_logDv3_f"},
573 {"_Z12native_log10Dv3_f", "_Z10native_logDv3_f"},
574 {"_Z5log10Dv4_f", "_Z3logDv4_f"},
575 {"_Z10half_log10Dv4_f", "_Z8half_logDv4_f"},
576 {"_Z12native_log10Dv4_f", "_Z10native_logDv4_f"}};
577
578 for (auto Pair : Map) {
579 // If we find a function with the matching name.
580 if (auto F = M.getFunction(Pair.first)) {
581 SmallVector<Instruction *, 4> ToRemoves;
582
583 // Walk the users of the function.
584 for (auto &U : F->uses()) {
585 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
586 auto NewF = M.getOrInsertFunction(Pair.second, F->getFunctionType());
587
588 auto Arg = CI->getOperand(0);
589
590 // Constant of the reciprocal of the natural log of 10 (ln(10)).
591 const double Ln10 =
592 0.434294481903251827651128918916605082294397005803666566114;
593
594 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
595
596 auto Mul = BinaryOperator::Create(
597 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), NewCI,
598 "", CI);
599
600 CI->replaceAllUsesWith(Mul);
601
602 // Lastly, remember to remove the user.
603 ToRemoves.push_back(CI);
604 }
605 }
606
607 Changed = !ToRemoves.empty();
608
609 // And cleanup the calls we don't use anymore.
610 for (auto V : ToRemoves) {
611 V->eraseFromParent();
612 }
613
614 // And remove the function we don't need either too.
615 F->eraseFromParent();
616 }
617 }
618
619 return Changed;
620}
621
622bool ReplaceOpenCLBuiltinPass::replaceBarrier(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -0400623
624 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
625
alan-bakerb60b1fc2019-12-13 19:09:38 -0500626 const std::vector<const char *> Names = {"_Z7barrierj",
627 // OpenCL 2.0 alias for barrier.
628 "_Z18work_group_barrierj"};
David Neto22f144c2017-06-12 14:26:21 -0400629
Kévin Petitc4643922019-06-17 19:32:05 +0100630 return replaceCallsWithValue(M, Names, [](CallInst *CI) {
631 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400632
Kévin Petitc4643922019-06-17 19:32:05 +0100633 // We need to map the OpenCL constants to the SPIR-V equivalents.
634 const auto LocalMemFence =
635 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
636 const auto GlobalMemFence =
637 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
638 const auto ConstantSequentiallyConsistent = ConstantInt::get(
639 Arg->getType(), spv::MemorySemanticsSequentiallyConsistentMask);
640 const auto ConstantScopeDevice =
641 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
642 const auto ConstantScopeWorkgroup =
643 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400644
Kévin Petitc4643922019-06-17 19:32:05 +0100645 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
646 const auto LocalMemFenceMask =
647 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
648 const auto WorkgroupShiftAmount =
649 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
650 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
651 Instruction::Shl, LocalMemFenceMask,
652 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400653
Kévin Petitc4643922019-06-17 19:32:05 +0100654 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
655 const auto GlobalMemFenceMask =
656 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
657 const auto UniformShiftAmount =
658 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
659 const auto MemorySemanticsUniform = BinaryOperator::Create(
660 Instruction::Shl, GlobalMemFenceMask,
661 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400662
Kévin Petitc4643922019-06-17 19:32:05 +0100663 // And combine the above together, also adding in
664 // MemorySemanticsSequentiallyConsistentMask.
665 auto MemorySemantics =
666 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
667 ConstantSequentiallyConsistent, "", CI);
668 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
669 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400670
Kévin Petitc4643922019-06-17 19:32:05 +0100671 // For Memory Scope if we used CLK_GLOBAL_MEM_FENCE, we need to use
672 // Device Scope, otherwise Workgroup Scope.
673 const auto Cmp =
674 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, GlobalMemFenceMask,
675 GlobalMemFence, "", CI);
676 const auto MemoryScope = SelectInst::Create(Cmp, ConstantScopeDevice,
677 ConstantScopeWorkgroup, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400678
Kévin Petitc4643922019-06-17 19:32:05 +0100679 // Lastly, the Execution Scope is always Workgroup Scope.
680 const auto ExecutionScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400681
Kévin Petitc4643922019-06-17 19:32:05 +0100682 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
683 {Attribute::NoDuplicate}, CI->getType(),
684 {ExecutionScope, MemoryScope, MemorySemantics});
685 });
David Neto22f144c2017-06-12 14:26:21 -0400686}
687
688bool ReplaceOpenCLBuiltinPass::replaceMemFence(Module &M) {
689 bool Changed = false;
690
691 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
692
Kévin Petitc4643922019-06-17 19:32:05 +0100693 using Tuple = std::tuple<spv::Op, unsigned>;
Neil Henning39672102017-09-29 14:33:13 +0100694 const std::map<const char *, Tuple> Map = {
Kévin Petitc4643922019-06-17 19:32:05 +0100695 {"_Z9mem_fencej", Tuple(spv::OpMemoryBarrier,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400696 spv::MemorySemanticsSequentiallyConsistentMask)},
Neil Henning39672102017-09-29 14:33:13 +0100697 {"_Z14read_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100698 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsAcquireMask)},
Neil Henning39672102017-09-29 14:33:13 +0100699 {"_Z15write_mem_fencej",
Kévin Petitc4643922019-06-17 19:32:05 +0100700 Tuple(spv::OpMemoryBarrier, spv::MemorySemanticsReleaseMask)}};
David Neto22f144c2017-06-12 14:26:21 -0400701
702 for (auto Pair : Map) {
703 // If we find a function with the matching name.
704 if (auto F = M.getFunction(Pair.first)) {
705 SmallVector<Instruction *, 4> ToRemoves;
706
707 // Walk the users of the function.
708 for (auto &U : F->uses()) {
709 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -0400710
711 auto Arg = CI->getOperand(0);
712
713 // We need to map the OpenCL constants to the SPIR-V equivalents.
714 const auto LocalMemFence =
715 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
716 const auto GlobalMemFence =
717 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
718 const auto ConstantMemorySemantics =
Neil Henning39672102017-09-29 14:33:13 +0100719 ConstantInt::get(Arg->getType(), std::get<1>(Pair.second));
David Neto22f144c2017-06-12 14:26:21 -0400720 const auto ConstantScopeDevice =
721 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
722
723 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
724 const auto LocalMemFenceMask = BinaryOperator::Create(
725 Instruction::And, LocalMemFence, Arg, "", CI);
726 const auto WorkgroupShiftAmount =
727 clz(spv::MemorySemanticsWorkgroupMemoryMask) -
728 clz(CLK_LOCAL_MEM_FENCE);
729 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
730 Instruction::Shl, LocalMemFenceMask,
731 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
732
733 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
734 const auto GlobalMemFenceMask = BinaryOperator::Create(
735 Instruction::And, GlobalMemFence, Arg, "", CI);
736 const auto UniformShiftAmount =
737 clz(spv::MemorySemanticsUniformMemoryMask) -
738 clz(CLK_GLOBAL_MEM_FENCE);
739 const auto MemorySemanticsUniform = BinaryOperator::Create(
740 Instruction::Shl, GlobalMemFenceMask,
741 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
742
743 // And combine the above together, also adding in
744 // MemorySemanticsSequentiallyConsistentMask.
745 auto MemorySemantics =
746 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
747 ConstantMemorySemantics, "", CI);
748 MemorySemantics = BinaryOperator::Create(
749 Instruction::Or, MemorySemantics, MemorySemanticsUniform, "", CI);
750
751 // Memory Scope is always device.
752 const auto MemoryScope = ConstantScopeDevice;
753
Kévin Petitc4643922019-06-17 19:32:05 +0100754 const auto SPIRVOp = std::get<0>(Pair.second);
755 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {}, CI->getType(),
756 {MemoryScope, MemorySemantics});
David Neto22f144c2017-06-12 14:26:21 -0400757
758 CI->replaceAllUsesWith(NewCI);
759
760 // Lastly, remember to remove the user.
761 ToRemoves.push_back(CI);
762 }
763 }
764
765 Changed = !ToRemoves.empty();
766
767 // And cleanup the calls we don't use anymore.
768 for (auto V : ToRemoves) {
769 V->eraseFromParent();
770 }
771
772 // And remove the function we don't need either too.
773 F->eraseFromParent();
774 }
775 }
776
777 return Changed;
778}
779
780bool ReplaceOpenCLBuiltinPass::replaceRelational(Module &M) {
781 bool Changed = false;
782
783 const std::map<const char *, std::pair<CmpInst::Predicate, int32_t>> Map = {
784 {"_Z7isequalff", {CmpInst::FCMP_OEQ, 1}},
785 {"_Z7isequalDv2_fS_", {CmpInst::FCMP_OEQ, -1}},
786 {"_Z7isequalDv3_fS_", {CmpInst::FCMP_OEQ, -1}},
787 {"_Z7isequalDv4_fS_", {CmpInst::FCMP_OEQ, -1}},
788 {"_Z9isgreaterff", {CmpInst::FCMP_OGT, 1}},
789 {"_Z9isgreaterDv2_fS_", {CmpInst::FCMP_OGT, -1}},
790 {"_Z9isgreaterDv3_fS_", {CmpInst::FCMP_OGT, -1}},
791 {"_Z9isgreaterDv4_fS_", {CmpInst::FCMP_OGT, -1}},
792 {"_Z14isgreaterequalff", {CmpInst::FCMP_OGE, 1}},
793 {"_Z14isgreaterequalDv2_fS_", {CmpInst::FCMP_OGE, -1}},
794 {"_Z14isgreaterequalDv3_fS_", {CmpInst::FCMP_OGE, -1}},
795 {"_Z14isgreaterequalDv4_fS_", {CmpInst::FCMP_OGE, -1}},
796 {"_Z6islessff", {CmpInst::FCMP_OLT, 1}},
797 {"_Z6islessDv2_fS_", {CmpInst::FCMP_OLT, -1}},
798 {"_Z6islessDv3_fS_", {CmpInst::FCMP_OLT, -1}},
799 {"_Z6islessDv4_fS_", {CmpInst::FCMP_OLT, -1}},
800 {"_Z11islessequalff", {CmpInst::FCMP_OLE, 1}},
801 {"_Z11islessequalDv2_fS_", {CmpInst::FCMP_OLE, -1}},
802 {"_Z11islessequalDv3_fS_", {CmpInst::FCMP_OLE, -1}},
803 {"_Z11islessequalDv4_fS_", {CmpInst::FCMP_OLE, -1}},
804 {"_Z10isnotequalff", {CmpInst::FCMP_ONE, 1}},
805 {"_Z10isnotequalDv2_fS_", {CmpInst::FCMP_ONE, -1}},
806 {"_Z10isnotequalDv3_fS_", {CmpInst::FCMP_ONE, -1}},
807 {"_Z10isnotequalDv4_fS_", {CmpInst::FCMP_ONE, -1}},
808 };
809
810 for (auto Pair : Map) {
811 // If we find a function with the matching name.
812 if (auto F = M.getFunction(Pair.first)) {
813 SmallVector<Instruction *, 4> ToRemoves;
814
815 // Walk the users of the function.
816 for (auto &U : F->uses()) {
817 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
818 // The predicate to use in the CmpInst.
819 auto Predicate = Pair.second.first;
820
821 // The value to return for true.
822 auto TrueValue =
823 ConstantInt::getSigned(CI->getType(), Pair.second.second);
824
825 // The value to return for false.
826 auto FalseValue = Constant::getNullValue(CI->getType());
827
828 auto Arg1 = CI->getOperand(0);
829 auto Arg2 = CI->getOperand(1);
830
831 const auto Cmp =
832 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
833
834 const auto Select =
835 SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
836
837 CI->replaceAllUsesWith(Select);
838
839 // Lastly, remember to remove the user.
840 ToRemoves.push_back(CI);
841 }
842 }
843
844 Changed = !ToRemoves.empty();
845
846 // And cleanup the calls we don't use anymore.
847 for (auto V : ToRemoves) {
848 V->eraseFromParent();
849 }
850
851 // And remove the function we don't need either too.
852 F->eraseFromParent();
853 }
854 }
855
856 return Changed;
857}
858
859bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Module &M) {
860 bool Changed = false;
861
Kévin Petitff03aee2019-06-12 19:39:03 +0100862 const std::map<const char *, std::pair<spv::Op, int32_t>> Map = {
863 {"_Z5isinff", {spv::OpIsInf, 1}},
864 {"_Z5isinfDv2_f", {spv::OpIsInf, -1}},
865 {"_Z5isinfDv3_f", {spv::OpIsInf, -1}},
866 {"_Z5isinfDv4_f", {spv::OpIsInf, -1}},
867 {"_Z5isnanf", {spv::OpIsNan, 1}},
868 {"_Z5isnanDv2_f", {spv::OpIsNan, -1}},
869 {"_Z5isnanDv3_f", {spv::OpIsNan, -1}},
870 {"_Z5isnanDv4_f", {spv::OpIsNan, -1}},
David Neto22f144c2017-06-12 14:26:21 -0400871 };
872
873 for (auto Pair : Map) {
874 // If we find a function with the matching name.
875 if (auto F = M.getFunction(Pair.first)) {
876 SmallVector<Instruction *, 4> ToRemoves;
877
878 // Walk the users of the function.
879 for (auto &U : F->uses()) {
880 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
881 const auto CITy = CI->getType();
882
Kévin Petitff03aee2019-06-12 19:39:03 +0100883 auto SPIRVOp = Pair.second.first;
David Neto22f144c2017-06-12 14:26:21 -0400884
885 // The value to return for true.
886 auto TrueValue = ConstantInt::getSigned(CITy, Pair.second.second);
887
888 // The value to return for false.
889 auto FalseValue = Constant::getNullValue(CITy);
890
891 const auto CorrespondingBoolTy = getBoolOrBoolVectorTy(
892 M.getContext(),
893 CITy->isVectorTy() ? CITy->getVectorNumElements() : 1);
894
Kévin Petitff03aee2019-06-12 19:39:03 +0100895 auto NewCI =
896 clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
897 CorrespondingBoolTy, {CI->getOperand(0)});
David Neto22f144c2017-06-12 14:26:21 -0400898
899 const auto Select =
900 SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
901
902 CI->replaceAllUsesWith(Select);
903
904 // Lastly, remember to remove the user.
905 ToRemoves.push_back(CI);
906 }
907 }
908
909 Changed = !ToRemoves.empty();
910
911 // And cleanup the calls we don't use anymore.
912 for (auto V : ToRemoves) {
913 V->eraseFromParent();
914 }
915
916 // And remove the function we don't need either too.
917 F->eraseFromParent();
918 }
919 }
920
921 return Changed;
922}
923
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100924bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Module &M) {
925 std::vector<const char *> Names = {
926 "_Z8isfiniteh", "_Z8isfiniteDv2_h", "_Z8isfiniteDv3_h",
927 "_Z8isfiniteDv4_h", "_Z8isfinitef", "_Z8isfiniteDv2_f",
928 "_Z8isfiniteDv3_f", "_Z8isfiniteDv4_f", "_Z8isfinited",
929 "_Z8isfiniteDv2_d", "_Z8isfiniteDv3_d", "_Z8isfiniteDv4_d",
930 };
931
932 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
933 auto &C = M.getContext();
934 auto Val = CI->getOperand(0);
935 auto ValTy = Val->getType();
936 auto RetTy = CI->getType();
937
938 // Get a suitable integer type to represent the number
939 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
940
941 // Create Mask
942 auto ScalarSize = ValTy->getScalarSizeInBits();
943 Value *InfMask;
944 switch (ScalarSize) {
945 case 16:
946 InfMask = ConstantInt::get(IntTy, 0x7C00U);
947 break;
948 case 32:
949 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
950 break;
951 case 64:
952 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
953 break;
954 default:
955 llvm_unreachable("Unsupported floating-point type");
956 }
957
958 IRBuilder<> Builder(CI);
959
960 // Bitcast to int
961 auto ValInt = Builder.CreateBitCast(Val, IntTy);
962
963 // Mask and compare
964 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
965 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
966
967 auto RetFalse = ConstantInt::get(RetTy, 0);
968 Value *RetTrue;
969 if (ValTy->isVectorTy()) {
970 RetTrue = ConstantInt::getSigned(RetTy, -1);
971 } else {
972 RetTrue = ConstantInt::get(RetTy, 1);
973 }
974 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
975 });
976}
977
David Neto22f144c2017-06-12 14:26:21 -0400978bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Module &M) {
979 bool Changed = false;
980
Kévin Petitff03aee2019-06-12 19:39:03 +0100981 const std::map<const char *, spv::Op> Map = {
Kévin Petitfd27cca2018-10-31 13:00:17 +0000982 // all
Kévin Petitff03aee2019-06-12 19:39:03 +0100983 {"_Z3allc", spv::OpNop},
984 {"_Z3allDv2_c", spv::OpAll},
985 {"_Z3allDv3_c", spv::OpAll},
986 {"_Z3allDv4_c", spv::OpAll},
987 {"_Z3alls", spv::OpNop},
988 {"_Z3allDv2_s", spv::OpAll},
989 {"_Z3allDv3_s", spv::OpAll},
990 {"_Z3allDv4_s", spv::OpAll},
991 {"_Z3alli", spv::OpNop},
992 {"_Z3allDv2_i", spv::OpAll},
993 {"_Z3allDv3_i", spv::OpAll},
994 {"_Z3allDv4_i", spv::OpAll},
995 {"_Z3alll", spv::OpNop},
996 {"_Z3allDv2_l", spv::OpAll},
997 {"_Z3allDv3_l", spv::OpAll},
998 {"_Z3allDv4_l", spv::OpAll},
Kévin Petitfd27cca2018-10-31 13:00:17 +0000999
1000 // any
Kévin Petitff03aee2019-06-12 19:39:03 +01001001 {"_Z3anyc", spv::OpNop},
1002 {"_Z3anyDv2_c", spv::OpAny},
1003 {"_Z3anyDv3_c", spv::OpAny},
1004 {"_Z3anyDv4_c", spv::OpAny},
1005 {"_Z3anys", spv::OpNop},
1006 {"_Z3anyDv2_s", spv::OpAny},
1007 {"_Z3anyDv3_s", spv::OpAny},
1008 {"_Z3anyDv4_s", spv::OpAny},
1009 {"_Z3anyi", spv::OpNop},
1010 {"_Z3anyDv2_i", spv::OpAny},
1011 {"_Z3anyDv3_i", spv::OpAny},
1012 {"_Z3anyDv4_i", spv::OpAny},
1013 {"_Z3anyl", spv::OpNop},
1014 {"_Z3anyDv2_l", spv::OpAny},
1015 {"_Z3anyDv3_l", spv::OpAny},
1016 {"_Z3anyDv4_l", spv::OpAny},
David Neto22f144c2017-06-12 14:26:21 -04001017 };
1018
1019 for (auto Pair : Map) {
1020 // If we find a function with the matching name.
1021 if (auto F = M.getFunction(Pair.first)) {
1022 SmallVector<Instruction *, 4> ToRemoves;
1023
1024 // Walk the users of the function.
1025 for (auto &U : F->uses()) {
1026 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04001027
1028 auto Arg = CI->getOperand(0);
1029
1030 Value *V;
1031
Kévin Petitfd27cca2018-10-31 13:00:17 +00001032 // If the argument is a 32-bit int, just use a shift
1033 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
1034 V = BinaryOperator::Create(Instruction::LShr, Arg,
1035 ConstantInt::get(Arg->getType(), 31), "",
1036 CI);
1037 } else {
David Neto22f144c2017-06-12 14:26:21 -04001038 // The value for zero to compare against.
1039 const auto ZeroValue = Constant::getNullValue(Arg->getType());
1040
David Neto22f144c2017-06-12 14:26:21 -04001041 // The value to return for true.
1042 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
1043
1044 // The value to return for false.
1045 const auto FalseValue = Constant::getNullValue(CI->getType());
1046
Kévin Petitfd27cca2018-10-31 13:00:17 +00001047 const auto Cmp = CmpInst::Create(
1048 Instruction::ICmp, CmpInst::ICMP_SLT, Arg, ZeroValue, "", CI);
1049
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001050 Value *SelectSource;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001051
1052 // If we have a function to call, call it!
Kévin Petitff03aee2019-06-12 19:39:03 +01001053 const auto SPIRVOp = Pair.second;
Kévin Petitfd27cca2018-10-31 13:00:17 +00001054
Kévin Petitff03aee2019-06-12 19:39:03 +01001055 if (SPIRVOp != spv::OpNop) {
Kévin Petitfd27cca2018-10-31 13:00:17 +00001056
Kévin Petitff03aee2019-06-12 19:39:03 +01001057 const auto BoolTy = Type::getInt1Ty(M.getContext());
Kévin Petitfd27cca2018-10-31 13:00:17 +00001058
Kévin Petitff03aee2019-06-12 19:39:03 +01001059 const auto NewCI = clspv::InsertSPIRVOp(
1060 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
Kévin Petitfd27cca2018-10-31 13:00:17 +00001061 SelectSource = NewCI;
1062
1063 } else {
1064 SelectSource = Cmp;
1065 }
1066
1067 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001068 }
1069
1070 CI->replaceAllUsesWith(V);
1071
1072 // Lastly, remember to remove the user.
1073 ToRemoves.push_back(CI);
1074 }
1075 }
1076
1077 Changed = !ToRemoves.empty();
1078
1079 // And cleanup the calls we don't use anymore.
1080 for (auto V : ToRemoves) {
1081 V->eraseFromParent();
1082 }
1083
1084 // And remove the function we don't need either too.
1085 F->eraseFromParent();
1086 }
1087 }
1088
1089 return Changed;
1090}
1091
Kévin Petitbf0036c2019-03-06 13:57:10 +00001092bool ReplaceOpenCLBuiltinPass::replaceUpsample(Module &M) {
1093 bool Changed = false;
1094
1095 for (auto const &SymVal : M.getValueSymbolTable()) {
1096 // Skip symbols whose name doesn't match
1097 if (!SymVal.getKey().startswith("_Z8upsample")) {
1098 continue;
1099 }
1100 // Is there a function going by that name?
1101 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1102
1103 SmallVector<Instruction *, 4> ToRemoves;
1104
1105 // Walk the users of the function.
1106 for (auto &U : F->uses()) {
1107 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1108
1109 // Get arguments
1110 auto HiValue = CI->getOperand(0);
1111 auto LoValue = CI->getOperand(1);
1112
1113 // Don't touch overloads that aren't in OpenCL C
1114 auto HiType = HiValue->getType();
1115 auto LoType = LoValue->getType();
1116
1117 if (HiType != LoType) {
1118 continue;
1119 }
1120
1121 if (!HiType->isIntOrIntVectorTy()) {
1122 continue;
1123 }
1124
1125 if (HiType->getScalarSizeInBits() * 2 !=
1126 CI->getType()->getScalarSizeInBits()) {
1127 continue;
1128 }
1129
1130 if ((HiType->getScalarSizeInBits() != 8) &&
1131 (HiType->getScalarSizeInBits() != 16) &&
1132 (HiType->getScalarSizeInBits() != 32)) {
1133 continue;
1134 }
1135
1136 if (HiType->isVectorTy()) {
1137 if ((HiType->getVectorNumElements() != 2) &&
1138 (HiType->getVectorNumElements() != 3) &&
1139 (HiType->getVectorNumElements() != 4) &&
1140 (HiType->getVectorNumElements() != 8) &&
1141 (HiType->getVectorNumElements() != 16)) {
1142 continue;
1143 }
1144 }
1145
1146 // Convert both operands to the result type
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001147 auto HiCast =
1148 CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
1149 auto LoCast =
1150 CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
Kévin Petitbf0036c2019-03-06 13:57:10 +00001151
1152 // Shift high operand
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001153 auto ShiftAmount =
1154 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
Kévin Petitbf0036c2019-03-06 13:57:10 +00001155 auto HiShifted = BinaryOperator::Create(Instruction::Shl, HiCast,
1156 ShiftAmount, "", CI);
1157
1158 // OR both results
1159 Value *V = BinaryOperator::Create(Instruction::Or, HiShifted, LoCast,
1160 "", CI);
1161
1162 // Replace call with the expression
1163 CI->replaceAllUsesWith(V);
1164
1165 // Lastly, remember to remove the user.
1166 ToRemoves.push_back(CI);
1167 }
1168 }
1169
1170 Changed = !ToRemoves.empty();
1171
1172 // And cleanup the calls we don't use anymore.
1173 for (auto V : ToRemoves) {
1174 V->eraseFromParent();
1175 }
1176
1177 // And remove the function we don't need either too.
1178 F->eraseFromParent();
1179 }
1180 }
1181
1182 return Changed;
1183}
1184
Kévin Petitd44eef52019-03-08 13:22:14 +00001185bool ReplaceOpenCLBuiltinPass::replaceRotate(Module &M) {
1186 bool Changed = false;
1187
1188 for (auto const &SymVal : M.getValueSymbolTable()) {
1189 // Skip symbols whose name doesn't match
1190 if (!SymVal.getKey().startswith("_Z6rotate")) {
1191 continue;
1192 }
1193 // Is there a function going by that name?
1194 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1195
1196 SmallVector<Instruction *, 4> ToRemoves;
1197
1198 // Walk the users of the function.
1199 for (auto &U : F->uses()) {
1200 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1201
1202 // Get arguments
1203 auto SrcValue = CI->getOperand(0);
1204 auto RotAmount = CI->getOperand(1);
1205
1206 // Don't touch overloads that aren't in OpenCL C
1207 auto SrcType = SrcValue->getType();
1208 auto RotType = RotAmount->getType();
1209
1210 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1211 continue;
1212 }
1213
1214 if (!SrcType->isIntOrIntVectorTy()) {
1215 continue;
1216 }
1217
1218 if ((SrcType->getScalarSizeInBits() != 8) &&
1219 (SrcType->getScalarSizeInBits() != 16) &&
1220 (SrcType->getScalarSizeInBits() != 32) &&
1221 (SrcType->getScalarSizeInBits() != 64)) {
1222 continue;
1223 }
1224
1225 if (SrcType->isVectorTy()) {
1226 if ((SrcType->getVectorNumElements() != 2) &&
1227 (SrcType->getVectorNumElements() != 3) &&
1228 (SrcType->getVectorNumElements() != 4) &&
1229 (SrcType->getVectorNumElements() != 8) &&
1230 (SrcType->getVectorNumElements() != 16)) {
1231 continue;
1232 }
1233 }
1234
1235 // The approach used is to shift the top bits down, the bottom bits up
1236 // and OR the two shifted values.
1237
1238 // The rotation amount is to be treated modulo the element size.
1239 // Since SPIR-V shift ops don't support this, let's apply the
1240 // modulo ahead of shifting. The element size is always a power of
1241 // two so we can just AND with a mask.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001242 auto ModMask =
1243 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
Kévin Petitd44eef52019-03-08 13:22:14 +00001244 RotAmount = BinaryOperator::Create(Instruction::And, RotAmount,
1245 ModMask, "", CI);
1246
1247 // Let's calc the amount by which to shift top bits down
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001248 auto ScalarSize =
1249 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
Kévin Petitd44eef52019-03-08 13:22:14 +00001250 auto DownAmount = BinaryOperator::Create(Instruction::Sub, ScalarSize,
1251 RotAmount, "", CI);
1252
1253 // Now shift the bottom bits up and the top bits down
1254 auto LoRotated = BinaryOperator::Create(Instruction::Shl, SrcValue,
1255 RotAmount, "", CI);
1256 auto HiRotated = BinaryOperator::Create(Instruction::LShr, SrcValue,
1257 DownAmount, "", CI);
1258
1259 // Finally OR the two shifted values
1260 Value *V = BinaryOperator::Create(Instruction::Or, LoRotated,
1261 HiRotated, "", CI);
1262
1263 // Replace call with the expression
1264 CI->replaceAllUsesWith(V);
1265
1266 // Lastly, remember to remove the user.
1267 ToRemoves.push_back(CI);
1268 }
1269 }
1270
1271 Changed = !ToRemoves.empty();
1272
1273 // And cleanup the calls we don't use anymore.
1274 for (auto V : ToRemoves) {
1275 V->eraseFromParent();
1276 }
1277
1278 // And remove the function we don't need either too.
1279 F->eraseFromParent();
1280 }
1281 }
1282
1283 return Changed;
1284}
1285
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001286bool ReplaceOpenCLBuiltinPass::replaceConvert(Module &M) {
1287 bool Changed = false;
1288
1289 for (auto const &SymVal : M.getValueSymbolTable()) {
1290
1291 // Skip symbols whose name obviously doesn't match
1292 if (!SymVal.getKey().contains("convert_")) {
1293 continue;
1294 }
1295
1296 // Is there a function going by that name?
1297 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1298
1299 // Get info from the mangled name
1300 FunctionInfo finfo;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001301 bool parsed = FunctionInfo::getFromMangledNameCheck(F->getName(), &finfo);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001302
1303 // All functions of interest are handled by our mangled name parser
1304 if (!parsed) {
1305 continue;
1306 }
1307
1308 // Move on if this isn't a call to convert_
1309 if (!finfo.name.startswith("convert_")) {
1310 continue;
1311 }
1312
1313 // Extract the destination type from the function name
1314 StringRef DstTypeName = finfo.name;
1315 DstTypeName.consume_front("convert_");
1316
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001317 auto DstSignedNess =
1318 StringSwitch<ArgTypeInfo::SignedNess>(DstTypeName)
1319 .StartsWith("char", ArgTypeInfo::SignedNess::Signed)
1320 .StartsWith("short", ArgTypeInfo::SignedNess::Signed)
1321 .StartsWith("int", ArgTypeInfo::SignedNess::Signed)
1322 .StartsWith("long", ArgTypeInfo::SignedNess::Signed)
1323 .StartsWith("uchar", ArgTypeInfo::SignedNess::Unsigned)
1324 .StartsWith("ushort", ArgTypeInfo::SignedNess::Unsigned)
1325 .StartsWith("uint", ArgTypeInfo::SignedNess::Unsigned)
1326 .StartsWith("ulong", ArgTypeInfo::SignedNess::Unsigned)
1327 .Default(ArgTypeInfo::SignedNess::None);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001328
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001329 bool DstIsSigned = DstSignedNess == ArgTypeInfo::SignedNess::Signed;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001330 bool SrcIsSigned = finfo.isArgSigned(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001331
1332 SmallVector<Instruction *, 4> ToRemoves;
1333
1334 // Walk the users of the function.
1335 for (auto &U : F->uses()) {
1336 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1337
1338 // Get arguments
1339 auto SrcValue = CI->getOperand(0);
1340
1341 // Don't touch overloads that aren't in OpenCL C
1342 auto SrcType = SrcValue->getType();
1343 auto DstType = CI->getType();
1344
1345 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1346 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1347 continue;
1348 }
1349
1350 if (SrcType->isVectorTy()) {
1351
1352 if (SrcType->getVectorNumElements() !=
1353 DstType->getVectorNumElements()) {
1354 continue;
1355 }
1356
1357 if ((SrcType->getVectorNumElements() != 2) &&
1358 (SrcType->getVectorNumElements() != 3) &&
1359 (SrcType->getVectorNumElements() != 4) &&
1360 (SrcType->getVectorNumElements() != 8) &&
1361 (SrcType->getVectorNumElements() != 16)) {
1362 continue;
1363 }
1364 }
1365
1366 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1367 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1368
1369 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1370 bool DstIsInt = DstType->isIntOrIntVectorTy();
1371
1372 Value *V;
alan-baker4092cc52020-01-15 16:42:57 -05001373 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1374 // Unnecessary cast operation.
1375 V = SrcValue;
1376 } else if (SrcIsFloat && DstIsFloat) {
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001377 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1378 } else if (SrcIsFloat && DstIsInt) {
1379 if (DstIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001380 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "",
1381 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001382 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001383 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "",
1384 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001385 }
1386 } else if (SrcIsInt && DstIsFloat) {
1387 if (SrcIsSigned) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001388 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "",
1389 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001390 } else {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001391 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "",
1392 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001393 }
1394 } else if (SrcIsInt && DstIsInt) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001395 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "",
1396 CI);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001397 } else {
1398 // Not something we're supposed to handle, just move on
1399 continue;
1400 }
1401
1402 // Replace call with the expression
1403 CI->replaceAllUsesWith(V);
1404
1405 // Lastly, remember to remove the user.
1406 ToRemoves.push_back(CI);
1407 }
1408 }
1409
1410 Changed = !ToRemoves.empty();
1411
1412 // And cleanup the calls we don't use anymore.
1413 for (auto V : ToRemoves) {
1414 V->eraseFromParent();
1415 }
1416
1417 // And remove the function we don't need either too.
1418 F->eraseFromParent();
1419 }
1420 }
1421
1422 return Changed;
1423}
1424
Kévin Petit8a560882019-03-21 15:24:34 +00001425bool ReplaceOpenCLBuiltinPass::replaceMulHiMadHi(Module &M) {
1426 bool Changed = false;
1427
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001428 SmallVector<Function *, 4> FnWorklist;
Kévin Petit8a560882019-03-21 15:24:34 +00001429
Kévin Petit617a76d2019-04-04 13:54:16 +01001430 for (auto const &SymVal : M.getValueSymbolTable()) {
Kévin Petit8a560882019-03-21 15:24:34 +00001431 bool isMad = SymVal.getKey().startswith("_Z6mad_hi");
1432 bool isMul = SymVal.getKey().startswith("_Z6mul_hi");
1433
1434 // Skip symbols whose name doesn't match
1435 if (!isMad && !isMul) {
1436 continue;
1437 }
1438
1439 // Is there a function going by that name?
1440 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001441 FnWorklist.push_back(F);
Kévin Petit8a560882019-03-21 15:24:34 +00001442 }
1443 }
1444
Kévin Petit617a76d2019-04-04 13:54:16 +01001445 for (auto F : FnWorklist) {
1446 SmallVector<Instruction *, 4> ToRemoves;
1447
1448 bool isMad = F->getName().startswith("_Z6mad_hi");
1449 // Walk the users of the function.
1450 for (auto &U : F->uses()) {
1451 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1452
1453 // Get arguments
1454 auto AValue = CI->getOperand(0);
1455 auto BValue = CI->getOperand(1);
1456 auto CValue = CI->getOperand(2);
1457
1458 // Don't touch overloads that aren't in OpenCL C
1459 auto AType = AValue->getType();
1460 auto BType = BValue->getType();
1461 auto CType = CValue->getType();
1462
1463 if ((AType != BType) || (CI->getType() != AType) ||
1464 (isMad && (AType != CType))) {
1465 continue;
1466 }
1467
1468 if (!AType->isIntOrIntVectorTy()) {
1469 continue;
1470 }
1471
1472 if ((AType->getScalarSizeInBits() != 8) &&
1473 (AType->getScalarSizeInBits() != 16) &&
1474 (AType->getScalarSizeInBits() != 32) &&
1475 (AType->getScalarSizeInBits() != 64)) {
1476 continue;
1477 }
1478
1479 if (AType->isVectorTy()) {
1480 if ((AType->getVectorNumElements() != 2) &&
1481 (AType->getVectorNumElements() != 3) &&
1482 (AType->getVectorNumElements() != 4) &&
1483 (AType->getVectorNumElements() != 8) &&
1484 (AType->getVectorNumElements() != 16)) {
1485 continue;
1486 }
1487 }
1488
1489 // Get infos from the mangled OpenCL built-in function name
Kévin Petit91bc72e2019-04-08 15:17:46 +01001490 auto finfo = FunctionInfo::getFromMangledName(F->getName());
Kévin Petit617a76d2019-04-04 13:54:16 +01001491
1492 // Select the appropriate signed/unsigned SPIR-V op
1493 spv::Op opcode;
Kévin Petit91bc72e2019-04-08 15:17:46 +01001494 if (finfo.isArgSigned(0)) {
Kévin Petit617a76d2019-04-04 13:54:16 +01001495 opcode = spv::OpSMulExtended;
1496 } else {
1497 opcode = spv::OpUMulExtended;
1498 }
1499
1500 // Our SPIR-V op returns a struct, create a type for it
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001501 SmallVector<Type *, 2> TwoValueType = {AType, AType};
Kévin Petit617a76d2019-04-04 13:54:16 +01001502 auto ExMulRetType = StructType::create(TwoValueType);
1503
1504 // Call the SPIR-V op
1505 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1506 ExMulRetType, {AValue, BValue});
1507
1508 // Get the high part of the result
1509 unsigned Idxs[] = {1};
1510 Value *V = ExtractValueInst::Create(Call, Idxs, "", CI);
1511
1512 // If we're handling a mad_hi, add the third argument to the result
1513 if (isMad) {
1514 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
1515 }
1516
1517 // Replace call with the expression
1518 CI->replaceAllUsesWith(V);
1519
1520 // Lastly, remember to remove the user.
1521 ToRemoves.push_back(CI);
1522 }
1523 }
1524
1525 Changed = !ToRemoves.empty();
1526
1527 // And cleanup the calls we don't use anymore.
1528 for (auto V : ToRemoves) {
1529 V->eraseFromParent();
1530 }
1531
1532 // And remove the function we don't need either too.
1533 F->eraseFromParent();
1534 }
1535
Kévin Petit8a560882019-03-21 15:24:34 +00001536 return Changed;
1537}
1538
Kévin Petitf5b78a22018-10-25 14:32:17 +00001539bool ReplaceOpenCLBuiltinPass::replaceSelect(Module &M) {
1540 bool Changed = false;
1541
1542 for (auto const &SymVal : M.getValueSymbolTable()) {
1543 // Skip symbols whose name doesn't match
1544 if (!SymVal.getKey().startswith("_Z6select")) {
1545 continue;
1546 }
1547 // Is there a function going by that name?
1548 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1549
1550 SmallVector<Instruction *, 4> ToRemoves;
1551
1552 // Walk the users of the function.
1553 for (auto &U : F->uses()) {
1554 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1555
1556 // Get arguments
1557 auto FalseValue = CI->getOperand(0);
1558 auto TrueValue = CI->getOperand(1);
1559 auto PredicateValue = CI->getOperand(2);
1560
1561 // Don't touch overloads that aren't in OpenCL C
1562 auto FalseType = FalseValue->getType();
1563 auto TrueType = TrueValue->getType();
1564 auto PredicateType = PredicateValue->getType();
1565
1566 if (FalseType != TrueType) {
1567 continue;
1568 }
1569
1570 if (!PredicateType->isIntOrIntVectorTy()) {
1571 continue;
1572 }
1573
1574 if (!FalseType->isIntOrIntVectorTy() &&
1575 !FalseType->getScalarType()->isFloatingPointTy()) {
1576 continue;
1577 }
1578
1579 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1580 continue;
1581 }
1582
1583 if (FalseType->getScalarSizeInBits() !=
1584 PredicateType->getScalarSizeInBits()) {
1585 continue;
1586 }
1587
1588 if (FalseType->isVectorTy()) {
1589 if (FalseType->getVectorNumElements() !=
1590 PredicateType->getVectorNumElements()) {
1591 continue;
1592 }
1593
1594 if ((FalseType->getVectorNumElements() != 2) &&
1595 (FalseType->getVectorNumElements() != 3) &&
1596 (FalseType->getVectorNumElements() != 4) &&
1597 (FalseType->getVectorNumElements() != 8) &&
1598 (FalseType->getVectorNumElements() != 16)) {
1599 continue;
1600 }
1601 }
1602
1603 // Create constant
1604 const auto ZeroValue = Constant::getNullValue(PredicateType);
1605
1606 // Scalar and vector are to be treated differently
1607 CmpInst::Predicate Pred;
1608 if (PredicateType->isVectorTy()) {
1609 Pred = CmpInst::ICMP_SLT;
1610 } else {
1611 Pred = CmpInst::ICMP_NE;
1612 }
1613
1614 // Create comparison instruction
1615 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1616 ZeroValue, "", CI);
1617
1618 // Create select
1619 Value *V = SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1620
1621 // Replace call with the selection
1622 CI->replaceAllUsesWith(V);
1623
1624 // Lastly, remember to remove the user.
1625 ToRemoves.push_back(CI);
1626 }
1627 }
1628
1629 Changed = !ToRemoves.empty();
1630
1631 // And cleanup the calls we don't use anymore.
1632 for (auto V : ToRemoves) {
1633 V->eraseFromParent();
1634 }
1635
1636 // And remove the function we don't need either too.
1637 F->eraseFromParent();
1638 }
1639 }
1640
1641 return Changed;
1642}
1643
Kévin Petite7d0cce2018-10-31 12:38:56 +00001644bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Module &M) {
1645 bool Changed = false;
1646
1647 for (auto const &SymVal : M.getValueSymbolTable()) {
1648 // Skip symbols whose name doesn't match
1649 if (!SymVal.getKey().startswith("_Z9bitselect")) {
1650 continue;
1651 }
1652 // Is there a function going by that name?
1653 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
1654
1655 SmallVector<Instruction *, 4> ToRemoves;
1656
1657 // Walk the users of the function.
1658 for (auto &U : F->uses()) {
1659 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1660
1661 if (CI->getNumOperands() != 4) {
1662 continue;
1663 }
1664
1665 // Get arguments
1666 auto FalseValue = CI->getOperand(0);
1667 auto TrueValue = CI->getOperand(1);
1668 auto PredicateValue = CI->getOperand(2);
1669
1670 // Don't touch overloads that aren't in OpenCL C
1671 auto FalseType = FalseValue->getType();
1672 auto TrueType = TrueValue->getType();
1673 auto PredicateType = PredicateValue->getType();
1674
1675 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1676 continue;
1677 }
1678
1679 if (TrueType->isVectorTy()) {
1680 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1681 !TrueType->getScalarType()->isIntegerTy()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001682 continue;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001683 }
1684 if ((TrueType->getVectorNumElements() != 2) &&
1685 (TrueType->getVectorNumElements() != 3) &&
1686 (TrueType->getVectorNumElements() != 4) &&
1687 (TrueType->getVectorNumElements() != 8) &&
1688 (TrueType->getVectorNumElements() != 16)) {
1689 continue;
1690 }
1691 }
1692
1693 // Remember the type of the operands
1694 auto OpType = TrueType;
1695
1696 // The actual bit selection will always be done on an integer type,
1697 // declare it here
1698 Type *BitType;
1699
1700 // If the operands are float, then bitcast them to int
1701 if (OpType->getScalarType()->isFloatingPointTy()) {
1702
1703 // First create the new type
Kévin Petitfdfa92e2019-09-25 14:20:58 +01001704 BitType = getIntOrIntVectorTyForCast(M.getContext(), OpType);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001705
1706 // Then bitcast all operands
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001707 PredicateValue =
1708 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1709 FalseValue =
1710 CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1711 TrueValue =
1712 CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001713
1714 } else {
1715 // The operands have an integer type, use it directly
1716 BitType = OpType;
1717 }
1718
1719 // All the operands are now always integers
1720 // implement as (c & b) | (~c & a)
1721
1722 // Create our negated predicate value
1723 auto AllOnes = Constant::getAllOnesValue(BitType);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001724 auto NotPredicateValue = BinaryOperator::Create(
1725 Instruction::Xor, PredicateValue, AllOnes, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001726
1727 // Then put everything together
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001728 auto BitsFalse = BinaryOperator::Create(
1729 Instruction::And, NotPredicateValue, FalseValue, "", CI);
1730 auto BitsTrue = BinaryOperator::Create(
1731 Instruction::And, PredicateValue, TrueValue, "", CI);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001732
1733 Value *V = BinaryOperator::Create(Instruction::Or, BitsFalse,
1734 BitsTrue, "", CI);
1735
1736 // If we were dealing with a floating point type, we must bitcast
1737 // the result back to that
1738 if (OpType->getScalarType()->isFloatingPointTy()) {
1739 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1740 }
1741
1742 // Replace call with our new code
1743 CI->replaceAllUsesWith(V);
1744
1745 // Lastly, remember to remove the user.
1746 ToRemoves.push_back(CI);
1747 }
1748 }
1749
1750 Changed = !ToRemoves.empty();
1751
1752 // And cleanup the calls we don't use anymore.
1753 for (auto V : ToRemoves) {
1754 V->eraseFromParent();
1755 }
1756
1757 // And remove the function we don't need either too.
1758 F->eraseFromParent();
1759 }
1760 }
1761
1762 return Changed;
1763}
1764
Kévin Petit6b0a9532018-10-30 20:00:39 +00001765bool ReplaceOpenCLBuiltinPass::replaceStepSmoothStep(Module &M) {
1766 bool Changed = false;
1767
1768 const std::map<const char *, const char *> Map = {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001769 {"_Z4stepfDv2_f", "_Z4stepDv2_fS_"},
1770 {"_Z4stepfDv3_f", "_Z4stepDv3_fS_"},
1771 {"_Z4stepfDv4_f", "_Z4stepDv4_fS_"},
1772 {"_Z10smoothstepffDv2_f", "_Z10smoothstepDv2_fS_S_"},
1773 {"_Z10smoothstepffDv3_f", "_Z10smoothstepDv3_fS_S_"},
1774 {"_Z10smoothstepffDv4_f", "_Z10smoothstepDv4_fS_S_"},
Kévin Petit6b0a9532018-10-30 20:00:39 +00001775 };
1776
1777 for (auto Pair : Map) {
1778 // If we find a function with the matching name.
1779 if (auto F = M.getFunction(Pair.first)) {
1780 SmallVector<Instruction *, 4> ToRemoves;
1781
1782 // Walk the users of the function.
1783 for (auto &U : F->uses()) {
1784 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1785
1786 auto ReplacementFn = Pair.second;
1787
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001788 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
Kévin Petit6b0a9532018-10-30 20:00:39 +00001789 Value *VectorArg;
1790
1791 // First figure out which function we're dealing with
1792 if (F->getName().startswith("_Z10smoothstep")) {
1793 ArgsToSplat.push_back(CI->getOperand(1));
1794 VectorArg = CI->getOperand(2);
1795 } else {
1796 VectorArg = CI->getOperand(1);
1797 }
1798
1799 // Splat arguments that need to be
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001800 SmallVector<Value *, 2> SplatArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001801 auto VecType = VectorArg->getType();
1802
1803 for (auto arg : ArgsToSplat) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001804 Value *NewVectorArg = UndefValue::get(VecType);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001805 for (auto i = 0; i < VecType->getVectorNumElements(); i++) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001806 auto index =
1807 ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1808 NewVectorArg =
1809 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001810 }
1811 SplatArgs.push_back(NewVectorArg);
1812 }
1813
1814 // Replace the call with the vector/vector flavour
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001815 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1816 const auto NewFType =
1817 FunctionType::get(CI->getType(), NewArgTypes, false);
Kévin Petit6b0a9532018-10-30 20:00:39 +00001818
1819 const auto NewF = M.getOrInsertFunction(ReplacementFn, NewFType);
1820
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001821 SmallVector<Value *, 3> NewArgs;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001822 for (auto arg : SplatArgs) {
1823 NewArgs.push_back(arg);
1824 }
1825 NewArgs.push_back(VectorArg);
1826
1827 const auto NewCI = CallInst::Create(NewF, NewArgs, "", CI);
1828
1829 CI->replaceAllUsesWith(NewCI);
1830
1831 // Lastly, remember to remove the user.
1832 ToRemoves.push_back(CI);
1833 }
1834 }
1835
1836 Changed = !ToRemoves.empty();
1837
1838 // And cleanup the calls we don't use anymore.
1839 for (auto V : ToRemoves) {
1840 V->eraseFromParent();
1841 }
1842
1843 // And remove the function we don't need either too.
1844 F->eraseFromParent();
1845 }
1846 }
1847
1848 return Changed;
1849}
1850
David Neto22f144c2017-06-12 14:26:21 -04001851bool ReplaceOpenCLBuiltinPass::replaceSignbit(Module &M) {
1852 bool Changed = false;
1853
1854 const std::map<const char *, Instruction::BinaryOps> Map = {
1855 {"_Z7signbitf", Instruction::LShr},
1856 {"_Z7signbitDv2_f", Instruction::AShr},
1857 {"_Z7signbitDv3_f", Instruction::AShr},
1858 {"_Z7signbitDv4_f", Instruction::AShr},
1859 };
1860
1861 for (auto Pair : Map) {
1862 // If we find a function with the matching name.
1863 if (auto F = M.getFunction(Pair.first)) {
1864 SmallVector<Instruction *, 4> ToRemoves;
1865
1866 // Walk the users of the function.
1867 for (auto &U : F->uses()) {
1868 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1869 auto Arg = CI->getOperand(0);
1870
1871 auto Bitcast =
1872 CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
1873
1874 auto Shr = BinaryOperator::Create(Pair.second, Bitcast,
1875 ConstantInt::get(CI->getType(), 31),
1876 "", CI);
1877
1878 CI->replaceAllUsesWith(Shr);
1879
1880 // Lastly, remember to remove the user.
1881 ToRemoves.push_back(CI);
1882 }
1883 }
1884
1885 Changed = !ToRemoves.empty();
1886
1887 // And cleanup the calls we don't use anymore.
1888 for (auto V : ToRemoves) {
1889 V->eraseFromParent();
1890 }
1891
1892 // And remove the function we don't need either too.
1893 F->eraseFromParent();
1894 }
1895 }
1896
1897 return Changed;
1898}
1899
1900bool ReplaceOpenCLBuiltinPass::replaceMadandMad24andMul24(Module &M) {
1901 bool Changed = false;
1902
1903 const std::map<const char *,
1904 std::pair<Instruction::BinaryOps, Instruction::BinaryOps>>
1905 Map = {
1906 {"_Z3madfff", {Instruction::FMul, Instruction::FAdd}},
1907 {"_Z3madDv2_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1908 {"_Z3madDv3_fS_S_", {Instruction::FMul, Instruction::FAdd}},
1909 {"_Z3madDv4_fS_S_", {Instruction::FMul, Instruction::FAdd}},
alan-bakerc21a65e2020-01-15 14:19:39 -05001910 {"_Z3madDhDhDh", {Instruction::FMul, Instruction::FAdd}},
1911 {"_Z3madDv2_DhS_S_", {Instruction::FMul, Instruction::FAdd}},
1912 {"_Z3madDv3_DhS_S_", {Instruction::FMul, Instruction::FAdd}},
1913 {"_Z3madDv4_DhS_S_", {Instruction::FMul, Instruction::FAdd}},
David Neto22f144c2017-06-12 14:26:21 -04001914 {"_Z5mad24iii", {Instruction::Mul, Instruction::Add}},
1915 {"_Z5mad24Dv2_iS_S_", {Instruction::Mul, Instruction::Add}},
1916 {"_Z5mad24Dv3_iS_S_", {Instruction::Mul, Instruction::Add}},
1917 {"_Z5mad24Dv4_iS_S_", {Instruction::Mul, Instruction::Add}},
1918 {"_Z5mad24jjj", {Instruction::Mul, Instruction::Add}},
1919 {"_Z5mad24Dv2_jS_S_", {Instruction::Mul, Instruction::Add}},
1920 {"_Z5mad24Dv3_jS_S_", {Instruction::Mul, Instruction::Add}},
1921 {"_Z5mad24Dv4_jS_S_", {Instruction::Mul, Instruction::Add}},
1922 {"_Z5mul24ii", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1923 {"_Z5mul24Dv2_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1924 {"_Z5mul24Dv3_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1925 {"_Z5mul24Dv4_iS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1926 {"_Z5mul24jj", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1927 {"_Z5mul24Dv2_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1928 {"_Z5mul24Dv3_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1929 {"_Z5mul24Dv4_jS_", {Instruction::Mul, Instruction::BinaryOpsEnd}},
1930 };
1931
1932 for (auto Pair : Map) {
1933 // If we find a function with the matching name.
1934 if (auto F = M.getFunction(Pair.first)) {
1935 SmallVector<Instruction *, 4> ToRemoves;
1936
1937 // Walk the users of the function.
1938 for (auto &U : F->uses()) {
1939 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
1940 // The multiply instruction to use.
1941 auto MulInst = Pair.second.first;
1942
1943 // The add instruction to use.
1944 auto AddInst = Pair.second.second;
1945
1946 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
1947
1948 auto I = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1949 CI->getArgOperand(1), "", CI);
1950
1951 if (Instruction::BinaryOpsEnd != AddInst) {
1952 I = BinaryOperator::Create(AddInst, I, CI->getArgOperand(2), "",
1953 CI);
1954 }
1955
1956 CI->replaceAllUsesWith(I);
1957
1958 // Lastly, remember to remove the user.
1959 ToRemoves.push_back(CI);
1960 }
1961 }
1962
1963 Changed = !ToRemoves.empty();
1964
1965 // And cleanup the calls we don't use anymore.
1966 for (auto V : ToRemoves) {
1967 V->eraseFromParent();
1968 }
1969
1970 // And remove the function we don't need either too.
1971 F->eraseFromParent();
1972 }
1973 }
1974
1975 return Changed;
1976}
1977
Derek Chowcfd368b2017-10-19 20:58:45 -07001978bool ReplaceOpenCLBuiltinPass::replaceVstore(Module &M) {
1979 bool Changed = false;
1980
alan-bakerf795f392019-06-11 18:24:34 -04001981 for (auto const &SymVal : M.getValueSymbolTable()) {
1982 if (!SymVal.getKey().contains("vstore"))
1983 continue;
1984 if (SymVal.getKey().contains("vstore_"))
1985 continue;
1986 if (SymVal.getKey().contains("vstorea"))
1987 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07001988
alan-bakerf795f392019-06-11 18:24:34 -04001989 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07001990 SmallVector<Instruction *, 4> ToRemoves;
1991
alan-bakerf795f392019-06-11 18:24:34 -04001992 auto fname = F->getName();
1993 if (!fname.consume_front("_Z"))
1994 continue;
1995 size_t name_len;
1996 if (fname.consumeInteger(10, name_len))
1997 continue;
1998 std::string name = fname.take_front(name_len);
1999
2000 bool ok = StringSwitch<bool>(name)
2001 .Case("vstore2", true)
2002 .Case("vstore3", true)
2003 .Case("vstore4", true)
2004 .Case("vstore8", true)
2005 .Case("vstore16", true)
2006 .Default(false);
2007 if (!ok)
2008 continue;
2009
Derek Chowcfd368b2017-10-19 20:58:45 -07002010 for (auto &U : F->uses()) {
2011 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002012 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07002013
alan-bakerf795f392019-06-11 18:24:34 -04002014 auto data_type = data->getType();
2015 if (!data_type->isVectorTy())
2016 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002017
alan-bakerf795f392019-06-11 18:24:34 -04002018 auto elems = data_type->getVectorNumElements();
2019 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2020 elems != 16)
2021 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002022
alan-bakerf795f392019-06-11 18:24:34 -04002023 auto offset = CI->getOperand(1);
2024 auto ptr = CI->getOperand(2);
2025 auto ptr_type = ptr->getType();
2026 auto pointee_type = ptr_type->getPointerElementType();
2027 if (pointee_type != data_type->getVectorElementType())
2028 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002029
alan-bakerf795f392019-06-11 18:24:34 -04002030 // Avoid pointer casts. Instead generate the correct number of stores
2031 // and rely on drivers to coalesce appropriately.
2032 IRBuilder<> builder(CI);
2033 auto elems_const = builder.getInt32(elems);
2034 auto adjust = builder.CreateMul(offset, elems_const);
2035 for (auto i = 0; i < elems; ++i) {
2036 auto idx = builder.getInt32(i);
2037 auto add = builder.CreateAdd(adjust, idx);
2038 auto gep = builder.CreateGEP(ptr, add);
2039 auto extract = builder.CreateExtractElement(data, i);
2040 auto store = builder.CreateStore(extract, gep);
2041 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002042
Derek Chowcfd368b2017-10-19 20:58:45 -07002043 ToRemoves.push_back(CI);
2044 }
2045 }
2046
2047 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002048 for (auto V : ToRemoves) {
2049 V->eraseFromParent();
2050 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002051 F->eraseFromParent();
2052 }
2053 }
2054
2055 return Changed;
2056}
2057
2058bool ReplaceOpenCLBuiltinPass::replaceVload(Module &M) {
2059 bool Changed = false;
2060
alan-bakerf795f392019-06-11 18:24:34 -04002061 for (auto const &SymVal : M.getValueSymbolTable()) {
2062 if (!SymVal.getKey().contains("vload"))
2063 continue;
2064 if (SymVal.getKey().contains("vload_"))
2065 continue;
2066 if (SymVal.getKey().contains("vloada"))
2067 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002068
alan-bakerf795f392019-06-11 18:24:34 -04002069 if (auto F = dyn_cast<Function>(SymVal.getValue())) {
Derek Chowcfd368b2017-10-19 20:58:45 -07002070 SmallVector<Instruction *, 4> ToRemoves;
2071
alan-bakerf795f392019-06-11 18:24:34 -04002072 auto fname = F->getName();
2073 if (!fname.consume_front("_Z"))
2074 continue;
2075 size_t name_len;
2076 if (fname.consumeInteger(10, name_len))
2077 continue;
2078 std::string name = fname.take_front(name_len);
2079
2080 bool ok = StringSwitch<bool>(name)
2081 .Case("vload2", true)
2082 .Case("vload3", true)
2083 .Case("vload4", true)
2084 .Case("vload8", true)
2085 .Case("vload16", true)
2086 .Default(false);
2087 if (!ok)
2088 continue;
2089
Derek Chowcfd368b2017-10-19 20:58:45 -07002090 for (auto &U : F->uses()) {
2091 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
alan-bakerf795f392019-06-11 18:24:34 -04002092 auto ret_type = F->getReturnType();
2093 if (!ret_type->isVectorTy())
2094 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002095
alan-bakerf795f392019-06-11 18:24:34 -04002096 auto elems = ret_type->getVectorNumElements();
2097 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 &&
2098 elems != 16)
2099 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002100
alan-bakerf795f392019-06-11 18:24:34 -04002101 auto offset = CI->getOperand(0);
2102 auto ptr = CI->getOperand(1);
2103 auto ptr_type = ptr->getType();
2104 auto pointee_type = ptr_type->getPointerElementType();
2105 if (pointee_type != ret_type->getVectorElementType())
2106 continue;
Derek Chowcfd368b2017-10-19 20:58:45 -07002107
alan-bakerf795f392019-06-11 18:24:34 -04002108 // Avoid pointer casts. Instead generate the correct number of loads
2109 // and rely on drivers to coalesce appropriately.
2110 IRBuilder<> builder(CI);
2111 auto elems_const = builder.getInt32(elems);
2112 Value *insert = UndefValue::get(ret_type);
2113 auto adjust = builder.CreateMul(offset, elems_const);
2114 for (auto i = 0; i < elems; ++i) {
2115 auto idx = builder.getInt32(i);
2116 auto add = builder.CreateAdd(adjust, idx);
2117 auto gep = builder.CreateGEP(ptr, add);
2118 auto load = builder.CreateLoad(gep);
2119 insert = builder.CreateInsertElement(insert, load, i);
2120 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002121
alan-bakerf795f392019-06-11 18:24:34 -04002122 CI->replaceAllUsesWith(insert);
Derek Chowcfd368b2017-10-19 20:58:45 -07002123 ToRemoves.push_back(CI);
2124 }
2125 }
2126
2127 Changed = !ToRemoves.empty();
Derek Chowcfd368b2017-10-19 20:58:45 -07002128 for (auto V : ToRemoves) {
2129 V->eraseFromParent();
2130 }
Derek Chowcfd368b2017-10-19 20:58:45 -07002131 F->eraseFromParent();
Derek Chowcfd368b2017-10-19 20:58:45 -07002132 }
2133 }
2134
2135 return Changed;
2136}
2137
David Neto22f144c2017-06-12 14:26:21 -04002138bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Module &M) {
2139 bool Changed = false;
2140
2141 const std::vector<const char *> Map = {"_Z10vload_halfjPU3AS1KDh",
2142 "_Z10vload_halfjPU3AS2KDh"};
2143
2144 for (auto Name : Map) {
2145 // If we find a function with the matching name.
2146 if (auto F = M.getFunction(Name)) {
2147 SmallVector<Instruction *, 4> ToRemoves;
2148
2149 // Walk the users of the function.
2150 for (auto &U : F->uses()) {
2151 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2152 // The index argument from vload_half.
2153 auto Arg0 = CI->getOperand(0);
2154
2155 // The pointer argument from vload_half.
2156 auto Arg1 = CI->getOperand(1);
2157
David Neto22f144c2017-06-12 14:26:21 -04002158 auto IntTy = Type::getInt32Ty(M.getContext());
2159 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
David Neto22f144c2017-06-12 14:26:21 -04002160 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
2161
David Neto22f144c2017-06-12 14:26:21 -04002162 // Our intrinsic to unpack a float2 from an int.
2163 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
2164
2165 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
2166
David Neto482550a2018-03-24 05:21:07 -07002167 if (clspv::Option::F16BitStorage()) {
David Netoac825b82017-05-30 12:49:01 -04002168 auto ShortTy = Type::getInt16Ty(M.getContext());
2169 auto ShortPointerTy = PointerType::get(
2170 ShortTy, Arg1->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002171
David Netoac825b82017-05-30 12:49:01 -04002172 // Cast the half* pointer to short*.
2173 auto Cast =
2174 CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002175
David Netoac825b82017-05-30 12:49:01 -04002176 // Index into the correct address of the casted pointer.
2177 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
2178
2179 // Load from the short* we casted to.
2180 auto Load = new LoadInst(Index, "", CI);
2181
2182 // ZExt the short -> int.
2183 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
2184
2185 // Get our float2.
2186 auto Call = CallInst::Create(NewF, ZExt, "", CI);
2187
2188 // Extract out the bottom element which is our float result.
2189 auto Extract = ExtractElementInst::Create(
2190 Call, ConstantInt::get(IntTy, 0), "", CI);
2191
2192 CI->replaceAllUsesWith(Extract);
2193 } else {
2194 // Assume the pointer argument points to storage aligned to 32bits
2195 // or more.
2196 // TODO(dneto): Do more analysis to make sure this is true?
2197 //
2198 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
2199 // with:
2200 //
2201 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
2202 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
2203 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
2204 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
2205 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
2206 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
2207 // x float> %converted, %index_is_odd32
2208
2209 auto IntPointerTy = PointerType::get(
2210 IntTy, Arg1->getType()->getPointerAddressSpace());
2211
David Neto973e6a82017-05-30 13:48:18 -04002212 // Cast the base pointer to int*.
David Netoac825b82017-05-30 12:49:01 -04002213 // In a valid call (according to assumptions), this should get
David Neto973e6a82017-05-30 13:48:18 -04002214 // optimized away in the simplify GEP pass.
David Netoac825b82017-05-30 12:49:01 -04002215 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
2216
2217 auto One = ConstantInt::get(IntTy, 1);
2218 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
2219 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
2220
2221 // Index into the correct address of the casted pointer.
2222 auto Ptr =
2223 GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
2224
2225 // Load from the int* we casted to.
2226 auto Load = new LoadInst(Ptr, "", CI);
2227
2228 // Get our float2.
2229 auto Call = CallInst::Create(NewF, Load, "", CI);
2230
2231 // Extract out the float result, where the element number is
2232 // determined by whether the original index was even or odd.
2233 auto Extract = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
2234
2235 CI->replaceAllUsesWith(Extract);
2236 }
David Neto22f144c2017-06-12 14:26:21 -04002237
2238 // Lastly, remember to remove the user.
2239 ToRemoves.push_back(CI);
2240 }
2241 }
2242
2243 Changed = !ToRemoves.empty();
2244
2245 // And cleanup the calls we don't use anymore.
2246 for (auto V : ToRemoves) {
2247 V->eraseFromParent();
2248 }
2249
2250 // And remove the function we don't need either too.
2251 F->eraseFromParent();
2252 }
2253 }
2254
2255 return Changed;
2256}
2257
2258bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002259
Kévin Petite8edce32019-04-10 14:23:32 +01002260 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002261 "_Z11vload_half2jPU3AS1KDh",
2262 "_Z12vloada_half2jPU3AS1KDh", // vloada_half2 global
2263 "_Z11vload_half2jPU3AS2KDh",
2264 "_Z12vloada_half2jPU3AS2KDh", // vloada_half2 constant
2265 };
David Neto22f144c2017-06-12 14:26:21 -04002266
Kévin Petite8edce32019-04-10 14:23:32 +01002267 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2268 // The index argument from vload_half.
2269 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002270
Kévin Petite8edce32019-04-10 14:23:32 +01002271 // The pointer argument from vload_half.
2272 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002273
Kévin Petite8edce32019-04-10 14:23:32 +01002274 auto IntTy = Type::getInt32Ty(M.getContext());
2275 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002276 auto NewPointerTy =
2277 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002278 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002279
Kévin Petite8edce32019-04-10 14:23:32 +01002280 // Cast the half* pointer to int*.
2281 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002282
Kévin Petite8edce32019-04-10 14:23:32 +01002283 // Index into the correct address of the casted pointer.
2284 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002285
Kévin Petite8edce32019-04-10 14:23:32 +01002286 // Load from the int* we casted to.
2287 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002288
Kévin Petite8edce32019-04-10 14:23:32 +01002289 // Our intrinsic to unpack a float2 from an int.
2290 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002291
Kévin Petite8edce32019-04-10 14:23:32 +01002292 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002293
Kévin Petite8edce32019-04-10 14:23:32 +01002294 // Get our float2.
2295 return CallInst::Create(NewF, Load, "", CI);
2296 });
David Neto22f144c2017-06-12 14:26:21 -04002297}
2298
2299bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002300
Kévin Petite8edce32019-04-10 14:23:32 +01002301 const std::vector<const char *> Names = {
David Neto556c7e62018-06-08 13:45:55 -07002302 "_Z11vload_half4jPU3AS1KDh",
2303 "_Z12vloada_half4jPU3AS1KDh",
2304 "_Z11vload_half4jPU3AS2KDh",
2305 "_Z12vloada_half4jPU3AS2KDh",
2306 };
David Neto22f144c2017-06-12 14:26:21 -04002307
Kévin Petite8edce32019-04-10 14:23:32 +01002308 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2309 // The index argument from vload_half.
2310 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002311
Kévin Petite8edce32019-04-10 14:23:32 +01002312 // The pointer argument from vload_half.
2313 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002314
Kévin Petite8edce32019-04-10 14:23:32 +01002315 auto IntTy = Type::getInt32Ty(M.getContext());
2316 auto Int2Ty = VectorType::get(IntTy, 2);
2317 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002318 auto NewPointerTy =
2319 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002320 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04002321
Kévin Petite8edce32019-04-10 14:23:32 +01002322 // Cast the half* pointer to int2*.
2323 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002324
Kévin Petite8edce32019-04-10 14:23:32 +01002325 // Index into the correct address of the casted pointer.
2326 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002327
Kévin Petite8edce32019-04-10 14:23:32 +01002328 // Load from the int2* we casted to.
2329 auto Load = new LoadInst(Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002330
Kévin Petite8edce32019-04-10 14:23:32 +01002331 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002332 auto X =
2333 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2334 auto Y =
2335 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002336
Kévin Petite8edce32019-04-10 14:23:32 +01002337 // Our intrinsic to unpack a float2 from an int.
2338 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002339
Kévin Petite8edce32019-04-10 14:23:32 +01002340 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002341
Kévin Petite8edce32019-04-10 14:23:32 +01002342 // Get the lower (x & y) components of our final float4.
2343 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002344
Kévin Petite8edce32019-04-10 14:23:32 +01002345 // Get the higher (z & w) components of our final float4.
2346 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002347
Kévin Petite8edce32019-04-10 14:23:32 +01002348 Constant *ShuffleMask[4] = {
2349 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2350 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002351
Kévin Petite8edce32019-04-10 14:23:32 +01002352 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002353 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2354 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002355 });
David Neto22f144c2017-06-12 14:26:21 -04002356}
2357
David Neto6ad93232018-06-07 15:42:58 -07002358bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002359
2360 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
2361 //
2362 // %u = load i32 %ptr
2363 // %fxy = call <2 x float> Unpack2xHalf(u)
2364 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002365 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002366 "_Z20__clspv_vloada_half2jPU3AS1Kj", // global
2367 "_Z20__clspv_vloada_half2jPU3AS3Kj", // local
2368 "_Z20__clspv_vloada_half2jPKj", // private
2369 };
2370
Kévin Petite8edce32019-04-10 14:23:32 +01002371 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2372 auto Index = CI->getOperand(0);
2373 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002374
Kévin Petite8edce32019-04-10 14:23:32 +01002375 auto IntTy = Type::getInt32Ty(M.getContext());
2376 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2377 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002378
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002379 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002380 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002381
Kévin Petite8edce32019-04-10 14:23:32 +01002382 // Our intrinsic to unpack a float2 from an int.
2383 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002384
Kévin Petite8edce32019-04-10 14:23:32 +01002385 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002386
Kévin Petite8edce32019-04-10 14:23:32 +01002387 // Get our final float2.
2388 return CallInst::Create(NewF, Load, "", CI);
2389 });
David Neto6ad93232018-06-07 15:42:58 -07002390}
2391
2392bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Module &M) {
David Neto6ad93232018-06-07 15:42:58 -07002393
2394 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
2395 //
2396 // %u2 = load <2 x i32> %ptr
2397 // %u2xy = extractelement %u2, 0
2398 // %u2zw = extractelement %u2, 1
2399 // %fxy = call <2 x float> Unpack2xHalf(uint)
2400 // %fzw = call <2 x float> Unpack2xHalf(uint)
2401 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
Kévin Petite8edce32019-04-10 14:23:32 +01002402 const std::vector<const char *> Names = {
David Neto6ad93232018-06-07 15:42:58 -07002403 "_Z20__clspv_vloada_half4jPU3AS1KDv2_j", // global
2404 "_Z20__clspv_vloada_half4jPU3AS3KDv2_j", // local
2405 "_Z20__clspv_vloada_half4jPKDv2_j", // private
2406 };
2407
Kévin Petite8edce32019-04-10 14:23:32 +01002408 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2409 auto Index = CI->getOperand(0);
2410 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07002411
Kévin Petite8edce32019-04-10 14:23:32 +01002412 auto IntTy = Type::getInt32Ty(M.getContext());
2413 auto Int2Ty = VectorType::get(IntTy, 2);
2414 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2415 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07002416
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002417 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002418 auto Load = new LoadInst(IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002419
Kévin Petite8edce32019-04-10 14:23:32 +01002420 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002421 auto X =
2422 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
2423 auto Y =
2424 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002425
Kévin Petite8edce32019-04-10 14:23:32 +01002426 // Our intrinsic to unpack a float2 from an int.
2427 auto SPIRVIntrinsic = "spirv.unpack.v2f16";
David Neto6ad93232018-06-07 15:42:58 -07002428
Kévin Petite8edce32019-04-10 14:23:32 +01002429 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07002430
Kévin Petite8edce32019-04-10 14:23:32 +01002431 // Get the lower (x & y) components of our final float4.
2432 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002433
Kévin Petite8edce32019-04-10 14:23:32 +01002434 // Get the higher (z & w) components of our final float4.
2435 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07002436
Kévin Petite8edce32019-04-10 14:23:32 +01002437 Constant *ShuffleMask[4] = {
2438 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2439 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07002440
Kévin Petite8edce32019-04-10 14:23:32 +01002441 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002442 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
2443 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002444 });
David Neto6ad93232018-06-07 15:42:58 -07002445}
2446
David Neto22f144c2017-06-12 14:26:21 -04002447bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002448
Kévin Petite8edce32019-04-10 14:23:32 +01002449 const std::vector<const char *> Names = {"_Z11vstore_halffjPU3AS1Dh",
2450 "_Z15vstore_half_rtefjPU3AS1Dh",
2451 "_Z15vstore_half_rtzfjPU3AS1Dh"};
David Neto22f144c2017-06-12 14:26:21 -04002452
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002453 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002454 // The value to store.
2455 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002456
Kévin Petite8edce32019-04-10 14:23:32 +01002457 // The index argument from vstore_half.
2458 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002459
Kévin Petite8edce32019-04-10 14:23:32 +01002460 // The pointer argument from vstore_half.
2461 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002462
Kévin Petite8edce32019-04-10 14:23:32 +01002463 auto IntTy = Type::getInt32Ty(M.getContext());
2464 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
2465 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
2466 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04002467
Kévin Petite8edce32019-04-10 14:23:32 +01002468 // Our intrinsic to pack a float2 to an int.
2469 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002470
Kévin Petite8edce32019-04-10 14:23:32 +01002471 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002472
Kévin Petite8edce32019-04-10 14:23:32 +01002473 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002474 auto TempVec = InsertElementInst::Create(
2475 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002476
Kévin Petite8edce32019-04-10 14:23:32 +01002477 // Pack the float2 -> half2 (in an int).
2478 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002479
Kévin Petite8edce32019-04-10 14:23:32 +01002480 Value *Ret;
2481 if (clspv::Option::F16BitStorage()) {
2482 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002483 auto ShortPointerTy =
2484 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002485
Kévin Petite8edce32019-04-10 14:23:32 +01002486 // Truncate our i32 to an i16.
2487 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002488
Kévin Petite8edce32019-04-10 14:23:32 +01002489 // Cast the half* pointer to short*.
2490 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002491
Kévin Petite8edce32019-04-10 14:23:32 +01002492 // Index into the correct address of the casted pointer.
2493 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002494
Kévin Petite8edce32019-04-10 14:23:32 +01002495 // Store to the int* we casted to.
2496 Ret = new StoreInst(Trunc, Index, CI);
2497 } else {
2498 // We can only write to 32-bit aligned words.
2499 //
2500 // Assuming base is aligned to 32-bits, replace the equivalent of
2501 // vstore_half(value, index, base)
2502 // with:
2503 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
2504 // uint32_t write_to_upper_half = index & 1u;
2505 // uint32_t shift = write_to_upper_half << 4;
2506 //
2507 // // Pack the float value as a half number in bottom 16 bits
2508 // // of an i32.
2509 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
2510 //
2511 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
2512 // ^ ((packed & 0xffff) << shift)
2513 // // We only need relaxed consistency, but OpenCL 1.2 only has
2514 // // sequentially consistent atomics.
2515 // // TODO(dneto): Use relaxed consistency.
2516 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002517 auto IntPointerTy =
2518 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04002519
Kévin Petite8edce32019-04-10 14:23:32 +01002520 auto Four = ConstantInt::get(IntTy, 4);
2521 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04002522
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002523 auto IndexIsOdd =
2524 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002525 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002526 auto IndexIntoI32 =
2527 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
2528 auto BaseI32Ptr =
2529 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
2530 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
2531 "base_i32_ptr", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002532 auto CurrentValue = new LoadInst(OutPtr, "current_value", CI);
2533 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002534 auto MaskBitsToWrite =
2535 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
2536 auto MaskedCurrent = BinaryOperator::CreateAnd(
2537 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04002538
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002539 auto XLowerBits =
2540 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
2541 auto NewBitsToWrite =
2542 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
2543 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
2544 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04002545
Kévin Petite8edce32019-04-10 14:23:32 +01002546 // Generate the call to atomi_xor.
2547 SmallVector<Type *, 5> ParamTypes;
2548 // The pointer type.
2549 ParamTypes.push_back(IntPointerTy);
2550 // The Types for memory scope, semantics, and value.
2551 ParamTypes.push_back(IntTy);
2552 ParamTypes.push_back(IntTy);
2553 ParamTypes.push_back(IntTy);
2554 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
2555 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04002556
Kévin Petite8edce32019-04-10 14:23:32 +01002557 const auto ConstantScopeDevice =
2558 ConstantInt::get(IntTy, spv::ScopeDevice);
2559 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
2560 // (SPIR-V Workgroup).
2561 const auto AddrSpaceSemanticsBits =
2562 IntPointerTy->getPointerAddressSpace() == 1
2563 ? spv::MemorySemanticsUniformMemoryMask
2564 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04002565
Kévin Petite8edce32019-04-10 14:23:32 +01002566 // We're using relaxed consistency here.
2567 const auto ConstantMemorySemantics =
2568 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
2569 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04002570
Kévin Petite8edce32019-04-10 14:23:32 +01002571 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
2572 ConstantMemorySemantics, ValueToXor};
2573 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
2574 Ret = nullptr;
David Neto22f144c2017-06-12 14:26:21 -04002575 }
David Neto22f144c2017-06-12 14:26:21 -04002576
Kévin Petite8edce32019-04-10 14:23:32 +01002577 return Ret;
2578 });
David Neto22f144c2017-06-12 14:26:21 -04002579}
2580
2581bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002582
Kévin Petite8edce32019-04-10 14:23:32 +01002583 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002584 "_Z12vstore_half2Dv2_fjPU3AS1Dh",
2585 "_Z13vstorea_half2Dv2_fjPU3AS1Dh", // vstorea global
2586 "_Z13vstorea_half2Dv2_fjPU3AS3Dh", // vstorea local
2587 "_Z13vstorea_half2Dv2_fjPDh", // vstorea private
2588 "_Z16vstore_half2_rteDv2_fjPU3AS1Dh",
2589 "_Z17vstorea_half2_rteDv2_fjPU3AS1Dh", // vstorea global
2590 "_Z17vstorea_half2_rteDv2_fjPU3AS3Dh", // vstorea local
2591 "_Z17vstorea_half2_rteDv2_fjPDh", // vstorea private
2592 "_Z16vstore_half2_rtzDv2_fjPU3AS1Dh",
2593 "_Z17vstorea_half2_rtzDv2_fjPU3AS1Dh", // vstorea global
2594 "_Z17vstorea_half2_rtzDv2_fjPU3AS3Dh", // vstorea local
2595 "_Z17vstorea_half2_rtzDv2_fjPDh", // vstorea private
2596 };
David Neto22f144c2017-06-12 14:26:21 -04002597
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002598 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01002599 // The value to store.
2600 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002601
Kévin Petite8edce32019-04-10 14:23:32 +01002602 // The index argument from vstore_half.
2603 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002604
Kévin Petite8edce32019-04-10 14:23:32 +01002605 // The pointer argument from vstore_half.
2606 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002607
Kévin Petite8edce32019-04-10 14:23:32 +01002608 auto IntTy = Type::getInt32Ty(M.getContext());
2609 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002610 auto NewPointerTy =
2611 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002612 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002613
Kévin Petite8edce32019-04-10 14:23:32 +01002614 // Our intrinsic to pack a float2 to an int.
2615 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002616
Kévin Petite8edce32019-04-10 14:23:32 +01002617 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002618
Kévin Petite8edce32019-04-10 14:23:32 +01002619 // Turn the packed x & y into the final packing.
2620 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002621
Kévin Petite8edce32019-04-10 14:23:32 +01002622 // Cast the half* pointer to int*.
2623 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002624
Kévin Petite8edce32019-04-10 14:23:32 +01002625 // Index into the correct address of the casted pointer.
2626 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002627
Kévin Petite8edce32019-04-10 14:23:32 +01002628 // Store to the int* we casted to.
2629 return new StoreInst(X, Index, CI);
2630 });
David Neto22f144c2017-06-12 14:26:21 -04002631}
2632
2633bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002634
Kévin Petite8edce32019-04-10 14:23:32 +01002635 const std::vector<const char *> Names = {
David Netoe2871522018-06-08 11:09:54 -07002636 "_Z12vstore_half4Dv4_fjPU3AS1Dh",
2637 "_Z13vstorea_half4Dv4_fjPU3AS1Dh", // global
2638 "_Z13vstorea_half4Dv4_fjPU3AS3Dh", // local
2639 "_Z13vstorea_half4Dv4_fjPDh", // private
2640 "_Z16vstore_half4_rteDv4_fjPU3AS1Dh",
2641 "_Z17vstorea_half4_rteDv4_fjPU3AS1Dh", // global
2642 "_Z17vstorea_half4_rteDv4_fjPU3AS3Dh", // local
2643 "_Z17vstorea_half4_rteDv4_fjPDh", // private
2644 "_Z16vstore_half4_rtzDv4_fjPU3AS1Dh",
2645 "_Z17vstorea_half4_rtzDv4_fjPU3AS1Dh", // global
2646 "_Z17vstorea_half4_rtzDv4_fjPU3AS3Dh", // local
2647 "_Z17vstorea_half4_rtzDv4_fjPDh", // private
2648 };
David Neto22f144c2017-06-12 14:26:21 -04002649
Kévin Petite8edce32019-04-10 14:23:32 +01002650 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
2651 // The value to store.
2652 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002653
Kévin Petite8edce32019-04-10 14:23:32 +01002654 // The index argument from vstore_half.
2655 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002656
Kévin Petite8edce32019-04-10 14:23:32 +01002657 // The pointer argument from vstore_half.
2658 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002659
Kévin Petite8edce32019-04-10 14:23:32 +01002660 auto IntTy = Type::getInt32Ty(M.getContext());
2661 auto Int2Ty = VectorType::get(IntTy, 2);
2662 auto Float2Ty = VectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002663 auto NewPointerTy =
2664 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002665 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002666
Kévin Petite8edce32019-04-10 14:23:32 +01002667 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2668 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002669
Kévin Petite8edce32019-04-10 14:23:32 +01002670 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002671 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2672 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002673
Kévin Petite8edce32019-04-10 14:23:32 +01002674 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2675 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002676
Kévin Petite8edce32019-04-10 14:23:32 +01002677 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002678 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2679 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002680
Kévin Petite8edce32019-04-10 14:23:32 +01002681 // Our intrinsic to pack a float2 to an int.
2682 auto SPIRVIntrinsic = "spirv.pack.v2f16";
David Neto22f144c2017-06-12 14:26:21 -04002683
Kévin Petite8edce32019-04-10 14:23:32 +01002684 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002685
Kévin Petite8edce32019-04-10 14:23:32 +01002686 // Turn the packed x & y into the final component of our int2.
2687 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002688
Kévin Petite8edce32019-04-10 14:23:32 +01002689 // Turn the packed z & w into the final component of our int2.
2690 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002691
Kévin Petite8edce32019-04-10 14:23:32 +01002692 auto Combine = InsertElementInst::Create(
2693 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002694 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2695 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002696
Kévin Petite8edce32019-04-10 14:23:32 +01002697 // Cast the half* pointer to int2*.
2698 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002699
Kévin Petite8edce32019-04-10 14:23:32 +01002700 // Index into the correct address of the casted pointer.
2701 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002702
Kévin Petite8edce32019-04-10 14:23:32 +01002703 // Store to the int2* we casted to.
2704 return new StoreInst(Combine, Index, CI);
2705 });
David Neto22f144c2017-06-12 14:26:21 -04002706}
2707
alan-bakerf7e17cb2020-01-02 07:29:59 -05002708bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Module &M) {
2709 bool Changed = false;
2710 const std::map<const char *, const char *> Map = {
2711 // 1D
2712 {"_Z11read_imageh14ocl_image1d_roi", "_Z11read_imagef14ocl_image1d_roi"},
2713 {"_Z11read_imageh14ocl_image1d_ro11ocl_sampleri",
2714 "_Z11read_imagef14ocl_image1d_ro11ocl_sampleri"},
2715 {"_Z11read_imageh14ocl_image1d_ro11ocl_samplerf",
2716 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
2717 // TODO 1D array
2718 // 2D
2719 {"_Z11read_imageh14ocl_image2d_roDv2_i",
2720 "_Z11read_imagef14ocl_image2d_roDv2_i"},
2721 {"_Z11read_imageh14ocl_image2d_ro11ocl_samplerDv2_i",
2722 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i"},
2723 {"_Z11read_imageh14ocl_image2d_ro11ocl_samplerDv2_f",
2724 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
2725 // TODO 2D array
2726 // 3D
2727 {"_Z11read_imageh14ocl_image3d_roDv4_i",
2728 "_Z11read_imagef14ocl_image3d_roDv4_i"},
2729 {"_Z11read_imageh14ocl_image3d_ro11ocl_samplerDv4_i",
2730 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i"},
2731 {"_Z11read_imageh14ocl_image3d_ro11ocl_samplerDv4_f",
2732 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
2733
2734 for (auto Pair : Map) {
2735 // If we find a function with the matching name.
2736 if (auto F = M.getFunction(Pair.first)) {
2737 SmallVector<Instruction *, 4> ToRemoves;
2738
2739 // Walk the users of the function.
2740 for (auto &U : F->uses()) {
2741 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2742 SmallVector<Type *, 3> types;
2743 SmallVector<Value *, 3> args;
2744 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2745 types.push_back(CI->getArgOperand(i)->getType());
2746 args.push_back(CI->getArgOperand(i));
2747 }
2748
2749 auto NewFType = FunctionType::get(
2750 VectorType::get(Type::getFloatTy(M.getContext()),
2751 CI->getType()->getVectorNumElements()),
2752 types, false);
2753
2754 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2755
2756 auto NewCI = CallInst::Create(NewF, args, "", CI);
2757
2758 // Convert to the half type.
2759 auto Cast = CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2760
2761 CI->replaceAllUsesWith(Cast);
2762
2763 // Lastly, remember to remove the user.
2764 ToRemoves.push_back(CI);
2765 }
2766 }
2767
2768 Changed = !ToRemoves.empty();
2769
2770 // And cleanup the calls we don't use anymore.
2771 for (auto V : ToRemoves) {
2772 V->eraseFromParent();
2773 }
2774
2775 // And remove the function we don't need either too.
2776 F->eraseFromParent();
2777 }
2778 }
2779
2780 return Changed;
2781}
2782
2783bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Module &M) {
2784 bool Changed = false;
2785 const std::map<const char *, const char *> Map = {
2786 // 1D
2787 {"_Z12write_imageh14ocl_image1d_woiDv4_Dh",
2788 "_Z12write_imagef14ocl_image1d_woiDv4_f"},
2789 // TODO 1D array
2790 // 2D
2791 {"_Z12write_imageh14ocl_image2d_woDv2_iDv4_Dh",
2792 "_Z12write_imagef14ocl_image2d_woDv2_iDv4_f"},
2793 // TODO 2D array
2794 // 3D
2795 {"_Z12write_imageh14ocl_image3d_woDv4_iDv4_Dh",
2796 "_Z12write_imagef14ocl_image3d_woDv4_iDv4_f"}};
2797
2798 for (auto Pair : Map) {
2799 // If we find a function with the matching name.
2800 if (auto F = M.getFunction(Pair.first)) {
2801 SmallVector<Instruction *, 4> ToRemoves;
2802
2803 // Walk the users of the function.
2804 for (auto &U : F->uses()) {
2805 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2806 SmallVector<Type *, 3> types(3);
2807 SmallVector<Value *, 3> args(3);
2808
2809 // Image
2810 types[0] = CI->getArgOperand(0)->getType();
2811 args[0] = CI->getArgOperand(0);
2812
2813 // Coord
2814 types[1] = CI->getArgOperand(1)->getType();
2815 args[1] = CI->getArgOperand(1);
2816
2817 // Data
2818 types[2] = VectorType::get(
2819 Type::getFloatTy(M.getContext()),
2820 CI->getArgOperand(2)->getType()->getVectorNumElements());
2821
2822 auto NewFType =
2823 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
2824
2825 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2826
2827 // Convert data to the float type.
2828 auto Cast =
2829 CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2830 args[2] = Cast;
2831
2832 auto NewCI = CallInst::Create(NewF, args, "", CI);
2833
2834 // Lastly, remember to remove the user.
2835 ToRemoves.push_back(CI);
2836 }
2837 }
2838
2839 Changed = !ToRemoves.empty();
2840
2841 // And cleanup the calls we don't use anymore.
2842 for (auto V : ToRemoves) {
2843 V->eraseFromParent();
2844 }
2845
2846 // And remove the function we don't need either too.
2847 F->eraseFromParent();
2848 }
2849 }
2850
2851 return Changed;
2852}
2853
alan-baker931d18a2019-12-12 08:21:32 -05002854bool ReplaceOpenCLBuiltinPass::replaceUnsampledReadImage(Module &M) {
2855 bool Changed = false;
2856 const std::map<const char *, const char *> Map = {
2857 // 1D
2858 {"_Z11read_imagef14ocl_image1d_roi",
2859 "_Z11read_imagef14ocl_image1d_ro11ocl_sampleri"},
2860 {"_Z11read_imagei14ocl_image1d_roi",
2861 "_Z11read_imagei14ocl_image1d_ro11ocl_sampleri"},
2862 {"_Z12read_imageui14ocl_image1d_roi",
2863 "_Z12read_imageui14ocl_image1d_ro11ocl_sampleri"},
2864 // TODO 1D array
2865 // 2D
2866 {"_Z11read_imagef14ocl_image2d_roDv2_i",
2867 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i"},
2868 {"_Z11read_imagei14ocl_image2d_roDv2_i",
2869 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i"},
2870 {"_Z12read_imageui14ocl_image2d_roDv2_i",
2871 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i"},
2872 // TODO 2D array
2873 // 3D
2874 {"_Z11read_imagef14ocl_image3d_roDv4_i",
2875 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i"},
2876 {"_Z11read_imagei14ocl_image3d_roDv4_i",
2877 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i"},
2878 {"_Z12read_imageui14ocl_image3d_roDv4_i",
2879 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i"}};
2880
2881 Function *translate_sampler =
2882 M.getFunction(clspv::TranslateSamplerInitializerFunction());
2883 Type *sampler_type = M.getTypeByName("opencl.sampler_t");
alan-bakerf7e17cb2020-01-02 07:29:59 -05002884 if (sampler_type) {
2885 sampler_type = sampler_type->getPointerTo(clspv::AddressSpace::Constant);
2886 }
alan-baker931d18a2019-12-12 08:21:32 -05002887 for (auto Pair : Map) {
2888 // If we find a function with the matching name.
2889 if (auto F = M.getFunction(Pair.first)) {
2890 SmallVector<Instruction *, 4> ToRemoves;
2891
2892 // Walk the users of the function.
2893 for (auto &U : F->uses()) {
2894 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2895 // The image.
2896 auto Image = CI->getOperand(0);
2897
2898 // The coordinate.
2899 auto Coord = CI->getOperand(1);
2900
2901 // Create the sampler translation function if necessary.
2902 if (!translate_sampler) {
2903 // Create the sampler type if necessary.
2904 if (!sampler_type) {
2905 sampler_type =
2906 StructType::create(M.getContext(), "opencl.sampler_t");
2907 sampler_type =
2908 sampler_type->getPointerTo(clspv::AddressSpace::Constant);
2909 }
2910 auto fn_type = FunctionType::get(
2911 sampler_type, {Type::getInt32Ty(M.getContext())}, false);
2912 auto callee = M.getOrInsertFunction(
2913 clspv::TranslateSamplerInitializerFunction(), fn_type);
2914 translate_sampler = cast<Function>(callee.getCallee());
2915 }
2916
2917 auto NewFType = FunctionType::get(
2918 CI->getType(), {Image->getType(), sampler_type, Coord->getType()},
2919 false);
2920
2921 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
2922
James Pricec05f6052020-01-14 13:37:20 -05002923 const uint64_t data_mask =
2924 clspv::version0::CLK_ADDRESS_NONE |
2925 clspv::version0::CLK_FILTER_NEAREST |
2926 clspv::version0::CLK_NORMALIZED_COORDS_FALSE;
alan-baker931d18a2019-12-12 08:21:32 -05002927 auto NewSamplerCI = CallInst::Create(
2928 translate_sampler,
2929 {ConstantInt::get(Type::getInt32Ty(M.getContext()), data_mask)},
2930 "", CI);
2931 auto NewCI =
2932 CallInst::Create(NewF, {Image, NewSamplerCI, Coord}, "", CI);
2933
2934 CI->replaceAllUsesWith(NewCI);
2935
2936 // Lastly, remember to remove the user.
2937 ToRemoves.push_back(CI);
2938 }
2939 }
2940
2941 Changed = !ToRemoves.empty();
2942
2943 // And cleanup the calls we don't use anymore.
2944 for (auto V : ToRemoves) {
2945 V->eraseFromParent();
2946 }
2947
2948 // And remove the function we don't need either too.
2949 F->eraseFromParent();
2950 }
2951 }
2952
2953 return Changed;
2954}
2955
Kévin Petit06517a12019-12-09 19:40:31 +00002956bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04002957 bool Changed = false;
2958
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002959 const std::map<const char *, const char *> Map = {
alan-bakerf906d2b2019-12-10 11:26:23 -05002960 // 1D
2961 {"_Z11read_imagei14ocl_image1d_ro11ocl_sampleri",
2962 "_Z11read_imagei14ocl_image1d_ro11ocl_samplerf"},
2963 {"_Z12read_imageui14ocl_image1d_ro11ocl_sampleri",
2964 "_Z12read_imageui14ocl_image1d_ro11ocl_samplerf"},
2965 {"_Z11read_imagef14ocl_image1d_ro11ocl_sampleri",
2966 "_Z11read_imagef14ocl_image1d_ro11ocl_samplerf"},
2967 // TODO 1Darray
Kévin Petit06517a12019-12-09 19:40:31 +00002968 // 2D
2969 {"_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_i",
2970 "_Z11read_imagei14ocl_image2d_ro11ocl_samplerDv2_f"},
2971 {"_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_i",
2972 "_Z12read_imageui14ocl_image2d_ro11ocl_samplerDv2_f"},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002973 {"_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_i",
2974 "_Z11read_imagef14ocl_image2d_ro11ocl_samplerDv2_f"},
Kévin Petit06517a12019-12-09 19:40:31 +00002975 // TODO 2D array
2976 // 3D
2977 {"_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_i",
2978 "_Z11read_imagei14ocl_image3d_ro11ocl_samplerDv4_f"},
2979 {"_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_i",
2980 "_Z12read_imageui14ocl_image3d_ro11ocl_samplerDv4_f"},
2981 {"_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_i",
2982 "_Z11read_imagef14ocl_image3d_ro11ocl_samplerDv4_f"}};
David Neto22f144c2017-06-12 14:26:21 -04002983
2984 for (auto Pair : Map) {
2985 // If we find a function with the matching name.
2986 if (auto F = M.getFunction(Pair.first)) {
2987 SmallVector<Instruction *, 4> ToRemoves;
2988
2989 // Walk the users of the function.
2990 for (auto &U : F->uses()) {
2991 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
2992 // The image.
2993 auto Arg0 = CI->getOperand(0);
2994
2995 // The sampler.
2996 auto Arg1 = CI->getOperand(1);
2997
2998 // The coordinate (integer type that we can't handle).
2999 auto Arg2 = CI->getOperand(2);
3000
alan-bakerf906d2b2019-12-10 11:26:23 -05003001 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
3002 // TODO(alan-baker): when arrayed images are supported fix component
3003 // calculation.
3004 uint32_t components = dim;
3005 Type *float_ty = nullptr;
3006 if (components == 1) {
3007 float_ty = Type::getFloatTy(M.getContext());
3008 } else {
3009 float_ty = VectorType::get(Type::getFloatTy(M.getContext()),
3010 Arg2->getType()->getVectorNumElements());
3011 }
David Neto22f144c2017-06-12 14:26:21 -04003012
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003013 auto NewFType = FunctionType::get(
alan-bakerf906d2b2019-12-10 11:26:23 -05003014 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty},
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003015 false);
David Neto22f144c2017-06-12 14:26:21 -04003016
3017 auto NewF = M.getOrInsertFunction(Pair.second, NewFType);
3018
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003019 auto Cast =
alan-bakerf906d2b2019-12-10 11:26:23 -05003020 CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003021
3022 auto NewCI = CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
3023
3024 CI->replaceAllUsesWith(NewCI);
3025
3026 // Lastly, remember to remove the user.
3027 ToRemoves.push_back(CI);
3028 }
3029 }
3030
3031 Changed = !ToRemoves.empty();
3032
3033 // And cleanup the calls we don't use anymore.
3034 for (auto V : ToRemoves) {
3035 V->eraseFromParent();
3036 }
3037
3038 // And remove the function we don't need either too.
3039 F->eraseFromParent();
3040 }
3041 }
3042
3043 return Changed;
3044}
3045
3046bool ReplaceOpenCLBuiltinPass::replaceAtomics(Module &M) {
3047 bool Changed = false;
3048
Kévin Petit9b340262019-06-19 18:31:11 +01003049 const std::map<const char *, spv::Op> Map = {
3050 {"_Z8atom_incPU3AS1Vi", spv::OpAtomicIIncrement},
3051 {"_Z8atom_incPU3AS3Vi", spv::OpAtomicIIncrement},
3052 {"_Z8atom_incPU3AS1Vj", spv::OpAtomicIIncrement},
3053 {"_Z8atom_incPU3AS3Vj", spv::OpAtomicIIncrement},
3054 {"_Z8atom_decPU3AS1Vi", spv::OpAtomicIDecrement},
3055 {"_Z8atom_decPU3AS3Vi", spv::OpAtomicIDecrement},
3056 {"_Z8atom_decPU3AS1Vj", spv::OpAtomicIDecrement},
3057 {"_Z8atom_decPU3AS3Vj", spv::OpAtomicIDecrement},
3058 {"_Z12atom_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
3059 {"_Z12atom_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
3060 {"_Z12atom_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
3061 {"_Z12atom_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange},
3062 {"_Z10atomic_incPU3AS1Vi", spv::OpAtomicIIncrement},
3063 {"_Z10atomic_incPU3AS3Vi", spv::OpAtomicIIncrement},
3064 {"_Z10atomic_incPU3AS1Vj", spv::OpAtomicIIncrement},
3065 {"_Z10atomic_incPU3AS3Vj", spv::OpAtomicIIncrement},
3066 {"_Z10atomic_decPU3AS1Vi", spv::OpAtomicIDecrement},
3067 {"_Z10atomic_decPU3AS3Vi", spv::OpAtomicIDecrement},
3068 {"_Z10atomic_decPU3AS1Vj", spv::OpAtomicIDecrement},
3069 {"_Z10atomic_decPU3AS3Vj", spv::OpAtomicIDecrement},
3070 {"_Z14atomic_cmpxchgPU3AS1Viii", spv::OpAtomicCompareExchange},
3071 {"_Z14atomic_cmpxchgPU3AS3Viii", spv::OpAtomicCompareExchange},
3072 {"_Z14atomic_cmpxchgPU3AS1Vjjj", spv::OpAtomicCompareExchange},
3073 {"_Z14atomic_cmpxchgPU3AS3Vjjj", spv::OpAtomicCompareExchange}};
David Neto22f144c2017-06-12 14:26:21 -04003074
3075 for (auto Pair : Map) {
3076 // If we find a function with the matching name.
3077 if (auto F = M.getFunction(Pair.first)) {
3078 SmallVector<Instruction *, 4> ToRemoves;
3079
3080 // Walk the users of the function.
3081 for (auto &U : F->uses()) {
3082 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
David Neto22f144c2017-06-12 14:26:21 -04003083
3084 auto IntTy = Type::getInt32Ty(M.getContext());
3085
David Neto22f144c2017-06-12 14:26:21 -04003086 // We need to map the OpenCL constants to the SPIR-V equivalents.
3087 const auto ConstantScopeDevice =
3088 ConstantInt::get(IntTy, spv::ScopeDevice);
3089 const auto ConstantMemorySemantics = ConstantInt::get(
3090 IntTy, spv::MemorySemanticsUniformMemoryMask |
3091 spv::MemorySemanticsSequentiallyConsistentMask);
3092
3093 SmallVector<Value *, 5> Params;
3094
3095 // The pointer.
3096 Params.push_back(CI->getArgOperand(0));
3097
3098 // The memory scope.
3099 Params.push_back(ConstantScopeDevice);
3100
3101 // The memory semantics.
3102 Params.push_back(ConstantMemorySemantics);
3103
3104 if (2 < CI->getNumArgOperands()) {
3105 // The unequal memory semantics.
3106 Params.push_back(ConstantMemorySemantics);
3107
3108 // The value.
3109 Params.push_back(CI->getArgOperand(2));
3110
3111 // The comparator.
3112 Params.push_back(CI->getArgOperand(1));
3113 } else if (1 < CI->getNumArgOperands()) {
3114 // The value.
3115 Params.push_back(CI->getArgOperand(1));
3116 }
3117
Kévin Petit9b340262019-06-19 18:31:11 +01003118 auto NewCI =
3119 clspv::InsertSPIRVOp(CI, Pair.second, {}, CI->getType(), Params);
David Neto22f144c2017-06-12 14:26:21 -04003120
3121 CI->replaceAllUsesWith(NewCI);
3122
3123 // Lastly, remember to remove the user.
3124 ToRemoves.push_back(CI);
3125 }
3126 }
3127
3128 Changed = !ToRemoves.empty();
3129
3130 // And cleanup the calls we don't use anymore.
3131 for (auto V : ToRemoves) {
3132 V->eraseFromParent();
3133 }
3134
3135 // And remove the function we don't need either too.
3136 F->eraseFromParent();
3137 }
3138 }
3139
Neil Henning39672102017-09-29 14:33:13 +01003140 const std::map<const char *, llvm::AtomicRMWInst::BinOp> Map2 = {
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003141 {"_Z8atom_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003142 {"_Z8atom_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003143 {"_Z8atom_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003144 {"_Z8atom_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003145 {"_Z8atom_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003146 {"_Z8atom_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003147 {"_Z8atom_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003148 {"_Z8atom_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003149 {"_Z9atom_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003150 {"_Z9atom_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003151 {"_Z9atom_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003152 {"_Z9atom_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003153 {"_Z8atom_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003154 {"_Z8atom_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003155 {"_Z8atom_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003156 {"_Z8atom_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003157 {"_Z8atom_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003158 {"_Z8atom_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003159 {"_Z8atom_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003160 {"_Z8atom_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003161 {"_Z8atom_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003162 {"_Z8atom_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003163 {"_Z8atom_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003164 {"_Z8atom_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003165 {"_Z7atom_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003166 {"_Z7atom_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003167 {"_Z7atom_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003168 {"_Z7atom_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003169 {"_Z8atom_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003170 {"_Z8atom_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
Kévin Petit4f6c6b02018-10-25 18:56:55 +00003171 {"_Z8atom_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003172 {"_Z8atom_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor},
Neil Henning39672102017-09-29 14:33:13 +01003173 {"_Z10atomic_addPU3AS1Vii", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003174 {"_Z10atomic_addPU3AS3Vii", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003175 {"_Z10atomic_addPU3AS1Vjj", llvm::AtomicRMWInst::Add},
Kévin Petita303dc62019-03-26 21:40:35 +00003176 {"_Z10atomic_addPU3AS3Vjj", llvm::AtomicRMWInst::Add},
Neil Henning39672102017-09-29 14:33:13 +01003177 {"_Z10atomic_subPU3AS1Vii", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003178 {"_Z10atomic_subPU3AS3Vii", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003179 {"_Z10atomic_subPU3AS1Vjj", llvm::AtomicRMWInst::Sub},
Kévin Petita303dc62019-03-26 21:40:35 +00003180 {"_Z10atomic_subPU3AS3Vjj", llvm::AtomicRMWInst::Sub},
Neil Henning39672102017-09-29 14:33:13 +01003181 {"_Z11atomic_xchgPU3AS1Vii", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003182 {"_Z11atomic_xchgPU3AS3Vii", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003183 {"_Z11atomic_xchgPU3AS1Vjj", llvm::AtomicRMWInst::Xchg},
Kévin Petita303dc62019-03-26 21:40:35 +00003184 {"_Z11atomic_xchgPU3AS3Vjj", llvm::AtomicRMWInst::Xchg},
Neil Henning39672102017-09-29 14:33:13 +01003185 {"_Z10atomic_minPU3AS1Vii", llvm::AtomicRMWInst::Min},
Kévin Petita303dc62019-03-26 21:40:35 +00003186 {"_Z10atomic_minPU3AS3Vii", llvm::AtomicRMWInst::Min},
Neil Henning39672102017-09-29 14:33:13 +01003187 {"_Z10atomic_minPU3AS1Vjj", llvm::AtomicRMWInst::UMin},
Kévin Petita303dc62019-03-26 21:40:35 +00003188 {"_Z10atomic_minPU3AS3Vjj", llvm::AtomicRMWInst::UMin},
Neil Henning39672102017-09-29 14:33:13 +01003189 {"_Z10atomic_maxPU3AS1Vii", llvm::AtomicRMWInst::Max},
Kévin Petita303dc62019-03-26 21:40:35 +00003190 {"_Z10atomic_maxPU3AS3Vii", llvm::AtomicRMWInst::Max},
Neil Henning39672102017-09-29 14:33:13 +01003191 {"_Z10atomic_maxPU3AS1Vjj", llvm::AtomicRMWInst::UMax},
Kévin Petita303dc62019-03-26 21:40:35 +00003192 {"_Z10atomic_maxPU3AS3Vjj", llvm::AtomicRMWInst::UMax},
Neil Henning39672102017-09-29 14:33:13 +01003193 {"_Z10atomic_andPU3AS1Vii", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003194 {"_Z10atomic_andPU3AS3Vii", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003195 {"_Z10atomic_andPU3AS1Vjj", llvm::AtomicRMWInst::And},
Kévin Petita303dc62019-03-26 21:40:35 +00003196 {"_Z10atomic_andPU3AS3Vjj", llvm::AtomicRMWInst::And},
Neil Henning39672102017-09-29 14:33:13 +01003197 {"_Z9atomic_orPU3AS1Vii", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003198 {"_Z9atomic_orPU3AS3Vii", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003199 {"_Z9atomic_orPU3AS1Vjj", llvm::AtomicRMWInst::Or},
Kévin Petita303dc62019-03-26 21:40:35 +00003200 {"_Z9atomic_orPU3AS3Vjj", llvm::AtomicRMWInst::Or},
Neil Henning39672102017-09-29 14:33:13 +01003201 {"_Z10atomic_xorPU3AS1Vii", llvm::AtomicRMWInst::Xor},
Kévin Petita303dc62019-03-26 21:40:35 +00003202 {"_Z10atomic_xorPU3AS3Vii", llvm::AtomicRMWInst::Xor},
3203 {"_Z10atomic_xorPU3AS1Vjj", llvm::AtomicRMWInst::Xor},
3204 {"_Z10atomic_xorPU3AS3Vjj", llvm::AtomicRMWInst::Xor}};
Neil Henning39672102017-09-29 14:33:13 +01003205
3206 for (auto Pair : Map2) {
3207 // If we find a function with the matching name.
3208 if (auto F = M.getFunction(Pair.first)) {
3209 SmallVector<Instruction *, 4> ToRemoves;
3210
3211 // Walk the users of the function.
3212 for (auto &U : F->uses()) {
3213 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3214 auto AtomicOp = new AtomicRMWInst(
3215 Pair.second, CI->getArgOperand(0), CI->getArgOperand(1),
3216 AtomicOrdering::SequentiallyConsistent, SyncScope::System, CI);
3217
3218 CI->replaceAllUsesWith(AtomicOp);
3219
3220 // Lastly, remember to remove the user.
3221 ToRemoves.push_back(CI);
3222 }
3223 }
3224
3225 Changed = !ToRemoves.empty();
3226
3227 // And cleanup the calls we don't use anymore.
3228 for (auto V : ToRemoves) {
3229 V->eraseFromParent();
3230 }
3231
3232 // And remove the function we don't need either too.
3233 F->eraseFromParent();
3234 }
3235 }
3236
David Neto22f144c2017-06-12 14:26:21 -04003237 return Changed;
3238}
3239
3240bool ReplaceOpenCLBuiltinPass::replaceCross(Module &M) {
David Neto22f144c2017-06-12 14:26:21 -04003241
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003242 std::vector<const char *> Names = {
3243 "_Z5crossDv4_fS_",
Kévin Petite8edce32019-04-10 14:23:32 +01003244 };
3245
3246 return replaceCallsWithValue(M, Names, [&M](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04003247 auto IntTy = Type::getInt32Ty(M.getContext());
3248 auto FloatTy = Type::getFloatTy(M.getContext());
3249
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003250 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
3251 ConstantInt::get(IntTy, 1),
3252 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04003253
3254 Constant *UpShuffleMask[4] = {
3255 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
3256 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
3257
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003258 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
3259 UndefValue::get(FloatTy),
3260 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04003261
Kévin Petite8edce32019-04-10 14:23:32 +01003262 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003263 auto Arg0 =
3264 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
3265 ConstantVector::get(DownShuffleMask), "", CI);
3266 auto Arg1 =
3267 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
3268 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003269 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04003270
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003271 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
David Neto22f144c2017-06-12 14:26:21 -04003272
Kévin Petite8edce32019-04-10 14:23:32 +01003273 auto Cross3Func = M.getOrInsertFunction("_Z5crossDv3_fS_", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04003274
Kévin Petite8edce32019-04-10 14:23:32 +01003275 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04003276
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003277 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
3278 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01003279 });
David Neto22f144c2017-06-12 14:26:21 -04003280}
David Neto62653202017-10-16 19:05:18 -04003281
3282bool ReplaceOpenCLBuiltinPass::replaceFract(Module &M) {
3283 bool Changed = false;
3284
3285 // OpenCL's float result = fract(float x, float* ptr)
3286 //
3287 // In the LLVM domain:
3288 //
3289 // %floor_result = call spir_func float @floor(float %x)
3290 // store float %floor_result, float * %ptr
3291 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
3292 // %result = call spir_func float
3293 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
3294 //
3295 // Becomes in the SPIR-V domain, where translations of floor, fmin,
3296 // and clspv.fract occur in the SPIR-V generator pass:
3297 //
3298 // %glsl_ext = OpExtInstImport "GLSL.std.450"
3299 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
3300 // ...
3301 // %floor_result = OpExtInst %float %glsl_ext Floor %x
3302 // OpStore %ptr %floor_result
3303 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
3304 // %fract_result = OpExtInst %float
3305 // %glsl_ext Fmin %fract_intermediate %just_under_1
3306
David Neto62653202017-10-16 19:05:18 -04003307 using std::string;
3308
3309 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
3310 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003311 using QuadType =
3312 std::tuple<const char *, const char *, const char *, const char *>;
David Neto62653202017-10-16 19:05:18 -04003313 auto make_quad = [](const char *a, const char *b, const char *c,
3314 const char *d) {
3315 return std::tuple<const char *, const char *, const char *, const char *>(
3316 a, b, c, d);
3317 };
3318 const std::vector<QuadType> Functions = {
3319 make_quad("_Z5fractfPf", "_Z5floorff", "_Z4fminff", "clspv.fract.f"),
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003320 make_quad("_Z5fractDv2_fPS_", "_Z5floorDv2_f", "_Z4fminDv2_ff",
3321 "clspv.fract.v2f"),
3322 make_quad("_Z5fractDv3_fPS_", "_Z5floorDv3_f", "_Z4fminDv3_ff",
3323 "clspv.fract.v3f"),
3324 make_quad("_Z5fractDv4_fPS_", "_Z5floorDv4_f", "_Z4fminDv4_ff",
3325 "clspv.fract.v4f"),
David Neto62653202017-10-16 19:05:18 -04003326 };
3327
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003328 for (auto &quad : Functions) {
David Neto62653202017-10-16 19:05:18 -04003329 const StringRef fract_name(std::get<0>(quad));
3330
3331 // If we find a function with the matching name.
3332 if (auto F = M.getFunction(fract_name)) {
3333 if (F->use_begin() == F->use_end())
3334 continue;
3335
3336 // We have some uses.
3337 Changed = true;
3338
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003339 auto &Context = M.getContext();
David Neto62653202017-10-16 19:05:18 -04003340
3341 const StringRef floor_name(std::get<1>(quad));
3342 const StringRef fmin_name(std::get<2>(quad));
3343 const StringRef clspv_fract_name(std::get<3>(quad));
3344
3345 // This is either float or a float vector. All the float-like
3346 // types are this type.
3347 auto result_ty = F->getReturnType();
3348
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003349 Function *fmin_fn = M.getFunction(fmin_name);
David Neto62653202017-10-16 19:05:18 -04003350 if (!fmin_fn) {
3351 // Make the fmin function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003352 FunctionType *fn_ty =
3353 FunctionType::get(result_ty, {result_ty, result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003354 fmin_fn =
3355 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003356 fmin_fn->addFnAttr(Attribute::ReadNone);
3357 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
3358 }
3359
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003360 Function *floor_fn = M.getFunction(floor_name);
David Neto62653202017-10-16 19:05:18 -04003361 if (!floor_fn) {
3362 // Make the floor function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003363 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003364 floor_fn = cast<Function>(
3365 M.getOrInsertFunction(floor_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003366 floor_fn->addFnAttr(Attribute::ReadNone);
3367 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
3368 }
3369
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003370 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
David Neto62653202017-10-16 19:05:18 -04003371 if (!clspv_fract_fn) {
3372 // Make the clspv_fract function.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003373 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
alan-bakerbccf62c2019-03-29 10:32:41 -04003374 clspv_fract_fn = cast<Function>(
3375 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
David Neto62653202017-10-16 19:05:18 -04003376 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
3377 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
3378 }
3379
3380 // Number of significant significand bits, whether represented or not.
3381 unsigned num_significand_bits;
3382 switch (result_ty->getScalarType()->getTypeID()) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003383 case Type::HalfTyID:
3384 num_significand_bits = 11;
3385 break;
3386 case Type::FloatTyID:
3387 num_significand_bits = 24;
3388 break;
3389 case Type::DoubleTyID:
3390 num_significand_bits = 53;
3391 break;
3392 default:
3393 assert(false && "Unhandled float type when processing fract builtin");
3394 break;
David Neto62653202017-10-16 19:05:18 -04003395 }
3396 // Beware that the disassembler displays this value as
3397 // OpConstant %float 1
3398 // which is not quite right.
3399 const double kJustUnderOneScalar =
3400 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
3401
3402 Constant *just_under_one =
3403 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
3404 if (result_ty->isVectorTy()) {
3405 just_under_one = ConstantVector::getSplat(
3406 result_ty->getVectorNumElements(), just_under_one);
3407 }
3408
3409 IRBuilder<> Builder(Context);
3410
3411 SmallVector<Instruction *, 4> ToRemoves;
3412
3413 // Walk the users of the function.
3414 for (auto &U : F->uses()) {
3415 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
3416
3417 Builder.SetInsertPoint(CI);
3418 auto arg = CI->getArgOperand(0);
3419 auto ptr = CI->getArgOperand(1);
3420
3421 // Compute floor result and store it.
3422 auto floor = Builder.CreateCall(floor_fn, {arg});
3423 Builder.CreateStore(floor, ptr);
3424
3425 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04003426 auto fract_result =
3427 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
David Neto62653202017-10-16 19:05:18 -04003428
3429 CI->replaceAllUsesWith(fract_result);
3430
3431 // Lastly, remember to remove the user.
3432 ToRemoves.push_back(CI);
3433 }
3434 }
3435
3436 // And cleanup the calls we don't use anymore.
3437 for (auto V : ToRemoves) {
3438 V->eraseFromParent();
3439 }
3440
3441 // And remove the function we don't need either too.
3442 F->eraseFromParent();
3443 }
3444 }
3445
3446 return Changed;
3447}