blob: 252a2bfc744cf7ba526b9310736d5ea269bf22c7 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
alan-bakere0902602020-03-23 08:43:40 -040030#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040033#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070034
SJW2c317da2020-03-23 07:39:13 -050035#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050036#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040037#include "Passes.h"
38#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050039#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040
SJW2c317da2020-03-23 07:39:13 -050041using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040042using namespace llvm;
43
44#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
45
46namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000047
David Neto22f144c2017-06-12 14:26:21 -040048uint32_t clz(uint32_t v) {
49 uint32_t r;
50 uint32_t shift;
51
52 r = (v > 0xFFFF) << 4;
53 v >>= r;
54 shift = (v > 0xFF) << 3;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0xF) << 2;
58 v >>= shift;
59 r |= shift;
60 shift = (v > 0x3) << 1;
61 v >>= shift;
62 r |= shift;
63 r |= (v >> 1);
64
65 return r;
66}
67
Kévin Petitfdfa92e2019-09-25 14:20:58 +010068Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
69 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040070 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -040071 IntTy = FixedVectorType::get(IntTy, vec_ty->getNumElements());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010072 }
73 return IntTy;
74}
75
SJW2c317da2020-03-23 07:39:13 -050076bool replaceCallsWithValue(Function &F,
77 std::function<Value *(CallInst *)> Replacer) {
78
79 bool Changed = false;
80
81 SmallVector<Instruction *, 4> ToRemoves;
82
83 // Walk the users of the function.
84 for (auto &U : F.uses()) {
85 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
86
87 auto NewValue = Replacer(CI);
88
89 if (NewValue != nullptr) {
90 CI->replaceAllUsesWith(NewValue);
91
92 // Lastly, remember to remove the user.
93 ToRemoves.push_back(CI);
94 }
95 }
96 }
97
98 Changed = !ToRemoves.empty();
99
100 // And cleanup the calls we don't use anymore.
101 for (auto V : ToRemoves) {
102 V->eraseFromParent();
103 }
104
105 return Changed;
106}
107
David Neto22f144c2017-06-12 14:26:21 -0400108struct ReplaceOpenCLBuiltinPass final : public ModulePass {
109 static char ID;
110 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
111
112 bool runOnModule(Module &M) override;
SJW2c317da2020-03-23 07:39:13 -0500113 bool runOnFunction(Function &F);
114 bool replaceAbs(Function &F);
115 bool replaceAbsDiff(Function &F, bool is_signed);
116 bool replaceCopysign(Function &F);
117 bool replaceRecip(Function &F);
118 bool replaceDivide(Function &F);
119 bool replaceDot(Function &F);
120 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500121 bool replaceExp10(Function &F, const std::string &basename);
122 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100123 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400124 bool replaceBarrier(Function &F, bool subgroup = false);
SJW2c317da2020-03-23 07:39:13 -0500125 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100126 bool replacePrefetch(Function &F);
SJW2c317da2020-03-23 07:39:13 -0500127 bool replaceRelational(Function &F, CmpInst::Predicate P, int32_t C);
128 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
129 bool replaceIsFinite(Function &F);
130 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
131 bool replaceUpsample(Function &F);
132 bool replaceRotate(Function &F);
133 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
134 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
135 bool replaceSelect(Function &F);
136 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500137 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500138 bool replaceSignbit(Function &F, bool is_vec);
139 bool replaceMul(Function &F, bool is_float, bool is_mad);
140 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
141 bool replaceVloadHalf(Function &F);
142 bool replaceVloadHalf2(Function &F);
143 bool replaceVloadHalf4(Function &F);
144 bool replaceClspvVloadaHalf2(Function &F);
145 bool replaceClspvVloadaHalf4(Function &F);
146 bool replaceVstoreHalf(Function &F, int vec_size);
147 bool replaceVstoreHalf(Function &F);
148 bool replaceVstoreHalf2(Function &F);
149 bool replaceVstoreHalf4(Function &F);
150 bool replaceHalfReadImage(Function &F);
151 bool replaceHalfWriteImage(Function &F);
152 bool replaceSampledReadImageWithIntCoords(Function &F);
153 bool replaceAtomics(Function &F, spv::Op Op);
154 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
155 bool replaceCross(Function &F);
156 bool replaceFract(Function &F, int vec_size);
157 bool replaceVload(Function &F);
158 bool replaceVstore(Function &F);
David Neto22f144c2017-06-12 14:26:21 -0400159};
SJW2c317da2020-03-23 07:39:13 -0500160
Kévin Petit91bc72e2019-04-08 15:17:46 +0100161} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400162
163char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400164INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
165 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400166
167namespace clspv {
168ModulePass *createReplaceOpenCLBuiltinPass() {
169 return new ReplaceOpenCLBuiltinPass();
170}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400171} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400172
173bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500174 std::list<Function *> func_list;
175 for (auto &F : M.getFunctionList()) {
176 // process only function declarations
177 if (F.isDeclaration() && runOnFunction(F)) {
178 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000179 }
180 }
SJW2c317da2020-03-23 07:39:13 -0500181 if (func_list.size() != 0) {
182 // recursively convert functions, but first remove dead
183 for (auto *F : func_list) {
184 if (F->use_empty()) {
185 F->eraseFromParent();
186 }
187 }
188 runOnModule(M);
189 return true;
190 }
191 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000192}
193
SJW2c317da2020-03-23 07:39:13 -0500194bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
195 auto &FI = Builtins::Lookup(&F);
196 switch (FI.getType()) {
197 case Builtins::kAbs:
198 if (!FI.getParameter(0).is_signed) {
199 return replaceAbs(F);
200 }
201 break;
202 case Builtins::kAbsDiff:
203 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
204 case Builtins::kCopysign:
205 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100206
SJW2c317da2020-03-23 07:39:13 -0500207 case Builtins::kHalfRecip:
208 case Builtins::kNativeRecip:
209 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100210
SJW2c317da2020-03-23 07:39:13 -0500211 case Builtins::kHalfDivide:
212 case Builtins::kNativeDivide:
213 return replaceDivide(F);
214
215 case Builtins::kDot:
216 return replaceDot(F);
217
218 case Builtins::kExp10:
219 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500220 case Builtins::kNativeExp10:
221 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500222
223 case Builtins::kLog10:
224 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500225 case Builtins::kNativeLog10:
226 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500227
gnl21636e7992020-09-09 16:08:16 +0100228 case Builtins::kLog1p:
229 return replaceLog1p(F);
230
SJW2c317da2020-03-23 07:39:13 -0500231 case Builtins::kFmod:
232 return replaceFmod(F);
233
234 case Builtins::kBarrier:
235 case Builtins::kWorkGroupBarrier:
236 return replaceBarrier(F);
237
alan-baker12d2c182020-07-20 08:22:42 -0400238 case Builtins::kSubGroupBarrier:
239 return replaceBarrier(F, true);
240
SJW2c317da2020-03-23 07:39:13 -0500241 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400242 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500243 case Builtins::kReadMemFence:
244 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
245 case Builtins::kWriteMemFence:
246 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
247
248 // Relational
249 case Builtins::kIsequal:
250 return replaceRelational(F, CmpInst::FCMP_OEQ,
251 FI.getParameter(0).vector_size ? -1 : 1);
252 case Builtins::kIsgreater:
253 return replaceRelational(F, CmpInst::FCMP_OGT,
254 FI.getParameter(0).vector_size ? -1 : 1);
255 case Builtins::kIsgreaterequal:
256 return replaceRelational(F, CmpInst::FCMP_OGE,
257 FI.getParameter(0).vector_size ? -1 : 1);
258 case Builtins::kIsless:
259 return replaceRelational(F, CmpInst::FCMP_OLT,
260 FI.getParameter(0).vector_size ? -1 : 1);
261 case Builtins::kIslessequal:
262 return replaceRelational(F, CmpInst::FCMP_OLE,
263 FI.getParameter(0).vector_size ? -1 : 1);
264 case Builtins::kIsnotequal:
265 return replaceRelational(F, CmpInst::FCMP_ONE,
266 FI.getParameter(0).vector_size ? -1 : 1);
267
268 case Builtins::kIsinf: {
269 bool is_vec = FI.getParameter(0).vector_size != 0;
270 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
271 }
272 case Builtins::kIsnan: {
273 bool is_vec = FI.getParameter(0).vector_size != 0;
274 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
275 }
276
277 case Builtins::kIsfinite:
278 return replaceIsFinite(F);
279
280 case Builtins::kAll: {
281 bool is_vec = FI.getParameter(0).vector_size != 0;
282 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
283 }
284 case Builtins::kAny: {
285 bool is_vec = FI.getParameter(0).vector_size != 0;
286 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
287 }
288
289 case Builtins::kUpsample:
290 return replaceUpsample(F);
291
292 case Builtins::kRotate:
293 return replaceRotate(F);
294
295 case Builtins::kConvert:
296 return replaceConvert(F, FI.getParameter(0).is_signed,
297 FI.getReturnType().is_signed);
298
299 case Builtins::kAtomicInc:
300 return replaceAtomics(F, spv::OpAtomicIIncrement);
301 case Builtins::kAtomicDec:
302 return replaceAtomics(F, spv::OpAtomicIDecrement);
303 case Builtins::kAtomicCmpxchg:
304 return replaceAtomics(F, spv::OpAtomicCompareExchange);
305 case Builtins::kAtomicAdd:
306 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
307 case Builtins::kAtomicSub:
308 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
309 case Builtins::kAtomicXchg:
310 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
311 case Builtins::kAtomicMin:
312 return replaceAtomics(F, FI.getParameter(0).is_signed
313 ? llvm::AtomicRMWInst::Min
314 : llvm::AtomicRMWInst::UMin);
315 case Builtins::kAtomicMax:
316 return replaceAtomics(F, FI.getParameter(0).is_signed
317 ? llvm::AtomicRMWInst::Max
318 : llvm::AtomicRMWInst::UMax);
319 case Builtins::kAtomicAnd:
320 return replaceAtomics(F, llvm::AtomicRMWInst::And);
321 case Builtins::kAtomicOr:
322 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
323 case Builtins::kAtomicXor:
324 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
325
326 case Builtins::kCross:
327 if (FI.getParameter(0).vector_size == 4) {
328 return replaceCross(F);
329 }
330 break;
331
332 case Builtins::kFract:
333 if (FI.getParameterCount()) {
334 return replaceFract(F, FI.getParameter(0).vector_size);
335 }
336 break;
337
338 case Builtins::kMadHi:
339 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
340 case Builtins::kMulHi:
341 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
342
343 case Builtins::kMad:
344 case Builtins::kMad24:
345 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
346 true);
347 case Builtins::kMul24:
348 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
349 false);
350
351 case Builtins::kSelect:
352 return replaceSelect(F);
353
354 case Builtins::kBitselect:
355 return replaceBitSelect(F);
356
357 case Builtins::kVload:
358 return replaceVload(F);
359
360 case Builtins::kVloadaHalf:
361 case Builtins::kVloadHalf:
362 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
363
364 case Builtins::kVstore:
365 return replaceVstore(F);
366
367 case Builtins::kVstoreHalf:
368 case Builtins::kVstoreaHalf:
369 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
370
371 case Builtins::kSmoothstep: {
372 int vec_size = FI.getLastParameter().vector_size;
373 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500374 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500375 }
376 break;
377 }
378 case Builtins::kStep: {
379 int vec_size = FI.getLastParameter().vector_size;
380 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500381 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500382 }
383 break;
384 }
385
386 case Builtins::kSignbit:
387 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
388
389 case Builtins::kReadImageh:
390 return replaceHalfReadImage(F);
391 case Builtins::kReadImagef:
392 case Builtins::kReadImagei:
393 case Builtins::kReadImageui: {
394 if (FI.getParameter(1).isSampler() &&
395 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
396 return replaceSampledReadImageWithIntCoords(F);
397 }
398 break;
399 }
400
401 case Builtins::kWriteImageh:
402 return replaceHalfWriteImage(F);
403
Kévin Petit1cb45112020-04-27 18:55:48 +0100404 case Builtins::kPrefetch:
405 return replacePrefetch(F);
406
SJW2c317da2020-03-23 07:39:13 -0500407 default:
408 break;
409 }
410
411 return false;
412}
413
414bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
415 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400416 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100417}
418
SJW2c317da2020-03-23 07:39:13 -0500419bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
420 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100421 auto XValue = CI->getOperand(0);
422 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100423
Kévin Petite8edce32019-04-10 14:23:32 +0100424 IRBuilder<> Builder(CI);
425 auto XmY = Builder.CreateSub(XValue, YValue);
426 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100427
SJW2c317da2020-03-23 07:39:13 -0500428 Value *Cmp = nullptr;
429 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100430 Cmp = Builder.CreateICmpSGT(YValue, XValue);
431 } else {
432 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100433 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100434
Kévin Petite8edce32019-04-10 14:23:32 +0100435 return Builder.CreateSelect(Cmp, YmX, XmY);
436 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100437}
438
SJW2c317da2020-03-23 07:39:13 -0500439bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
440 return replaceCallsWithValue(F, [&F](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100441 auto XValue = CI->getOperand(0);
442 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100443
Kévin Petite8edce32019-04-10 14:23:32 +0100444 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100445
SJW2c317da2020-03-23 07:39:13 -0500446 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -0400447 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400448 IntTy = FixedVectorType::get(IntTy, vec_ty->getNumElements());
Kévin Petit8c1be282019-04-02 19:34:25 +0100449 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100450
Kévin Petite8edce32019-04-10 14:23:32 +0100451 // Return X with the sign of Y
452
453 // Sign bit masks
454 auto SignBit = IntTy->getScalarSizeInBits() - 1;
455 auto SignBitMask = 1 << SignBit;
456 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
457 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
458
459 IRBuilder<> Builder(CI);
460
461 // Extract sign of Y
462 auto YInt = Builder.CreateBitCast(YValue, IntTy);
463 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
464
465 // Clear sign bit in X
466 auto XInt = Builder.CreateBitCast(XValue, IntTy);
467 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
468
469 // Insert sign bit of Y into X
470 auto NewXInt = Builder.CreateOr(XInt, YSign);
471
472 // And cast back to floating-point
473 return Builder.CreateBitCast(NewXInt, Ty);
474 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100475}
476
SJW2c317da2020-03-23 07:39:13 -0500477bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
478 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100479 // Recip has one arg.
480 auto Arg = CI->getOperand(0);
481 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
482 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
483 });
David Neto22f144c2017-06-12 14:26:21 -0400484}
485
SJW2c317da2020-03-23 07:39:13 -0500486bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
487 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100488 auto Op0 = CI->getOperand(0);
489 auto Op1 = CI->getOperand(1);
490 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
491 });
David Neto22f144c2017-06-12 14:26:21 -0400492}
493
SJW2c317da2020-03-23 07:39:13 -0500494bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
495 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100496 auto Op0 = CI->getOperand(0);
497 auto Op1 = CI->getOperand(1);
498
SJW2c317da2020-03-23 07:39:13 -0500499 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100500 if (Op0->getType()->isVectorTy()) {
501 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
502 CI->getType(), {Op0, Op1});
503 } else {
504 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
505 }
506
507 return V;
508 });
509}
510
SJW2c317da2020-03-23 07:39:13 -0500511bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500512 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500513 // convert to natural
514 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500515 std::string NewFName = basename.substr(0, slen);
516 NewFName =
517 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400518
SJW2c317da2020-03-23 07:39:13 -0500519 Module &M = *F.getParent();
520 return replaceCallsWithValue(F, [&](CallInst *CI) {
521 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
522
523 auto Arg = CI->getOperand(0);
524
525 // Constant of the natural log of 10 (ln(10)).
526 const double Ln10 =
527 2.302585092994045684017991454684364207601101488628772976033;
528
529 auto Mul = BinaryOperator::Create(
530 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
531
532 return CallInst::Create(NewF, Mul, "", CI);
533 });
David Neto22f144c2017-06-12 14:26:21 -0400534}
535
SJW2c317da2020-03-23 07:39:13 -0500536bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100537 // OpenCL fmod(x,y) is x - y * trunc(x/y)
538 // The sign for a non-zero result is taken from x.
539 // (Try an example.)
540 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500541 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100542 auto Op0 = CI->getOperand(0);
543 auto Op1 = CI->getOperand(1);
544 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
545 });
546}
547
SJW2c317da2020-03-23 07:39:13 -0500548bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500549 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500550 // convert to natural
551 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500552 std::string NewFName = basename.substr(0, slen);
553 NewFName =
554 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400555
SJW2c317da2020-03-23 07:39:13 -0500556 Module &M = *F.getParent();
557 return replaceCallsWithValue(F, [&](CallInst *CI) {
558 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
559
560 auto Arg = CI->getOperand(0);
561
562 // Constant of the reciprocal of the natural log of 10 (ln(10)).
563 const double Ln10 =
564 0.434294481903251827651128918916605082294397005803666566114;
565
566 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
567
568 return BinaryOperator::Create(Instruction::FMul,
569 ConstantFP::get(Arg->getType(), Ln10), NewCI,
570 "", CI);
571 });
David Neto22f144c2017-06-12 14:26:21 -0400572}
573
gnl21636e7992020-09-09 16:08:16 +0100574bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
575 // convert to natural
576 std::string NewFName =
577 Builtins::GetMangledFunctionName("log", F.getFunctionType());
578
579 Module &M = *F.getParent();
580 return replaceCallsWithValue(F, [&](CallInst *CI) {
581 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
582
583 auto Arg = CI->getOperand(0);
584
585 auto ArgP1 = BinaryOperator::Create(
586 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
587
588 return CallInst::Create(NewF, ArgP1, "", CI);
589 });
590}
591
alan-baker12d2c182020-07-20 08:22:42 -0400592bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400593
594 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
595
alan-baker12d2c182020-07-20 08:22:42 -0400596 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100597 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400598
Kévin Petitc4643922019-06-17 19:32:05 +0100599 // We need to map the OpenCL constants to the SPIR-V equivalents.
600 const auto LocalMemFence =
601 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
602 const auto GlobalMemFence =
603 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400604 const auto ConstantAcquireRelease = ConstantInt::get(
605 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100606 const auto ConstantScopeDevice =
607 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
608 const auto ConstantScopeWorkgroup =
609 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400610 const auto ConstantScopeSubgroup =
611 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400612
Kévin Petitc4643922019-06-17 19:32:05 +0100613 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
614 const auto LocalMemFenceMask =
615 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
616 const auto WorkgroupShiftAmount =
617 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
618 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
619 Instruction::Shl, LocalMemFenceMask,
620 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400621
Kévin Petitc4643922019-06-17 19:32:05 +0100622 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
623 const auto GlobalMemFenceMask =
624 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
625 const auto UniformShiftAmount =
626 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
627 const auto MemorySemanticsUniform = BinaryOperator::Create(
628 Instruction::Shl, GlobalMemFenceMask,
629 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400630
Kévin Petitc4643922019-06-17 19:32:05 +0100631 // And combine the above together, also adding in
alan-baker12d2c182020-07-20 08:22:42 -0400632 // MemorySemanticsAcquireReleaseMask.
Kévin Petitc4643922019-06-17 19:32:05 +0100633 auto MemorySemantics =
634 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400635 ConstantAcquireRelease, "", CI);
Kévin Petitc4643922019-06-17 19:32:05 +0100636 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
637 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400638
alan-baker12d2c182020-07-20 08:22:42 -0400639 // If the memory scope is not specified explicitly, it is either Subgroup
640 // or Workgroup depending on the type of barrier.
641 Value *MemoryScope =
642 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
643 if (CI->data_operands_size() > 1) {
644 enum {
645 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
646 CL_MEMORY_SCOPE_DEVICE = 0x2,
647 CL_MEMORY_SCOPE_SUBGROUP = 0x4
648 };
649 // The call was given an explicit memory scope.
650 const auto MemoryScopeSubgroup =
651 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
652 const auto MemoryScopeDevice =
653 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400654
alan-baker12d2c182020-07-20 08:22:42 -0400655 auto Cmp =
656 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
657 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
658 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
659 ConstantScopeWorkgroup, "", CI);
660 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
661 MemoryScopeDevice, CI->getOperand(1), "", CI);
662 MemoryScope =
663 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
664 }
665
666 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
667 // the type of barrier;
668 const auto ExecutionScope =
669 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400670
Kévin Petitc4643922019-06-17 19:32:05 +0100671 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
672 {Attribute::NoDuplicate}, CI->getType(),
673 {ExecutionScope, MemoryScope, MemorySemantics});
674 });
David Neto22f144c2017-06-12 14:26:21 -0400675}
676
SJW2c317da2020-03-23 07:39:13 -0500677bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
678 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400679
SJW2c317da2020-03-23 07:39:13 -0500680 return replaceCallsWithValue(F, [&](CallInst *CI) {
681 enum { CLK_LOCAL_MEM_FENCE = 0x01, CLK_GLOBAL_MEM_FENCE = 0x02 };
David Neto22f144c2017-06-12 14:26:21 -0400682
SJW2c317da2020-03-23 07:39:13 -0500683 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400684
SJW2c317da2020-03-23 07:39:13 -0500685 // We need to map the OpenCL constants to the SPIR-V equivalents.
686 const auto LocalMemFence =
687 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
688 const auto GlobalMemFence =
689 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
690 const auto ConstantMemorySemantics =
691 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400692 const auto ConstantScopeWorkgroup =
693 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400694
SJW2c317da2020-03-23 07:39:13 -0500695 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
696 const auto LocalMemFenceMask =
697 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
698 const auto WorkgroupShiftAmount =
699 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
700 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
701 Instruction::Shl, LocalMemFenceMask,
702 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400703
SJW2c317da2020-03-23 07:39:13 -0500704 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
705 const auto GlobalMemFenceMask =
706 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
707 const auto UniformShiftAmount =
708 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
709 const auto MemorySemanticsUniform = BinaryOperator::Create(
710 Instruction::Shl, GlobalMemFenceMask,
711 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400712
SJW2c317da2020-03-23 07:39:13 -0500713 // And combine the above together, also adding in
714 // MemorySemanticsSequentiallyConsistentMask.
715 auto MemorySemantics =
716 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
717 ConstantMemorySemantics, "", CI);
718 MemorySemantics = BinaryOperator::Create(Instruction::Or, MemorySemantics,
719 MemorySemanticsUniform, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400720
alan-baker12d2c182020-07-20 08:22:42 -0400721 // Memory Scope is always workgroup.
722 const auto MemoryScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400723
SJW2c317da2020-03-23 07:39:13 -0500724 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier, {}, CI->getType(),
725 {MemoryScope, MemorySemantics});
726 });
David Neto22f144c2017-06-12 14:26:21 -0400727}
728
Kévin Petit1cb45112020-04-27 18:55:48 +0100729bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
730 bool Changed = false;
731
732 SmallVector<Instruction *, 4> ToRemoves;
733
734 // Find all calls to the function
735 for (auto &U : F.uses()) {
736 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
737 ToRemoves.push_back(CI);
738 }
739 }
740
741 Changed = !ToRemoves.empty();
742
743 // Delete them
744 for (auto V : ToRemoves) {
745 V->eraseFromParent();
746 }
747
748 return Changed;
749}
750
SJW2c317da2020-03-23 07:39:13 -0500751bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
752 CmpInst::Predicate P,
753 int32_t C) {
754 return replaceCallsWithValue(F, [&](CallInst *CI) {
755 // The predicate to use in the CmpInst.
756 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -0400757
SJW2c317da2020-03-23 07:39:13 -0500758 // The value to return for true.
759 auto TrueValue = ConstantInt::getSigned(CI->getType(), C);
David Neto22f144c2017-06-12 14:26:21 -0400760
SJW2c317da2020-03-23 07:39:13 -0500761 // The value to return for false.
762 auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400763
SJW2c317da2020-03-23 07:39:13 -0500764 auto Arg1 = CI->getOperand(0);
765 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -0400766
SJW2c317da2020-03-23 07:39:13 -0500767 const auto Cmp =
768 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400769
SJW2c317da2020-03-23 07:39:13 -0500770 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
771 });
David Neto22f144c2017-06-12 14:26:21 -0400772}
773
SJW2c317da2020-03-23 07:39:13 -0500774bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
775 spv::Op SPIRVOp,
776 int32_t C) {
777 Module &M = *F.getParent();
778 return replaceCallsWithValue(F, [&](CallInst *CI) {
779 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -0400780
SJW2c317da2020-03-23 07:39:13 -0500781 // The value to return for true.
782 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -0400783
SJW2c317da2020-03-23 07:39:13 -0500784 // The value to return for false.
785 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -0400786
SJW2c317da2020-03-23 07:39:13 -0500787 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -0400788 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400789 CorrespondingBoolTy = FixedVectorType::get(
790 Type::getInt1Ty(M.getContext()), CIVecTy->getNumElements());
David Neto22f144c2017-06-12 14:26:21 -0400791 }
David Neto22f144c2017-06-12 14:26:21 -0400792
SJW2c317da2020-03-23 07:39:13 -0500793 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
794 CorrespondingBoolTy, {CI->getOperand(0)});
795
796 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
797 });
David Neto22f144c2017-06-12 14:26:21 -0400798}
799
SJW2c317da2020-03-23 07:39:13 -0500800bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
801 Module &M = *F.getParent();
802 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100803 auto &C = M.getContext();
804 auto Val = CI->getOperand(0);
805 auto ValTy = Val->getType();
806 auto RetTy = CI->getType();
807
808 // Get a suitable integer type to represent the number
809 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
810
811 // Create Mask
812 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -0500813 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100814 switch (ScalarSize) {
815 case 16:
816 InfMask = ConstantInt::get(IntTy, 0x7C00U);
817 break;
818 case 32:
819 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
820 break;
821 case 64:
822 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
823 break;
824 default:
825 llvm_unreachable("Unsupported floating-point type");
826 }
827
828 IRBuilder<> Builder(CI);
829
830 // Bitcast to int
831 auto ValInt = Builder.CreateBitCast(Val, IntTy);
832
833 // Mask and compare
834 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
835 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
836
837 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -0500838 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100839 if (ValTy->isVectorTy()) {
840 RetTrue = ConstantInt::getSigned(RetTy, -1);
841 } else {
842 RetTrue = ConstantInt::get(RetTy, 1);
843 }
844 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
845 });
846}
847
SJW2c317da2020-03-23 07:39:13 -0500848bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
849 Module &M = *F.getParent();
850 return replaceCallsWithValue(F, [&](CallInst *CI) {
851 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400852
SJW2c317da2020-03-23 07:39:13 -0500853 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000854
SJW2c317da2020-03-23 07:39:13 -0500855 // If the argument is a 32-bit int, just use a shift
856 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
857 V = BinaryOperator::Create(Instruction::LShr, Arg,
858 ConstantInt::get(Arg->getType(), 31), "", CI);
859 } else {
860 // The value for zero to compare against.
861 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -0400862
SJW2c317da2020-03-23 07:39:13 -0500863 // The value to return for true.
864 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -0400865
SJW2c317da2020-03-23 07:39:13 -0500866 // The value to return for false.
867 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400868
SJW2c317da2020-03-23 07:39:13 -0500869 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
870 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400871
SJW2c317da2020-03-23 07:39:13 -0500872 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -0400873
SJW2c317da2020-03-23 07:39:13 -0500874 // If we have a function to call, call it!
875 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -0400876
SJW2c317da2020-03-23 07:39:13 -0500877 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -0400878
SJW2c317da2020-03-23 07:39:13 -0500879 const auto NewCI = clspv::InsertSPIRVOp(
880 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
881 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -0400882
SJW2c317da2020-03-23 07:39:13 -0500883 } else {
884 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -0400885 }
886
SJW2c317da2020-03-23 07:39:13 -0500887 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400888 }
SJW2c317da2020-03-23 07:39:13 -0500889 return V;
890 });
David Neto22f144c2017-06-12 14:26:21 -0400891}
892
SJW2c317da2020-03-23 07:39:13 -0500893bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
894 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
895 // Get arguments
896 auto HiValue = CI->getOperand(0);
897 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000898
SJW2c317da2020-03-23 07:39:13 -0500899 // Don't touch overloads that aren't in OpenCL C
900 auto HiType = HiValue->getType();
901 auto LoType = LoValue->getType();
902
903 if (HiType != LoType) {
904 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000905 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000906
SJW2c317da2020-03-23 07:39:13 -0500907 if (!HiType->isIntOrIntVectorTy()) {
908 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000909 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000910
SJW2c317da2020-03-23 07:39:13 -0500911 if (HiType->getScalarSizeInBits() * 2 !=
912 CI->getType()->getScalarSizeInBits()) {
913 return nullptr;
914 }
915
916 if ((HiType->getScalarSizeInBits() != 8) &&
917 (HiType->getScalarSizeInBits() != 16) &&
918 (HiType->getScalarSizeInBits() != 32)) {
919 return nullptr;
920 }
921
James Pricecf53df42020-04-20 14:41:24 -0400922 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
923 unsigned NumElements = HiVecType->getNumElements();
924 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
925 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500926 return nullptr;
927 }
928 }
929
930 // Convert both operands to the result type
931 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
932 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
933
934 // Shift high operand
935 auto ShiftAmount =
936 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
937 auto HiShifted =
938 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
939
940 // OR both results
941 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
942 });
Kévin Petitbf0036c2019-03-06 13:57:10 +0000943}
944
SJW2c317da2020-03-23 07:39:13 -0500945bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
946 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
947 // Get arguments
948 auto SrcValue = CI->getOperand(0);
949 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +0000950
SJW2c317da2020-03-23 07:39:13 -0500951 // Don't touch overloads that aren't in OpenCL C
952 auto SrcType = SrcValue->getType();
953 auto RotType = RotAmount->getType();
954
955 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
956 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +0000957 }
Kévin Petitd44eef52019-03-08 13:22:14 +0000958
SJW2c317da2020-03-23 07:39:13 -0500959 if (!SrcType->isIntOrIntVectorTy()) {
960 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +0000961 }
Kévin Petitd44eef52019-03-08 13:22:14 +0000962
SJW2c317da2020-03-23 07:39:13 -0500963 if ((SrcType->getScalarSizeInBits() != 8) &&
964 (SrcType->getScalarSizeInBits() != 16) &&
965 (SrcType->getScalarSizeInBits() != 32) &&
966 (SrcType->getScalarSizeInBits() != 64)) {
967 return nullptr;
968 }
969
James Pricecf53df42020-04-20 14:41:24 -0400970 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
971 unsigned NumElements = SrcVecType->getNumElements();
972 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
973 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500974 return nullptr;
975 }
976 }
977
978 // The approach used is to shift the top bits down, the bottom bits up
979 // and OR the two shifted values.
980
981 // The rotation amount is to be treated modulo the element size.
982 // Since SPIR-V shift ops don't support this, let's apply the
983 // modulo ahead of shifting. The element size is always a power of
984 // two so we can just AND with a mask.
985 auto ModMask =
986 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
987 RotAmount =
988 BinaryOperator::Create(Instruction::And, RotAmount, ModMask, "", CI);
989
990 // Let's calc the amount by which to shift top bits down
991 auto ScalarSize = ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
992 auto DownAmount =
993 BinaryOperator::Create(Instruction::Sub, ScalarSize, RotAmount, "", CI);
994
995 // Now shift the bottom bits up and the top bits down
996 auto LoRotated =
997 BinaryOperator::Create(Instruction::Shl, SrcValue, RotAmount, "", CI);
998 auto HiRotated =
999 BinaryOperator::Create(Instruction::LShr, SrcValue, DownAmount, "", CI);
1000
1001 // Finally OR the two shifted values
1002 return BinaryOperator::Create(Instruction::Or, LoRotated, HiRotated, "",
1003 CI);
1004 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001005}
1006
SJW2c317da2020-03-23 07:39:13 -05001007bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1008 bool DstIsSigned) {
1009 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1010 Value *V = nullptr;
1011 // Get arguments
1012 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001013
SJW2c317da2020-03-23 07:39:13 -05001014 // Don't touch overloads that aren't in OpenCL C
1015 auto SrcType = SrcValue->getType();
1016 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001017
SJW2c317da2020-03-23 07:39:13 -05001018 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1019 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1020 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001021 }
1022
James Pricecf53df42020-04-20 14:41:24 -04001023 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
1024 unsigned SrcNumElements = SrcVecType->getNumElements();
1025 unsigned DstNumElements = cast<VectorType>(DstType)->getNumElements();
1026 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001027 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001028 }
1029
James Pricecf53df42020-04-20 14:41:24 -04001030 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1031 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1032 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001033 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001034 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001035 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001036
SJW2c317da2020-03-23 07:39:13 -05001037 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1038 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1039
1040 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1041 bool DstIsInt = DstType->isIntOrIntVectorTy();
1042
1043 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1044 // Unnecessary cast operation.
1045 V = SrcValue;
1046 } else if (SrcIsFloat && DstIsFloat) {
1047 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1048 } else if (SrcIsFloat && DstIsInt) {
1049 if (DstIsSigned) {
1050 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1051 } else {
1052 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1053 }
1054 } else if (SrcIsInt && DstIsFloat) {
1055 if (SrcIsSigned) {
1056 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1057 } else {
1058 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1059 }
1060 } else if (SrcIsInt && DstIsInt) {
1061 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1062 } else {
1063 // Not something we're supposed to handle, just move on
1064 }
1065
1066 return V;
1067 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001068}
1069
SJW2c317da2020-03-23 07:39:13 -05001070bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1071 bool is_mad) {
1072 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1073 Value *V = nullptr;
1074 // Get arguments
1075 auto AValue = CI->getOperand(0);
1076 auto BValue = CI->getOperand(1);
1077 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001078
SJW2c317da2020-03-23 07:39:13 -05001079 // Don't touch overloads that aren't in OpenCL C
1080 auto AType = AValue->getType();
1081 auto BType = BValue->getType();
1082 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001083
SJW2c317da2020-03-23 07:39:13 -05001084 if ((AType != BType) || (CI->getType() != AType) ||
1085 (is_mad && (AType != CType))) {
1086 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001087 }
1088
SJW2c317da2020-03-23 07:39:13 -05001089 if (!AType->isIntOrIntVectorTy()) {
1090 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001091 }
Kévin Petit8a560882019-03-21 15:24:34 +00001092
SJW2c317da2020-03-23 07:39:13 -05001093 if ((AType->getScalarSizeInBits() != 8) &&
1094 (AType->getScalarSizeInBits() != 16) &&
1095 (AType->getScalarSizeInBits() != 32) &&
1096 (AType->getScalarSizeInBits() != 64)) {
1097 return V;
1098 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001099
James Pricecf53df42020-04-20 14:41:24 -04001100 if (auto AVecType = dyn_cast<VectorType>(AType)) {
1101 unsigned NumElements = AVecType->getNumElements();
1102 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1103 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001104 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001105 }
1106 }
1107
SJW2c317da2020-03-23 07:39:13 -05001108 // Our SPIR-V op returns a struct, create a type for it
1109 SmallVector<Type *, 2> TwoValueType = {AType, AType};
1110 auto ExMulRetType = StructType::create(TwoValueType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001111
SJW2c317da2020-03-23 07:39:13 -05001112 // Select the appropriate signed/unsigned SPIR-V op
1113 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1114
1115 // Call the SPIR-V op
1116 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1117 ExMulRetType, {AValue, BValue});
1118
1119 // Get the high part of the result
1120 unsigned Idxs[] = {1};
1121 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1122
1123 // If we're handling a mad_hi, add the third argument to the result
1124 if (is_mad) {
1125 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001126 }
1127
SJW2c317da2020-03-23 07:39:13 -05001128 return V;
1129 });
Kévin Petit8a560882019-03-21 15:24:34 +00001130}
1131
SJW2c317da2020-03-23 07:39:13 -05001132bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1133 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1134 // Get arguments
1135 auto FalseValue = CI->getOperand(0);
1136 auto TrueValue = CI->getOperand(1);
1137 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001138
SJW2c317da2020-03-23 07:39:13 -05001139 // Don't touch overloads that aren't in OpenCL C
1140 auto FalseType = FalseValue->getType();
1141 auto TrueType = TrueValue->getType();
1142 auto PredicateType = PredicateValue->getType();
1143
1144 if (FalseType != TrueType) {
1145 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001146 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001147
SJW2c317da2020-03-23 07:39:13 -05001148 if (!PredicateType->isIntOrIntVectorTy()) {
1149 return nullptr;
1150 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001151
SJW2c317da2020-03-23 07:39:13 -05001152 if (!FalseType->isIntOrIntVectorTy() &&
1153 !FalseType->getScalarType()->isFloatingPointTy()) {
1154 return nullptr;
1155 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001156
SJW2c317da2020-03-23 07:39:13 -05001157 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1158 return nullptr;
1159 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001160
SJW2c317da2020-03-23 07:39:13 -05001161 if (FalseType->getScalarSizeInBits() !=
1162 PredicateType->getScalarSizeInBits()) {
1163 return nullptr;
1164 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001165
James Pricecf53df42020-04-20 14:41:24 -04001166 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
1167 unsigned NumElements = FalseVecType->getNumElements();
1168 if (NumElements != cast<VectorType>(PredicateType)->getNumElements()) {
SJW2c317da2020-03-23 07:39:13 -05001169 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001170 }
1171
James Pricecf53df42020-04-20 14:41:24 -04001172 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1173 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001174 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001175 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001176 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001177
SJW2c317da2020-03-23 07:39:13 -05001178 // Create constant
1179 const auto ZeroValue = Constant::getNullValue(PredicateType);
1180
1181 // Scalar and vector are to be treated differently
1182 CmpInst::Predicate Pred;
1183 if (PredicateType->isVectorTy()) {
1184 Pred = CmpInst::ICMP_SLT;
1185 } else {
1186 Pred = CmpInst::ICMP_NE;
1187 }
1188
1189 // Create comparison instruction
1190 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1191 ZeroValue, "", CI);
1192
1193 // Create select
1194 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1195 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001196}
1197
SJW2c317da2020-03-23 07:39:13 -05001198bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1199 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1200 Value *V = nullptr;
1201 if (CI->getNumOperands() != 4) {
1202 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001203 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001204
SJW2c317da2020-03-23 07:39:13 -05001205 // Get arguments
1206 auto FalseValue = CI->getOperand(0);
1207 auto TrueValue = CI->getOperand(1);
1208 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001209
SJW2c317da2020-03-23 07:39:13 -05001210 // Don't touch overloads that aren't in OpenCL C
1211 auto FalseType = FalseValue->getType();
1212 auto TrueType = TrueValue->getType();
1213 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001214
SJW2c317da2020-03-23 07:39:13 -05001215 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1216 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001217 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001218
James Pricecf53df42020-04-20 14:41:24 -04001219 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001220 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1221 !TrueType->getScalarType()->isIntegerTy()) {
1222 return V;
1223 }
James Pricecf53df42020-04-20 14:41:24 -04001224 unsigned NumElements = TrueVecType->getNumElements();
1225 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1226 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001227 return V;
1228 }
1229 }
1230
1231 // Remember the type of the operands
1232 auto OpType = TrueType;
1233
1234 // The actual bit selection will always be done on an integer type,
1235 // declare it here
1236 Type *BitType;
1237
1238 // If the operands are float, then bitcast them to int
1239 if (OpType->getScalarType()->isFloatingPointTy()) {
1240
1241 // First create the new type
1242 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1243
1244 // Then bitcast all operands
1245 PredicateValue =
1246 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1247 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1248 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1249
1250 } else {
1251 // The operands have an integer type, use it directly
1252 BitType = OpType;
1253 }
1254
1255 // All the operands are now always integers
1256 // implement as (c & b) | (~c & a)
1257
1258 // Create our negated predicate value
1259 auto AllOnes = Constant::getAllOnesValue(BitType);
1260 auto NotPredicateValue = BinaryOperator::Create(
1261 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1262
1263 // Then put everything together
1264 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1265 FalseValue, "", CI);
1266 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1267 TrueValue, "", CI);
1268
1269 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1270
1271 // If we were dealing with a floating point type, we must bitcast
1272 // the result back to that
1273 if (OpType->getScalarType()->isFloatingPointTy()) {
1274 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1275 }
1276
1277 return V;
1278 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001279}
1280
SJW61531372020-06-09 07:31:08 -05001281bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001282 // convert to vector versions
1283 Module &M = *F.getParent();
1284 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1285 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1286 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001287
SJW2c317da2020-03-23 07:39:13 -05001288 // First figure out which function we're dealing with
1289 if (is_smooth) {
1290 ArgsToSplat.push_back(CI->getOperand(1));
1291 VectorArg = CI->getOperand(2);
1292 } else {
1293 VectorArg = CI->getOperand(1);
1294 }
1295
1296 // Splat arguments that need to be
1297 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001298 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001299
1300 for (auto arg : ArgsToSplat) {
1301 Value *NewVectorArg = UndefValue::get(VecType);
James Pricecf53df42020-04-20 14:41:24 -04001302 for (auto i = 0; i < VecType->getNumElements(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001303 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1304 NewVectorArg =
1305 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1306 }
1307 SplatArgs.push_back(NewVectorArg);
1308 }
1309
1310 // Replace the call with the vector/vector flavour
1311 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1312 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1313
SJW61531372020-06-09 07:31:08 -05001314 std::string NewFName = Builtins::GetMangledFunctionName(
1315 is_smooth ? "smoothstep" : "step", NewFType);
1316
SJW2c317da2020-03-23 07:39:13 -05001317 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1318
1319 SmallVector<Value *, 3> NewArgs;
1320 for (auto arg : SplatArgs) {
1321 NewArgs.push_back(arg);
1322 }
1323 NewArgs.push_back(VectorArg);
1324
1325 return CallInst::Create(NewF, NewArgs, "", CI);
1326 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001327}
1328
SJW2c317da2020-03-23 07:39:13 -05001329bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001330 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1331 auto Arg = CI->getOperand(0);
1332 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001333
SJW2c317da2020-03-23 07:39:13 -05001334 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001335
SJW2c317da2020-03-23 07:39:13 -05001336 return BinaryOperator::Create(Op, Bitcast,
1337 ConstantInt::get(CI->getType(), 31), "", CI);
1338 });
David Neto22f144c2017-06-12 14:26:21 -04001339}
1340
SJW2c317da2020-03-23 07:39:13 -05001341bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1342 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001343 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1344 // The multiply instruction to use.
1345 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001346
SJW2c317da2020-03-23 07:39:13 -05001347 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001348
SJW2c317da2020-03-23 07:39:13 -05001349 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1350 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001351
SJW2c317da2020-03-23 07:39:13 -05001352 if (is_mad) {
1353 // The add instruction to use.
1354 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001355
SJW2c317da2020-03-23 07:39:13 -05001356 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001357 }
David Neto22f144c2017-06-12 14:26:21 -04001358
SJW2c317da2020-03-23 07:39:13 -05001359 return V;
1360 });
David Neto22f144c2017-06-12 14:26:21 -04001361}
1362
SJW2c317da2020-03-23 07:39:13 -05001363bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001364 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1365 Value *V = nullptr;
1366 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001367
SJW2c317da2020-03-23 07:39:13 -05001368 auto data_type = data->getType();
1369 if (!data_type->isVectorTy())
1370 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001371
James Pricecf53df42020-04-20 14:41:24 -04001372 auto vec_data_type = cast<VectorType>(data_type);
1373
1374 auto elems = vec_data_type->getNumElements();
SJW2c317da2020-03-23 07:39:13 -05001375 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1376 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001377
SJW2c317da2020-03-23 07:39:13 -05001378 auto offset = CI->getOperand(1);
1379 auto ptr = CI->getOperand(2);
1380 auto ptr_type = ptr->getType();
1381 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001382 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001383 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001384
SJW2c317da2020-03-23 07:39:13 -05001385 // Avoid pointer casts. Instead generate the correct number of stores
1386 // and rely on drivers to coalesce appropriately.
1387 IRBuilder<> builder(CI);
1388 auto elems_const = builder.getInt32(elems);
1389 auto adjust = builder.CreateMul(offset, elems_const);
1390 for (auto i = 0; i < elems; ++i) {
1391 auto idx = builder.getInt32(i);
1392 auto add = builder.CreateAdd(adjust, idx);
1393 auto gep = builder.CreateGEP(ptr, add);
1394 auto extract = builder.CreateExtractElement(data, i);
1395 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001396 }
SJW2c317da2020-03-23 07:39:13 -05001397 return V;
1398 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001399}
1400
SJW2c317da2020-03-23 07:39:13 -05001401bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001402 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1403 Value *V = nullptr;
1404 auto ret_type = F.getReturnType();
1405 if (!ret_type->isVectorTy())
1406 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001407
James Pricecf53df42020-04-20 14:41:24 -04001408 auto vec_ret_type = cast<VectorType>(ret_type);
1409
1410 auto elems = vec_ret_type->getNumElements();
SJW2c317da2020-03-23 07:39:13 -05001411 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1412 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001413
SJW2c317da2020-03-23 07:39:13 -05001414 auto offset = CI->getOperand(0);
1415 auto ptr = CI->getOperand(1);
1416 auto ptr_type = ptr->getType();
1417 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001418 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001419 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001420
SJW2c317da2020-03-23 07:39:13 -05001421 // Avoid pointer casts. Instead generate the correct number of loads
1422 // and rely on drivers to coalesce appropriately.
1423 IRBuilder<> builder(CI);
1424 auto elems_const = builder.getInt32(elems);
1425 V = UndefValue::get(ret_type);
1426 auto adjust = builder.CreateMul(offset, elems_const);
1427 for (auto i = 0; i < elems; ++i) {
1428 auto idx = builder.getInt32(i);
1429 auto add = builder.CreateAdd(adjust, idx);
1430 auto gep = builder.CreateGEP(ptr, add);
1431 auto load = builder.CreateLoad(gep);
1432 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001433 }
SJW2c317da2020-03-23 07:39:13 -05001434 return V;
1435 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001436}
1437
SJW2c317da2020-03-23 07:39:13 -05001438bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1439 const std::string &name,
1440 int vec_size) {
1441 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1442 if (!vec_size) {
1443 // deduce vec_size from last character of name (e.g. vload_half4)
1444 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001445 }
SJW2c317da2020-03-23 07:39:13 -05001446 switch (vec_size) {
1447 case 2:
1448 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1449 case 4:
1450 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1451 case 0:
1452 if (!is_clspv_version) {
1453 return replaceVloadHalf(F);
1454 }
1455 default:
1456 llvm_unreachable("Unsupported vload_half vector size");
1457 break;
1458 }
1459 return false;
David Neto22f144c2017-06-12 14:26:21 -04001460}
1461
SJW2c317da2020-03-23 07:39:13 -05001462bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1463 Module &M = *F.getParent();
1464 return replaceCallsWithValue(F, [&](CallInst *CI) {
1465 // The index argument from vload_half.
1466 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001467
SJW2c317da2020-03-23 07:39:13 -05001468 // The pointer argument from vload_half.
1469 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001470
SJW2c317da2020-03-23 07:39:13 -05001471 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001472 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001473 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1474
1475 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001476 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001477
1478 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1479
1480 Value *V = nullptr;
1481
alan-baker7efcaaa2020-05-06 19:33:27 -04001482 bool supports_16bit_storage = true;
1483 switch (Arg1->getType()->getPointerAddressSpace()) {
1484 case clspv::AddressSpace::Global:
1485 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1486 clspv::Option::StorageClass::kSSBO);
1487 break;
1488 case clspv::AddressSpace::Constant:
1489 if (clspv::Option::ConstantArgsInUniformBuffer())
1490 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1491 clspv::Option::StorageClass::kUBO);
1492 else
1493 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1494 clspv::Option::StorageClass::kSSBO);
1495 break;
1496 default:
1497 // Clspv will emit the Float16 capability if the half type is
1498 // encountered. That capability covers private and local addressspaces.
1499 break;
1500 }
1501
1502 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001503 auto ShortTy = Type::getInt16Ty(M.getContext());
1504 auto ShortPointerTy =
1505 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1506
1507 // Cast the half* pointer to short*.
1508 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1509
1510 // Index into the correct address of the casted pointer.
1511 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1512
1513 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001514 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001515
1516 // ZExt the short -> int.
1517 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1518
1519 // Get our float2.
1520 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1521
1522 // Extract out the bottom element which is our float result.
1523 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1524 } else {
1525 // Assume the pointer argument points to storage aligned to 32bits
1526 // or more.
1527 // TODO(dneto): Do more analysis to make sure this is true?
1528 //
1529 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1530 // with:
1531 //
1532 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1533 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1534 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1535 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1536 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1537 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1538 // x float> %converted, %index_is_odd32
1539
1540 auto IntPointerTy =
1541 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1542
1543 // Cast the base pointer to int*.
1544 // In a valid call (according to assumptions), this should get
1545 // optimized away in the simplify GEP pass.
1546 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1547
1548 auto One = ConstantInt::get(IntTy, 1);
1549 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1550 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1551
1552 // Index into the correct address of the casted pointer.
1553 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1554
1555 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001556 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001557
1558 // Get our float2.
1559 auto Call = CallInst::Create(NewF, Load, "", CI);
1560
1561 // Extract out the float result, where the element number is
1562 // determined by whether the original index was even or odd.
1563 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1564 }
1565 return V;
1566 });
1567}
1568
1569bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1570 Module &M = *F.getParent();
1571 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001572 // The index argument from vload_half.
1573 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001574
Kévin Petite8edce32019-04-10 14:23:32 +01001575 // The pointer argument from vload_half.
1576 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001577
Kévin Petite8edce32019-04-10 14:23:32 +01001578 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001579 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001580 auto NewPointerTy =
1581 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001582 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001583
Kévin Petite8edce32019-04-10 14:23:32 +01001584 // Cast the half* pointer to int*.
1585 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001586
Kévin Petite8edce32019-04-10 14:23:32 +01001587 // Index into the correct address of the casted pointer.
1588 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001589
Kévin Petite8edce32019-04-10 14:23:32 +01001590 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001591 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001592
Kévin Petite8edce32019-04-10 14:23:32 +01001593 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001594 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001595
Kévin Petite8edce32019-04-10 14:23:32 +01001596 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001597
Kévin Petite8edce32019-04-10 14:23:32 +01001598 // Get our float2.
1599 return CallInst::Create(NewF, Load, "", CI);
1600 });
David Neto22f144c2017-06-12 14:26:21 -04001601}
1602
SJW2c317da2020-03-23 07:39:13 -05001603bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1604 Module &M = *F.getParent();
1605 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001606 // The index argument from vload_half.
1607 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001608
Kévin Petite8edce32019-04-10 14:23:32 +01001609 // The pointer argument from vload_half.
1610 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001611
Kévin Petite8edce32019-04-10 14:23:32 +01001612 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001613 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1614 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001615 auto NewPointerTy =
1616 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001617 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001618
Kévin Petite8edce32019-04-10 14:23:32 +01001619 // Cast the half* pointer to int2*.
1620 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001621
Kévin Petite8edce32019-04-10 14:23:32 +01001622 // Index into the correct address of the casted pointer.
1623 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001624
Kévin Petite8edce32019-04-10 14:23:32 +01001625 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001626 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001627
Kévin Petite8edce32019-04-10 14:23:32 +01001628 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001629 auto X =
1630 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1631 auto Y =
1632 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001633
Kévin Petite8edce32019-04-10 14:23:32 +01001634 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001635 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001636
Kévin Petite8edce32019-04-10 14:23:32 +01001637 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001638
Kévin Petite8edce32019-04-10 14:23:32 +01001639 // Get the lower (x & y) components of our final float4.
1640 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001641
Kévin Petite8edce32019-04-10 14:23:32 +01001642 // Get the higher (z & w) components of our final float4.
1643 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001644
Kévin Petite8edce32019-04-10 14:23:32 +01001645 Constant *ShuffleMask[4] = {
1646 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1647 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001648
Kévin Petite8edce32019-04-10 14:23:32 +01001649 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001650 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1651 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001652 });
David Neto22f144c2017-06-12 14:26:21 -04001653}
1654
SJW2c317da2020-03-23 07:39:13 -05001655bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001656
1657 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1658 //
1659 // %u = load i32 %ptr
1660 // %fxy = call <2 x float> Unpack2xHalf(u)
1661 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001662 Module &M = *F.getParent();
1663 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001664 auto Index = CI->getOperand(0);
1665 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001666
Kévin Petite8edce32019-04-10 14:23:32 +01001667 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001668 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001669 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001670
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001671 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001672 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001673
Kévin Petite8edce32019-04-10 14:23:32 +01001674 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001675 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001676
Kévin Petite8edce32019-04-10 14:23:32 +01001677 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001678
Kévin Petite8edce32019-04-10 14:23:32 +01001679 // Get our final float2.
1680 return CallInst::Create(NewF, Load, "", CI);
1681 });
David Neto6ad93232018-06-07 15:42:58 -07001682}
1683
SJW2c317da2020-03-23 07:39:13 -05001684bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001685
1686 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1687 //
1688 // %u2 = load <2 x i32> %ptr
1689 // %u2xy = extractelement %u2, 0
1690 // %u2zw = extractelement %u2, 1
1691 // %fxy = call <2 x float> Unpack2xHalf(uint)
1692 // %fzw = call <2 x float> Unpack2xHalf(uint)
1693 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001694 Module &M = *F.getParent();
1695 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001696 auto Index = CI->getOperand(0);
1697 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001698
Kévin Petite8edce32019-04-10 14:23:32 +01001699 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001700 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1701 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001702 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001703
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001704 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001705 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001706
Kévin Petite8edce32019-04-10 14:23:32 +01001707 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001708 auto X =
1709 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1710 auto Y =
1711 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001712
Kévin Petite8edce32019-04-10 14:23:32 +01001713 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001714 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001715
Kévin Petite8edce32019-04-10 14:23:32 +01001716 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001717
Kévin Petite8edce32019-04-10 14:23:32 +01001718 // Get the lower (x & y) components of our final float4.
1719 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001720
Kévin Petite8edce32019-04-10 14:23:32 +01001721 // Get the higher (z & w) components of our final float4.
1722 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001723
Kévin Petite8edce32019-04-10 14:23:32 +01001724 Constant *ShuffleMask[4] = {
1725 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1726 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001727
Kévin Petite8edce32019-04-10 14:23:32 +01001728 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001729 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1730 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001731 });
David Neto6ad93232018-06-07 15:42:58 -07001732}
1733
SJW2c317da2020-03-23 07:39:13 -05001734bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1735 switch (vec_size) {
1736 case 0:
1737 return replaceVstoreHalf(F);
1738 case 2:
1739 return replaceVstoreHalf2(F);
1740 case 4:
1741 return replaceVstoreHalf4(F);
1742 default:
1743 llvm_unreachable("Unsupported vstore_half vector size");
1744 break;
1745 }
1746 return false;
1747}
David Neto22f144c2017-06-12 14:26:21 -04001748
SJW2c317da2020-03-23 07:39:13 -05001749bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1750 Module &M = *F.getParent();
1751 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001752 // The value to store.
1753 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001754
Kévin Petite8edce32019-04-10 14:23:32 +01001755 // The index argument from vstore_half.
1756 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001757
Kévin Petite8edce32019-04-10 14:23:32 +01001758 // The pointer argument from vstore_half.
1759 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001760
Kévin Petite8edce32019-04-10 14:23:32 +01001761 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001762 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001763 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1764 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001765
Kévin Petite8edce32019-04-10 14:23:32 +01001766 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001767 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001768
Kévin Petite8edce32019-04-10 14:23:32 +01001769 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001770
Kévin Petite8edce32019-04-10 14:23:32 +01001771 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001772 auto TempVec = InsertElementInst::Create(
1773 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001774
Kévin Petite8edce32019-04-10 14:23:32 +01001775 // Pack the float2 -> half2 (in an int).
1776 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001777
alan-baker7efcaaa2020-05-06 19:33:27 -04001778 bool supports_16bit_storage = true;
1779 switch (Arg2->getType()->getPointerAddressSpace()) {
1780 case clspv::AddressSpace::Global:
1781 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1782 clspv::Option::StorageClass::kSSBO);
1783 break;
1784 case clspv::AddressSpace::Constant:
1785 if (clspv::Option::ConstantArgsInUniformBuffer())
1786 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1787 clspv::Option::StorageClass::kUBO);
1788 else
1789 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1790 clspv::Option::StorageClass::kSSBO);
1791 break;
1792 default:
1793 // Clspv will emit the Float16 capability if the half type is
1794 // encountered. That capability covers private and local addressspaces.
1795 break;
1796 }
1797
SJW2c317da2020-03-23 07:39:13 -05001798 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04001799 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01001800 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001801 auto ShortPointerTy =
1802 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001803
Kévin Petite8edce32019-04-10 14:23:32 +01001804 // Truncate our i32 to an i16.
1805 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001806
Kévin Petite8edce32019-04-10 14:23:32 +01001807 // Cast the half* pointer to short*.
1808 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001809
Kévin Petite8edce32019-04-10 14:23:32 +01001810 // Index into the correct address of the casted pointer.
1811 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001812
Kévin Petite8edce32019-04-10 14:23:32 +01001813 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05001814 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001815 } else {
1816 // We can only write to 32-bit aligned words.
1817 //
1818 // Assuming base is aligned to 32-bits, replace the equivalent of
1819 // vstore_half(value, index, base)
1820 // with:
1821 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1822 // uint32_t write_to_upper_half = index & 1u;
1823 // uint32_t shift = write_to_upper_half << 4;
1824 //
1825 // // Pack the float value as a half number in bottom 16 bits
1826 // // of an i32.
1827 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1828 //
1829 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1830 // ^ ((packed & 0xffff) << shift)
1831 // // We only need relaxed consistency, but OpenCL 1.2 only has
1832 // // sequentially consistent atomics.
1833 // // TODO(dneto): Use relaxed consistency.
1834 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001835 auto IntPointerTy =
1836 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001837
Kévin Petite8edce32019-04-10 14:23:32 +01001838 auto Four = ConstantInt::get(IntTy, 4);
1839 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04001840
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001841 auto IndexIsOdd =
1842 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001843 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001844 auto IndexIntoI32 =
1845 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1846 auto BaseI32Ptr =
1847 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1848 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
1849 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001850 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001851 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001852 auto MaskBitsToWrite =
1853 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1854 auto MaskedCurrent = BinaryOperator::CreateAnd(
1855 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04001856
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001857 auto XLowerBits =
1858 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1859 auto NewBitsToWrite =
1860 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1861 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
1862 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04001863
Kévin Petite8edce32019-04-10 14:23:32 +01001864 // Generate the call to atomi_xor.
1865 SmallVector<Type *, 5> ParamTypes;
1866 // The pointer type.
1867 ParamTypes.push_back(IntPointerTy);
1868 // The Types for memory scope, semantics, and value.
1869 ParamTypes.push_back(IntTy);
1870 ParamTypes.push_back(IntTy);
1871 ParamTypes.push_back(IntTy);
1872 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1873 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04001874
Kévin Petite8edce32019-04-10 14:23:32 +01001875 const auto ConstantScopeDevice =
1876 ConstantInt::get(IntTy, spv::ScopeDevice);
1877 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1878 // (SPIR-V Workgroup).
1879 const auto AddrSpaceSemanticsBits =
1880 IntPointerTy->getPointerAddressSpace() == 1
1881 ? spv::MemorySemanticsUniformMemoryMask
1882 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04001883
Kévin Petite8edce32019-04-10 14:23:32 +01001884 // We're using relaxed consistency here.
1885 const auto ConstantMemorySemantics =
1886 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1887 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04001888
Kévin Petite8edce32019-04-10 14:23:32 +01001889 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1890 ConstantMemorySemantics, ValueToXor};
1891 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05001892
1893 // Return a Nop so the old Call is removed
1894 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
1895 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001896 }
David Neto22f144c2017-06-12 14:26:21 -04001897
SJW2c317da2020-03-23 07:39:13 -05001898 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01001899 });
David Neto22f144c2017-06-12 14:26:21 -04001900}
1901
SJW2c317da2020-03-23 07:39:13 -05001902bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
1903 Module &M = *F.getParent();
1904 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001905 // The value to store.
1906 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001907
Kévin Petite8edce32019-04-10 14:23:32 +01001908 // The index argument from vstore_half.
1909 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001910
Kévin Petite8edce32019-04-10 14:23:32 +01001911 // The pointer argument from vstore_half.
1912 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001913
Kévin Petite8edce32019-04-10 14:23:32 +01001914 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001915 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001916 auto NewPointerTy =
1917 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001918 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001919
Kévin Petite8edce32019-04-10 14:23:32 +01001920 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001921 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001922
Kévin Petite8edce32019-04-10 14:23:32 +01001923 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001924
Kévin Petite8edce32019-04-10 14:23:32 +01001925 // Turn the packed x & y into the final packing.
1926 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001927
Kévin Petite8edce32019-04-10 14:23:32 +01001928 // Cast the half* pointer to int*.
1929 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001930
Kévin Petite8edce32019-04-10 14:23:32 +01001931 // Index into the correct address of the casted pointer.
1932 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001933
Kévin Petite8edce32019-04-10 14:23:32 +01001934 // Store to the int* we casted to.
1935 return new StoreInst(X, Index, CI);
1936 });
David Neto22f144c2017-06-12 14:26:21 -04001937}
1938
SJW2c317da2020-03-23 07:39:13 -05001939bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
1940 Module &M = *F.getParent();
1941 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001942 // The value to store.
1943 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001944
Kévin Petite8edce32019-04-10 14:23:32 +01001945 // The index argument from vstore_half.
1946 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001947
Kévin Petite8edce32019-04-10 14:23:32 +01001948 // The pointer argument from vstore_half.
1949 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001950
Kévin Petite8edce32019-04-10 14:23:32 +01001951 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001952 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1953 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001954 auto NewPointerTy =
1955 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001956 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001957
Kévin Petite8edce32019-04-10 14:23:32 +01001958 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
1959 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04001960
Kévin Petite8edce32019-04-10 14:23:32 +01001961 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001962 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1963 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001964
Kévin Petite8edce32019-04-10 14:23:32 +01001965 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
1966 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001967
Kévin Petite8edce32019-04-10 14:23:32 +01001968 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001969 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
1970 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001971
Kévin Petite8edce32019-04-10 14:23:32 +01001972 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001973 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001974
Kévin Petite8edce32019-04-10 14:23:32 +01001975 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001976
Kévin Petite8edce32019-04-10 14:23:32 +01001977 // Turn the packed x & y into the final component of our int2.
1978 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001979
Kévin Petite8edce32019-04-10 14:23:32 +01001980 // Turn the packed z & w into the final component of our int2.
1981 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001982
Kévin Petite8edce32019-04-10 14:23:32 +01001983 auto Combine = InsertElementInst::Create(
1984 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001985 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
1986 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001987
Kévin Petite8edce32019-04-10 14:23:32 +01001988 // Cast the half* pointer to int2*.
1989 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001990
Kévin Petite8edce32019-04-10 14:23:32 +01001991 // Index into the correct address of the casted pointer.
1992 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001993
Kévin Petite8edce32019-04-10 14:23:32 +01001994 // Store to the int2* we casted to.
1995 return new StoreInst(Combine, Index, CI);
1996 });
David Neto22f144c2017-06-12 14:26:21 -04001997}
1998
SJW2c317da2020-03-23 07:39:13 -05001999bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2000 // convert half to float
2001 Module &M = *F.getParent();
2002 return replaceCallsWithValue(F, [&](CallInst *CI) {
2003 SmallVector<Type *, 3> types;
2004 SmallVector<Value *, 3> args;
2005 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2006 types.push_back(CI->getArgOperand(i)->getType());
2007 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002008 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002009
SJW2c317da2020-03-23 07:39:13 -05002010 auto NewFType = FunctionType::get(
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002011 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2012 cast<VectorType>(CI->getType())->getNumElements()),
SJW2c317da2020-03-23 07:39:13 -05002013 types, false);
2014
SJW61531372020-06-09 07:31:08 -05002015 std::string NewFName =
2016 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002017
2018 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2019
2020 auto NewCI = CallInst::Create(NewF, args, "", CI);
2021
2022 // Convert to the half type.
2023 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2024 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002025}
2026
SJW2c317da2020-03-23 07:39:13 -05002027bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2028 // convert half to float
2029 Module &M = *F.getParent();
2030 return replaceCallsWithValue(F, [&](CallInst *CI) {
2031 SmallVector<Type *, 3> types(3);
2032 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002033
SJW2c317da2020-03-23 07:39:13 -05002034 // Image
2035 types[0] = CI->getArgOperand(0)->getType();
2036 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002037
SJW2c317da2020-03-23 07:39:13 -05002038 // Coord
2039 types[1] = CI->getArgOperand(1)->getType();
2040 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002041
SJW2c317da2020-03-23 07:39:13 -05002042 // Data
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002043 types[2] = FixedVectorType::get(
SJW2c317da2020-03-23 07:39:13 -05002044 Type::getFloatTy(M.getContext()),
James Pricecf53df42020-04-20 14:41:24 -04002045 cast<VectorType>(CI->getArgOperand(2)->getType())->getNumElements());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002046
SJW2c317da2020-03-23 07:39:13 -05002047 auto NewFType =
2048 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002049
SJW61531372020-06-09 07:31:08 -05002050 std::string NewFName =
2051 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002052
SJW2c317da2020-03-23 07:39:13 -05002053 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002054
SJW2c317da2020-03-23 07:39:13 -05002055 // Convert data to the float type.
2056 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2057 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002058
SJW2c317da2020-03-23 07:39:13 -05002059 return CallInst::Create(NewF, args, "", CI);
2060 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002061}
2062
SJW2c317da2020-03-23 07:39:13 -05002063bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2064 Function &F) {
2065 // convert read_image with int coords to float coords
2066 Module &M = *F.getParent();
2067 return replaceCallsWithValue(F, [&](CallInst *CI) {
2068 // The image.
2069 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002070
SJW2c317da2020-03-23 07:39:13 -05002071 // The sampler.
2072 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002073
SJW2c317da2020-03-23 07:39:13 -05002074 // The coordinate (integer type that we can't handle).
2075 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002076
SJW2c317da2020-03-23 07:39:13 -05002077 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2078 uint32_t components =
2079 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2080 Type *float_ty = nullptr;
2081 if (components == 1) {
2082 float_ty = Type::getFloatTy(M.getContext());
2083 } else {
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002084 float_ty = FixedVectorType::get(
2085 Type::getFloatTy(M.getContext()),
2086 cast<VectorType>(Arg2->getType())->getNumElements());
David Neto22f144c2017-06-12 14:26:21 -04002087 }
David Neto22f144c2017-06-12 14:26:21 -04002088
SJW2c317da2020-03-23 07:39:13 -05002089 auto NewFType = FunctionType::get(
2090 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2091
2092 std::string NewFName = F.getName().str();
2093 NewFName[NewFName.length() - 1] = 'f';
2094
2095 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2096
2097 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2098
2099 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2100 });
David Neto22f144c2017-06-12 14:26:21 -04002101}
2102
SJW2c317da2020-03-23 07:39:13 -05002103bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2104 return replaceCallsWithValue(F, [&](CallInst *CI) {
2105 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002106
SJW2c317da2020-03-23 07:39:13 -05002107 // We need to map the OpenCL constants to the SPIR-V equivalents.
2108 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2109 const auto ConstantMemorySemantics = ConstantInt::get(
2110 IntTy, spv::MemorySemanticsUniformMemoryMask |
2111 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002112
SJW2c317da2020-03-23 07:39:13 -05002113 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002114
SJW2c317da2020-03-23 07:39:13 -05002115 // The pointer.
2116 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002117
SJW2c317da2020-03-23 07:39:13 -05002118 // The memory scope.
2119 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002120
SJW2c317da2020-03-23 07:39:13 -05002121 // The memory semantics.
2122 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002123
SJW2c317da2020-03-23 07:39:13 -05002124 if (2 < CI->getNumArgOperands()) {
2125 // The unequal memory semantics.
2126 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002127
SJW2c317da2020-03-23 07:39:13 -05002128 // The value.
2129 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002130
SJW2c317da2020-03-23 07:39:13 -05002131 // The comparator.
2132 Params.push_back(CI->getArgOperand(1));
2133 } else if (1 < CI->getNumArgOperands()) {
2134 // The value.
2135 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002136 }
David Neto22f144c2017-06-12 14:26:21 -04002137
SJW2c317da2020-03-23 07:39:13 -05002138 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2139 });
David Neto22f144c2017-06-12 14:26:21 -04002140}
2141
SJW2c317da2020-03-23 07:39:13 -05002142bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2143 llvm::AtomicRMWInst::BinOp Op) {
2144 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002145 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2146 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002147 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002148 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002149 SyncScope::System, CI);
2150 });
2151}
David Neto22f144c2017-06-12 14:26:21 -04002152
SJW2c317da2020-03-23 07:39:13 -05002153bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2154 Module &M = *F.getParent();
2155 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002156 auto IntTy = Type::getInt32Ty(M.getContext());
2157 auto FloatTy = Type::getFloatTy(M.getContext());
2158
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002159 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2160 ConstantInt::get(IntTy, 1),
2161 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002162
2163 Constant *UpShuffleMask[4] = {
2164 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2165 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2166
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002167 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2168 UndefValue::get(FloatTy),
2169 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002170
Kévin Petite8edce32019-04-10 14:23:32 +01002171 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002172 auto Arg0 =
2173 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2174 ConstantVector::get(DownShuffleMask), "", CI);
2175 auto Arg1 =
2176 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2177 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002178 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002179
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002180 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002181 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002182
SJW61531372020-06-09 07:31:08 -05002183 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002184
Kévin Petite8edce32019-04-10 14:23:32 +01002185 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002186
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002187 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2188 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002189 });
David Neto22f144c2017-06-12 14:26:21 -04002190}
David Neto62653202017-10-16 19:05:18 -04002191
SJW2c317da2020-03-23 07:39:13 -05002192bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002193 // OpenCL's float result = fract(float x, float* ptr)
2194 //
2195 // In the LLVM domain:
2196 //
2197 // %floor_result = call spir_func float @floor(float %x)
2198 // store float %floor_result, float * %ptr
2199 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2200 // %result = call spir_func float
2201 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2202 //
2203 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2204 // and clspv.fract occur in the SPIR-V generator pass:
2205 //
2206 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2207 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2208 // ...
2209 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2210 // OpStore %ptr %floor_result
2211 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2212 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002213 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002214
David Neto62653202017-10-16 19:05:18 -04002215 using std::string;
2216
2217 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2218 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002219
SJW2c317da2020-03-23 07:39:13 -05002220 Module &M = *F.getParent();
2221 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002222
SJW2c317da2020-03-23 07:39:13 -05002223 // This is either float or a float vector. All the float-like
2224 // types are this type.
2225 auto result_ty = F.getReturnType();
2226
SJW61531372020-06-09 07:31:08 -05002227 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002228 Function *fmin_fn = M.getFunction(fmin_name);
2229 if (!fmin_fn) {
2230 // Make the fmin function.
2231 FunctionType *fn_ty =
2232 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2233 fmin_fn =
2234 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2235 fmin_fn->addFnAttr(Attribute::ReadNone);
2236 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2237 }
2238
SJW61531372020-06-09 07:31:08 -05002239 std::string floor_name =
2240 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002241 Function *floor_fn = M.getFunction(floor_name);
2242 if (!floor_fn) {
2243 // Make the floor function.
2244 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2245 floor_fn =
2246 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2247 floor_fn->addFnAttr(Attribute::ReadNone);
2248 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2249 }
2250
SJW61531372020-06-09 07:31:08 -05002251 std::string clspv_fract_name =
2252 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002253 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2254 if (!clspv_fract_fn) {
2255 // Make the clspv_fract function.
2256 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2257 clspv_fract_fn = cast<Function>(
2258 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2259 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2260 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2261 }
2262
2263 // Number of significant significand bits, whether represented or not.
2264 unsigned num_significand_bits;
2265 switch (result_ty->getScalarType()->getTypeID()) {
2266 case Type::HalfTyID:
2267 num_significand_bits = 11;
2268 break;
2269 case Type::FloatTyID:
2270 num_significand_bits = 24;
2271 break;
2272 case Type::DoubleTyID:
2273 num_significand_bits = 53;
2274 break;
2275 default:
2276 llvm_unreachable("Unhandled float type when processing fract builtin");
2277 break;
2278 }
2279 // Beware that the disassembler displays this value as
2280 // OpConstant %float 1
2281 // which is not quite right.
2282 const double kJustUnderOneScalar =
2283 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2284
2285 Constant *just_under_one =
2286 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2287 if (result_ty->isVectorTy()) {
2288 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002289 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002290 }
2291
2292 IRBuilder<> Builder(CI);
2293
2294 auto arg = CI->getArgOperand(0);
2295 auto ptr = CI->getArgOperand(1);
2296
2297 // Compute floor result and store it.
2298 auto floor = Builder.CreateCall(floor_fn, {arg});
2299 Builder.CreateStore(floor, ptr);
2300
2301 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2302 auto fract_result =
2303 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2304
2305 return fract_result;
2306 });
David Neto62653202017-10-16 19:05:18 -04002307}