blob: e3080e2946395593815a68c3f3b7e9adc5027c28 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
alan-bakere0902602020-03-23 08:43:40 -040030#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040033#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070034
SJW2c317da2020-03-23 07:39:13 -050035#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050036#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040037#include "Passes.h"
38#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050039#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040
SJW2c317da2020-03-23 07:39:13 -050041using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040042using namespace llvm;
43
44#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
45
46namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000047
David Neto22f144c2017-06-12 14:26:21 -040048uint32_t clz(uint32_t v) {
49 uint32_t r;
50 uint32_t shift;
51
52 r = (v > 0xFFFF) << 4;
53 v >>= r;
54 shift = (v > 0xFF) << 3;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0xF) << 2;
58 v >>= shift;
59 r |= shift;
60 shift = (v > 0x3) << 1;
61 v >>= shift;
62 r |= shift;
63 r |= (v >> 1);
64
65 return r;
66}
67
Kévin Petitfdfa92e2019-09-25 14:20:58 +010068Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
69 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040070 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040071 IntTy = FixedVectorType::get(IntTy,
72 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010073 }
74 return IntTy;
75}
76
SJW2c317da2020-03-23 07:39:13 -050077bool replaceCallsWithValue(Function &F,
78 std::function<Value *(CallInst *)> Replacer) {
79
80 bool Changed = false;
81
82 SmallVector<Instruction *, 4> ToRemoves;
83
84 // Walk the users of the function.
85 for (auto &U : F.uses()) {
86 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
87
88 auto NewValue = Replacer(CI);
89
90 if (NewValue != nullptr) {
91 CI->replaceAllUsesWith(NewValue);
92
93 // Lastly, remember to remove the user.
94 ToRemoves.push_back(CI);
95 }
96 }
97 }
98
99 Changed = !ToRemoves.empty();
100
101 // And cleanup the calls we don't use anymore.
102 for (auto V : ToRemoves) {
103 V->eraseFromParent();
104 }
105
106 return Changed;
107}
108
David Neto22f144c2017-06-12 14:26:21 -0400109struct ReplaceOpenCLBuiltinPass final : public ModulePass {
110 static char ID;
111 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
112
113 bool runOnModule(Module &M) override;
SJW2c317da2020-03-23 07:39:13 -0500114 bool runOnFunction(Function &F);
115 bool replaceAbs(Function &F);
116 bool replaceAbsDiff(Function &F, bool is_signed);
117 bool replaceCopysign(Function &F);
118 bool replaceRecip(Function &F);
119 bool replaceDivide(Function &F);
120 bool replaceDot(Function &F);
121 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500122 bool replaceExp10(Function &F, const std::string &basename);
123 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100124 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400125 bool replaceBarrier(Function &F, bool subgroup = false);
SJW2c317da2020-03-23 07:39:13 -0500126 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100127 bool replacePrefetch(Function &F);
SJW2c317da2020-03-23 07:39:13 -0500128 bool replaceRelational(Function &F, CmpInst::Predicate P, int32_t C);
129 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
130 bool replaceIsFinite(Function &F);
131 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
132 bool replaceUpsample(Function &F);
133 bool replaceRotate(Function &F);
134 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
135 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
136 bool replaceSelect(Function &F);
137 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500138 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500139 bool replaceSignbit(Function &F, bool is_vec);
140 bool replaceMul(Function &F, bool is_float, bool is_mad);
141 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
142 bool replaceVloadHalf(Function &F);
143 bool replaceVloadHalf2(Function &F);
144 bool replaceVloadHalf4(Function &F);
145 bool replaceClspvVloadaHalf2(Function &F);
146 bool replaceClspvVloadaHalf4(Function &F);
147 bool replaceVstoreHalf(Function &F, int vec_size);
148 bool replaceVstoreHalf(Function &F);
149 bool replaceVstoreHalf2(Function &F);
150 bool replaceVstoreHalf4(Function &F);
151 bool replaceHalfReadImage(Function &F);
152 bool replaceHalfWriteImage(Function &F);
153 bool replaceSampledReadImageWithIntCoords(Function &F);
154 bool replaceAtomics(Function &F, spv::Op Op);
155 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
156 bool replaceCross(Function &F);
157 bool replaceFract(Function &F, int vec_size);
158 bool replaceVload(Function &F);
159 bool replaceVstore(Function &F);
David Neto22f144c2017-06-12 14:26:21 -0400160};
SJW2c317da2020-03-23 07:39:13 -0500161
Kévin Petit91bc72e2019-04-08 15:17:46 +0100162} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400163
164char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400165INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
166 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400167
168namespace clspv {
169ModulePass *createReplaceOpenCLBuiltinPass() {
170 return new ReplaceOpenCLBuiltinPass();
171}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400172} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400173
174bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500175 std::list<Function *> func_list;
176 for (auto &F : M.getFunctionList()) {
177 // process only function declarations
178 if (F.isDeclaration() && runOnFunction(F)) {
179 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000180 }
181 }
SJW2c317da2020-03-23 07:39:13 -0500182 if (func_list.size() != 0) {
183 // recursively convert functions, but first remove dead
184 for (auto *F : func_list) {
185 if (F->use_empty()) {
186 F->eraseFromParent();
187 }
188 }
189 runOnModule(M);
190 return true;
191 }
192 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000193}
194
SJW2c317da2020-03-23 07:39:13 -0500195bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
196 auto &FI = Builtins::Lookup(&F);
197 switch (FI.getType()) {
198 case Builtins::kAbs:
199 if (!FI.getParameter(0).is_signed) {
200 return replaceAbs(F);
201 }
202 break;
203 case Builtins::kAbsDiff:
204 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
205 case Builtins::kCopysign:
206 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100207
SJW2c317da2020-03-23 07:39:13 -0500208 case Builtins::kHalfRecip:
209 case Builtins::kNativeRecip:
210 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100211
SJW2c317da2020-03-23 07:39:13 -0500212 case Builtins::kHalfDivide:
213 case Builtins::kNativeDivide:
214 return replaceDivide(F);
215
216 case Builtins::kDot:
217 return replaceDot(F);
218
219 case Builtins::kExp10:
220 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500221 case Builtins::kNativeExp10:
222 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500223
224 case Builtins::kLog10:
225 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500226 case Builtins::kNativeLog10:
227 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500228
gnl21636e7992020-09-09 16:08:16 +0100229 case Builtins::kLog1p:
230 return replaceLog1p(F);
231
SJW2c317da2020-03-23 07:39:13 -0500232 case Builtins::kFmod:
233 return replaceFmod(F);
234
235 case Builtins::kBarrier:
236 case Builtins::kWorkGroupBarrier:
237 return replaceBarrier(F);
238
alan-baker12d2c182020-07-20 08:22:42 -0400239 case Builtins::kSubGroupBarrier:
240 return replaceBarrier(F, true);
241
SJW2c317da2020-03-23 07:39:13 -0500242 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400243 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500244 case Builtins::kReadMemFence:
245 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
246 case Builtins::kWriteMemFence:
247 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
248
249 // Relational
250 case Builtins::kIsequal:
251 return replaceRelational(F, CmpInst::FCMP_OEQ,
252 FI.getParameter(0).vector_size ? -1 : 1);
253 case Builtins::kIsgreater:
254 return replaceRelational(F, CmpInst::FCMP_OGT,
255 FI.getParameter(0).vector_size ? -1 : 1);
256 case Builtins::kIsgreaterequal:
257 return replaceRelational(F, CmpInst::FCMP_OGE,
258 FI.getParameter(0).vector_size ? -1 : 1);
259 case Builtins::kIsless:
260 return replaceRelational(F, CmpInst::FCMP_OLT,
261 FI.getParameter(0).vector_size ? -1 : 1);
262 case Builtins::kIslessequal:
263 return replaceRelational(F, CmpInst::FCMP_OLE,
264 FI.getParameter(0).vector_size ? -1 : 1);
265 case Builtins::kIsnotequal:
266 return replaceRelational(F, CmpInst::FCMP_ONE,
267 FI.getParameter(0).vector_size ? -1 : 1);
268
269 case Builtins::kIsinf: {
270 bool is_vec = FI.getParameter(0).vector_size != 0;
271 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
272 }
273 case Builtins::kIsnan: {
274 bool is_vec = FI.getParameter(0).vector_size != 0;
275 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
276 }
277
278 case Builtins::kIsfinite:
279 return replaceIsFinite(F);
280
281 case Builtins::kAll: {
282 bool is_vec = FI.getParameter(0).vector_size != 0;
283 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
284 }
285 case Builtins::kAny: {
286 bool is_vec = FI.getParameter(0).vector_size != 0;
287 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
288 }
289
290 case Builtins::kUpsample:
291 return replaceUpsample(F);
292
293 case Builtins::kRotate:
294 return replaceRotate(F);
295
296 case Builtins::kConvert:
297 return replaceConvert(F, FI.getParameter(0).is_signed,
298 FI.getReturnType().is_signed);
299
300 case Builtins::kAtomicInc:
301 return replaceAtomics(F, spv::OpAtomicIIncrement);
302 case Builtins::kAtomicDec:
303 return replaceAtomics(F, spv::OpAtomicIDecrement);
304 case Builtins::kAtomicCmpxchg:
305 return replaceAtomics(F, spv::OpAtomicCompareExchange);
306 case Builtins::kAtomicAdd:
307 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
308 case Builtins::kAtomicSub:
309 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
310 case Builtins::kAtomicXchg:
311 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
312 case Builtins::kAtomicMin:
313 return replaceAtomics(F, FI.getParameter(0).is_signed
314 ? llvm::AtomicRMWInst::Min
315 : llvm::AtomicRMWInst::UMin);
316 case Builtins::kAtomicMax:
317 return replaceAtomics(F, FI.getParameter(0).is_signed
318 ? llvm::AtomicRMWInst::Max
319 : llvm::AtomicRMWInst::UMax);
320 case Builtins::kAtomicAnd:
321 return replaceAtomics(F, llvm::AtomicRMWInst::And);
322 case Builtins::kAtomicOr:
323 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
324 case Builtins::kAtomicXor:
325 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
326
327 case Builtins::kCross:
328 if (FI.getParameter(0).vector_size == 4) {
329 return replaceCross(F);
330 }
331 break;
332
333 case Builtins::kFract:
334 if (FI.getParameterCount()) {
335 return replaceFract(F, FI.getParameter(0).vector_size);
336 }
337 break;
338
339 case Builtins::kMadHi:
340 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
341 case Builtins::kMulHi:
342 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
343
344 case Builtins::kMad:
345 case Builtins::kMad24:
346 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
347 true);
348 case Builtins::kMul24:
349 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
350 false);
351
352 case Builtins::kSelect:
353 return replaceSelect(F);
354
355 case Builtins::kBitselect:
356 return replaceBitSelect(F);
357
358 case Builtins::kVload:
359 return replaceVload(F);
360
361 case Builtins::kVloadaHalf:
362 case Builtins::kVloadHalf:
363 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
364
365 case Builtins::kVstore:
366 return replaceVstore(F);
367
368 case Builtins::kVstoreHalf:
369 case Builtins::kVstoreaHalf:
370 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
371
372 case Builtins::kSmoothstep: {
373 int vec_size = FI.getLastParameter().vector_size;
374 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500375 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500376 }
377 break;
378 }
379 case Builtins::kStep: {
380 int vec_size = FI.getLastParameter().vector_size;
381 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500382 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500383 }
384 break;
385 }
386
387 case Builtins::kSignbit:
388 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
389
390 case Builtins::kReadImageh:
391 return replaceHalfReadImage(F);
392 case Builtins::kReadImagef:
393 case Builtins::kReadImagei:
394 case Builtins::kReadImageui: {
395 if (FI.getParameter(1).isSampler() &&
396 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
397 return replaceSampledReadImageWithIntCoords(F);
398 }
399 break;
400 }
401
402 case Builtins::kWriteImageh:
403 return replaceHalfWriteImage(F);
404
Kévin Petit1cb45112020-04-27 18:55:48 +0100405 case Builtins::kPrefetch:
406 return replacePrefetch(F);
407
SJW2c317da2020-03-23 07:39:13 -0500408 default:
409 break;
410 }
411
412 return false;
413}
414
415bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
416 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400417 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100418}
419
SJW2c317da2020-03-23 07:39:13 -0500420bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
421 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100422 auto XValue = CI->getOperand(0);
423 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100424
Kévin Petite8edce32019-04-10 14:23:32 +0100425 IRBuilder<> Builder(CI);
426 auto XmY = Builder.CreateSub(XValue, YValue);
427 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100428
SJW2c317da2020-03-23 07:39:13 -0500429 Value *Cmp = nullptr;
430 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100431 Cmp = Builder.CreateICmpSGT(YValue, XValue);
432 } else {
433 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100434 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100435
Kévin Petite8edce32019-04-10 14:23:32 +0100436 return Builder.CreateSelect(Cmp, YmX, XmY);
437 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100438}
439
SJW2c317da2020-03-23 07:39:13 -0500440bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
441 return replaceCallsWithValue(F, [&F](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100442 auto XValue = CI->getOperand(0);
443 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100444
Kévin Petite8edce32019-04-10 14:23:32 +0100445 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100446
SJW2c317da2020-03-23 07:39:13 -0500447 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -0400448 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400449 IntTy = FixedVectorType::get(
450 IntTy, vec_ty->getElementCount().getKnownMinValue());
Kévin Petit8c1be282019-04-02 19:34:25 +0100451 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100452
Kévin Petite8edce32019-04-10 14:23:32 +0100453 // Return X with the sign of Y
454
455 // Sign bit masks
456 auto SignBit = IntTy->getScalarSizeInBits() - 1;
457 auto SignBitMask = 1 << SignBit;
458 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
459 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
460
461 IRBuilder<> Builder(CI);
462
463 // Extract sign of Y
464 auto YInt = Builder.CreateBitCast(YValue, IntTy);
465 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
466
467 // Clear sign bit in X
468 auto XInt = Builder.CreateBitCast(XValue, IntTy);
469 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
470
471 // Insert sign bit of Y into X
472 auto NewXInt = Builder.CreateOr(XInt, YSign);
473
474 // And cast back to floating-point
475 return Builder.CreateBitCast(NewXInt, Ty);
476 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100477}
478
SJW2c317da2020-03-23 07:39:13 -0500479bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
480 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100481 // Recip has one arg.
482 auto Arg = CI->getOperand(0);
483 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
484 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
485 });
David Neto22f144c2017-06-12 14:26:21 -0400486}
487
SJW2c317da2020-03-23 07:39:13 -0500488bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
489 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100490 auto Op0 = CI->getOperand(0);
491 auto Op1 = CI->getOperand(1);
492 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
493 });
David Neto22f144c2017-06-12 14:26:21 -0400494}
495
SJW2c317da2020-03-23 07:39:13 -0500496bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
497 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100498 auto Op0 = CI->getOperand(0);
499 auto Op1 = CI->getOperand(1);
500
SJW2c317da2020-03-23 07:39:13 -0500501 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100502 if (Op0->getType()->isVectorTy()) {
503 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
504 CI->getType(), {Op0, Op1});
505 } else {
506 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
507 }
508
509 return V;
510 });
511}
512
SJW2c317da2020-03-23 07:39:13 -0500513bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500514 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500515 // convert to natural
516 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500517 std::string NewFName = basename.substr(0, slen);
518 NewFName =
519 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400520
SJW2c317da2020-03-23 07:39:13 -0500521 Module &M = *F.getParent();
522 return replaceCallsWithValue(F, [&](CallInst *CI) {
523 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
524
525 auto Arg = CI->getOperand(0);
526
527 // Constant of the natural log of 10 (ln(10)).
528 const double Ln10 =
529 2.302585092994045684017991454684364207601101488628772976033;
530
531 auto Mul = BinaryOperator::Create(
532 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
533
534 return CallInst::Create(NewF, Mul, "", CI);
535 });
David Neto22f144c2017-06-12 14:26:21 -0400536}
537
SJW2c317da2020-03-23 07:39:13 -0500538bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100539 // OpenCL fmod(x,y) is x - y * trunc(x/y)
540 // The sign for a non-zero result is taken from x.
541 // (Try an example.)
542 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500543 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100544 auto Op0 = CI->getOperand(0);
545 auto Op1 = CI->getOperand(1);
546 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
547 });
548}
549
SJW2c317da2020-03-23 07:39:13 -0500550bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500551 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500552 // convert to natural
553 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500554 std::string NewFName = basename.substr(0, slen);
555 NewFName =
556 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400557
SJW2c317da2020-03-23 07:39:13 -0500558 Module &M = *F.getParent();
559 return replaceCallsWithValue(F, [&](CallInst *CI) {
560 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
561
562 auto Arg = CI->getOperand(0);
563
564 // Constant of the reciprocal of the natural log of 10 (ln(10)).
565 const double Ln10 =
566 0.434294481903251827651128918916605082294397005803666566114;
567
568 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
569
570 return BinaryOperator::Create(Instruction::FMul,
571 ConstantFP::get(Arg->getType(), Ln10), NewCI,
572 "", CI);
573 });
David Neto22f144c2017-06-12 14:26:21 -0400574}
575
gnl21636e7992020-09-09 16:08:16 +0100576bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
577 // convert to natural
578 std::string NewFName =
579 Builtins::GetMangledFunctionName("log", F.getFunctionType());
580
581 Module &M = *F.getParent();
582 return replaceCallsWithValue(F, [&](CallInst *CI) {
583 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
584
585 auto Arg = CI->getOperand(0);
586
587 auto ArgP1 = BinaryOperator::Create(
588 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
589
590 return CallInst::Create(NewF, ArgP1, "", CI);
591 });
592}
593
alan-baker12d2c182020-07-20 08:22:42 -0400594bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400595
alan-bakerf6bc8252020-09-23 14:58:55 -0400596 enum {
597 CLK_LOCAL_MEM_FENCE = 0x01,
598 CLK_GLOBAL_MEM_FENCE = 0x02,
599 CLK_IMAGE_MEM_FENCE = 0x04
600 };
David Neto22f144c2017-06-12 14:26:21 -0400601
alan-baker12d2c182020-07-20 08:22:42 -0400602 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100603 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400604
Kévin Petitc4643922019-06-17 19:32:05 +0100605 // We need to map the OpenCL constants to the SPIR-V equivalents.
606 const auto LocalMemFence =
607 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
608 const auto GlobalMemFence =
609 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400610 const auto ImageMemFence =
611 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400612 const auto ConstantAcquireRelease = ConstantInt::get(
613 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100614 const auto ConstantScopeDevice =
615 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
616 const auto ConstantScopeWorkgroup =
617 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400618 const auto ConstantScopeSubgroup =
619 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400620
Kévin Petitc4643922019-06-17 19:32:05 +0100621 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
622 const auto LocalMemFenceMask =
623 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
624 const auto WorkgroupShiftAmount =
625 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
626 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
627 Instruction::Shl, LocalMemFenceMask,
628 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400629
Kévin Petitc4643922019-06-17 19:32:05 +0100630 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
631 const auto GlobalMemFenceMask =
632 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
633 const auto UniformShiftAmount =
634 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
635 const auto MemorySemanticsUniform = BinaryOperator::Create(
636 Instruction::Shl, GlobalMemFenceMask,
637 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400638
alan-bakerf6bc8252020-09-23 14:58:55 -0400639 // OpenCL 2.0
640 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
641 const auto ImageMemFenceMask =
642 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
643 const auto ImageShiftAmount =
644 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
645 const auto MemorySemanticsImage = BinaryOperator::Create(
646 Instruction::Shl, ImageMemFenceMask,
647 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
648
Kévin Petitc4643922019-06-17 19:32:05 +0100649 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400650 // MemorySemanticsSequentiallyConsistentMask.
651 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +0100652 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400653 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400654 auto MemorySemantics2 = BinaryOperator::Create(
655 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
656 auto MemorySemantics = BinaryOperator::Create(
657 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400658
alan-baker12d2c182020-07-20 08:22:42 -0400659 // If the memory scope is not specified explicitly, it is either Subgroup
660 // or Workgroup depending on the type of barrier.
661 Value *MemoryScope =
662 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
663 if (CI->data_operands_size() > 1) {
664 enum {
665 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
666 CL_MEMORY_SCOPE_DEVICE = 0x2,
667 CL_MEMORY_SCOPE_SUBGROUP = 0x4
668 };
669 // The call was given an explicit memory scope.
670 const auto MemoryScopeSubgroup =
671 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
672 const auto MemoryScopeDevice =
673 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400674
alan-baker12d2c182020-07-20 08:22:42 -0400675 auto Cmp =
676 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
677 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
678 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
679 ConstantScopeWorkgroup, "", CI);
680 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
681 MemoryScopeDevice, CI->getOperand(1), "", CI);
682 MemoryScope =
683 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
684 }
685
686 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
687 // the type of barrier;
688 const auto ExecutionScope =
689 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400690
Kévin Petitc4643922019-06-17 19:32:05 +0100691 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
692 {Attribute::NoDuplicate}, CI->getType(),
693 {ExecutionScope, MemoryScope, MemorySemantics});
694 });
David Neto22f144c2017-06-12 14:26:21 -0400695}
696
SJW2c317da2020-03-23 07:39:13 -0500697bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
698 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400699
SJW2c317da2020-03-23 07:39:13 -0500700 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -0400701 enum {
702 CLK_LOCAL_MEM_FENCE = 0x01,
703 CLK_GLOBAL_MEM_FENCE = 0x02,
704 CLK_IMAGE_MEM_FENCE = 0x04,
705 };
David Neto22f144c2017-06-12 14:26:21 -0400706
SJW2c317da2020-03-23 07:39:13 -0500707 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400708
SJW2c317da2020-03-23 07:39:13 -0500709 // We need to map the OpenCL constants to the SPIR-V equivalents.
710 const auto LocalMemFence =
711 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
712 const auto GlobalMemFence =
713 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400714 const auto ImageMemFence =
715 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -0500716 const auto ConstantMemorySemantics =
717 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400718 const auto ConstantScopeWorkgroup =
719 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400720
SJW2c317da2020-03-23 07:39:13 -0500721 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
722 const auto LocalMemFenceMask =
723 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
724 const auto WorkgroupShiftAmount =
725 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
726 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
727 Instruction::Shl, LocalMemFenceMask,
728 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400729
SJW2c317da2020-03-23 07:39:13 -0500730 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
731 const auto GlobalMemFenceMask =
732 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
733 const auto UniformShiftAmount =
734 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
735 const auto MemorySemanticsUniform = BinaryOperator::Create(
736 Instruction::Shl, GlobalMemFenceMask,
737 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400738
alan-bakerf6bc8252020-09-23 14:58:55 -0400739 // OpenCL 2.0
740 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
741 const auto ImageMemFenceMask =
742 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
743 const auto ImageShiftAmount =
744 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
745 const auto MemorySemanticsImage = BinaryOperator::Create(
746 Instruction::Shl, ImageMemFenceMask,
747 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
748
SJW2c317da2020-03-23 07:39:13 -0500749 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400750 // |semantics|.
751 auto MemorySemantics1 =
SJW2c317da2020-03-23 07:39:13 -0500752 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
753 ConstantMemorySemantics, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400754 auto MemorySemantics2 = BinaryOperator::Create(
755 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
756 auto MemorySemantics = BinaryOperator::Create(
757 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400758
alan-baker12d2c182020-07-20 08:22:42 -0400759 // Memory Scope is always workgroup.
760 const auto MemoryScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400761
SJW2c317da2020-03-23 07:39:13 -0500762 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier, {}, CI->getType(),
763 {MemoryScope, MemorySemantics});
764 });
David Neto22f144c2017-06-12 14:26:21 -0400765}
766
Kévin Petit1cb45112020-04-27 18:55:48 +0100767bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
768 bool Changed = false;
769
770 SmallVector<Instruction *, 4> ToRemoves;
771
772 // Find all calls to the function
773 for (auto &U : F.uses()) {
774 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
775 ToRemoves.push_back(CI);
776 }
777 }
778
779 Changed = !ToRemoves.empty();
780
781 // Delete them
782 for (auto V : ToRemoves) {
783 V->eraseFromParent();
784 }
785
786 return Changed;
787}
788
SJW2c317da2020-03-23 07:39:13 -0500789bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
790 CmpInst::Predicate P,
791 int32_t C) {
792 return replaceCallsWithValue(F, [&](CallInst *CI) {
793 // The predicate to use in the CmpInst.
794 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -0400795
SJW2c317da2020-03-23 07:39:13 -0500796 // The value to return for true.
797 auto TrueValue = ConstantInt::getSigned(CI->getType(), C);
David Neto22f144c2017-06-12 14:26:21 -0400798
SJW2c317da2020-03-23 07:39:13 -0500799 // The value to return for false.
800 auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400801
SJW2c317da2020-03-23 07:39:13 -0500802 auto Arg1 = CI->getOperand(0);
803 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -0400804
SJW2c317da2020-03-23 07:39:13 -0500805 const auto Cmp =
806 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400807
SJW2c317da2020-03-23 07:39:13 -0500808 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
809 });
David Neto22f144c2017-06-12 14:26:21 -0400810}
811
SJW2c317da2020-03-23 07:39:13 -0500812bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
813 spv::Op SPIRVOp,
814 int32_t C) {
815 Module &M = *F.getParent();
816 return replaceCallsWithValue(F, [&](CallInst *CI) {
817 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -0400818
SJW2c317da2020-03-23 07:39:13 -0500819 // The value to return for true.
820 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -0400821
SJW2c317da2020-03-23 07:39:13 -0500822 // The value to return for false.
823 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -0400824
SJW2c317da2020-03-23 07:39:13 -0500825 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -0400826 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400827 CorrespondingBoolTy =
828 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
829 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -0400830 }
David Neto22f144c2017-06-12 14:26:21 -0400831
SJW2c317da2020-03-23 07:39:13 -0500832 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
833 CorrespondingBoolTy, {CI->getOperand(0)});
834
835 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
836 });
David Neto22f144c2017-06-12 14:26:21 -0400837}
838
SJW2c317da2020-03-23 07:39:13 -0500839bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
840 Module &M = *F.getParent();
841 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100842 auto &C = M.getContext();
843 auto Val = CI->getOperand(0);
844 auto ValTy = Val->getType();
845 auto RetTy = CI->getType();
846
847 // Get a suitable integer type to represent the number
848 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
849
850 // Create Mask
851 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -0500852 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100853 switch (ScalarSize) {
854 case 16:
855 InfMask = ConstantInt::get(IntTy, 0x7C00U);
856 break;
857 case 32:
858 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
859 break;
860 case 64:
861 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
862 break;
863 default:
864 llvm_unreachable("Unsupported floating-point type");
865 }
866
867 IRBuilder<> Builder(CI);
868
869 // Bitcast to int
870 auto ValInt = Builder.CreateBitCast(Val, IntTy);
871
872 // Mask and compare
873 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
874 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
875
876 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -0500877 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100878 if (ValTy->isVectorTy()) {
879 RetTrue = ConstantInt::getSigned(RetTy, -1);
880 } else {
881 RetTrue = ConstantInt::get(RetTy, 1);
882 }
883 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
884 });
885}
886
SJW2c317da2020-03-23 07:39:13 -0500887bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
888 Module &M = *F.getParent();
889 return replaceCallsWithValue(F, [&](CallInst *CI) {
890 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400891
SJW2c317da2020-03-23 07:39:13 -0500892 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000893
SJW2c317da2020-03-23 07:39:13 -0500894 // If the argument is a 32-bit int, just use a shift
895 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
896 V = BinaryOperator::Create(Instruction::LShr, Arg,
897 ConstantInt::get(Arg->getType(), 31), "", CI);
898 } else {
899 // The value for zero to compare against.
900 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -0400901
SJW2c317da2020-03-23 07:39:13 -0500902 // The value to return for true.
903 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -0400904
SJW2c317da2020-03-23 07:39:13 -0500905 // The value to return for false.
906 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400907
SJW2c317da2020-03-23 07:39:13 -0500908 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
909 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400910
SJW2c317da2020-03-23 07:39:13 -0500911 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -0400912
SJW2c317da2020-03-23 07:39:13 -0500913 // If we have a function to call, call it!
914 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -0400915
SJW2c317da2020-03-23 07:39:13 -0500916 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -0400917
SJW2c317da2020-03-23 07:39:13 -0500918 const auto NewCI = clspv::InsertSPIRVOp(
919 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
920 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -0400921
SJW2c317da2020-03-23 07:39:13 -0500922 } else {
923 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -0400924 }
925
SJW2c317da2020-03-23 07:39:13 -0500926 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400927 }
SJW2c317da2020-03-23 07:39:13 -0500928 return V;
929 });
David Neto22f144c2017-06-12 14:26:21 -0400930}
931
SJW2c317da2020-03-23 07:39:13 -0500932bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
933 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
934 // Get arguments
935 auto HiValue = CI->getOperand(0);
936 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000937
SJW2c317da2020-03-23 07:39:13 -0500938 // Don't touch overloads that aren't in OpenCL C
939 auto HiType = HiValue->getType();
940 auto LoType = LoValue->getType();
941
942 if (HiType != LoType) {
943 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000944 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000945
SJW2c317da2020-03-23 07:39:13 -0500946 if (!HiType->isIntOrIntVectorTy()) {
947 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000948 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000949
SJW2c317da2020-03-23 07:39:13 -0500950 if (HiType->getScalarSizeInBits() * 2 !=
951 CI->getType()->getScalarSizeInBits()) {
952 return nullptr;
953 }
954
955 if ((HiType->getScalarSizeInBits() != 8) &&
956 (HiType->getScalarSizeInBits() != 16) &&
957 (HiType->getScalarSizeInBits() != 32)) {
958 return nullptr;
959 }
960
James Pricecf53df42020-04-20 14:41:24 -0400961 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400962 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -0400963 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
964 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500965 return nullptr;
966 }
967 }
968
969 // Convert both operands to the result type
970 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
971 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
972
973 // Shift high operand
974 auto ShiftAmount =
975 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
976 auto HiShifted =
977 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
978
979 // OR both results
980 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
981 });
Kévin Petitbf0036c2019-03-06 13:57:10 +0000982}
983
SJW2c317da2020-03-23 07:39:13 -0500984bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
985 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
986 // Get arguments
987 auto SrcValue = CI->getOperand(0);
988 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +0000989
SJW2c317da2020-03-23 07:39:13 -0500990 // Don't touch overloads that aren't in OpenCL C
991 auto SrcType = SrcValue->getType();
992 auto RotType = RotAmount->getType();
993
994 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
995 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +0000996 }
Kévin Petitd44eef52019-03-08 13:22:14 +0000997
SJW2c317da2020-03-23 07:39:13 -0500998 if (!SrcType->isIntOrIntVectorTy()) {
999 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001000 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001001
SJW2c317da2020-03-23 07:39:13 -05001002 if ((SrcType->getScalarSizeInBits() != 8) &&
1003 (SrcType->getScalarSizeInBits() != 16) &&
1004 (SrcType->getScalarSizeInBits() != 32) &&
1005 (SrcType->getScalarSizeInBits() != 64)) {
1006 return nullptr;
1007 }
1008
James Pricecf53df42020-04-20 14:41:24 -04001009 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001010 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001011 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1012 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001013 return nullptr;
1014 }
1015 }
1016
1017 // The approach used is to shift the top bits down, the bottom bits up
1018 // and OR the two shifted values.
1019
1020 // The rotation amount is to be treated modulo the element size.
1021 // Since SPIR-V shift ops don't support this, let's apply the
1022 // modulo ahead of shifting. The element size is always a power of
1023 // two so we can just AND with a mask.
1024 auto ModMask =
1025 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
1026 RotAmount =
1027 BinaryOperator::Create(Instruction::And, RotAmount, ModMask, "", CI);
1028
1029 // Let's calc the amount by which to shift top bits down
1030 auto ScalarSize = ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
1031 auto DownAmount =
1032 BinaryOperator::Create(Instruction::Sub, ScalarSize, RotAmount, "", CI);
1033
1034 // Now shift the bottom bits up and the top bits down
1035 auto LoRotated =
1036 BinaryOperator::Create(Instruction::Shl, SrcValue, RotAmount, "", CI);
1037 auto HiRotated =
1038 BinaryOperator::Create(Instruction::LShr, SrcValue, DownAmount, "", CI);
1039
1040 // Finally OR the two shifted values
1041 return BinaryOperator::Create(Instruction::Or, LoRotated, HiRotated, "",
1042 CI);
1043 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001044}
1045
SJW2c317da2020-03-23 07:39:13 -05001046bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1047 bool DstIsSigned) {
1048 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1049 Value *V = nullptr;
1050 // Get arguments
1051 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001052
SJW2c317da2020-03-23 07:39:13 -05001053 // Don't touch overloads that aren't in OpenCL C
1054 auto SrcType = SrcValue->getType();
1055 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001056
SJW2c317da2020-03-23 07:39:13 -05001057 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1058 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1059 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001060 }
1061
James Pricecf53df42020-04-20 14:41:24 -04001062 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001063 unsigned SrcNumElements =
1064 SrcVecType->getElementCount().getKnownMinValue();
1065 unsigned DstNumElements =
1066 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001067 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001068 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001069 }
1070
James Pricecf53df42020-04-20 14:41:24 -04001071 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1072 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1073 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001074 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001075 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001076 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001077
SJW2c317da2020-03-23 07:39:13 -05001078 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1079 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1080
1081 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1082 bool DstIsInt = DstType->isIntOrIntVectorTy();
1083
1084 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1085 // Unnecessary cast operation.
1086 V = SrcValue;
1087 } else if (SrcIsFloat && DstIsFloat) {
1088 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1089 } else if (SrcIsFloat && DstIsInt) {
1090 if (DstIsSigned) {
1091 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1092 } else {
1093 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1094 }
1095 } else if (SrcIsInt && DstIsFloat) {
1096 if (SrcIsSigned) {
1097 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1098 } else {
1099 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1100 }
1101 } else if (SrcIsInt && DstIsInt) {
1102 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1103 } else {
1104 // Not something we're supposed to handle, just move on
1105 }
1106
1107 return V;
1108 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001109}
1110
SJW2c317da2020-03-23 07:39:13 -05001111bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1112 bool is_mad) {
1113 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1114 Value *V = nullptr;
1115 // Get arguments
1116 auto AValue = CI->getOperand(0);
1117 auto BValue = CI->getOperand(1);
1118 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001119
SJW2c317da2020-03-23 07:39:13 -05001120 // Don't touch overloads that aren't in OpenCL C
1121 auto AType = AValue->getType();
1122 auto BType = BValue->getType();
1123 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001124
SJW2c317da2020-03-23 07:39:13 -05001125 if ((AType != BType) || (CI->getType() != AType) ||
1126 (is_mad && (AType != CType))) {
1127 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001128 }
1129
SJW2c317da2020-03-23 07:39:13 -05001130 if (!AType->isIntOrIntVectorTy()) {
1131 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001132 }
Kévin Petit8a560882019-03-21 15:24:34 +00001133
SJW2c317da2020-03-23 07:39:13 -05001134 if ((AType->getScalarSizeInBits() != 8) &&
1135 (AType->getScalarSizeInBits() != 16) &&
1136 (AType->getScalarSizeInBits() != 32) &&
1137 (AType->getScalarSizeInBits() != 64)) {
1138 return V;
1139 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001140
James Pricecf53df42020-04-20 14:41:24 -04001141 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001142 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001143 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1144 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001145 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001146 }
1147 }
1148
SJW2c317da2020-03-23 07:39:13 -05001149 // Our SPIR-V op returns a struct, create a type for it
1150 SmallVector<Type *, 2> TwoValueType = {AType, AType};
1151 auto ExMulRetType = StructType::create(TwoValueType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001152
SJW2c317da2020-03-23 07:39:13 -05001153 // Select the appropriate signed/unsigned SPIR-V op
1154 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1155
1156 // Call the SPIR-V op
1157 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1158 ExMulRetType, {AValue, BValue});
1159
1160 // Get the high part of the result
1161 unsigned Idxs[] = {1};
1162 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1163
1164 // If we're handling a mad_hi, add the third argument to the result
1165 if (is_mad) {
1166 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001167 }
1168
SJW2c317da2020-03-23 07:39:13 -05001169 return V;
1170 });
Kévin Petit8a560882019-03-21 15:24:34 +00001171}
1172
SJW2c317da2020-03-23 07:39:13 -05001173bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1174 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1175 // Get arguments
1176 auto FalseValue = CI->getOperand(0);
1177 auto TrueValue = CI->getOperand(1);
1178 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001179
SJW2c317da2020-03-23 07:39:13 -05001180 // Don't touch overloads that aren't in OpenCL C
1181 auto FalseType = FalseValue->getType();
1182 auto TrueType = TrueValue->getType();
1183 auto PredicateType = PredicateValue->getType();
1184
1185 if (FalseType != TrueType) {
1186 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001187 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001188
SJW2c317da2020-03-23 07:39:13 -05001189 if (!PredicateType->isIntOrIntVectorTy()) {
1190 return nullptr;
1191 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001192
SJW2c317da2020-03-23 07:39:13 -05001193 if (!FalseType->isIntOrIntVectorTy() &&
1194 !FalseType->getScalarType()->isFloatingPointTy()) {
1195 return nullptr;
1196 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001197
SJW2c317da2020-03-23 07:39:13 -05001198 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1199 return nullptr;
1200 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001201
SJW2c317da2020-03-23 07:39:13 -05001202 if (FalseType->getScalarSizeInBits() !=
1203 PredicateType->getScalarSizeInBits()) {
1204 return nullptr;
1205 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001206
James Pricecf53df42020-04-20 14:41:24 -04001207 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001208 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1209 if (NumElements != cast<VectorType>(PredicateType)
1210 ->getElementCount()
1211 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001212 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001213 }
1214
James Pricecf53df42020-04-20 14:41:24 -04001215 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1216 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001217 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001218 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001219 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001220
SJW2c317da2020-03-23 07:39:13 -05001221 // Create constant
1222 const auto ZeroValue = Constant::getNullValue(PredicateType);
1223
1224 // Scalar and vector are to be treated differently
1225 CmpInst::Predicate Pred;
1226 if (PredicateType->isVectorTy()) {
1227 Pred = CmpInst::ICMP_SLT;
1228 } else {
1229 Pred = CmpInst::ICMP_NE;
1230 }
1231
1232 // Create comparison instruction
1233 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1234 ZeroValue, "", CI);
1235
1236 // Create select
1237 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1238 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001239}
1240
SJW2c317da2020-03-23 07:39:13 -05001241bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1242 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1243 Value *V = nullptr;
1244 if (CI->getNumOperands() != 4) {
1245 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001246 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001247
SJW2c317da2020-03-23 07:39:13 -05001248 // Get arguments
1249 auto FalseValue = CI->getOperand(0);
1250 auto TrueValue = CI->getOperand(1);
1251 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001252
SJW2c317da2020-03-23 07:39:13 -05001253 // Don't touch overloads that aren't in OpenCL C
1254 auto FalseType = FalseValue->getType();
1255 auto TrueType = TrueValue->getType();
1256 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001257
SJW2c317da2020-03-23 07:39:13 -05001258 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1259 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001260 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001261
James Pricecf53df42020-04-20 14:41:24 -04001262 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001263 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1264 !TrueType->getScalarType()->isIntegerTy()) {
1265 return V;
1266 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001267 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001268 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1269 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001270 return V;
1271 }
1272 }
1273
1274 // Remember the type of the operands
1275 auto OpType = TrueType;
1276
1277 // The actual bit selection will always be done on an integer type,
1278 // declare it here
1279 Type *BitType;
1280
1281 // If the operands are float, then bitcast them to int
1282 if (OpType->getScalarType()->isFloatingPointTy()) {
1283
1284 // First create the new type
1285 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1286
1287 // Then bitcast all operands
1288 PredicateValue =
1289 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1290 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1291 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1292
1293 } else {
1294 // The operands have an integer type, use it directly
1295 BitType = OpType;
1296 }
1297
1298 // All the operands are now always integers
1299 // implement as (c & b) | (~c & a)
1300
1301 // Create our negated predicate value
1302 auto AllOnes = Constant::getAllOnesValue(BitType);
1303 auto NotPredicateValue = BinaryOperator::Create(
1304 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1305
1306 // Then put everything together
1307 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1308 FalseValue, "", CI);
1309 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1310 TrueValue, "", CI);
1311
1312 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1313
1314 // If we were dealing with a floating point type, we must bitcast
1315 // the result back to that
1316 if (OpType->getScalarType()->isFloatingPointTy()) {
1317 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1318 }
1319
1320 return V;
1321 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001322}
1323
SJW61531372020-06-09 07:31:08 -05001324bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001325 // convert to vector versions
1326 Module &M = *F.getParent();
1327 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1328 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1329 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001330
SJW2c317da2020-03-23 07:39:13 -05001331 // First figure out which function we're dealing with
1332 if (is_smooth) {
1333 ArgsToSplat.push_back(CI->getOperand(1));
1334 VectorArg = CI->getOperand(2);
1335 } else {
1336 VectorArg = CI->getOperand(1);
1337 }
1338
1339 // Splat arguments that need to be
1340 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001341 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001342
1343 for (auto arg : ArgsToSplat) {
1344 Value *NewVectorArg = UndefValue::get(VecType);
alan-baker5a8c3be2020-09-09 13:44:26 -04001345 for (auto i = 0; i < VecType->getElementCount().getKnownMinValue(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001346 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1347 NewVectorArg =
1348 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1349 }
1350 SplatArgs.push_back(NewVectorArg);
1351 }
1352
1353 // Replace the call with the vector/vector flavour
1354 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1355 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1356
SJW61531372020-06-09 07:31:08 -05001357 std::string NewFName = Builtins::GetMangledFunctionName(
1358 is_smooth ? "smoothstep" : "step", NewFType);
1359
SJW2c317da2020-03-23 07:39:13 -05001360 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1361
1362 SmallVector<Value *, 3> NewArgs;
1363 for (auto arg : SplatArgs) {
1364 NewArgs.push_back(arg);
1365 }
1366 NewArgs.push_back(VectorArg);
1367
1368 return CallInst::Create(NewF, NewArgs, "", CI);
1369 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001370}
1371
SJW2c317da2020-03-23 07:39:13 -05001372bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001373 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1374 auto Arg = CI->getOperand(0);
1375 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001376
SJW2c317da2020-03-23 07:39:13 -05001377 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001378
SJW2c317da2020-03-23 07:39:13 -05001379 return BinaryOperator::Create(Op, Bitcast,
1380 ConstantInt::get(CI->getType(), 31), "", CI);
1381 });
David Neto22f144c2017-06-12 14:26:21 -04001382}
1383
SJW2c317da2020-03-23 07:39:13 -05001384bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1385 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001386 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1387 // The multiply instruction to use.
1388 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001389
SJW2c317da2020-03-23 07:39:13 -05001390 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001391
SJW2c317da2020-03-23 07:39:13 -05001392 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1393 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001394
SJW2c317da2020-03-23 07:39:13 -05001395 if (is_mad) {
1396 // The add instruction to use.
1397 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001398
SJW2c317da2020-03-23 07:39:13 -05001399 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001400 }
David Neto22f144c2017-06-12 14:26:21 -04001401
SJW2c317da2020-03-23 07:39:13 -05001402 return V;
1403 });
David Neto22f144c2017-06-12 14:26:21 -04001404}
1405
SJW2c317da2020-03-23 07:39:13 -05001406bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001407 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1408 Value *V = nullptr;
1409 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001410
SJW2c317da2020-03-23 07:39:13 -05001411 auto data_type = data->getType();
1412 if (!data_type->isVectorTy())
1413 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001414
James Pricecf53df42020-04-20 14:41:24 -04001415 auto vec_data_type = cast<VectorType>(data_type);
1416
alan-baker5a8c3be2020-09-09 13:44:26 -04001417 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001418 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1419 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001420
SJW2c317da2020-03-23 07:39:13 -05001421 auto offset = CI->getOperand(1);
1422 auto ptr = CI->getOperand(2);
1423 auto ptr_type = ptr->getType();
1424 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001425 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001426 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001427
SJW2c317da2020-03-23 07:39:13 -05001428 // Avoid pointer casts. Instead generate the correct number of stores
1429 // and rely on drivers to coalesce appropriately.
1430 IRBuilder<> builder(CI);
1431 auto elems_const = builder.getInt32(elems);
1432 auto adjust = builder.CreateMul(offset, elems_const);
1433 for (auto i = 0; i < elems; ++i) {
1434 auto idx = builder.getInt32(i);
1435 auto add = builder.CreateAdd(adjust, idx);
1436 auto gep = builder.CreateGEP(ptr, add);
1437 auto extract = builder.CreateExtractElement(data, i);
1438 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001439 }
SJW2c317da2020-03-23 07:39:13 -05001440 return V;
1441 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001442}
1443
SJW2c317da2020-03-23 07:39:13 -05001444bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001445 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1446 Value *V = nullptr;
1447 auto ret_type = F.getReturnType();
1448 if (!ret_type->isVectorTy())
1449 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001450
James Pricecf53df42020-04-20 14:41:24 -04001451 auto vec_ret_type = cast<VectorType>(ret_type);
1452
alan-baker5a8c3be2020-09-09 13:44:26 -04001453 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001454 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1455 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001456
SJW2c317da2020-03-23 07:39:13 -05001457 auto offset = CI->getOperand(0);
1458 auto ptr = CI->getOperand(1);
1459 auto ptr_type = ptr->getType();
1460 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001461 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001462 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001463
SJW2c317da2020-03-23 07:39:13 -05001464 // Avoid pointer casts. Instead generate the correct number of loads
1465 // and rely on drivers to coalesce appropriately.
1466 IRBuilder<> builder(CI);
1467 auto elems_const = builder.getInt32(elems);
1468 V = UndefValue::get(ret_type);
1469 auto adjust = builder.CreateMul(offset, elems_const);
1470 for (auto i = 0; i < elems; ++i) {
1471 auto idx = builder.getInt32(i);
1472 auto add = builder.CreateAdd(adjust, idx);
1473 auto gep = builder.CreateGEP(ptr, add);
1474 auto load = builder.CreateLoad(gep);
1475 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001476 }
SJW2c317da2020-03-23 07:39:13 -05001477 return V;
1478 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001479}
1480
SJW2c317da2020-03-23 07:39:13 -05001481bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1482 const std::string &name,
1483 int vec_size) {
1484 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1485 if (!vec_size) {
1486 // deduce vec_size from last character of name (e.g. vload_half4)
1487 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001488 }
SJW2c317da2020-03-23 07:39:13 -05001489 switch (vec_size) {
1490 case 2:
1491 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1492 case 4:
1493 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1494 case 0:
1495 if (!is_clspv_version) {
1496 return replaceVloadHalf(F);
1497 }
1498 default:
1499 llvm_unreachable("Unsupported vload_half vector size");
1500 break;
1501 }
1502 return false;
David Neto22f144c2017-06-12 14:26:21 -04001503}
1504
SJW2c317da2020-03-23 07:39:13 -05001505bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1506 Module &M = *F.getParent();
1507 return replaceCallsWithValue(F, [&](CallInst *CI) {
1508 // The index argument from vload_half.
1509 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001510
SJW2c317da2020-03-23 07:39:13 -05001511 // The pointer argument from vload_half.
1512 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001513
SJW2c317da2020-03-23 07:39:13 -05001514 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001515 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001516 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1517
1518 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001519 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001520
1521 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1522
1523 Value *V = nullptr;
1524
alan-baker7efcaaa2020-05-06 19:33:27 -04001525 bool supports_16bit_storage = true;
1526 switch (Arg1->getType()->getPointerAddressSpace()) {
1527 case clspv::AddressSpace::Global:
1528 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1529 clspv::Option::StorageClass::kSSBO);
1530 break;
1531 case clspv::AddressSpace::Constant:
1532 if (clspv::Option::ConstantArgsInUniformBuffer())
1533 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1534 clspv::Option::StorageClass::kUBO);
1535 else
1536 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1537 clspv::Option::StorageClass::kSSBO);
1538 break;
1539 default:
1540 // Clspv will emit the Float16 capability if the half type is
1541 // encountered. That capability covers private and local addressspaces.
1542 break;
1543 }
1544
1545 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001546 auto ShortTy = Type::getInt16Ty(M.getContext());
1547 auto ShortPointerTy =
1548 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1549
1550 // Cast the half* pointer to short*.
1551 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1552
1553 // Index into the correct address of the casted pointer.
1554 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1555
1556 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001557 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001558
1559 // ZExt the short -> int.
1560 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1561
1562 // Get our float2.
1563 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1564
1565 // Extract out the bottom element which is our float result.
1566 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1567 } else {
1568 // Assume the pointer argument points to storage aligned to 32bits
1569 // or more.
1570 // TODO(dneto): Do more analysis to make sure this is true?
1571 //
1572 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1573 // with:
1574 //
1575 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1576 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1577 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1578 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1579 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1580 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1581 // x float> %converted, %index_is_odd32
1582
1583 auto IntPointerTy =
1584 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1585
1586 // Cast the base pointer to int*.
1587 // In a valid call (according to assumptions), this should get
1588 // optimized away in the simplify GEP pass.
1589 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1590
1591 auto One = ConstantInt::get(IntTy, 1);
1592 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1593 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1594
1595 // Index into the correct address of the casted pointer.
1596 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1597
1598 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001599 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001600
1601 // Get our float2.
1602 auto Call = CallInst::Create(NewF, Load, "", CI);
1603
1604 // Extract out the float result, where the element number is
1605 // determined by whether the original index was even or odd.
1606 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1607 }
1608 return V;
1609 });
1610}
1611
1612bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1613 Module &M = *F.getParent();
1614 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001615 // The index argument from vload_half.
1616 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001617
Kévin Petite8edce32019-04-10 14:23:32 +01001618 // The pointer argument from vload_half.
1619 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001620
Kévin Petite8edce32019-04-10 14:23:32 +01001621 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001622 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001623 auto NewPointerTy =
1624 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001625 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001626
Kévin Petite8edce32019-04-10 14:23:32 +01001627 // Cast the half* pointer to int*.
1628 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001629
Kévin Petite8edce32019-04-10 14:23:32 +01001630 // Index into the correct address of the casted pointer.
1631 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001632
Kévin Petite8edce32019-04-10 14:23:32 +01001633 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001634 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001635
Kévin Petite8edce32019-04-10 14:23:32 +01001636 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001637 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001638
Kévin Petite8edce32019-04-10 14:23:32 +01001639 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001640
Kévin Petite8edce32019-04-10 14:23:32 +01001641 // Get our float2.
1642 return CallInst::Create(NewF, Load, "", CI);
1643 });
David Neto22f144c2017-06-12 14:26:21 -04001644}
1645
SJW2c317da2020-03-23 07:39:13 -05001646bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1647 Module &M = *F.getParent();
1648 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001649 // The index argument from vload_half.
1650 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001651
Kévin Petite8edce32019-04-10 14:23:32 +01001652 // The pointer argument from vload_half.
1653 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001654
Kévin Petite8edce32019-04-10 14:23:32 +01001655 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001656 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1657 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001658 auto NewPointerTy =
1659 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001660 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001661
Kévin Petite8edce32019-04-10 14:23:32 +01001662 // Cast the half* pointer to int2*.
1663 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001664
Kévin Petite8edce32019-04-10 14:23:32 +01001665 // Index into the correct address of the casted pointer.
1666 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001667
Kévin Petite8edce32019-04-10 14:23:32 +01001668 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001669 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001670
Kévin Petite8edce32019-04-10 14:23:32 +01001671 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001672 auto X =
1673 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1674 auto Y =
1675 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001676
Kévin Petite8edce32019-04-10 14:23:32 +01001677 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001678 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001679
Kévin Petite8edce32019-04-10 14:23:32 +01001680 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001681
Kévin Petite8edce32019-04-10 14:23:32 +01001682 // Get the lower (x & y) components of our final float4.
1683 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001684
Kévin Petite8edce32019-04-10 14:23:32 +01001685 // Get the higher (z & w) components of our final float4.
1686 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001687
Kévin Petite8edce32019-04-10 14:23:32 +01001688 Constant *ShuffleMask[4] = {
1689 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1690 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001691
Kévin Petite8edce32019-04-10 14:23:32 +01001692 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001693 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1694 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001695 });
David Neto22f144c2017-06-12 14:26:21 -04001696}
1697
SJW2c317da2020-03-23 07:39:13 -05001698bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001699
1700 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1701 //
1702 // %u = load i32 %ptr
1703 // %fxy = call <2 x float> Unpack2xHalf(u)
1704 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001705 Module &M = *F.getParent();
1706 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001707 auto Index = CI->getOperand(0);
1708 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001709
Kévin Petite8edce32019-04-10 14:23:32 +01001710 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001711 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001712 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001713
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001714 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001715 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001716
Kévin Petite8edce32019-04-10 14:23:32 +01001717 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001718 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001719
Kévin Petite8edce32019-04-10 14:23:32 +01001720 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001721
Kévin Petite8edce32019-04-10 14:23:32 +01001722 // Get our final float2.
1723 return CallInst::Create(NewF, Load, "", CI);
1724 });
David Neto6ad93232018-06-07 15:42:58 -07001725}
1726
SJW2c317da2020-03-23 07:39:13 -05001727bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001728
1729 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1730 //
1731 // %u2 = load <2 x i32> %ptr
1732 // %u2xy = extractelement %u2, 0
1733 // %u2zw = extractelement %u2, 1
1734 // %fxy = call <2 x float> Unpack2xHalf(uint)
1735 // %fzw = call <2 x float> Unpack2xHalf(uint)
1736 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001737 Module &M = *F.getParent();
1738 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001739 auto Index = CI->getOperand(0);
1740 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001741
Kévin Petite8edce32019-04-10 14:23:32 +01001742 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001743 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1744 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001745 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001746
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001747 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001748 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001749
Kévin Petite8edce32019-04-10 14:23:32 +01001750 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001751 auto X =
1752 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1753 auto Y =
1754 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001755
Kévin Petite8edce32019-04-10 14:23:32 +01001756 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001757 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001758
Kévin Petite8edce32019-04-10 14:23:32 +01001759 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001760
Kévin Petite8edce32019-04-10 14:23:32 +01001761 // Get the lower (x & y) components of our final float4.
1762 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001763
Kévin Petite8edce32019-04-10 14:23:32 +01001764 // Get the higher (z & w) components of our final float4.
1765 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001766
Kévin Petite8edce32019-04-10 14:23:32 +01001767 Constant *ShuffleMask[4] = {
1768 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1769 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001770
Kévin Petite8edce32019-04-10 14:23:32 +01001771 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001772 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1773 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001774 });
David Neto6ad93232018-06-07 15:42:58 -07001775}
1776
SJW2c317da2020-03-23 07:39:13 -05001777bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1778 switch (vec_size) {
1779 case 0:
1780 return replaceVstoreHalf(F);
1781 case 2:
1782 return replaceVstoreHalf2(F);
1783 case 4:
1784 return replaceVstoreHalf4(F);
1785 default:
1786 llvm_unreachable("Unsupported vstore_half vector size");
1787 break;
1788 }
1789 return false;
1790}
David Neto22f144c2017-06-12 14:26:21 -04001791
SJW2c317da2020-03-23 07:39:13 -05001792bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1793 Module &M = *F.getParent();
1794 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001795 // The value to store.
1796 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001797
Kévin Petite8edce32019-04-10 14:23:32 +01001798 // The index argument from vstore_half.
1799 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001800
Kévin Petite8edce32019-04-10 14:23:32 +01001801 // The pointer argument from vstore_half.
1802 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001803
Kévin Petite8edce32019-04-10 14:23:32 +01001804 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001805 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001806 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1807 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001808
Kévin Petite8edce32019-04-10 14:23:32 +01001809 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001810 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001811
Kévin Petite8edce32019-04-10 14:23:32 +01001812 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001813
Kévin Petite8edce32019-04-10 14:23:32 +01001814 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001815 auto TempVec = InsertElementInst::Create(
1816 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001817
Kévin Petite8edce32019-04-10 14:23:32 +01001818 // Pack the float2 -> half2 (in an int).
1819 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001820
alan-baker7efcaaa2020-05-06 19:33:27 -04001821 bool supports_16bit_storage = true;
1822 switch (Arg2->getType()->getPointerAddressSpace()) {
1823 case clspv::AddressSpace::Global:
1824 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1825 clspv::Option::StorageClass::kSSBO);
1826 break;
1827 case clspv::AddressSpace::Constant:
1828 if (clspv::Option::ConstantArgsInUniformBuffer())
1829 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1830 clspv::Option::StorageClass::kUBO);
1831 else
1832 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1833 clspv::Option::StorageClass::kSSBO);
1834 break;
1835 default:
1836 // Clspv will emit the Float16 capability if the half type is
1837 // encountered. That capability covers private and local addressspaces.
1838 break;
1839 }
1840
SJW2c317da2020-03-23 07:39:13 -05001841 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04001842 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01001843 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001844 auto ShortPointerTy =
1845 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001846
Kévin Petite8edce32019-04-10 14:23:32 +01001847 // Truncate our i32 to an i16.
1848 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001849
Kévin Petite8edce32019-04-10 14:23:32 +01001850 // Cast the half* pointer to short*.
1851 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001852
Kévin Petite8edce32019-04-10 14:23:32 +01001853 // Index into the correct address of the casted pointer.
1854 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001855
Kévin Petite8edce32019-04-10 14:23:32 +01001856 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05001857 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001858 } else {
1859 // We can only write to 32-bit aligned words.
1860 //
1861 // Assuming base is aligned to 32-bits, replace the equivalent of
1862 // vstore_half(value, index, base)
1863 // with:
1864 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1865 // uint32_t write_to_upper_half = index & 1u;
1866 // uint32_t shift = write_to_upper_half << 4;
1867 //
1868 // // Pack the float value as a half number in bottom 16 bits
1869 // // of an i32.
1870 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1871 //
1872 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1873 // ^ ((packed & 0xffff) << shift)
1874 // // We only need relaxed consistency, but OpenCL 1.2 only has
1875 // // sequentially consistent atomics.
1876 // // TODO(dneto): Use relaxed consistency.
1877 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001878 auto IntPointerTy =
1879 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001880
Kévin Petite8edce32019-04-10 14:23:32 +01001881 auto Four = ConstantInt::get(IntTy, 4);
1882 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04001883
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001884 auto IndexIsOdd =
1885 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001886 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001887 auto IndexIntoI32 =
1888 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1889 auto BaseI32Ptr =
1890 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1891 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
1892 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001893 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001894 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001895 auto MaskBitsToWrite =
1896 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1897 auto MaskedCurrent = BinaryOperator::CreateAnd(
1898 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04001899
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001900 auto XLowerBits =
1901 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1902 auto NewBitsToWrite =
1903 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1904 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
1905 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04001906
Kévin Petite8edce32019-04-10 14:23:32 +01001907 // Generate the call to atomi_xor.
1908 SmallVector<Type *, 5> ParamTypes;
1909 // The pointer type.
1910 ParamTypes.push_back(IntPointerTy);
1911 // The Types for memory scope, semantics, and value.
1912 ParamTypes.push_back(IntTy);
1913 ParamTypes.push_back(IntTy);
1914 ParamTypes.push_back(IntTy);
1915 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1916 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04001917
Kévin Petite8edce32019-04-10 14:23:32 +01001918 const auto ConstantScopeDevice =
1919 ConstantInt::get(IntTy, spv::ScopeDevice);
1920 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1921 // (SPIR-V Workgroup).
1922 const auto AddrSpaceSemanticsBits =
1923 IntPointerTy->getPointerAddressSpace() == 1
1924 ? spv::MemorySemanticsUniformMemoryMask
1925 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04001926
Kévin Petite8edce32019-04-10 14:23:32 +01001927 // We're using relaxed consistency here.
1928 const auto ConstantMemorySemantics =
1929 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1930 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04001931
Kévin Petite8edce32019-04-10 14:23:32 +01001932 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1933 ConstantMemorySemantics, ValueToXor};
1934 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05001935
1936 // Return a Nop so the old Call is removed
1937 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
1938 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001939 }
David Neto22f144c2017-06-12 14:26:21 -04001940
SJW2c317da2020-03-23 07:39:13 -05001941 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01001942 });
David Neto22f144c2017-06-12 14:26:21 -04001943}
1944
SJW2c317da2020-03-23 07:39:13 -05001945bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
1946 Module &M = *F.getParent();
1947 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001948 // The value to store.
1949 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001950
Kévin Petite8edce32019-04-10 14:23:32 +01001951 // The index argument from vstore_half.
1952 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001953
Kévin Petite8edce32019-04-10 14:23:32 +01001954 // The pointer argument from vstore_half.
1955 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001956
Kévin Petite8edce32019-04-10 14:23:32 +01001957 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001958 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001959 auto NewPointerTy =
1960 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001961 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001962
Kévin Petite8edce32019-04-10 14:23:32 +01001963 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001964 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001965
Kévin Petite8edce32019-04-10 14:23:32 +01001966 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001967
Kévin Petite8edce32019-04-10 14:23:32 +01001968 // Turn the packed x & y into the final packing.
1969 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001970
Kévin Petite8edce32019-04-10 14:23:32 +01001971 // Cast the half* pointer to int*.
1972 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001973
Kévin Petite8edce32019-04-10 14:23:32 +01001974 // Index into the correct address of the casted pointer.
1975 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001976
Kévin Petite8edce32019-04-10 14:23:32 +01001977 // Store to the int* we casted to.
1978 return new StoreInst(X, Index, CI);
1979 });
David Neto22f144c2017-06-12 14:26:21 -04001980}
1981
SJW2c317da2020-03-23 07:39:13 -05001982bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
1983 Module &M = *F.getParent();
1984 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001985 // The value to store.
1986 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001987
Kévin Petite8edce32019-04-10 14:23:32 +01001988 // The index argument from vstore_half.
1989 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001990
Kévin Petite8edce32019-04-10 14:23:32 +01001991 // The pointer argument from vstore_half.
1992 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001993
Kévin Petite8edce32019-04-10 14:23:32 +01001994 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001995 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1996 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001997 auto NewPointerTy =
1998 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001999 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002000
Kévin Petite8edce32019-04-10 14:23:32 +01002001 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2002 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002003
Kévin Petite8edce32019-04-10 14:23:32 +01002004 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002005 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2006 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002007
Kévin Petite8edce32019-04-10 14:23:32 +01002008 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2009 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002010
Kévin Petite8edce32019-04-10 14:23:32 +01002011 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002012 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2013 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002014
Kévin Petite8edce32019-04-10 14:23:32 +01002015 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002016 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002017
Kévin Petite8edce32019-04-10 14:23:32 +01002018 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002019
Kévin Petite8edce32019-04-10 14:23:32 +01002020 // Turn the packed x & y into the final component of our int2.
2021 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002022
Kévin Petite8edce32019-04-10 14:23:32 +01002023 // Turn the packed z & w into the final component of our int2.
2024 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002025
Kévin Petite8edce32019-04-10 14:23:32 +01002026 auto Combine = InsertElementInst::Create(
2027 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002028 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2029 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002030
Kévin Petite8edce32019-04-10 14:23:32 +01002031 // Cast the half* pointer to int2*.
2032 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002033
Kévin Petite8edce32019-04-10 14:23:32 +01002034 // Index into the correct address of the casted pointer.
2035 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002036
Kévin Petite8edce32019-04-10 14:23:32 +01002037 // Store to the int2* we casted to.
2038 return new StoreInst(Combine, Index, CI);
2039 });
David Neto22f144c2017-06-12 14:26:21 -04002040}
2041
SJW2c317da2020-03-23 07:39:13 -05002042bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2043 // convert half to float
2044 Module &M = *F.getParent();
2045 return replaceCallsWithValue(F, [&](CallInst *CI) {
2046 SmallVector<Type *, 3> types;
2047 SmallVector<Value *, 3> args;
2048 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2049 types.push_back(CI->getArgOperand(i)->getType());
2050 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002051 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002052
alan-baker5a8c3be2020-09-09 13:44:26 -04002053 auto NewFType =
2054 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2055 cast<VectorType>(CI->getType())
2056 ->getElementCount()
2057 .getKnownMinValue()),
2058 types, false);
SJW2c317da2020-03-23 07:39:13 -05002059
SJW61531372020-06-09 07:31:08 -05002060 std::string NewFName =
2061 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002062
2063 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2064
2065 auto NewCI = CallInst::Create(NewF, args, "", CI);
2066
2067 // Convert to the half type.
2068 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2069 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002070}
2071
SJW2c317da2020-03-23 07:39:13 -05002072bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2073 // convert half to float
2074 Module &M = *F.getParent();
2075 return replaceCallsWithValue(F, [&](CallInst *CI) {
2076 SmallVector<Type *, 3> types(3);
2077 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002078
SJW2c317da2020-03-23 07:39:13 -05002079 // Image
2080 types[0] = CI->getArgOperand(0)->getType();
2081 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002082
SJW2c317da2020-03-23 07:39:13 -05002083 // Coord
2084 types[1] = CI->getArgOperand(1)->getType();
2085 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002086
SJW2c317da2020-03-23 07:39:13 -05002087 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002088 types[2] =
2089 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2090 cast<VectorType>(CI->getArgOperand(2)->getType())
2091 ->getElementCount()
2092 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002093
SJW2c317da2020-03-23 07:39:13 -05002094 auto NewFType =
2095 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002096
SJW61531372020-06-09 07:31:08 -05002097 std::string NewFName =
2098 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002099
SJW2c317da2020-03-23 07:39:13 -05002100 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002101
SJW2c317da2020-03-23 07:39:13 -05002102 // Convert data to the float type.
2103 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2104 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002105
SJW2c317da2020-03-23 07:39:13 -05002106 return CallInst::Create(NewF, args, "", CI);
2107 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002108}
2109
SJW2c317da2020-03-23 07:39:13 -05002110bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2111 Function &F) {
2112 // convert read_image with int coords to float coords
2113 Module &M = *F.getParent();
2114 return replaceCallsWithValue(F, [&](CallInst *CI) {
2115 // The image.
2116 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002117
SJW2c317da2020-03-23 07:39:13 -05002118 // The sampler.
2119 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002120
SJW2c317da2020-03-23 07:39:13 -05002121 // The coordinate (integer type that we can't handle).
2122 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002123
SJW2c317da2020-03-23 07:39:13 -05002124 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2125 uint32_t components =
2126 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2127 Type *float_ty = nullptr;
2128 if (components == 1) {
2129 float_ty = Type::getFloatTy(M.getContext());
2130 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002131 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2132 cast<VectorType>(Arg2->getType())
2133 ->getElementCount()
2134 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002135 }
David Neto22f144c2017-06-12 14:26:21 -04002136
SJW2c317da2020-03-23 07:39:13 -05002137 auto NewFType = FunctionType::get(
2138 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2139
2140 std::string NewFName = F.getName().str();
2141 NewFName[NewFName.length() - 1] = 'f';
2142
2143 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2144
2145 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2146
2147 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2148 });
David Neto22f144c2017-06-12 14:26:21 -04002149}
2150
SJW2c317da2020-03-23 07:39:13 -05002151bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2152 return replaceCallsWithValue(F, [&](CallInst *CI) {
2153 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002154
SJW2c317da2020-03-23 07:39:13 -05002155 // We need to map the OpenCL constants to the SPIR-V equivalents.
2156 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2157 const auto ConstantMemorySemantics = ConstantInt::get(
2158 IntTy, spv::MemorySemanticsUniformMemoryMask |
2159 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002160
SJW2c317da2020-03-23 07:39:13 -05002161 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002162
SJW2c317da2020-03-23 07:39:13 -05002163 // The pointer.
2164 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002165
SJW2c317da2020-03-23 07:39:13 -05002166 // The memory scope.
2167 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002168
SJW2c317da2020-03-23 07:39:13 -05002169 // The memory semantics.
2170 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002171
SJW2c317da2020-03-23 07:39:13 -05002172 if (2 < CI->getNumArgOperands()) {
2173 // The unequal memory semantics.
2174 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002175
SJW2c317da2020-03-23 07:39:13 -05002176 // The value.
2177 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002178
SJW2c317da2020-03-23 07:39:13 -05002179 // The comparator.
2180 Params.push_back(CI->getArgOperand(1));
2181 } else if (1 < CI->getNumArgOperands()) {
2182 // The value.
2183 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002184 }
David Neto22f144c2017-06-12 14:26:21 -04002185
SJW2c317da2020-03-23 07:39:13 -05002186 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2187 });
David Neto22f144c2017-06-12 14:26:21 -04002188}
2189
SJW2c317da2020-03-23 07:39:13 -05002190bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2191 llvm::AtomicRMWInst::BinOp Op) {
2192 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002193 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2194 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002195 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002196 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002197 SyncScope::System, CI);
2198 });
2199}
David Neto22f144c2017-06-12 14:26:21 -04002200
SJW2c317da2020-03-23 07:39:13 -05002201bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2202 Module &M = *F.getParent();
2203 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002204 auto IntTy = Type::getInt32Ty(M.getContext());
2205 auto FloatTy = Type::getFloatTy(M.getContext());
2206
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002207 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2208 ConstantInt::get(IntTy, 1),
2209 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002210
2211 Constant *UpShuffleMask[4] = {
2212 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2213 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2214
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002215 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2216 UndefValue::get(FloatTy),
2217 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002218
Kévin Petite8edce32019-04-10 14:23:32 +01002219 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002220 auto Arg0 =
2221 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2222 ConstantVector::get(DownShuffleMask), "", CI);
2223 auto Arg1 =
2224 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2225 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002226 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002227
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002228 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002229 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002230
SJW61531372020-06-09 07:31:08 -05002231 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002232
Kévin Petite8edce32019-04-10 14:23:32 +01002233 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002234
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002235 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2236 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002237 });
David Neto22f144c2017-06-12 14:26:21 -04002238}
David Neto62653202017-10-16 19:05:18 -04002239
SJW2c317da2020-03-23 07:39:13 -05002240bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002241 // OpenCL's float result = fract(float x, float* ptr)
2242 //
2243 // In the LLVM domain:
2244 //
2245 // %floor_result = call spir_func float @floor(float %x)
2246 // store float %floor_result, float * %ptr
2247 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2248 // %result = call spir_func float
2249 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2250 //
2251 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2252 // and clspv.fract occur in the SPIR-V generator pass:
2253 //
2254 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2255 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2256 // ...
2257 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2258 // OpStore %ptr %floor_result
2259 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2260 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002261 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002262
David Neto62653202017-10-16 19:05:18 -04002263 using std::string;
2264
2265 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2266 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002267
SJW2c317da2020-03-23 07:39:13 -05002268 Module &M = *F.getParent();
2269 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002270
SJW2c317da2020-03-23 07:39:13 -05002271 // This is either float or a float vector. All the float-like
2272 // types are this type.
2273 auto result_ty = F.getReturnType();
2274
SJW61531372020-06-09 07:31:08 -05002275 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002276 Function *fmin_fn = M.getFunction(fmin_name);
2277 if (!fmin_fn) {
2278 // Make the fmin function.
2279 FunctionType *fn_ty =
2280 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2281 fmin_fn =
2282 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2283 fmin_fn->addFnAttr(Attribute::ReadNone);
2284 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2285 }
2286
SJW61531372020-06-09 07:31:08 -05002287 std::string floor_name =
2288 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002289 Function *floor_fn = M.getFunction(floor_name);
2290 if (!floor_fn) {
2291 // Make the floor function.
2292 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2293 floor_fn =
2294 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2295 floor_fn->addFnAttr(Attribute::ReadNone);
2296 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2297 }
2298
SJW61531372020-06-09 07:31:08 -05002299 std::string clspv_fract_name =
2300 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002301 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2302 if (!clspv_fract_fn) {
2303 // Make the clspv_fract function.
2304 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2305 clspv_fract_fn = cast<Function>(
2306 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2307 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2308 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2309 }
2310
2311 // Number of significant significand bits, whether represented or not.
2312 unsigned num_significand_bits;
2313 switch (result_ty->getScalarType()->getTypeID()) {
2314 case Type::HalfTyID:
2315 num_significand_bits = 11;
2316 break;
2317 case Type::FloatTyID:
2318 num_significand_bits = 24;
2319 break;
2320 case Type::DoubleTyID:
2321 num_significand_bits = 53;
2322 break;
2323 default:
2324 llvm_unreachable("Unhandled float type when processing fract builtin");
2325 break;
2326 }
2327 // Beware that the disassembler displays this value as
2328 // OpConstant %float 1
2329 // which is not quite right.
2330 const double kJustUnderOneScalar =
2331 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2332
2333 Constant *just_under_one =
2334 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2335 if (result_ty->isVectorTy()) {
2336 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002337 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002338 }
2339
2340 IRBuilder<> Builder(CI);
2341
2342 auto arg = CI->getArgOperand(0);
2343 auto ptr = CI->getArgOperand(1);
2344
2345 // Compute floor result and store it.
2346 auto floor = Builder.CreateCall(floor_fn, {arg});
2347 Builder.CreateStore(floor, ptr);
2348
2349 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2350 auto fract_result =
2351 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2352
2353 return fract_result;
2354 });
David Neto62653202017-10-16 19:05:18 -04002355}