blob: b321f40b3ed05d81d8441d28092b1a20c5769966 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto62653202017-10-16 19:05:18 -040015#include <math.h>
16#include <string>
17#include <tuple>
18
Kévin Petit9d1a9d12019-03-25 15:23:46 +000019#include "llvm/ADT/StringSwitch.h"
David Neto118188e2018-08-24 11:27:54 -040020#include "llvm/IR/Constants.h"
David Neto118188e2018-08-24 11:27:54 -040021#include "llvm/IR/IRBuilder.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040022#include "llvm/IR/Instructions.h"
David Neto118188e2018-08-24 11:27:54 -040023#include "llvm/IR/Module.h"
Kévin Petitf5b78a22018-10-25 14:32:17 +000024#include "llvm/IR/ValueSymbolTable.h"
David Neto118188e2018-08-24 11:27:54 -040025#include "llvm/Pass.h"
26#include "llvm/Support/CommandLine.h"
27#include "llvm/Support/raw_ostream.h"
28#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040029
alan-bakere0902602020-03-23 08:43:40 -040030#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040031
alan-baker931d18a2019-12-12 08:21:32 -050032#include "clspv/AddressSpace.h"
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040033#include "clspv/Option.h"
David Neto482550a2018-03-24 05:21:07 -070034
SJW2c317da2020-03-23 07:39:13 -050035#include "Builtins.h"
alan-baker931d18a2019-12-12 08:21:32 -050036#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040037#include "Passes.h"
38#include "SPIRVOp.h"
alan-bakerf906d2b2019-12-10 11:26:23 -050039#include "Types.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040040
SJW2c317da2020-03-23 07:39:13 -050041using namespace clspv;
David Neto22f144c2017-06-12 14:26:21 -040042using namespace llvm;
43
44#define DEBUG_TYPE "ReplaceOpenCLBuiltin"
45
46namespace {
Kévin Petit8a560882019-03-21 15:24:34 +000047
David Neto22f144c2017-06-12 14:26:21 -040048uint32_t clz(uint32_t v) {
49 uint32_t r;
50 uint32_t shift;
51
52 r = (v > 0xFFFF) << 4;
53 v >>= r;
54 shift = (v > 0xFF) << 3;
55 v >>= shift;
56 r |= shift;
57 shift = (v > 0xF) << 2;
58 v >>= shift;
59 r |= shift;
60 shift = (v > 0x3) << 1;
61 v >>= shift;
62 r |= shift;
63 r |= (v >> 1);
64
65 return r;
66}
67
Kévin Petitfdfa92e2019-09-25 14:20:58 +010068Type *getIntOrIntVectorTyForCast(LLVMContext &C, Type *Ty) {
69 Type *IntTy = Type::getIntNTy(C, Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -040070 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040071 IntTy = FixedVectorType::get(IntTy,
72 vec_ty->getElementCount().getKnownMinValue());
Kévin Petitfdfa92e2019-09-25 14:20:58 +010073 }
74 return IntTy;
75}
76
SJW2c317da2020-03-23 07:39:13 -050077bool replaceCallsWithValue(Function &F,
78 std::function<Value *(CallInst *)> Replacer) {
79
80 bool Changed = false;
81
82 SmallVector<Instruction *, 4> ToRemoves;
83
84 // Walk the users of the function.
85 for (auto &U : F.uses()) {
86 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
87
88 auto NewValue = Replacer(CI);
89
90 if (NewValue != nullptr) {
91 CI->replaceAllUsesWith(NewValue);
92
93 // Lastly, remember to remove the user.
94 ToRemoves.push_back(CI);
95 }
96 }
97 }
98
99 Changed = !ToRemoves.empty();
100
101 // And cleanup the calls we don't use anymore.
102 for (auto V : ToRemoves) {
103 V->eraseFromParent();
104 }
105
106 return Changed;
107}
108
David Neto22f144c2017-06-12 14:26:21 -0400109struct ReplaceOpenCLBuiltinPass final : public ModulePass {
110 static char ID;
111 ReplaceOpenCLBuiltinPass() : ModulePass(ID) {}
112
113 bool runOnModule(Module &M) override;
SJW2c317da2020-03-23 07:39:13 -0500114 bool runOnFunction(Function &F);
115 bool replaceAbs(Function &F);
116 bool replaceAbsDiff(Function &F, bool is_signed);
117 bool replaceCopysign(Function &F);
118 bool replaceRecip(Function &F);
119 bool replaceDivide(Function &F);
120 bool replaceDot(Function &F);
121 bool replaceFmod(Function &F);
SJW61531372020-06-09 07:31:08 -0500122 bool replaceExp10(Function &F, const std::string &basename);
123 bool replaceLog10(Function &F, const std::string &basename);
gnl21636e7992020-09-09 16:08:16 +0100124 bool replaceLog1p(Function &F);
alan-baker12d2c182020-07-20 08:22:42 -0400125 bool replaceBarrier(Function &F, bool subgroup = false);
SJW2c317da2020-03-23 07:39:13 -0500126 bool replaceMemFence(Function &F, uint32_t semantics);
Kévin Petit1cb45112020-04-27 18:55:48 +0100127 bool replacePrefetch(Function &F);
SJW2c317da2020-03-23 07:39:13 -0500128 bool replaceRelational(Function &F, CmpInst::Predicate P, int32_t C);
129 bool replaceIsInfAndIsNan(Function &F, spv::Op SPIRVOp, int32_t isvec);
130 bool replaceIsFinite(Function &F);
131 bool replaceAllAndAny(Function &F, spv::Op SPIRVOp);
132 bool replaceUpsample(Function &F);
133 bool replaceRotate(Function &F);
134 bool replaceConvert(Function &F, bool SrcIsSigned, bool DstIsSigned);
135 bool replaceMulHi(Function &F, bool is_signed, bool is_mad = false);
136 bool replaceSelect(Function &F);
137 bool replaceBitSelect(Function &F);
SJW61531372020-06-09 07:31:08 -0500138 bool replaceStep(Function &F, bool is_smooth);
SJW2c317da2020-03-23 07:39:13 -0500139 bool replaceSignbit(Function &F, bool is_vec);
140 bool replaceMul(Function &F, bool is_float, bool is_mad);
141 bool replaceVloadHalf(Function &F, const std::string &name, int vec_size);
142 bool replaceVloadHalf(Function &F);
143 bool replaceVloadHalf2(Function &F);
144 bool replaceVloadHalf4(Function &F);
145 bool replaceClspvVloadaHalf2(Function &F);
146 bool replaceClspvVloadaHalf4(Function &F);
147 bool replaceVstoreHalf(Function &F, int vec_size);
148 bool replaceVstoreHalf(Function &F);
149 bool replaceVstoreHalf2(Function &F);
150 bool replaceVstoreHalf4(Function &F);
151 bool replaceHalfReadImage(Function &F);
152 bool replaceHalfWriteImage(Function &F);
153 bool replaceSampledReadImageWithIntCoords(Function &F);
154 bool replaceAtomics(Function &F, spv::Op Op);
155 bool replaceAtomics(Function &F, llvm::AtomicRMWInst::BinOp Op);
156 bool replaceCross(Function &F);
157 bool replaceFract(Function &F, int vec_size);
158 bool replaceVload(Function &F);
159 bool replaceVstore(Function &F);
alan-bakera52b7312020-10-26 08:58:51 -0400160 bool replaceAddSat(Function &F, bool is_signed);
David Neto22f144c2017-06-12 14:26:21 -0400161};
SJW2c317da2020-03-23 07:39:13 -0500162
Kévin Petit91bc72e2019-04-08 15:17:46 +0100163} // namespace
David Neto22f144c2017-06-12 14:26:21 -0400164
165char ReplaceOpenCLBuiltinPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -0400166INITIALIZE_PASS(ReplaceOpenCLBuiltinPass, "ReplaceOpenCLBuiltin",
167 "Replace OpenCL Builtins Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -0400168
169namespace clspv {
170ModulePass *createReplaceOpenCLBuiltinPass() {
171 return new ReplaceOpenCLBuiltinPass();
172}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400173} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -0400174
175bool ReplaceOpenCLBuiltinPass::runOnModule(Module &M) {
SJW2c317da2020-03-23 07:39:13 -0500176 std::list<Function *> func_list;
177 for (auto &F : M.getFunctionList()) {
178 // process only function declarations
179 if (F.isDeclaration() && runOnFunction(F)) {
180 func_list.push_front(&F);
Kévin Petit2444e9b2018-11-09 14:14:37 +0000181 }
182 }
SJW2c317da2020-03-23 07:39:13 -0500183 if (func_list.size() != 0) {
184 // recursively convert functions, but first remove dead
185 for (auto *F : func_list) {
186 if (F->use_empty()) {
187 F->eraseFromParent();
188 }
189 }
190 runOnModule(M);
191 return true;
192 }
193 return false;
Kévin Petit2444e9b2018-11-09 14:14:37 +0000194}
195
SJW2c317da2020-03-23 07:39:13 -0500196bool ReplaceOpenCLBuiltinPass::runOnFunction(Function &F) {
197 auto &FI = Builtins::Lookup(&F);
198 switch (FI.getType()) {
199 case Builtins::kAbs:
200 if (!FI.getParameter(0).is_signed) {
201 return replaceAbs(F);
202 }
203 break;
204 case Builtins::kAbsDiff:
205 return replaceAbsDiff(F, FI.getParameter(0).is_signed);
alan-bakera52b7312020-10-26 08:58:51 -0400206
207 case Builtins::kAddSat:
208 return replaceAddSat(F, FI.getParameter(0).is_signed);
209
SJW2c317da2020-03-23 07:39:13 -0500210 case Builtins::kCopysign:
211 return replaceCopysign(F);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100212
SJW2c317da2020-03-23 07:39:13 -0500213 case Builtins::kHalfRecip:
214 case Builtins::kNativeRecip:
215 return replaceRecip(F);
Kévin Petite8edce32019-04-10 14:23:32 +0100216
SJW2c317da2020-03-23 07:39:13 -0500217 case Builtins::kHalfDivide:
218 case Builtins::kNativeDivide:
219 return replaceDivide(F);
220
221 case Builtins::kDot:
222 return replaceDot(F);
223
224 case Builtins::kExp10:
225 case Builtins::kHalfExp10:
SJW61531372020-06-09 07:31:08 -0500226 case Builtins::kNativeExp10:
227 return replaceExp10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500228
229 case Builtins::kLog10:
230 case Builtins::kHalfLog10:
SJW61531372020-06-09 07:31:08 -0500231 case Builtins::kNativeLog10:
232 return replaceLog10(F, FI.getName());
SJW2c317da2020-03-23 07:39:13 -0500233
gnl21636e7992020-09-09 16:08:16 +0100234 case Builtins::kLog1p:
235 return replaceLog1p(F);
236
SJW2c317da2020-03-23 07:39:13 -0500237 case Builtins::kFmod:
238 return replaceFmod(F);
239
240 case Builtins::kBarrier:
241 case Builtins::kWorkGroupBarrier:
242 return replaceBarrier(F);
243
alan-baker12d2c182020-07-20 08:22:42 -0400244 case Builtins::kSubGroupBarrier:
245 return replaceBarrier(F, true);
246
SJW2c317da2020-03-23 07:39:13 -0500247 case Builtins::kMemFence:
alan-baker12d2c182020-07-20 08:22:42 -0400248 return replaceMemFence(F, spv::MemorySemanticsAcquireReleaseMask);
SJW2c317da2020-03-23 07:39:13 -0500249 case Builtins::kReadMemFence:
250 return replaceMemFence(F, spv::MemorySemanticsAcquireMask);
251 case Builtins::kWriteMemFence:
252 return replaceMemFence(F, spv::MemorySemanticsReleaseMask);
253
254 // Relational
255 case Builtins::kIsequal:
256 return replaceRelational(F, CmpInst::FCMP_OEQ,
257 FI.getParameter(0).vector_size ? -1 : 1);
258 case Builtins::kIsgreater:
259 return replaceRelational(F, CmpInst::FCMP_OGT,
260 FI.getParameter(0).vector_size ? -1 : 1);
261 case Builtins::kIsgreaterequal:
262 return replaceRelational(F, CmpInst::FCMP_OGE,
263 FI.getParameter(0).vector_size ? -1 : 1);
264 case Builtins::kIsless:
265 return replaceRelational(F, CmpInst::FCMP_OLT,
266 FI.getParameter(0).vector_size ? -1 : 1);
267 case Builtins::kIslessequal:
268 return replaceRelational(F, CmpInst::FCMP_OLE,
269 FI.getParameter(0).vector_size ? -1 : 1);
270 case Builtins::kIsnotequal:
271 return replaceRelational(F, CmpInst::FCMP_ONE,
272 FI.getParameter(0).vector_size ? -1 : 1);
273
274 case Builtins::kIsinf: {
275 bool is_vec = FI.getParameter(0).vector_size != 0;
276 return replaceIsInfAndIsNan(F, spv::OpIsInf, is_vec ? -1 : 1);
277 }
278 case Builtins::kIsnan: {
279 bool is_vec = FI.getParameter(0).vector_size != 0;
280 return replaceIsInfAndIsNan(F, spv::OpIsNan, is_vec ? -1 : 1);
281 }
282
283 case Builtins::kIsfinite:
284 return replaceIsFinite(F);
285
286 case Builtins::kAll: {
287 bool is_vec = FI.getParameter(0).vector_size != 0;
288 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAll);
289 }
290 case Builtins::kAny: {
291 bool is_vec = FI.getParameter(0).vector_size != 0;
292 return replaceAllAndAny(F, !is_vec ? spv::OpNop : spv::OpAny);
293 }
294
295 case Builtins::kUpsample:
296 return replaceUpsample(F);
297
298 case Builtins::kRotate:
299 return replaceRotate(F);
300
301 case Builtins::kConvert:
302 return replaceConvert(F, FI.getParameter(0).is_signed,
303 FI.getReturnType().is_signed);
304
305 case Builtins::kAtomicInc:
306 return replaceAtomics(F, spv::OpAtomicIIncrement);
307 case Builtins::kAtomicDec:
308 return replaceAtomics(F, spv::OpAtomicIDecrement);
309 case Builtins::kAtomicCmpxchg:
310 return replaceAtomics(F, spv::OpAtomicCompareExchange);
311 case Builtins::kAtomicAdd:
312 return replaceAtomics(F, llvm::AtomicRMWInst::Add);
313 case Builtins::kAtomicSub:
314 return replaceAtomics(F, llvm::AtomicRMWInst::Sub);
315 case Builtins::kAtomicXchg:
316 return replaceAtomics(F, llvm::AtomicRMWInst::Xchg);
317 case Builtins::kAtomicMin:
318 return replaceAtomics(F, FI.getParameter(0).is_signed
319 ? llvm::AtomicRMWInst::Min
320 : llvm::AtomicRMWInst::UMin);
321 case Builtins::kAtomicMax:
322 return replaceAtomics(F, FI.getParameter(0).is_signed
323 ? llvm::AtomicRMWInst::Max
324 : llvm::AtomicRMWInst::UMax);
325 case Builtins::kAtomicAnd:
326 return replaceAtomics(F, llvm::AtomicRMWInst::And);
327 case Builtins::kAtomicOr:
328 return replaceAtomics(F, llvm::AtomicRMWInst::Or);
329 case Builtins::kAtomicXor:
330 return replaceAtomics(F, llvm::AtomicRMWInst::Xor);
331
332 case Builtins::kCross:
333 if (FI.getParameter(0).vector_size == 4) {
334 return replaceCross(F);
335 }
336 break;
337
338 case Builtins::kFract:
339 if (FI.getParameterCount()) {
340 return replaceFract(F, FI.getParameter(0).vector_size);
341 }
342 break;
343
344 case Builtins::kMadHi:
345 return replaceMulHi(F, FI.getParameter(0).is_signed, true);
346 case Builtins::kMulHi:
347 return replaceMulHi(F, FI.getParameter(0).is_signed, false);
348
349 case Builtins::kMad:
350 case Builtins::kMad24:
351 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
352 true);
353 case Builtins::kMul24:
354 return replaceMul(F, FI.getParameter(0).type_id == llvm::Type::FloatTyID,
355 false);
356
357 case Builtins::kSelect:
358 return replaceSelect(F);
359
360 case Builtins::kBitselect:
361 return replaceBitSelect(F);
362
363 case Builtins::kVload:
364 return replaceVload(F);
365
366 case Builtins::kVloadaHalf:
367 case Builtins::kVloadHalf:
368 return replaceVloadHalf(F, FI.getName(), FI.getParameter(0).vector_size);
369
370 case Builtins::kVstore:
371 return replaceVstore(F);
372
373 case Builtins::kVstoreHalf:
374 case Builtins::kVstoreaHalf:
375 return replaceVstoreHalf(F, FI.getParameter(0).vector_size);
376
377 case Builtins::kSmoothstep: {
378 int vec_size = FI.getLastParameter().vector_size;
379 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500380 return replaceStep(F, true);
SJW2c317da2020-03-23 07:39:13 -0500381 }
382 break;
383 }
384 case Builtins::kStep: {
385 int vec_size = FI.getLastParameter().vector_size;
386 if (FI.getParameter(0).vector_size == 0 && vec_size != 0) {
SJW61531372020-06-09 07:31:08 -0500387 return replaceStep(F, false);
SJW2c317da2020-03-23 07:39:13 -0500388 }
389 break;
390 }
391
392 case Builtins::kSignbit:
393 return replaceSignbit(F, FI.getParameter(0).vector_size != 0);
394
395 case Builtins::kReadImageh:
396 return replaceHalfReadImage(F);
397 case Builtins::kReadImagef:
398 case Builtins::kReadImagei:
399 case Builtins::kReadImageui: {
400 if (FI.getParameter(1).isSampler() &&
401 FI.getParameter(2).type_id == llvm::Type::IntegerTyID) {
402 return replaceSampledReadImageWithIntCoords(F);
403 }
404 break;
405 }
406
407 case Builtins::kWriteImageh:
408 return replaceHalfWriteImage(F);
409
Kévin Petit1cb45112020-04-27 18:55:48 +0100410 case Builtins::kPrefetch:
411 return replacePrefetch(F);
412
SJW2c317da2020-03-23 07:39:13 -0500413 default:
414 break;
415 }
416
417 return false;
418}
419
420bool ReplaceOpenCLBuiltinPass::replaceAbs(Function &F) {
421 return replaceCallsWithValue(F,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400422 [](CallInst *CI) { return CI->getOperand(0); });
Kévin Petite8edce32019-04-10 14:23:32 +0100423}
424
SJW2c317da2020-03-23 07:39:13 -0500425bool ReplaceOpenCLBuiltinPass::replaceAbsDiff(Function &F, bool is_signed) {
426 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100427 auto XValue = CI->getOperand(0);
428 auto YValue = CI->getOperand(1);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100429
Kévin Petite8edce32019-04-10 14:23:32 +0100430 IRBuilder<> Builder(CI);
431 auto XmY = Builder.CreateSub(XValue, YValue);
432 auto YmX = Builder.CreateSub(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100433
SJW2c317da2020-03-23 07:39:13 -0500434 Value *Cmp = nullptr;
435 if (is_signed) {
Kévin Petite8edce32019-04-10 14:23:32 +0100436 Cmp = Builder.CreateICmpSGT(YValue, XValue);
437 } else {
438 Cmp = Builder.CreateICmpUGT(YValue, XValue);
Kévin Petit91bc72e2019-04-08 15:17:46 +0100439 }
Kévin Petit91bc72e2019-04-08 15:17:46 +0100440
Kévin Petite8edce32019-04-10 14:23:32 +0100441 return Builder.CreateSelect(Cmp, YmX, XmY);
442 });
Kévin Petit91bc72e2019-04-08 15:17:46 +0100443}
444
SJW2c317da2020-03-23 07:39:13 -0500445bool ReplaceOpenCLBuiltinPass::replaceCopysign(Function &F) {
446 return replaceCallsWithValue(F, [&F](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100447 auto XValue = CI->getOperand(0);
448 auto YValue = CI->getOperand(1);
Kévin Petit8c1be282019-04-02 19:34:25 +0100449
Kévin Petite8edce32019-04-10 14:23:32 +0100450 auto Ty = XValue->getType();
Kévin Petit8c1be282019-04-02 19:34:25 +0100451
SJW2c317da2020-03-23 07:39:13 -0500452 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
James Pricecf53df42020-04-20 14:41:24 -0400453 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400454 IntTy = FixedVectorType::get(
455 IntTy, vec_ty->getElementCount().getKnownMinValue());
Kévin Petit8c1be282019-04-02 19:34:25 +0100456 }
Kévin Petit8c1be282019-04-02 19:34:25 +0100457
Kévin Petite8edce32019-04-10 14:23:32 +0100458 // Return X with the sign of Y
459
460 // Sign bit masks
461 auto SignBit = IntTy->getScalarSizeInBits() - 1;
462 auto SignBitMask = 1 << SignBit;
463 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
464 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
465
466 IRBuilder<> Builder(CI);
467
468 // Extract sign of Y
469 auto YInt = Builder.CreateBitCast(YValue, IntTy);
470 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
471
472 // Clear sign bit in X
473 auto XInt = Builder.CreateBitCast(XValue, IntTy);
474 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
475
476 // Insert sign bit of Y into X
477 auto NewXInt = Builder.CreateOr(XInt, YSign);
478
479 // And cast back to floating-point
480 return Builder.CreateBitCast(NewXInt, Ty);
481 });
Kévin Petit8c1be282019-04-02 19:34:25 +0100482}
483
SJW2c317da2020-03-23 07:39:13 -0500484bool ReplaceOpenCLBuiltinPass::replaceRecip(Function &F) {
485 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100486 // Recip has one arg.
487 auto Arg = CI->getOperand(0);
488 auto Cst1 = ConstantFP::get(Arg->getType(), 1.0);
489 return BinaryOperator::Create(Instruction::FDiv, Cst1, Arg, "", CI);
490 });
David Neto22f144c2017-06-12 14:26:21 -0400491}
492
SJW2c317da2020-03-23 07:39:13 -0500493bool ReplaceOpenCLBuiltinPass::replaceDivide(Function &F) {
494 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +0100495 auto Op0 = CI->getOperand(0);
496 auto Op1 = CI->getOperand(1);
497 return BinaryOperator::Create(Instruction::FDiv, Op0, Op1, "", CI);
498 });
David Neto22f144c2017-06-12 14:26:21 -0400499}
500
SJW2c317da2020-03-23 07:39:13 -0500501bool ReplaceOpenCLBuiltinPass::replaceDot(Function &F) {
502 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit1329a002019-06-15 05:54:05 +0100503 auto Op0 = CI->getOperand(0);
504 auto Op1 = CI->getOperand(1);
505
SJW2c317da2020-03-23 07:39:13 -0500506 Value *V = nullptr;
Kévin Petit1329a002019-06-15 05:54:05 +0100507 if (Op0->getType()->isVectorTy()) {
508 V = clspv::InsertSPIRVOp(CI, spv::OpDot, {Attribute::ReadNone},
509 CI->getType(), {Op0, Op1});
510 } else {
511 V = BinaryOperator::Create(Instruction::FMul, Op0, Op1, "", CI);
512 }
513
514 return V;
515 });
516}
517
SJW2c317da2020-03-23 07:39:13 -0500518bool ReplaceOpenCLBuiltinPass::replaceExp10(Function &F,
SJW61531372020-06-09 07:31:08 -0500519 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500520 // convert to natural
521 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500522 std::string NewFName = basename.substr(0, slen);
523 NewFName =
524 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400525
SJW2c317da2020-03-23 07:39:13 -0500526 Module &M = *F.getParent();
527 return replaceCallsWithValue(F, [&](CallInst *CI) {
528 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
529
530 auto Arg = CI->getOperand(0);
531
532 // Constant of the natural log of 10 (ln(10)).
533 const double Ln10 =
534 2.302585092994045684017991454684364207601101488628772976033;
535
536 auto Mul = BinaryOperator::Create(
537 Instruction::FMul, ConstantFP::get(Arg->getType(), Ln10), Arg, "", CI);
538
539 return CallInst::Create(NewF, Mul, "", CI);
540 });
David Neto22f144c2017-06-12 14:26:21 -0400541}
542
SJW2c317da2020-03-23 07:39:13 -0500543bool ReplaceOpenCLBuiltinPass::replaceFmod(Function &F) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100544 // OpenCL fmod(x,y) is x - y * trunc(x/y)
545 // The sign for a non-zero result is taken from x.
546 // (Try an example.)
547 // So translate to FRem
SJW2c317da2020-03-23 07:39:13 -0500548 return replaceCallsWithValue(F, [](CallInst *CI) {
Kévin Petit0644a9c2019-06-20 21:08:46 +0100549 auto Op0 = CI->getOperand(0);
550 auto Op1 = CI->getOperand(1);
551 return BinaryOperator::Create(Instruction::FRem, Op0, Op1, "", CI);
552 });
553}
554
SJW2c317da2020-03-23 07:39:13 -0500555bool ReplaceOpenCLBuiltinPass::replaceLog10(Function &F,
SJW61531372020-06-09 07:31:08 -0500556 const std::string &basename) {
SJW2c317da2020-03-23 07:39:13 -0500557 // convert to natural
558 auto slen = basename.length() - 2;
SJW61531372020-06-09 07:31:08 -0500559 std::string NewFName = basename.substr(0, slen);
560 NewFName =
561 Builtins::GetMangledFunctionName(NewFName.c_str(), F.getFunctionType());
David Neto22f144c2017-06-12 14:26:21 -0400562
SJW2c317da2020-03-23 07:39:13 -0500563 Module &M = *F.getParent();
564 return replaceCallsWithValue(F, [&](CallInst *CI) {
565 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
566
567 auto Arg = CI->getOperand(0);
568
569 // Constant of the reciprocal of the natural log of 10 (ln(10)).
570 const double Ln10 =
571 0.434294481903251827651128918916605082294397005803666566114;
572
573 auto NewCI = CallInst::Create(NewF, Arg, "", CI);
574
575 return BinaryOperator::Create(Instruction::FMul,
576 ConstantFP::get(Arg->getType(), Ln10), NewCI,
577 "", CI);
578 });
David Neto22f144c2017-06-12 14:26:21 -0400579}
580
gnl21636e7992020-09-09 16:08:16 +0100581bool ReplaceOpenCLBuiltinPass::replaceLog1p(Function &F) {
582 // convert to natural
583 std::string NewFName =
584 Builtins::GetMangledFunctionName("log", F.getFunctionType());
585
586 Module &M = *F.getParent();
587 return replaceCallsWithValue(F, [&](CallInst *CI) {
588 auto NewF = M.getOrInsertFunction(NewFName, F.getFunctionType());
589
590 auto Arg = CI->getOperand(0);
591
592 auto ArgP1 = BinaryOperator::Create(
593 Instruction::FAdd, ConstantFP::get(Arg->getType(), 1.0), Arg, "", CI);
594
595 return CallInst::Create(NewF, ArgP1, "", CI);
596 });
597}
598
alan-baker12d2c182020-07-20 08:22:42 -0400599bool ReplaceOpenCLBuiltinPass::replaceBarrier(Function &F, bool subgroup) {
David Neto22f144c2017-06-12 14:26:21 -0400600
alan-bakerf6bc8252020-09-23 14:58:55 -0400601 enum {
602 CLK_LOCAL_MEM_FENCE = 0x01,
603 CLK_GLOBAL_MEM_FENCE = 0x02,
604 CLK_IMAGE_MEM_FENCE = 0x04
605 };
David Neto22f144c2017-06-12 14:26:21 -0400606
alan-baker12d2c182020-07-20 08:22:42 -0400607 return replaceCallsWithValue(F, [subgroup](CallInst *CI) {
Kévin Petitc4643922019-06-17 19:32:05 +0100608 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400609
Kévin Petitc4643922019-06-17 19:32:05 +0100610 // We need to map the OpenCL constants to the SPIR-V equivalents.
611 const auto LocalMemFence =
612 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
613 const auto GlobalMemFence =
614 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400615 const auto ImageMemFence =
616 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
alan-baker12d2c182020-07-20 08:22:42 -0400617 const auto ConstantAcquireRelease = ConstantInt::get(
618 Arg->getType(), spv::MemorySemanticsAcquireReleaseMask);
Kévin Petitc4643922019-06-17 19:32:05 +0100619 const auto ConstantScopeDevice =
620 ConstantInt::get(Arg->getType(), spv::ScopeDevice);
621 const auto ConstantScopeWorkgroup =
622 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
alan-baker12d2c182020-07-20 08:22:42 -0400623 const auto ConstantScopeSubgroup =
624 ConstantInt::get(Arg->getType(), spv::ScopeSubgroup);
David Neto22f144c2017-06-12 14:26:21 -0400625
Kévin Petitc4643922019-06-17 19:32:05 +0100626 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
627 const auto LocalMemFenceMask =
628 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
629 const auto WorkgroupShiftAmount =
630 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
631 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
632 Instruction::Shl, LocalMemFenceMask,
633 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400634
Kévin Petitc4643922019-06-17 19:32:05 +0100635 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
636 const auto GlobalMemFenceMask =
637 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
638 const auto UniformShiftAmount =
639 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
640 const auto MemorySemanticsUniform = BinaryOperator::Create(
641 Instruction::Shl, GlobalMemFenceMask,
642 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400643
alan-bakerf6bc8252020-09-23 14:58:55 -0400644 // OpenCL 2.0
645 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
646 const auto ImageMemFenceMask =
647 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
648 const auto ImageShiftAmount =
649 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
650 const auto MemorySemanticsImage = BinaryOperator::Create(
651 Instruction::Shl, ImageMemFenceMask,
652 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
653
Kévin Petitc4643922019-06-17 19:32:05 +0100654 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400655 // MemorySemanticsSequentiallyConsistentMask.
656 auto MemorySemantics1 =
Kévin Petitc4643922019-06-17 19:32:05 +0100657 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
alan-baker12d2c182020-07-20 08:22:42 -0400658 ConstantAcquireRelease, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400659 auto MemorySemantics2 = BinaryOperator::Create(
660 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
661 auto MemorySemantics = BinaryOperator::Create(
662 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400663
alan-baker12d2c182020-07-20 08:22:42 -0400664 // If the memory scope is not specified explicitly, it is either Subgroup
665 // or Workgroup depending on the type of barrier.
666 Value *MemoryScope =
667 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
668 if (CI->data_operands_size() > 1) {
669 enum {
670 CL_MEMORY_SCOPE_WORKGROUP = 0x1,
671 CL_MEMORY_SCOPE_DEVICE = 0x2,
672 CL_MEMORY_SCOPE_SUBGROUP = 0x4
673 };
674 // The call was given an explicit memory scope.
675 const auto MemoryScopeSubgroup =
676 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_SUBGROUP);
677 const auto MemoryScopeDevice =
678 ConstantInt::get(Arg->getType(), CL_MEMORY_SCOPE_DEVICE);
David Neto22f144c2017-06-12 14:26:21 -0400679
alan-baker12d2c182020-07-20 08:22:42 -0400680 auto Cmp =
681 CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
682 MemoryScopeSubgroup, CI->getOperand(1), "", CI);
683 MemoryScope = SelectInst::Create(Cmp, ConstantScopeSubgroup,
684 ConstantScopeWorkgroup, "", CI);
685 Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ,
686 MemoryScopeDevice, CI->getOperand(1), "", CI);
687 MemoryScope =
688 SelectInst::Create(Cmp, ConstantScopeDevice, MemoryScope, "", CI);
689 }
690
691 // Lastly, the Execution Scope is either Workgroup or Subgroup depending on
692 // the type of barrier;
693 const auto ExecutionScope =
694 subgroup ? ConstantScopeSubgroup : ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400695
Kévin Petitc4643922019-06-17 19:32:05 +0100696 return clspv::InsertSPIRVOp(CI, spv::OpControlBarrier,
697 {Attribute::NoDuplicate}, CI->getType(),
698 {ExecutionScope, MemoryScope, MemorySemantics});
699 });
David Neto22f144c2017-06-12 14:26:21 -0400700}
701
SJW2c317da2020-03-23 07:39:13 -0500702bool ReplaceOpenCLBuiltinPass::replaceMemFence(Function &F,
703 uint32_t semantics) {
David Neto22f144c2017-06-12 14:26:21 -0400704
SJW2c317da2020-03-23 07:39:13 -0500705 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerf6bc8252020-09-23 14:58:55 -0400706 enum {
707 CLK_LOCAL_MEM_FENCE = 0x01,
708 CLK_GLOBAL_MEM_FENCE = 0x02,
709 CLK_IMAGE_MEM_FENCE = 0x04,
710 };
David Neto22f144c2017-06-12 14:26:21 -0400711
SJW2c317da2020-03-23 07:39:13 -0500712 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400713
SJW2c317da2020-03-23 07:39:13 -0500714 // We need to map the OpenCL constants to the SPIR-V equivalents.
715 const auto LocalMemFence =
716 ConstantInt::get(Arg->getType(), CLK_LOCAL_MEM_FENCE);
717 const auto GlobalMemFence =
718 ConstantInt::get(Arg->getType(), CLK_GLOBAL_MEM_FENCE);
alan-bakerf6bc8252020-09-23 14:58:55 -0400719 const auto ImageMemFence =
720 ConstantInt::get(Arg->getType(), CLK_IMAGE_MEM_FENCE);
SJW2c317da2020-03-23 07:39:13 -0500721 const auto ConstantMemorySemantics =
722 ConstantInt::get(Arg->getType(), semantics);
alan-baker12d2c182020-07-20 08:22:42 -0400723 const auto ConstantScopeWorkgroup =
724 ConstantInt::get(Arg->getType(), spv::ScopeWorkgroup);
David Neto22f144c2017-06-12 14:26:21 -0400725
SJW2c317da2020-03-23 07:39:13 -0500726 // Map CLK_LOCAL_MEM_FENCE to MemorySemanticsWorkgroupMemoryMask.
727 const auto LocalMemFenceMask =
728 BinaryOperator::Create(Instruction::And, LocalMemFence, Arg, "", CI);
729 const auto WorkgroupShiftAmount =
730 clz(spv::MemorySemanticsWorkgroupMemoryMask) - clz(CLK_LOCAL_MEM_FENCE);
731 const auto MemorySemanticsWorkgroup = BinaryOperator::Create(
732 Instruction::Shl, LocalMemFenceMask,
733 ConstantInt::get(Arg->getType(), WorkgroupShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400734
SJW2c317da2020-03-23 07:39:13 -0500735 // Map CLK_GLOBAL_MEM_FENCE to MemorySemanticsUniformMemoryMask.
736 const auto GlobalMemFenceMask =
737 BinaryOperator::Create(Instruction::And, GlobalMemFence, Arg, "", CI);
738 const auto UniformShiftAmount =
739 clz(spv::MemorySemanticsUniformMemoryMask) - clz(CLK_GLOBAL_MEM_FENCE);
740 const auto MemorySemanticsUniform = BinaryOperator::Create(
741 Instruction::Shl, GlobalMemFenceMask,
742 ConstantInt::get(Arg->getType(), UniformShiftAmount), "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400743
alan-bakerf6bc8252020-09-23 14:58:55 -0400744 // OpenCL 2.0
745 // Map CLK_IMAGE_MEM_FENCE to MemorySemanticsImageMemoryMask.
746 const auto ImageMemFenceMask =
747 BinaryOperator::Create(Instruction::And, ImageMemFence, Arg, "", CI);
748 const auto ImageShiftAmount =
749 clz(spv::MemorySemanticsImageMemoryMask) - clz(CLK_IMAGE_MEM_FENCE);
750 const auto MemorySemanticsImage = BinaryOperator::Create(
751 Instruction::Shl, ImageMemFenceMask,
752 ConstantInt::get(Arg->getType(), ImageShiftAmount), "", CI);
753
SJW2c317da2020-03-23 07:39:13 -0500754 // And combine the above together, also adding in
alan-bakerf6bc8252020-09-23 14:58:55 -0400755 // |semantics|.
756 auto MemorySemantics1 =
SJW2c317da2020-03-23 07:39:13 -0500757 BinaryOperator::Create(Instruction::Or, MemorySemanticsWorkgroup,
758 ConstantMemorySemantics, "", CI);
alan-bakerf6bc8252020-09-23 14:58:55 -0400759 auto MemorySemantics2 = BinaryOperator::Create(
760 Instruction::Or, MemorySemanticsUniform, MemorySemanticsImage, "", CI);
761 auto MemorySemantics = BinaryOperator::Create(
762 Instruction::Or, MemorySemantics1, MemorySemantics2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400763
alan-baker12d2c182020-07-20 08:22:42 -0400764 // Memory Scope is always workgroup.
765 const auto MemoryScope = ConstantScopeWorkgroup;
David Neto22f144c2017-06-12 14:26:21 -0400766
SJW2c317da2020-03-23 07:39:13 -0500767 return clspv::InsertSPIRVOp(CI, spv::OpMemoryBarrier, {}, CI->getType(),
768 {MemoryScope, MemorySemantics});
769 });
David Neto22f144c2017-06-12 14:26:21 -0400770}
771
Kévin Petit1cb45112020-04-27 18:55:48 +0100772bool ReplaceOpenCLBuiltinPass::replacePrefetch(Function &F) {
773 bool Changed = false;
774
775 SmallVector<Instruction *, 4> ToRemoves;
776
777 // Find all calls to the function
778 for (auto &U : F.uses()) {
779 if (auto CI = dyn_cast<CallInst>(U.getUser())) {
780 ToRemoves.push_back(CI);
781 }
782 }
783
784 Changed = !ToRemoves.empty();
785
786 // Delete them
787 for (auto V : ToRemoves) {
788 V->eraseFromParent();
789 }
790
791 return Changed;
792}
793
SJW2c317da2020-03-23 07:39:13 -0500794bool ReplaceOpenCLBuiltinPass::replaceRelational(Function &F,
795 CmpInst::Predicate P,
796 int32_t C) {
797 return replaceCallsWithValue(F, [&](CallInst *CI) {
798 // The predicate to use in the CmpInst.
799 auto Predicate = P;
David Neto22f144c2017-06-12 14:26:21 -0400800
SJW2c317da2020-03-23 07:39:13 -0500801 // The value to return for true.
802 auto TrueValue = ConstantInt::getSigned(CI->getType(), C);
David Neto22f144c2017-06-12 14:26:21 -0400803
SJW2c317da2020-03-23 07:39:13 -0500804 // The value to return for false.
805 auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400806
SJW2c317da2020-03-23 07:39:13 -0500807 auto Arg1 = CI->getOperand(0);
808 auto Arg2 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -0400809
SJW2c317da2020-03-23 07:39:13 -0500810 const auto Cmp =
811 CmpInst::Create(Instruction::FCmp, Predicate, Arg1, Arg2, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400812
SJW2c317da2020-03-23 07:39:13 -0500813 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
814 });
David Neto22f144c2017-06-12 14:26:21 -0400815}
816
SJW2c317da2020-03-23 07:39:13 -0500817bool ReplaceOpenCLBuiltinPass::replaceIsInfAndIsNan(Function &F,
818 spv::Op SPIRVOp,
819 int32_t C) {
820 Module &M = *F.getParent();
821 return replaceCallsWithValue(F, [&](CallInst *CI) {
822 const auto CITy = CI->getType();
David Neto22f144c2017-06-12 14:26:21 -0400823
SJW2c317da2020-03-23 07:39:13 -0500824 // The value to return for true.
825 auto TrueValue = ConstantInt::getSigned(CITy, C);
David Neto22f144c2017-06-12 14:26:21 -0400826
SJW2c317da2020-03-23 07:39:13 -0500827 // The value to return for false.
828 auto FalseValue = Constant::getNullValue(CITy);
David Neto22f144c2017-06-12 14:26:21 -0400829
SJW2c317da2020-03-23 07:39:13 -0500830 Type *CorrespondingBoolTy = Type::getInt1Ty(M.getContext());
James Pricecf53df42020-04-20 14:41:24 -0400831 if (auto CIVecTy = dyn_cast<VectorType>(CITy)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400832 CorrespondingBoolTy =
833 FixedVectorType::get(Type::getInt1Ty(M.getContext()),
834 CIVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -0400835 }
David Neto22f144c2017-06-12 14:26:21 -0400836
SJW2c317da2020-03-23 07:39:13 -0500837 auto NewCI = clspv::InsertSPIRVOp(CI, SPIRVOp, {Attribute::ReadNone},
838 CorrespondingBoolTy, {CI->getOperand(0)});
839
840 return SelectInst::Create(NewCI, TrueValue, FalseValue, "", CI);
841 });
David Neto22f144c2017-06-12 14:26:21 -0400842}
843
SJW2c317da2020-03-23 07:39:13 -0500844bool ReplaceOpenCLBuiltinPass::replaceIsFinite(Function &F) {
845 Module &M = *F.getParent();
846 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100847 auto &C = M.getContext();
848 auto Val = CI->getOperand(0);
849 auto ValTy = Val->getType();
850 auto RetTy = CI->getType();
851
852 // Get a suitable integer type to represent the number
853 auto IntTy = getIntOrIntVectorTyForCast(C, ValTy);
854
855 // Create Mask
856 auto ScalarSize = ValTy->getScalarSizeInBits();
SJW2c317da2020-03-23 07:39:13 -0500857 Value *InfMask = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100858 switch (ScalarSize) {
859 case 16:
860 InfMask = ConstantInt::get(IntTy, 0x7C00U);
861 break;
862 case 32:
863 InfMask = ConstantInt::get(IntTy, 0x7F800000U);
864 break;
865 case 64:
866 InfMask = ConstantInt::get(IntTy, 0x7FF0000000000000ULL);
867 break;
868 default:
869 llvm_unreachable("Unsupported floating-point type");
870 }
871
872 IRBuilder<> Builder(CI);
873
874 // Bitcast to int
875 auto ValInt = Builder.CreateBitCast(Val, IntTy);
876
877 // Mask and compare
878 auto InfBits = Builder.CreateAnd(InfMask, ValInt);
879 auto Cmp = Builder.CreateICmp(CmpInst::ICMP_EQ, InfBits, InfMask);
880
881 auto RetFalse = ConstantInt::get(RetTy, 0);
SJW2c317da2020-03-23 07:39:13 -0500882 Value *RetTrue = nullptr;
Kévin Petitfdfa92e2019-09-25 14:20:58 +0100883 if (ValTy->isVectorTy()) {
884 RetTrue = ConstantInt::getSigned(RetTy, -1);
885 } else {
886 RetTrue = ConstantInt::get(RetTy, 1);
887 }
888 return Builder.CreateSelect(Cmp, RetFalse, RetTrue);
889 });
890}
891
SJW2c317da2020-03-23 07:39:13 -0500892bool ReplaceOpenCLBuiltinPass::replaceAllAndAny(Function &F, spv::Op SPIRVOp) {
893 Module &M = *F.getParent();
894 return replaceCallsWithValue(F, [&](CallInst *CI) {
895 auto Arg = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400896
SJW2c317da2020-03-23 07:39:13 -0500897 Value *V = nullptr;
Kévin Petitfd27cca2018-10-31 13:00:17 +0000898
SJW2c317da2020-03-23 07:39:13 -0500899 // If the argument is a 32-bit int, just use a shift
900 if (Arg->getType() == Type::getInt32Ty(M.getContext())) {
901 V = BinaryOperator::Create(Instruction::LShr, Arg,
902 ConstantInt::get(Arg->getType(), 31), "", CI);
903 } else {
904 // The value for zero to compare against.
905 const auto ZeroValue = Constant::getNullValue(Arg->getType());
David Neto22f144c2017-06-12 14:26:21 -0400906
SJW2c317da2020-03-23 07:39:13 -0500907 // The value to return for true.
908 const auto TrueValue = ConstantInt::get(CI->getType(), 1);
David Neto22f144c2017-06-12 14:26:21 -0400909
SJW2c317da2020-03-23 07:39:13 -0500910 // The value to return for false.
911 const auto FalseValue = Constant::getNullValue(CI->getType());
David Neto22f144c2017-06-12 14:26:21 -0400912
SJW2c317da2020-03-23 07:39:13 -0500913 const auto Cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
914 Arg, ZeroValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400915
SJW2c317da2020-03-23 07:39:13 -0500916 Value *SelectSource = nullptr;
David Neto22f144c2017-06-12 14:26:21 -0400917
SJW2c317da2020-03-23 07:39:13 -0500918 // If we have a function to call, call it!
919 if (SPIRVOp != spv::OpNop) {
David Neto22f144c2017-06-12 14:26:21 -0400920
SJW2c317da2020-03-23 07:39:13 -0500921 const auto BoolTy = Type::getInt1Ty(M.getContext());
David Neto22f144c2017-06-12 14:26:21 -0400922
SJW2c317da2020-03-23 07:39:13 -0500923 const auto NewCI = clspv::InsertSPIRVOp(
924 CI, SPIRVOp, {Attribute::ReadNone}, BoolTy, {Cmp});
925 SelectSource = NewCI;
David Neto22f144c2017-06-12 14:26:21 -0400926
SJW2c317da2020-03-23 07:39:13 -0500927 } else {
928 SelectSource = Cmp;
David Neto22f144c2017-06-12 14:26:21 -0400929 }
930
SJW2c317da2020-03-23 07:39:13 -0500931 V = SelectInst::Create(SelectSource, TrueValue, FalseValue, "", CI);
David Neto22f144c2017-06-12 14:26:21 -0400932 }
SJW2c317da2020-03-23 07:39:13 -0500933 return V;
934 });
David Neto22f144c2017-06-12 14:26:21 -0400935}
936
SJW2c317da2020-03-23 07:39:13 -0500937bool ReplaceOpenCLBuiltinPass::replaceUpsample(Function &F) {
938 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
939 // Get arguments
940 auto HiValue = CI->getOperand(0);
941 auto LoValue = CI->getOperand(1);
Kévin Petitbf0036c2019-03-06 13:57:10 +0000942
SJW2c317da2020-03-23 07:39:13 -0500943 // Don't touch overloads that aren't in OpenCL C
944 auto HiType = HiValue->getType();
945 auto LoType = LoValue->getType();
946
947 if (HiType != LoType) {
948 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000949 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000950
SJW2c317da2020-03-23 07:39:13 -0500951 if (!HiType->isIntOrIntVectorTy()) {
952 return nullptr;
Kévin Petitbf0036c2019-03-06 13:57:10 +0000953 }
Kévin Petitbf0036c2019-03-06 13:57:10 +0000954
SJW2c317da2020-03-23 07:39:13 -0500955 if (HiType->getScalarSizeInBits() * 2 !=
956 CI->getType()->getScalarSizeInBits()) {
957 return nullptr;
958 }
959
960 if ((HiType->getScalarSizeInBits() != 8) &&
961 (HiType->getScalarSizeInBits() != 16) &&
962 (HiType->getScalarSizeInBits() != 32)) {
963 return nullptr;
964 }
965
James Pricecf53df42020-04-20 14:41:24 -0400966 if (auto HiVecType = dyn_cast<VectorType>(HiType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400967 unsigned NumElements = HiVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -0400968 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
969 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -0500970 return nullptr;
971 }
972 }
973
974 // Convert both operands to the result type
975 auto HiCast = CastInst::CreateZExtOrBitCast(HiValue, CI->getType(), "", CI);
976 auto LoCast = CastInst::CreateZExtOrBitCast(LoValue, CI->getType(), "", CI);
977
978 // Shift high operand
979 auto ShiftAmount =
980 ConstantInt::get(CI->getType(), HiType->getScalarSizeInBits());
981 auto HiShifted =
982 BinaryOperator::Create(Instruction::Shl, HiCast, ShiftAmount, "", CI);
983
984 // OR both results
985 return BinaryOperator::Create(Instruction::Or, HiShifted, LoCast, "", CI);
986 });
Kévin Petitbf0036c2019-03-06 13:57:10 +0000987}
988
SJW2c317da2020-03-23 07:39:13 -0500989bool ReplaceOpenCLBuiltinPass::replaceRotate(Function &F) {
990 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
991 // Get arguments
992 auto SrcValue = CI->getOperand(0);
993 auto RotAmount = CI->getOperand(1);
Kévin Petitd44eef52019-03-08 13:22:14 +0000994
SJW2c317da2020-03-23 07:39:13 -0500995 // Don't touch overloads that aren't in OpenCL C
996 auto SrcType = SrcValue->getType();
997 auto RotType = RotAmount->getType();
998
999 if ((SrcType != RotType) || (CI->getType() != SrcType)) {
1000 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001001 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001002
SJW2c317da2020-03-23 07:39:13 -05001003 if (!SrcType->isIntOrIntVectorTy()) {
1004 return nullptr;
Kévin Petitd44eef52019-03-08 13:22:14 +00001005 }
Kévin Petitd44eef52019-03-08 13:22:14 +00001006
SJW2c317da2020-03-23 07:39:13 -05001007 if ((SrcType->getScalarSizeInBits() != 8) &&
1008 (SrcType->getScalarSizeInBits() != 16) &&
1009 (SrcType->getScalarSizeInBits() != 32) &&
1010 (SrcType->getScalarSizeInBits() != 64)) {
1011 return nullptr;
1012 }
1013
James Pricecf53df42020-04-20 14:41:24 -04001014 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001015 unsigned NumElements = SrcVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001016 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1017 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001018 return nullptr;
1019 }
1020 }
1021
1022 // The approach used is to shift the top bits down, the bottom bits up
1023 // and OR the two shifted values.
1024
1025 // The rotation amount is to be treated modulo the element size.
1026 // Since SPIR-V shift ops don't support this, let's apply the
1027 // modulo ahead of shifting. The element size is always a power of
1028 // two so we can just AND with a mask.
1029 auto ModMask =
1030 ConstantInt::get(SrcType, SrcType->getScalarSizeInBits() - 1);
1031 RotAmount =
1032 BinaryOperator::Create(Instruction::And, RotAmount, ModMask, "", CI);
1033
1034 // Let's calc the amount by which to shift top bits down
1035 auto ScalarSize = ConstantInt::get(SrcType, SrcType->getScalarSizeInBits());
1036 auto DownAmount =
1037 BinaryOperator::Create(Instruction::Sub, ScalarSize, RotAmount, "", CI);
1038
1039 // Now shift the bottom bits up and the top bits down
1040 auto LoRotated =
1041 BinaryOperator::Create(Instruction::Shl, SrcValue, RotAmount, "", CI);
1042 auto HiRotated =
1043 BinaryOperator::Create(Instruction::LShr, SrcValue, DownAmount, "", CI);
1044
1045 // Finally OR the two shifted values
1046 return BinaryOperator::Create(Instruction::Or, LoRotated, HiRotated, "",
1047 CI);
1048 });
Kévin Petitd44eef52019-03-08 13:22:14 +00001049}
1050
SJW2c317da2020-03-23 07:39:13 -05001051bool ReplaceOpenCLBuiltinPass::replaceConvert(Function &F, bool SrcIsSigned,
1052 bool DstIsSigned) {
1053 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1054 Value *V = nullptr;
1055 // Get arguments
1056 auto SrcValue = CI->getOperand(0);
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001057
SJW2c317da2020-03-23 07:39:13 -05001058 // Don't touch overloads that aren't in OpenCL C
1059 auto SrcType = SrcValue->getType();
1060 auto DstType = CI->getType();
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001061
SJW2c317da2020-03-23 07:39:13 -05001062 if ((SrcType->isVectorTy() && !DstType->isVectorTy()) ||
1063 (!SrcType->isVectorTy() && DstType->isVectorTy())) {
1064 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001065 }
1066
James Pricecf53df42020-04-20 14:41:24 -04001067 if (auto SrcVecType = dyn_cast<VectorType>(SrcType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001068 unsigned SrcNumElements =
1069 SrcVecType->getElementCount().getKnownMinValue();
1070 unsigned DstNumElements =
1071 cast<VectorType>(DstType)->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001072 if (SrcNumElements != DstNumElements) {
SJW2c317da2020-03-23 07:39:13 -05001073 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001074 }
1075
James Pricecf53df42020-04-20 14:41:24 -04001076 if ((SrcNumElements != 2) && (SrcNumElements != 3) &&
1077 (SrcNumElements != 4) && (SrcNumElements != 8) &&
1078 (SrcNumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001079 return V;
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001080 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001081 }
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001082
SJW2c317da2020-03-23 07:39:13 -05001083 bool SrcIsFloat = SrcType->getScalarType()->isFloatingPointTy();
1084 bool DstIsFloat = DstType->getScalarType()->isFloatingPointTy();
1085
1086 bool SrcIsInt = SrcType->isIntOrIntVectorTy();
1087 bool DstIsInt = DstType->isIntOrIntVectorTy();
1088
1089 if (SrcType == DstType && DstIsSigned == SrcIsSigned) {
1090 // Unnecessary cast operation.
1091 V = SrcValue;
1092 } else if (SrcIsFloat && DstIsFloat) {
1093 V = CastInst::CreateFPCast(SrcValue, DstType, "", CI);
1094 } else if (SrcIsFloat && DstIsInt) {
1095 if (DstIsSigned) {
1096 V = CastInst::Create(Instruction::FPToSI, SrcValue, DstType, "", CI);
1097 } else {
1098 V = CastInst::Create(Instruction::FPToUI, SrcValue, DstType, "", CI);
1099 }
1100 } else if (SrcIsInt && DstIsFloat) {
1101 if (SrcIsSigned) {
1102 V = CastInst::Create(Instruction::SIToFP, SrcValue, DstType, "", CI);
1103 } else {
1104 V = CastInst::Create(Instruction::UIToFP, SrcValue, DstType, "", CI);
1105 }
1106 } else if (SrcIsInt && DstIsInt) {
1107 V = CastInst::CreateIntegerCast(SrcValue, DstType, SrcIsSigned, "", CI);
1108 } else {
1109 // Not something we're supposed to handle, just move on
1110 }
1111
1112 return V;
1113 });
Kévin Petit9d1a9d12019-03-25 15:23:46 +00001114}
1115
SJW2c317da2020-03-23 07:39:13 -05001116bool ReplaceOpenCLBuiltinPass::replaceMulHi(Function &F, bool is_signed,
1117 bool is_mad) {
1118 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1119 Value *V = nullptr;
1120 // Get arguments
1121 auto AValue = CI->getOperand(0);
1122 auto BValue = CI->getOperand(1);
1123 auto CValue = CI->getOperand(2);
Kévin Petit8a560882019-03-21 15:24:34 +00001124
SJW2c317da2020-03-23 07:39:13 -05001125 // Don't touch overloads that aren't in OpenCL C
1126 auto AType = AValue->getType();
1127 auto BType = BValue->getType();
1128 auto CType = CValue->getType();
Kévin Petit8a560882019-03-21 15:24:34 +00001129
SJW2c317da2020-03-23 07:39:13 -05001130 if ((AType != BType) || (CI->getType() != AType) ||
1131 (is_mad && (AType != CType))) {
1132 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001133 }
1134
SJW2c317da2020-03-23 07:39:13 -05001135 if (!AType->isIntOrIntVectorTy()) {
1136 return V;
Kévin Petit8a560882019-03-21 15:24:34 +00001137 }
Kévin Petit8a560882019-03-21 15:24:34 +00001138
SJW2c317da2020-03-23 07:39:13 -05001139 if ((AType->getScalarSizeInBits() != 8) &&
1140 (AType->getScalarSizeInBits() != 16) &&
1141 (AType->getScalarSizeInBits() != 32) &&
1142 (AType->getScalarSizeInBits() != 64)) {
1143 return V;
1144 }
Kévin Petit617a76d2019-04-04 13:54:16 +01001145
James Pricecf53df42020-04-20 14:41:24 -04001146 if (auto AVecType = dyn_cast<VectorType>(AType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001147 unsigned NumElements = AVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001148 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1149 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001150 return V;
Kévin Petit617a76d2019-04-04 13:54:16 +01001151 }
1152 }
1153
SJW2c317da2020-03-23 07:39:13 -05001154 // Our SPIR-V op returns a struct, create a type for it
1155 SmallVector<Type *, 2> TwoValueType = {AType, AType};
1156 auto ExMulRetType = StructType::create(TwoValueType);
Kévin Petit617a76d2019-04-04 13:54:16 +01001157
SJW2c317da2020-03-23 07:39:13 -05001158 // Select the appropriate signed/unsigned SPIR-V op
1159 spv::Op opcode = is_signed ? spv::OpSMulExtended : spv::OpUMulExtended;
1160
1161 // Call the SPIR-V op
1162 auto Call = clspv::InsertSPIRVOp(CI, opcode, {Attribute::ReadNone},
1163 ExMulRetType, {AValue, BValue});
1164
1165 // Get the high part of the result
1166 unsigned Idxs[] = {1};
1167 V = ExtractValueInst::Create(Call, Idxs, "", CI);
1168
1169 // If we're handling a mad_hi, add the third argument to the result
1170 if (is_mad) {
1171 V = BinaryOperator::Create(Instruction::Add, V, CValue, "", CI);
Kévin Petit617a76d2019-04-04 13:54:16 +01001172 }
1173
SJW2c317da2020-03-23 07:39:13 -05001174 return V;
1175 });
Kévin Petit8a560882019-03-21 15:24:34 +00001176}
1177
SJW2c317da2020-03-23 07:39:13 -05001178bool ReplaceOpenCLBuiltinPass::replaceSelect(Function &F) {
1179 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1180 // Get arguments
1181 auto FalseValue = CI->getOperand(0);
1182 auto TrueValue = CI->getOperand(1);
1183 auto PredicateValue = CI->getOperand(2);
Kévin Petitf5b78a22018-10-25 14:32:17 +00001184
SJW2c317da2020-03-23 07:39:13 -05001185 // Don't touch overloads that aren't in OpenCL C
1186 auto FalseType = FalseValue->getType();
1187 auto TrueType = TrueValue->getType();
1188 auto PredicateType = PredicateValue->getType();
1189
1190 if (FalseType != TrueType) {
1191 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001192 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001193
SJW2c317da2020-03-23 07:39:13 -05001194 if (!PredicateType->isIntOrIntVectorTy()) {
1195 return nullptr;
1196 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001197
SJW2c317da2020-03-23 07:39:13 -05001198 if (!FalseType->isIntOrIntVectorTy() &&
1199 !FalseType->getScalarType()->isFloatingPointTy()) {
1200 return nullptr;
1201 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001202
SJW2c317da2020-03-23 07:39:13 -05001203 if (FalseType->isVectorTy() && !PredicateType->isVectorTy()) {
1204 return nullptr;
1205 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001206
SJW2c317da2020-03-23 07:39:13 -05001207 if (FalseType->getScalarSizeInBits() !=
1208 PredicateType->getScalarSizeInBits()) {
1209 return nullptr;
1210 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001211
James Pricecf53df42020-04-20 14:41:24 -04001212 if (auto FalseVecType = dyn_cast<VectorType>(FalseType)) {
alan-baker5a8c3be2020-09-09 13:44:26 -04001213 unsigned NumElements = FalseVecType->getElementCount().getKnownMinValue();
1214 if (NumElements != cast<VectorType>(PredicateType)
1215 ->getElementCount()
1216 .getKnownMinValue()) {
SJW2c317da2020-03-23 07:39:13 -05001217 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001218 }
1219
James Pricecf53df42020-04-20 14:41:24 -04001220 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1221 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001222 return nullptr;
Kévin Petitf5b78a22018-10-25 14:32:17 +00001223 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001224 }
Kévin Petitf5b78a22018-10-25 14:32:17 +00001225
SJW2c317da2020-03-23 07:39:13 -05001226 // Create constant
1227 const auto ZeroValue = Constant::getNullValue(PredicateType);
1228
1229 // Scalar and vector are to be treated differently
1230 CmpInst::Predicate Pred;
1231 if (PredicateType->isVectorTy()) {
1232 Pred = CmpInst::ICMP_SLT;
1233 } else {
1234 Pred = CmpInst::ICMP_NE;
1235 }
1236
1237 // Create comparison instruction
1238 auto Cmp = CmpInst::Create(Instruction::ICmp, Pred, PredicateValue,
1239 ZeroValue, "", CI);
1240
1241 // Create select
1242 return SelectInst::Create(Cmp, TrueValue, FalseValue, "", CI);
1243 });
Kévin Petitf5b78a22018-10-25 14:32:17 +00001244}
1245
SJW2c317da2020-03-23 07:39:13 -05001246bool ReplaceOpenCLBuiltinPass::replaceBitSelect(Function &F) {
1247 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1248 Value *V = nullptr;
1249 if (CI->getNumOperands() != 4) {
1250 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001251 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001252
SJW2c317da2020-03-23 07:39:13 -05001253 // Get arguments
1254 auto FalseValue = CI->getOperand(0);
1255 auto TrueValue = CI->getOperand(1);
1256 auto PredicateValue = CI->getOperand(2);
Kévin Petite7d0cce2018-10-31 12:38:56 +00001257
SJW2c317da2020-03-23 07:39:13 -05001258 // Don't touch overloads that aren't in OpenCL C
1259 auto FalseType = FalseValue->getType();
1260 auto TrueType = TrueValue->getType();
1261 auto PredicateType = PredicateValue->getType();
Kévin Petite7d0cce2018-10-31 12:38:56 +00001262
SJW2c317da2020-03-23 07:39:13 -05001263 if ((FalseType != TrueType) || (PredicateType != TrueType)) {
1264 return V;
Kévin Petite7d0cce2018-10-31 12:38:56 +00001265 }
Kévin Petite7d0cce2018-10-31 12:38:56 +00001266
James Pricecf53df42020-04-20 14:41:24 -04001267 if (auto TrueVecType = dyn_cast<VectorType>(TrueType)) {
SJW2c317da2020-03-23 07:39:13 -05001268 if (!TrueType->getScalarType()->isFloatingPointTy() &&
1269 !TrueType->getScalarType()->isIntegerTy()) {
1270 return V;
1271 }
alan-baker5a8c3be2020-09-09 13:44:26 -04001272 unsigned NumElements = TrueVecType->getElementCount().getKnownMinValue();
James Pricecf53df42020-04-20 14:41:24 -04001273 if ((NumElements != 2) && (NumElements != 3) && (NumElements != 4) &&
1274 (NumElements != 8) && (NumElements != 16)) {
SJW2c317da2020-03-23 07:39:13 -05001275 return V;
1276 }
1277 }
1278
1279 // Remember the type of the operands
1280 auto OpType = TrueType;
1281
1282 // The actual bit selection will always be done on an integer type,
1283 // declare it here
1284 Type *BitType;
1285
1286 // If the operands are float, then bitcast them to int
1287 if (OpType->getScalarType()->isFloatingPointTy()) {
1288
1289 // First create the new type
1290 BitType = getIntOrIntVectorTyForCast(F.getContext(), OpType);
1291
1292 // Then bitcast all operands
1293 PredicateValue =
1294 CastInst::CreateZExtOrBitCast(PredicateValue, BitType, "", CI);
1295 FalseValue = CastInst::CreateZExtOrBitCast(FalseValue, BitType, "", CI);
1296 TrueValue = CastInst::CreateZExtOrBitCast(TrueValue, BitType, "", CI);
1297
1298 } else {
1299 // The operands have an integer type, use it directly
1300 BitType = OpType;
1301 }
1302
1303 // All the operands are now always integers
1304 // implement as (c & b) | (~c & a)
1305
1306 // Create our negated predicate value
1307 auto AllOnes = Constant::getAllOnesValue(BitType);
1308 auto NotPredicateValue = BinaryOperator::Create(
1309 Instruction::Xor, PredicateValue, AllOnes, "", CI);
1310
1311 // Then put everything together
1312 auto BitsFalse = BinaryOperator::Create(Instruction::And, NotPredicateValue,
1313 FalseValue, "", CI);
1314 auto BitsTrue = BinaryOperator::Create(Instruction::And, PredicateValue,
1315 TrueValue, "", CI);
1316
1317 V = BinaryOperator::Create(Instruction::Or, BitsFalse, BitsTrue, "", CI);
1318
1319 // If we were dealing with a floating point type, we must bitcast
1320 // the result back to that
1321 if (OpType->getScalarType()->isFloatingPointTy()) {
1322 V = CastInst::CreateZExtOrBitCast(V, OpType, "", CI);
1323 }
1324
1325 return V;
1326 });
Kévin Petite7d0cce2018-10-31 12:38:56 +00001327}
1328
SJW61531372020-06-09 07:31:08 -05001329bool ReplaceOpenCLBuiltinPass::replaceStep(Function &F, bool is_smooth) {
SJW2c317da2020-03-23 07:39:13 -05001330 // convert to vector versions
1331 Module &M = *F.getParent();
1332 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1333 SmallVector<Value *, 2> ArgsToSplat = {CI->getOperand(0)};
1334 Value *VectorArg = nullptr;
Kévin Petit6b0a9532018-10-30 20:00:39 +00001335
SJW2c317da2020-03-23 07:39:13 -05001336 // First figure out which function we're dealing with
1337 if (is_smooth) {
1338 ArgsToSplat.push_back(CI->getOperand(1));
1339 VectorArg = CI->getOperand(2);
1340 } else {
1341 VectorArg = CI->getOperand(1);
1342 }
1343
1344 // Splat arguments that need to be
1345 SmallVector<Value *, 2> SplatArgs;
James Pricecf53df42020-04-20 14:41:24 -04001346 auto VecType = cast<VectorType>(VectorArg->getType());
SJW2c317da2020-03-23 07:39:13 -05001347
1348 for (auto arg : ArgsToSplat) {
1349 Value *NewVectorArg = UndefValue::get(VecType);
alan-baker5a8c3be2020-09-09 13:44:26 -04001350 for (auto i = 0; i < VecType->getElementCount().getKnownMinValue(); i++) {
SJW2c317da2020-03-23 07:39:13 -05001351 auto index = ConstantInt::get(Type::getInt32Ty(M.getContext()), i);
1352 NewVectorArg =
1353 InsertElementInst::Create(NewVectorArg, arg, index, "", CI);
1354 }
1355 SplatArgs.push_back(NewVectorArg);
1356 }
1357
1358 // Replace the call with the vector/vector flavour
1359 SmallVector<Type *, 3> NewArgTypes(ArgsToSplat.size() + 1, VecType);
1360 const auto NewFType = FunctionType::get(CI->getType(), NewArgTypes, false);
1361
SJW61531372020-06-09 07:31:08 -05001362 std::string NewFName = Builtins::GetMangledFunctionName(
1363 is_smooth ? "smoothstep" : "step", NewFType);
1364
SJW2c317da2020-03-23 07:39:13 -05001365 const auto NewF = M.getOrInsertFunction(NewFName, NewFType);
1366
1367 SmallVector<Value *, 3> NewArgs;
1368 for (auto arg : SplatArgs) {
1369 NewArgs.push_back(arg);
1370 }
1371 NewArgs.push_back(VectorArg);
1372
1373 return CallInst::Create(NewF, NewArgs, "", CI);
1374 });
Kévin Petit6b0a9532018-10-30 20:00:39 +00001375}
1376
SJW2c317da2020-03-23 07:39:13 -05001377bool ReplaceOpenCLBuiltinPass::replaceSignbit(Function &F, bool is_vec) {
SJW2c317da2020-03-23 07:39:13 -05001378 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1379 auto Arg = CI->getOperand(0);
1380 auto Op = is_vec ? Instruction::AShr : Instruction::LShr;
David Neto22f144c2017-06-12 14:26:21 -04001381
SJW2c317da2020-03-23 07:39:13 -05001382 auto Bitcast = CastInst::CreateZExtOrBitCast(Arg, CI->getType(), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001383
SJW2c317da2020-03-23 07:39:13 -05001384 return BinaryOperator::Create(Op, Bitcast,
1385 ConstantInt::get(CI->getType(), 31), "", CI);
1386 });
David Neto22f144c2017-06-12 14:26:21 -04001387}
1388
SJW2c317da2020-03-23 07:39:13 -05001389bool ReplaceOpenCLBuiltinPass::replaceMul(Function &F, bool is_float,
1390 bool is_mad) {
SJW2c317da2020-03-23 07:39:13 -05001391 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1392 // The multiply instruction to use.
1393 auto MulInst = is_float ? Instruction::FMul : Instruction::Mul;
David Neto22f144c2017-06-12 14:26:21 -04001394
SJW2c317da2020-03-23 07:39:13 -05001395 SmallVector<Value *, 8> Args(CI->arg_begin(), CI->arg_end());
David Neto22f144c2017-06-12 14:26:21 -04001396
SJW2c317da2020-03-23 07:39:13 -05001397 Value *V = BinaryOperator::Create(MulInst, CI->getArgOperand(0),
1398 CI->getArgOperand(1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001399
SJW2c317da2020-03-23 07:39:13 -05001400 if (is_mad) {
1401 // The add instruction to use.
1402 auto AddInst = is_float ? Instruction::FAdd : Instruction::Add;
David Neto22f144c2017-06-12 14:26:21 -04001403
SJW2c317da2020-03-23 07:39:13 -05001404 V = BinaryOperator::Create(AddInst, V, CI->getArgOperand(2), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001405 }
David Neto22f144c2017-06-12 14:26:21 -04001406
SJW2c317da2020-03-23 07:39:13 -05001407 return V;
1408 });
David Neto22f144c2017-06-12 14:26:21 -04001409}
1410
SJW2c317da2020-03-23 07:39:13 -05001411bool ReplaceOpenCLBuiltinPass::replaceVstore(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001412 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1413 Value *V = nullptr;
1414 auto data = CI->getOperand(0);
Derek Chowcfd368b2017-10-19 20:58:45 -07001415
SJW2c317da2020-03-23 07:39:13 -05001416 auto data_type = data->getType();
1417 if (!data_type->isVectorTy())
1418 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001419
James Pricecf53df42020-04-20 14:41:24 -04001420 auto vec_data_type = cast<VectorType>(data_type);
1421
alan-baker5a8c3be2020-09-09 13:44:26 -04001422 auto elems = vec_data_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001423 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1424 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001425
SJW2c317da2020-03-23 07:39:13 -05001426 auto offset = CI->getOperand(1);
1427 auto ptr = CI->getOperand(2);
1428 auto ptr_type = ptr->getType();
1429 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001430 if (pointee_type != vec_data_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001431 return V;
alan-bakerf795f392019-06-11 18:24:34 -04001432
SJW2c317da2020-03-23 07:39:13 -05001433 // Avoid pointer casts. Instead generate the correct number of stores
1434 // and rely on drivers to coalesce appropriately.
1435 IRBuilder<> builder(CI);
1436 auto elems_const = builder.getInt32(elems);
1437 auto adjust = builder.CreateMul(offset, elems_const);
1438 for (auto i = 0; i < elems; ++i) {
1439 auto idx = builder.getInt32(i);
1440 auto add = builder.CreateAdd(adjust, idx);
1441 auto gep = builder.CreateGEP(ptr, add);
1442 auto extract = builder.CreateExtractElement(data, i);
1443 V = builder.CreateStore(extract, gep);
Derek Chowcfd368b2017-10-19 20:58:45 -07001444 }
SJW2c317da2020-03-23 07:39:13 -05001445 return V;
1446 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001447}
1448
SJW2c317da2020-03-23 07:39:13 -05001449bool ReplaceOpenCLBuiltinPass::replaceVload(Function &F) {
SJW2c317da2020-03-23 07:39:13 -05001450 return replaceCallsWithValue(F, [&](CallInst *CI) -> llvm::Value * {
1451 Value *V = nullptr;
1452 auto ret_type = F.getReturnType();
1453 if (!ret_type->isVectorTy())
1454 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001455
James Pricecf53df42020-04-20 14:41:24 -04001456 auto vec_ret_type = cast<VectorType>(ret_type);
1457
alan-baker5a8c3be2020-09-09 13:44:26 -04001458 auto elems = vec_ret_type->getElementCount().getKnownMinValue();
SJW2c317da2020-03-23 07:39:13 -05001459 if (elems != 2 && elems != 3 && elems != 4 && elems != 8 && elems != 16)
1460 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001461
SJW2c317da2020-03-23 07:39:13 -05001462 auto offset = CI->getOperand(0);
1463 auto ptr = CI->getOperand(1);
1464 auto ptr_type = ptr->getType();
1465 auto pointee_type = ptr_type->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -04001466 if (pointee_type != vec_ret_type->getElementType())
SJW2c317da2020-03-23 07:39:13 -05001467 return V;
Derek Chowcfd368b2017-10-19 20:58:45 -07001468
SJW2c317da2020-03-23 07:39:13 -05001469 // Avoid pointer casts. Instead generate the correct number of loads
1470 // and rely on drivers to coalesce appropriately.
1471 IRBuilder<> builder(CI);
1472 auto elems_const = builder.getInt32(elems);
1473 V = UndefValue::get(ret_type);
1474 auto adjust = builder.CreateMul(offset, elems_const);
1475 for (auto i = 0; i < elems; ++i) {
1476 auto idx = builder.getInt32(i);
1477 auto add = builder.CreateAdd(adjust, idx);
1478 auto gep = builder.CreateGEP(ptr, add);
1479 auto load = builder.CreateLoad(gep);
1480 V = builder.CreateInsertElement(V, load, i);
Derek Chowcfd368b2017-10-19 20:58:45 -07001481 }
SJW2c317da2020-03-23 07:39:13 -05001482 return V;
1483 });
Derek Chowcfd368b2017-10-19 20:58:45 -07001484}
1485
SJW2c317da2020-03-23 07:39:13 -05001486bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F,
1487 const std::string &name,
1488 int vec_size) {
1489 bool is_clspv_version = !name.compare(0, 8, "__clspv_");
1490 if (!vec_size) {
1491 // deduce vec_size from last character of name (e.g. vload_half4)
1492 vec_size = std::atoi(&name.back());
David Neto22f144c2017-06-12 14:26:21 -04001493 }
SJW2c317da2020-03-23 07:39:13 -05001494 switch (vec_size) {
1495 case 2:
1496 return is_clspv_version ? replaceClspvVloadaHalf2(F) : replaceVloadHalf2(F);
1497 case 4:
1498 return is_clspv_version ? replaceClspvVloadaHalf4(F) : replaceVloadHalf4(F);
1499 case 0:
1500 if (!is_clspv_version) {
1501 return replaceVloadHalf(F);
1502 }
1503 default:
1504 llvm_unreachable("Unsupported vload_half vector size");
1505 break;
1506 }
1507 return false;
David Neto22f144c2017-06-12 14:26:21 -04001508}
1509
SJW2c317da2020-03-23 07:39:13 -05001510bool ReplaceOpenCLBuiltinPass::replaceVloadHalf(Function &F) {
1511 Module &M = *F.getParent();
1512 return replaceCallsWithValue(F, [&](CallInst *CI) {
1513 // The index argument from vload_half.
1514 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001515
SJW2c317da2020-03-23 07:39:13 -05001516 // The pointer argument from vload_half.
1517 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001518
SJW2c317da2020-03-23 07:39:13 -05001519 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001520 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
SJW2c317da2020-03-23 07:39:13 -05001521 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
1522
1523 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001524 auto SPIRVIntrinsic = clspv::UnpackFunction();
SJW2c317da2020-03-23 07:39:13 -05001525
1526 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
1527
1528 Value *V = nullptr;
1529
alan-baker7efcaaa2020-05-06 19:33:27 -04001530 bool supports_16bit_storage = true;
1531 switch (Arg1->getType()->getPointerAddressSpace()) {
1532 case clspv::AddressSpace::Global:
1533 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1534 clspv::Option::StorageClass::kSSBO);
1535 break;
1536 case clspv::AddressSpace::Constant:
1537 if (clspv::Option::ConstantArgsInUniformBuffer())
1538 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1539 clspv::Option::StorageClass::kUBO);
1540 else
1541 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1542 clspv::Option::StorageClass::kSSBO);
1543 break;
1544 default:
1545 // Clspv will emit the Float16 capability if the half type is
1546 // encountered. That capability covers private and local addressspaces.
1547 break;
1548 }
1549
1550 if (supports_16bit_storage) {
SJW2c317da2020-03-23 07:39:13 -05001551 auto ShortTy = Type::getInt16Ty(M.getContext());
1552 auto ShortPointerTy =
1553 PointerType::get(ShortTy, Arg1->getType()->getPointerAddressSpace());
1554
1555 // Cast the half* pointer to short*.
1556 auto Cast = CastInst::CreatePointerCast(Arg1, ShortPointerTy, "", CI);
1557
1558 // Index into the correct address of the casted pointer.
1559 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg0, "", CI);
1560
1561 // Load from the short* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001562 auto Load = new LoadInst(ShortTy, Index, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001563
1564 // ZExt the short -> int.
1565 auto ZExt = CastInst::CreateZExtOrBitCast(Load, IntTy, "", CI);
1566
1567 // Get our float2.
1568 auto Call = CallInst::Create(NewF, ZExt, "", CI);
1569
1570 // Extract out the bottom element which is our float result.
1571 V = ExtractElementInst::Create(Call, ConstantInt::get(IntTy, 0), "", CI);
1572 } else {
1573 // Assume the pointer argument points to storage aligned to 32bits
1574 // or more.
1575 // TODO(dneto): Do more analysis to make sure this is true?
1576 //
1577 // Replace call vstore_half(i32 %index, half addrspace(1) %base)
1578 // with:
1579 //
1580 // %base_i32_ptr = bitcast half addrspace(1)* %base to i32
1581 // addrspace(1)* %index_is_odd32 = and i32 %index, 1 %index_i32 =
1582 // lshr i32 %index, 1 %in_ptr = getlementptr i32, i32
1583 // addrspace(1)* %base_i32_ptr, %index_i32 %value_i32 = load i32,
1584 // i32 addrspace(1)* %in_ptr %converted = call <2 x float>
1585 // @spirv.unpack.v2f16(i32 %value_i32) %value = extractelement <2
1586 // x float> %converted, %index_is_odd32
1587
1588 auto IntPointerTy =
1589 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
1590
1591 // Cast the base pointer to int*.
1592 // In a valid call (according to assumptions), this should get
1593 // optimized away in the simplify GEP pass.
1594 auto Cast = CastInst::CreatePointerCast(Arg1, IntPointerTy, "", CI);
1595
1596 auto One = ConstantInt::get(IntTy, 1);
1597 auto IndexIsOdd = BinaryOperator::CreateAnd(Arg0, One, "", CI);
1598 auto IndexIntoI32 = BinaryOperator::CreateLShr(Arg0, One, "", CI);
1599
1600 // Index into the correct address of the casted pointer.
1601 auto Ptr = GetElementPtrInst::Create(IntTy, Cast, IndexIntoI32, "", CI);
1602
1603 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001604 auto Load = new LoadInst(IntTy, Ptr, "", CI);
SJW2c317da2020-03-23 07:39:13 -05001605
1606 // Get our float2.
1607 auto Call = CallInst::Create(NewF, Load, "", CI);
1608
1609 // Extract out the float result, where the element number is
1610 // determined by whether the original index was even or odd.
1611 V = ExtractElementInst::Create(Call, IndexIsOdd, "", CI);
1612 }
1613 return V;
1614 });
1615}
1616
1617bool ReplaceOpenCLBuiltinPass::replaceVloadHalf2(Function &F) {
1618 Module &M = *F.getParent();
1619 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001620 // The index argument from vload_half.
1621 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001622
Kévin Petite8edce32019-04-10 14:23:32 +01001623 // The pointer argument from vload_half.
1624 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001625
Kévin Petite8edce32019-04-10 14:23:32 +01001626 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001627 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001628 auto NewPointerTy =
1629 PointerType::get(IntTy, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001630 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001631
Kévin Petite8edce32019-04-10 14:23:32 +01001632 // Cast the half* pointer to int*.
1633 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001634
Kévin Petite8edce32019-04-10 14:23:32 +01001635 // Index into the correct address of the casted pointer.
1636 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001637
Kévin Petite8edce32019-04-10 14:23:32 +01001638 // Load from the int* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001639 auto Load = new LoadInst(IntTy, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001640
Kévin Petite8edce32019-04-10 14:23:32 +01001641 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001642 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001643
Kévin Petite8edce32019-04-10 14:23:32 +01001644 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001645
Kévin Petite8edce32019-04-10 14:23:32 +01001646 // Get our float2.
1647 return CallInst::Create(NewF, Load, "", CI);
1648 });
David Neto22f144c2017-06-12 14:26:21 -04001649}
1650
SJW2c317da2020-03-23 07:39:13 -05001651bool ReplaceOpenCLBuiltinPass::replaceVloadHalf4(Function &F) {
1652 Module &M = *F.getParent();
1653 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001654 // The index argument from vload_half.
1655 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001656
Kévin Petite8edce32019-04-10 14:23:32 +01001657 // The pointer argument from vload_half.
1658 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001659
Kévin Petite8edce32019-04-10 14:23:32 +01001660 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001661 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1662 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001663 auto NewPointerTy =
1664 PointerType::get(Int2Ty, Arg1->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001665 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto22f144c2017-06-12 14:26:21 -04001666
Kévin Petite8edce32019-04-10 14:23:32 +01001667 // Cast the half* pointer to int2*.
1668 auto Cast = CastInst::CreatePointerCast(Arg1, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001669
Kévin Petite8edce32019-04-10 14:23:32 +01001670 // Index into the correct address of the casted pointer.
1671 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001672
Kévin Petite8edce32019-04-10 14:23:32 +01001673 // Load from the int2* we casted to.
alan-baker741fd1f2020-04-14 17:38:15 -04001674 auto Load = new LoadInst(Int2Ty, Index, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001675
Kévin Petite8edce32019-04-10 14:23:32 +01001676 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001677 auto X =
1678 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1679 auto Y =
1680 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001681
Kévin Petite8edce32019-04-10 14:23:32 +01001682 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001683 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001684
Kévin Petite8edce32019-04-10 14:23:32 +01001685 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001686
Kévin Petite8edce32019-04-10 14:23:32 +01001687 // Get the lower (x & y) components of our final float4.
1688 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001689
Kévin Petite8edce32019-04-10 14:23:32 +01001690 // Get the higher (z & w) components of our final float4.
1691 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001692
Kévin Petite8edce32019-04-10 14:23:32 +01001693 Constant *ShuffleMask[4] = {
1694 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1695 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04001696
Kévin Petite8edce32019-04-10 14:23:32 +01001697 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001698 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1699 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001700 });
David Neto22f144c2017-06-12 14:26:21 -04001701}
1702
SJW2c317da2020-03-23 07:39:13 -05001703bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf2(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001704
1705 // Replace __clspv_vloada_half2(uint Index, global uint* Ptr) with:
1706 //
1707 // %u = load i32 %ptr
1708 // %fxy = call <2 x float> Unpack2xHalf(u)
1709 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001710 Module &M = *F.getParent();
1711 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001712 auto Index = CI->getOperand(0);
1713 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001714
Kévin Petite8edce32019-04-10 14:23:32 +01001715 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001716 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001717 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001718
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001719 auto IndexedPtr = GetElementPtrInst::Create(IntTy, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001720 auto Load = new LoadInst(IntTy, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001721
Kévin Petite8edce32019-04-10 14:23:32 +01001722 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001723 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001724
Kévin Petite8edce32019-04-10 14:23:32 +01001725 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001726
Kévin Petite8edce32019-04-10 14:23:32 +01001727 // Get our final float2.
1728 return CallInst::Create(NewF, Load, "", CI);
1729 });
David Neto6ad93232018-06-07 15:42:58 -07001730}
1731
SJW2c317da2020-03-23 07:39:13 -05001732bool ReplaceOpenCLBuiltinPass::replaceClspvVloadaHalf4(Function &F) {
David Neto6ad93232018-06-07 15:42:58 -07001733
1734 // Replace __clspv_vloada_half4(uint Index, global uint2* Ptr) with:
1735 //
1736 // %u2 = load <2 x i32> %ptr
1737 // %u2xy = extractelement %u2, 0
1738 // %u2zw = extractelement %u2, 1
1739 // %fxy = call <2 x float> Unpack2xHalf(uint)
1740 // %fzw = call <2 x float> Unpack2xHalf(uint)
1741 // %result = shufflevector %fxy %fzw <4 x i32> <0, 1, 2, 3>
SJW2c317da2020-03-23 07:39:13 -05001742 Module &M = *F.getParent();
1743 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001744 auto Index = CI->getOperand(0);
1745 auto Ptr = CI->getOperand(1);
David Neto6ad93232018-06-07 15:42:58 -07001746
Kévin Petite8edce32019-04-10 14:23:32 +01001747 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001748 auto Int2Ty = FixedVectorType::get(IntTy, 2);
1749 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001750 auto NewFType = FunctionType::get(Float2Ty, IntTy, false);
David Neto6ad93232018-06-07 15:42:58 -07001751
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001752 auto IndexedPtr = GetElementPtrInst::Create(Int2Ty, Ptr, Index, "", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001753 auto Load = new LoadInst(Int2Ty, IndexedPtr, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001754
Kévin Petite8edce32019-04-10 14:23:32 +01001755 // Extract each element from the loaded int2.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001756 auto X =
1757 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 0), "", CI);
1758 auto Y =
1759 ExtractElementInst::Create(Load, ConstantInt::get(IntTy, 1), "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001760
Kévin Petite8edce32019-04-10 14:23:32 +01001761 // Our intrinsic to unpack a float2 from an int.
SJW61531372020-06-09 07:31:08 -05001762 auto SPIRVIntrinsic = clspv::UnpackFunction();
David Neto6ad93232018-06-07 15:42:58 -07001763
Kévin Petite8edce32019-04-10 14:23:32 +01001764 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto6ad93232018-06-07 15:42:58 -07001765
Kévin Petite8edce32019-04-10 14:23:32 +01001766 // Get the lower (x & y) components of our final float4.
1767 auto Lo = CallInst::Create(NewF, X, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001768
Kévin Petite8edce32019-04-10 14:23:32 +01001769 // Get the higher (z & w) components of our final float4.
1770 auto Hi = CallInst::Create(NewF, Y, "", CI);
David Neto6ad93232018-06-07 15:42:58 -07001771
Kévin Petite8edce32019-04-10 14:23:32 +01001772 Constant *ShuffleMask[4] = {
1773 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
1774 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
David Neto6ad93232018-06-07 15:42:58 -07001775
Kévin Petite8edce32019-04-10 14:23:32 +01001776 // Combine our two float2's into one float4.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001777 return new ShuffleVectorInst(Lo, Hi, ConstantVector::get(ShuffleMask), "",
1778 CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001779 });
David Neto6ad93232018-06-07 15:42:58 -07001780}
1781
SJW2c317da2020-03-23 07:39:13 -05001782bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F, int vec_size) {
1783 switch (vec_size) {
1784 case 0:
1785 return replaceVstoreHalf(F);
1786 case 2:
1787 return replaceVstoreHalf2(F);
1788 case 4:
1789 return replaceVstoreHalf4(F);
1790 default:
1791 llvm_unreachable("Unsupported vstore_half vector size");
1792 break;
1793 }
1794 return false;
1795}
David Neto22f144c2017-06-12 14:26:21 -04001796
SJW2c317da2020-03-23 07:39:13 -05001797bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf(Function &F) {
1798 Module &M = *F.getParent();
1799 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001800 // The value to store.
1801 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001802
Kévin Petite8edce32019-04-10 14:23:32 +01001803 // The index argument from vstore_half.
1804 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001805
Kévin Petite8edce32019-04-10 14:23:32 +01001806 // The pointer argument from vstore_half.
1807 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001808
Kévin Petite8edce32019-04-10 14:23:32 +01001809 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001810 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Kévin Petite8edce32019-04-10 14:23:32 +01001811 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
1812 auto One = ConstantInt::get(IntTy, 1);
David Neto22f144c2017-06-12 14:26:21 -04001813
Kévin Petite8edce32019-04-10 14:23:32 +01001814 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001815 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001816
Kévin Petite8edce32019-04-10 14:23:32 +01001817 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001818
Kévin Petite8edce32019-04-10 14:23:32 +01001819 // Insert our value into a float2 so that we can pack it.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001820 auto TempVec = InsertElementInst::Create(
1821 UndefValue::get(Float2Ty), Arg0, ConstantInt::get(IntTy, 0), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001822
Kévin Petite8edce32019-04-10 14:23:32 +01001823 // Pack the float2 -> half2 (in an int).
1824 auto X = CallInst::Create(NewF, TempVec, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001825
alan-baker7efcaaa2020-05-06 19:33:27 -04001826 bool supports_16bit_storage = true;
1827 switch (Arg2->getType()->getPointerAddressSpace()) {
1828 case clspv::AddressSpace::Global:
1829 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1830 clspv::Option::StorageClass::kSSBO);
1831 break;
1832 case clspv::AddressSpace::Constant:
1833 if (clspv::Option::ConstantArgsInUniformBuffer())
1834 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1835 clspv::Option::StorageClass::kUBO);
1836 else
1837 supports_16bit_storage = clspv::Option::Supports16BitStorageClass(
1838 clspv::Option::StorageClass::kSSBO);
1839 break;
1840 default:
1841 // Clspv will emit the Float16 capability if the half type is
1842 // encountered. That capability covers private and local addressspaces.
1843 break;
1844 }
1845
SJW2c317da2020-03-23 07:39:13 -05001846 Value *V = nullptr;
alan-baker7efcaaa2020-05-06 19:33:27 -04001847 if (supports_16bit_storage) {
Kévin Petite8edce32019-04-10 14:23:32 +01001848 auto ShortTy = Type::getInt16Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001849 auto ShortPointerTy =
1850 PointerType::get(ShortTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001851
Kévin Petite8edce32019-04-10 14:23:32 +01001852 // Truncate our i32 to an i16.
1853 auto Trunc = CastInst::CreateTruncOrBitCast(X, ShortTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001854
Kévin Petite8edce32019-04-10 14:23:32 +01001855 // Cast the half* pointer to short*.
1856 auto Cast = CastInst::CreatePointerCast(Arg2, ShortPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001857
Kévin Petite8edce32019-04-10 14:23:32 +01001858 // Index into the correct address of the casted pointer.
1859 auto Index = GetElementPtrInst::Create(ShortTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001860
Kévin Petite8edce32019-04-10 14:23:32 +01001861 // Store to the int* we casted to.
SJW2c317da2020-03-23 07:39:13 -05001862 V = new StoreInst(Trunc, Index, CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001863 } else {
1864 // We can only write to 32-bit aligned words.
1865 //
1866 // Assuming base is aligned to 32-bits, replace the equivalent of
1867 // vstore_half(value, index, base)
1868 // with:
1869 // uint32_t* target_ptr = (uint32_t*)(base) + index / 2;
1870 // uint32_t write_to_upper_half = index & 1u;
1871 // uint32_t shift = write_to_upper_half << 4;
1872 //
1873 // // Pack the float value as a half number in bottom 16 bits
1874 // // of an i32.
1875 // uint32_t packed = spirv.pack.v2f16((float2)(value, undef));
1876 //
1877 // uint32_t xor_value = (*target_ptr & (0xffff << shift))
1878 // ^ ((packed & 0xffff) << shift)
1879 // // We only need relaxed consistency, but OpenCL 1.2 only has
1880 // // sequentially consistent atomics.
1881 // // TODO(dneto): Use relaxed consistency.
1882 // atomic_xor(target_ptr, xor_value)
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001883 auto IntPointerTy =
1884 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
David Neto22f144c2017-06-12 14:26:21 -04001885
Kévin Petite8edce32019-04-10 14:23:32 +01001886 auto Four = ConstantInt::get(IntTy, 4);
1887 auto FFFF = ConstantInt::get(IntTy, 0xffff);
David Neto17852de2017-05-29 17:29:31 -04001888
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001889 auto IndexIsOdd =
1890 BinaryOperator::CreateAnd(Arg1, One, "index_is_odd_i32", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001891 // Compute index / 2
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001892 auto IndexIntoI32 =
1893 BinaryOperator::CreateLShr(Arg1, One, "index_into_i32", CI);
1894 auto BaseI32Ptr =
1895 CastInst::CreatePointerCast(Arg2, IntPointerTy, "base_i32_ptr", CI);
1896 auto OutPtr = GetElementPtrInst::Create(IntTy, BaseI32Ptr, IndexIntoI32,
1897 "base_i32_ptr", CI);
alan-baker741fd1f2020-04-14 17:38:15 -04001898 auto CurrentValue = new LoadInst(IntTy, OutPtr, "current_value", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01001899 auto Shift = BinaryOperator::CreateShl(IndexIsOdd, Four, "shift", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001900 auto MaskBitsToWrite =
1901 BinaryOperator::CreateShl(FFFF, Shift, "mask_bits_to_write", CI);
1902 auto MaskedCurrent = BinaryOperator::CreateAnd(
1903 MaskBitsToWrite, CurrentValue, "masked_current", CI);
David Neto17852de2017-05-29 17:29:31 -04001904
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001905 auto XLowerBits =
1906 BinaryOperator::CreateAnd(X, FFFF, "lower_bits_of_packed", CI);
1907 auto NewBitsToWrite =
1908 BinaryOperator::CreateShl(XLowerBits, Shift, "new_bits_to_write", CI);
1909 auto ValueToXor = BinaryOperator::CreateXor(MaskedCurrent, NewBitsToWrite,
1910 "value_to_xor", CI);
David Neto17852de2017-05-29 17:29:31 -04001911
Kévin Petite8edce32019-04-10 14:23:32 +01001912 // Generate the call to atomi_xor.
1913 SmallVector<Type *, 5> ParamTypes;
1914 // The pointer type.
1915 ParamTypes.push_back(IntPointerTy);
1916 // The Types for memory scope, semantics, and value.
1917 ParamTypes.push_back(IntTy);
1918 ParamTypes.push_back(IntTy);
1919 ParamTypes.push_back(IntTy);
1920 auto NewFType = FunctionType::get(IntTy, ParamTypes, false);
1921 auto NewF = M.getOrInsertFunction("spirv.atomic_xor", NewFType);
David Neto17852de2017-05-29 17:29:31 -04001922
Kévin Petite8edce32019-04-10 14:23:32 +01001923 const auto ConstantScopeDevice =
1924 ConstantInt::get(IntTy, spv::ScopeDevice);
1925 // Assume the pointee is in OpenCL global (SPIR-V Uniform) or local
1926 // (SPIR-V Workgroup).
1927 const auto AddrSpaceSemanticsBits =
1928 IntPointerTy->getPointerAddressSpace() == 1
1929 ? spv::MemorySemanticsUniformMemoryMask
1930 : spv::MemorySemanticsWorkgroupMemoryMask;
David Neto17852de2017-05-29 17:29:31 -04001931
Kévin Petite8edce32019-04-10 14:23:32 +01001932 // We're using relaxed consistency here.
1933 const auto ConstantMemorySemantics =
1934 ConstantInt::get(IntTy, spv::MemorySemanticsUniformMemoryMask |
1935 AddrSpaceSemanticsBits);
David Neto17852de2017-05-29 17:29:31 -04001936
Kévin Petite8edce32019-04-10 14:23:32 +01001937 SmallVector<Value *, 5> Params{OutPtr, ConstantScopeDevice,
1938 ConstantMemorySemantics, ValueToXor};
1939 CallInst::Create(NewF, Params, "store_halfword_xor_trick", CI);
SJW2c317da2020-03-23 07:39:13 -05001940
1941 // Return a Nop so the old Call is removed
1942 Function *donothing = Intrinsic::getDeclaration(&M, Intrinsic::donothing);
1943 V = CallInst::Create(donothing, {}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001944 }
David Neto22f144c2017-06-12 14:26:21 -04001945
SJW2c317da2020-03-23 07:39:13 -05001946 return V;
Kévin Petite8edce32019-04-10 14:23:32 +01001947 });
David Neto22f144c2017-06-12 14:26:21 -04001948}
1949
SJW2c317da2020-03-23 07:39:13 -05001950bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf2(Function &F) {
1951 Module &M = *F.getParent();
1952 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001953 // The value to store.
1954 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001955
Kévin Petite8edce32019-04-10 14:23:32 +01001956 // The index argument from vstore_half.
1957 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001958
Kévin Petite8edce32019-04-10 14:23:32 +01001959 // The pointer argument from vstore_half.
1960 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001961
Kévin Petite8edce32019-04-10 14:23:32 +01001962 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04001963 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001964 auto NewPointerTy =
1965 PointerType::get(IntTy, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01001966 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04001967
Kévin Petite8edce32019-04-10 14:23:32 +01001968 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05001969 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04001970
Kévin Petite8edce32019-04-10 14:23:32 +01001971 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04001972
Kévin Petite8edce32019-04-10 14:23:32 +01001973 // Turn the packed x & y into the final packing.
1974 auto X = CallInst::Create(NewF, Arg0, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001975
Kévin Petite8edce32019-04-10 14:23:32 +01001976 // Cast the half* pointer to int*.
1977 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001978
Kévin Petite8edce32019-04-10 14:23:32 +01001979 // Index into the correct address of the casted pointer.
1980 auto Index = GetElementPtrInst::Create(IntTy, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04001981
Kévin Petite8edce32019-04-10 14:23:32 +01001982 // Store to the int* we casted to.
1983 return new StoreInst(X, Index, CI);
1984 });
David Neto22f144c2017-06-12 14:26:21 -04001985}
1986
SJW2c317da2020-03-23 07:39:13 -05001987bool ReplaceOpenCLBuiltinPass::replaceVstoreHalf4(Function &F) {
1988 Module &M = *F.getParent();
1989 return replaceCallsWithValue(F, [&](CallInst *CI) {
Kévin Petite8edce32019-04-10 14:23:32 +01001990 // The value to store.
1991 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04001992
Kévin Petite8edce32019-04-10 14:23:32 +01001993 // The index argument from vstore_half.
1994 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04001995
Kévin Petite8edce32019-04-10 14:23:32 +01001996 // The pointer argument from vstore_half.
1997 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04001998
Kévin Petite8edce32019-04-10 14:23:32 +01001999 auto IntTy = Type::getInt32Ty(M.getContext());
alan-bakerb3e2b6d2020-06-24 23:59:57 -04002000 auto Int2Ty = FixedVectorType::get(IntTy, 2);
2001 auto Float2Ty = FixedVectorType::get(Type::getFloatTy(M.getContext()), 2);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002002 auto NewPointerTy =
2003 PointerType::get(Int2Ty, Arg2->getType()->getPointerAddressSpace());
Kévin Petite8edce32019-04-10 14:23:32 +01002004 auto NewFType = FunctionType::get(IntTy, Float2Ty, false);
David Neto22f144c2017-06-12 14:26:21 -04002005
Kévin Petite8edce32019-04-10 14:23:32 +01002006 Constant *LoShuffleMask[2] = {ConstantInt::get(IntTy, 0),
2007 ConstantInt::get(IntTy, 1)};
David Neto22f144c2017-06-12 14:26:21 -04002008
Kévin Petite8edce32019-04-10 14:23:32 +01002009 // Extract out the x & y components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002010 auto Lo = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2011 ConstantVector::get(LoShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002012
Kévin Petite8edce32019-04-10 14:23:32 +01002013 Constant *HiShuffleMask[2] = {ConstantInt::get(IntTy, 2),
2014 ConstantInt::get(IntTy, 3)};
David Neto22f144c2017-06-12 14:26:21 -04002015
Kévin Petite8edce32019-04-10 14:23:32 +01002016 // Extract out the z & w components of our to store value.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002017 auto Hi = new ShuffleVectorInst(Arg0, UndefValue::get(Arg0->getType()),
2018 ConstantVector::get(HiShuffleMask), "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002019
Kévin Petite8edce32019-04-10 14:23:32 +01002020 // Our intrinsic to pack a float2 to an int.
SJW61531372020-06-09 07:31:08 -05002021 auto SPIRVIntrinsic = clspv::PackFunction();
David Neto22f144c2017-06-12 14:26:21 -04002022
Kévin Petite8edce32019-04-10 14:23:32 +01002023 auto NewF = M.getOrInsertFunction(SPIRVIntrinsic, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002024
Kévin Petite8edce32019-04-10 14:23:32 +01002025 // Turn the packed x & y into the final component of our int2.
2026 auto X = CallInst::Create(NewF, Lo, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002027
Kévin Petite8edce32019-04-10 14:23:32 +01002028 // Turn the packed z & w into the final component of our int2.
2029 auto Y = CallInst::Create(NewF, Hi, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002030
Kévin Petite8edce32019-04-10 14:23:32 +01002031 auto Combine = InsertElementInst::Create(
2032 UndefValue::get(Int2Ty), X, ConstantInt::get(IntTy, 0), "", CI);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002033 Combine = InsertElementInst::Create(Combine, Y, ConstantInt::get(IntTy, 1),
2034 "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002035
Kévin Petite8edce32019-04-10 14:23:32 +01002036 // Cast the half* pointer to int2*.
2037 auto Cast = CastInst::CreatePointerCast(Arg2, NewPointerTy, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002038
Kévin Petite8edce32019-04-10 14:23:32 +01002039 // Index into the correct address of the casted pointer.
2040 auto Index = GetElementPtrInst::Create(Int2Ty, Cast, Arg1, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002041
Kévin Petite8edce32019-04-10 14:23:32 +01002042 // Store to the int2* we casted to.
2043 return new StoreInst(Combine, Index, CI);
2044 });
David Neto22f144c2017-06-12 14:26:21 -04002045}
2046
SJW2c317da2020-03-23 07:39:13 -05002047bool ReplaceOpenCLBuiltinPass::replaceHalfReadImage(Function &F) {
2048 // convert half to float
2049 Module &M = *F.getParent();
2050 return replaceCallsWithValue(F, [&](CallInst *CI) {
2051 SmallVector<Type *, 3> types;
2052 SmallVector<Value *, 3> args;
2053 for (auto i = 0; i < CI->getNumArgOperands(); ++i) {
2054 types.push_back(CI->getArgOperand(i)->getType());
2055 args.push_back(CI->getArgOperand(i));
alan-bakerf7e17cb2020-01-02 07:29:59 -05002056 }
alan-bakerf7e17cb2020-01-02 07:29:59 -05002057
alan-baker5a8c3be2020-09-09 13:44:26 -04002058 auto NewFType =
2059 FunctionType::get(FixedVectorType::get(Type::getFloatTy(M.getContext()),
2060 cast<VectorType>(CI->getType())
2061 ->getElementCount()
2062 .getKnownMinValue()),
2063 types, false);
SJW2c317da2020-03-23 07:39:13 -05002064
SJW61531372020-06-09 07:31:08 -05002065 std::string NewFName =
2066 Builtins::GetMangledFunctionName("read_imagef", NewFType);
SJW2c317da2020-03-23 07:39:13 -05002067
2068 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2069
2070 auto NewCI = CallInst::Create(NewF, args, "", CI);
2071
2072 // Convert to the half type.
2073 return CastInst::CreateFPCast(NewCI, CI->getType(), "", CI);
2074 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002075}
2076
SJW2c317da2020-03-23 07:39:13 -05002077bool ReplaceOpenCLBuiltinPass::replaceHalfWriteImage(Function &F) {
2078 // convert half to float
2079 Module &M = *F.getParent();
2080 return replaceCallsWithValue(F, [&](CallInst *CI) {
2081 SmallVector<Type *, 3> types(3);
2082 SmallVector<Value *, 3> args(3);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002083
SJW2c317da2020-03-23 07:39:13 -05002084 // Image
2085 types[0] = CI->getArgOperand(0)->getType();
2086 args[0] = CI->getArgOperand(0);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002087
SJW2c317da2020-03-23 07:39:13 -05002088 // Coord
2089 types[1] = CI->getArgOperand(1)->getType();
2090 args[1] = CI->getArgOperand(1);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002091
SJW2c317da2020-03-23 07:39:13 -05002092 // Data
alan-baker5a8c3be2020-09-09 13:44:26 -04002093 types[2] =
2094 FixedVectorType::get(Type::getFloatTy(M.getContext()),
2095 cast<VectorType>(CI->getArgOperand(2)->getType())
2096 ->getElementCount()
2097 .getKnownMinValue());
alan-bakerf7e17cb2020-01-02 07:29:59 -05002098
SJW2c317da2020-03-23 07:39:13 -05002099 auto NewFType =
2100 FunctionType::get(Type::getVoidTy(M.getContext()), types, false);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002101
SJW61531372020-06-09 07:31:08 -05002102 std::string NewFName =
2103 Builtins::GetMangledFunctionName("write_imagef", NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002104
SJW2c317da2020-03-23 07:39:13 -05002105 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
alan-bakerf7e17cb2020-01-02 07:29:59 -05002106
SJW2c317da2020-03-23 07:39:13 -05002107 // Convert data to the float type.
2108 auto Cast = CastInst::CreateFPCast(CI->getArgOperand(2), types[2], "", CI);
2109 args[2] = Cast;
alan-bakerf7e17cb2020-01-02 07:29:59 -05002110
SJW2c317da2020-03-23 07:39:13 -05002111 return CallInst::Create(NewF, args, "", CI);
2112 });
alan-bakerf7e17cb2020-01-02 07:29:59 -05002113}
2114
SJW2c317da2020-03-23 07:39:13 -05002115bool ReplaceOpenCLBuiltinPass::replaceSampledReadImageWithIntCoords(
2116 Function &F) {
2117 // convert read_image with int coords to float coords
2118 Module &M = *F.getParent();
2119 return replaceCallsWithValue(F, [&](CallInst *CI) {
2120 // The image.
2121 auto Arg0 = CI->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -04002122
SJW2c317da2020-03-23 07:39:13 -05002123 // The sampler.
2124 auto Arg1 = CI->getOperand(1);
David Neto22f144c2017-06-12 14:26:21 -04002125
SJW2c317da2020-03-23 07:39:13 -05002126 // The coordinate (integer type that we can't handle).
2127 auto Arg2 = CI->getOperand(2);
David Neto22f144c2017-06-12 14:26:21 -04002128
SJW2c317da2020-03-23 07:39:13 -05002129 uint32_t dim = clspv::ImageDimensionality(Arg0->getType());
2130 uint32_t components =
2131 dim + (clspv::IsArrayImageType(Arg0->getType()) ? 1 : 0);
2132 Type *float_ty = nullptr;
2133 if (components == 1) {
2134 float_ty = Type::getFloatTy(M.getContext());
2135 } else {
alan-baker5a8c3be2020-09-09 13:44:26 -04002136 float_ty = FixedVectorType::get(Type::getFloatTy(M.getContext()),
2137 cast<VectorType>(Arg2->getType())
2138 ->getElementCount()
2139 .getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -04002140 }
David Neto22f144c2017-06-12 14:26:21 -04002141
SJW2c317da2020-03-23 07:39:13 -05002142 auto NewFType = FunctionType::get(
2143 CI->getType(), {Arg0->getType(), Arg1->getType(), float_ty}, false);
2144
2145 std::string NewFName = F.getName().str();
2146 NewFName[NewFName.length() - 1] = 'f';
2147
2148 auto NewF = M.getOrInsertFunction(NewFName, NewFType);
2149
2150 auto Cast = CastInst::Create(Instruction::SIToFP, Arg2, float_ty, "", CI);
2151
2152 return CallInst::Create(NewF, {Arg0, Arg1, Cast}, "", CI);
2153 });
David Neto22f144c2017-06-12 14:26:21 -04002154}
2155
SJW2c317da2020-03-23 07:39:13 -05002156bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F, spv::Op Op) {
2157 return replaceCallsWithValue(F, [&](CallInst *CI) {
2158 auto IntTy = Type::getInt32Ty(F.getContext());
David Neto22f144c2017-06-12 14:26:21 -04002159
SJW2c317da2020-03-23 07:39:13 -05002160 // We need to map the OpenCL constants to the SPIR-V equivalents.
2161 const auto ConstantScopeDevice = ConstantInt::get(IntTy, spv::ScopeDevice);
2162 const auto ConstantMemorySemantics = ConstantInt::get(
2163 IntTy, spv::MemorySemanticsUniformMemoryMask |
2164 spv::MemorySemanticsSequentiallyConsistentMask);
David Neto22f144c2017-06-12 14:26:21 -04002165
SJW2c317da2020-03-23 07:39:13 -05002166 SmallVector<Value *, 5> Params;
David Neto22f144c2017-06-12 14:26:21 -04002167
SJW2c317da2020-03-23 07:39:13 -05002168 // The pointer.
2169 Params.push_back(CI->getArgOperand(0));
David Neto22f144c2017-06-12 14:26:21 -04002170
SJW2c317da2020-03-23 07:39:13 -05002171 // The memory scope.
2172 Params.push_back(ConstantScopeDevice);
David Neto22f144c2017-06-12 14:26:21 -04002173
SJW2c317da2020-03-23 07:39:13 -05002174 // The memory semantics.
2175 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002176
SJW2c317da2020-03-23 07:39:13 -05002177 if (2 < CI->getNumArgOperands()) {
2178 // The unequal memory semantics.
2179 Params.push_back(ConstantMemorySemantics);
David Neto22f144c2017-06-12 14:26:21 -04002180
SJW2c317da2020-03-23 07:39:13 -05002181 // The value.
2182 Params.push_back(CI->getArgOperand(2));
David Neto22f144c2017-06-12 14:26:21 -04002183
SJW2c317da2020-03-23 07:39:13 -05002184 // The comparator.
2185 Params.push_back(CI->getArgOperand(1));
2186 } else if (1 < CI->getNumArgOperands()) {
2187 // The value.
2188 Params.push_back(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -04002189 }
David Neto22f144c2017-06-12 14:26:21 -04002190
SJW2c317da2020-03-23 07:39:13 -05002191 return clspv::InsertSPIRVOp(CI, Op, {}, CI->getType(), Params);
2192 });
David Neto22f144c2017-06-12 14:26:21 -04002193}
2194
SJW2c317da2020-03-23 07:39:13 -05002195bool ReplaceOpenCLBuiltinPass::replaceAtomics(Function &F,
2196 llvm::AtomicRMWInst::BinOp Op) {
2197 return replaceCallsWithValue(F, [&](CallInst *CI) {
alan-bakerd0eb9052020-07-07 13:12:01 -04002198 auto align = F.getParent()->getDataLayout().getABITypeAlign(
2199 CI->getArgOperand(1)->getType());
SJW2c317da2020-03-23 07:39:13 -05002200 return new AtomicRMWInst(Op, CI->getArgOperand(0), CI->getArgOperand(1),
alan-bakerd0eb9052020-07-07 13:12:01 -04002201 align, AtomicOrdering::SequentiallyConsistent,
SJW2c317da2020-03-23 07:39:13 -05002202 SyncScope::System, CI);
2203 });
2204}
David Neto22f144c2017-06-12 14:26:21 -04002205
SJW2c317da2020-03-23 07:39:13 -05002206bool ReplaceOpenCLBuiltinPass::replaceCross(Function &F) {
2207 Module &M = *F.getParent();
2208 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto22f144c2017-06-12 14:26:21 -04002209 auto IntTy = Type::getInt32Ty(M.getContext());
2210 auto FloatTy = Type::getFloatTy(M.getContext());
2211
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002212 Constant *DownShuffleMask[3] = {ConstantInt::get(IntTy, 0),
2213 ConstantInt::get(IntTy, 1),
2214 ConstantInt::get(IntTy, 2)};
David Neto22f144c2017-06-12 14:26:21 -04002215
2216 Constant *UpShuffleMask[4] = {
2217 ConstantInt::get(IntTy, 0), ConstantInt::get(IntTy, 1),
2218 ConstantInt::get(IntTy, 2), ConstantInt::get(IntTy, 3)};
2219
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002220 Constant *FloatVec[3] = {ConstantFP::get(FloatTy, 0.0f),
2221 UndefValue::get(FloatTy),
2222 UndefValue::get(FloatTy)};
David Neto22f144c2017-06-12 14:26:21 -04002223
Kévin Petite8edce32019-04-10 14:23:32 +01002224 auto Vec4Ty = CI->getArgOperand(0)->getType();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002225 auto Arg0 =
2226 new ShuffleVectorInst(CI->getArgOperand(0), UndefValue::get(Vec4Ty),
2227 ConstantVector::get(DownShuffleMask), "", CI);
2228 auto Arg1 =
2229 new ShuffleVectorInst(CI->getArgOperand(1), UndefValue::get(Vec4Ty),
2230 ConstantVector::get(DownShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002231 auto Vec3Ty = Arg0->getType();
David Neto22f144c2017-06-12 14:26:21 -04002232
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002233 auto NewFType = FunctionType::get(Vec3Ty, {Vec3Ty, Vec3Ty}, false);
SJW61531372020-06-09 07:31:08 -05002234 auto NewFName = Builtins::GetMangledFunctionName("cross", NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002235
SJW61531372020-06-09 07:31:08 -05002236 auto Cross3Func = M.getOrInsertFunction(NewFName, NewFType);
David Neto22f144c2017-06-12 14:26:21 -04002237
Kévin Petite8edce32019-04-10 14:23:32 +01002238 auto DownResult = CallInst::Create(Cross3Func, {Arg0, Arg1}, "", CI);
David Neto22f144c2017-06-12 14:26:21 -04002239
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04002240 return new ShuffleVectorInst(DownResult, ConstantVector::get(FloatVec),
2241 ConstantVector::get(UpShuffleMask), "", CI);
Kévin Petite8edce32019-04-10 14:23:32 +01002242 });
David Neto22f144c2017-06-12 14:26:21 -04002243}
David Neto62653202017-10-16 19:05:18 -04002244
SJW2c317da2020-03-23 07:39:13 -05002245bool ReplaceOpenCLBuiltinPass::replaceFract(Function &F, int vec_size) {
David Neto62653202017-10-16 19:05:18 -04002246 // OpenCL's float result = fract(float x, float* ptr)
2247 //
2248 // In the LLVM domain:
2249 //
2250 // %floor_result = call spir_func float @floor(float %x)
2251 // store float %floor_result, float * %ptr
2252 // %fract_intermediate = call spir_func float @clspv.fract(float %x)
2253 // %result = call spir_func float
2254 // @fmin(float %fract_intermediate, float 0x1.fffffep-1f)
2255 //
2256 // Becomes in the SPIR-V domain, where translations of floor, fmin,
2257 // and clspv.fract occur in the SPIR-V generator pass:
2258 //
2259 // %glsl_ext = OpExtInstImport "GLSL.std.450"
2260 // %just_under_1 = OpConstant %float 0x1.fffffep-1f
2261 // ...
2262 // %floor_result = OpExtInst %float %glsl_ext Floor %x
2263 // OpStore %ptr %floor_result
2264 // %fract_intermediate = OpExtInst %float %glsl_ext Fract %x
2265 // %fract_result = OpExtInst %float
Marco Antognini55d51862020-07-21 17:50:07 +01002266 // %glsl_ext Nmin %fract_intermediate %just_under_1
David Neto62653202017-10-16 19:05:18 -04002267
David Neto62653202017-10-16 19:05:18 -04002268 using std::string;
2269
2270 // Mapping from the fract builtin to the floor, fmin, and clspv.fract builtins
2271 // we need. The clspv.fract builtin is the same as GLSL.std.450 Fract.
David Neto62653202017-10-16 19:05:18 -04002272
SJW2c317da2020-03-23 07:39:13 -05002273 Module &M = *F.getParent();
2274 return replaceCallsWithValue(F, [&](CallInst *CI) {
David Neto62653202017-10-16 19:05:18 -04002275
SJW2c317da2020-03-23 07:39:13 -05002276 // This is either float or a float vector. All the float-like
2277 // types are this type.
2278 auto result_ty = F.getReturnType();
2279
SJW61531372020-06-09 07:31:08 -05002280 std::string fmin_name = Builtins::GetMangledFunctionName("fmin", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002281 Function *fmin_fn = M.getFunction(fmin_name);
2282 if (!fmin_fn) {
2283 // Make the fmin function.
2284 FunctionType *fn_ty =
2285 FunctionType::get(result_ty, {result_ty, result_ty}, false);
2286 fmin_fn =
2287 cast<Function>(M.getOrInsertFunction(fmin_name, fn_ty).getCallee());
2288 fmin_fn->addFnAttr(Attribute::ReadNone);
2289 fmin_fn->setCallingConv(CallingConv::SPIR_FUNC);
2290 }
2291
SJW61531372020-06-09 07:31:08 -05002292 std::string floor_name =
2293 Builtins::GetMangledFunctionName("floor", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002294 Function *floor_fn = M.getFunction(floor_name);
2295 if (!floor_fn) {
2296 // Make the floor function.
2297 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2298 floor_fn =
2299 cast<Function>(M.getOrInsertFunction(floor_name, fn_ty).getCallee());
2300 floor_fn->addFnAttr(Attribute::ReadNone);
2301 floor_fn->setCallingConv(CallingConv::SPIR_FUNC);
2302 }
2303
SJW61531372020-06-09 07:31:08 -05002304 std::string clspv_fract_name =
2305 Builtins::GetMangledFunctionName("clspv.fract", result_ty);
SJW2c317da2020-03-23 07:39:13 -05002306 Function *clspv_fract_fn = M.getFunction(clspv_fract_name);
2307 if (!clspv_fract_fn) {
2308 // Make the clspv_fract function.
2309 FunctionType *fn_ty = FunctionType::get(result_ty, {result_ty}, false);
2310 clspv_fract_fn = cast<Function>(
2311 M.getOrInsertFunction(clspv_fract_name, fn_ty).getCallee());
2312 clspv_fract_fn->addFnAttr(Attribute::ReadNone);
2313 clspv_fract_fn->setCallingConv(CallingConv::SPIR_FUNC);
2314 }
2315
2316 // Number of significant significand bits, whether represented or not.
2317 unsigned num_significand_bits;
2318 switch (result_ty->getScalarType()->getTypeID()) {
2319 case Type::HalfTyID:
2320 num_significand_bits = 11;
2321 break;
2322 case Type::FloatTyID:
2323 num_significand_bits = 24;
2324 break;
2325 case Type::DoubleTyID:
2326 num_significand_bits = 53;
2327 break;
2328 default:
2329 llvm_unreachable("Unhandled float type when processing fract builtin");
2330 break;
2331 }
2332 // Beware that the disassembler displays this value as
2333 // OpConstant %float 1
2334 // which is not quite right.
2335 const double kJustUnderOneScalar =
2336 ldexp(double((1 << num_significand_bits) - 1), -num_significand_bits);
2337
2338 Constant *just_under_one =
2339 ConstantFP::get(result_ty->getScalarType(), kJustUnderOneScalar);
2340 if (result_ty->isVectorTy()) {
2341 just_under_one = ConstantVector::getSplat(
alan-baker931253b2020-08-20 17:15:38 -04002342 cast<VectorType>(result_ty)->getElementCount(), just_under_one);
SJW2c317da2020-03-23 07:39:13 -05002343 }
2344
2345 IRBuilder<> Builder(CI);
2346
2347 auto arg = CI->getArgOperand(0);
2348 auto ptr = CI->getArgOperand(1);
2349
2350 // Compute floor result and store it.
2351 auto floor = Builder.CreateCall(floor_fn, {arg});
2352 Builder.CreateStore(floor, ptr);
2353
2354 auto fract_intermediate = Builder.CreateCall(clspv_fract_fn, arg);
2355 auto fract_result =
2356 Builder.CreateCall(fmin_fn, {fract_intermediate, just_under_one});
2357
2358 return fract_result;
2359 });
David Neto62653202017-10-16 19:05:18 -04002360}
alan-bakera52b7312020-10-26 08:58:51 -04002361
2362bool ReplaceOpenCLBuiltinPass::replaceAddSat(Function &F, bool is_signed) {
2363 Module *module = F.getParent();
2364 return replaceCallsWithValue(F, [&module, is_signed](CallInst *Call) {
2365 // SPIR-V OpIAddCarry interprets inputs as unsigned. We use that
2366 // instruction for unsigned additions. For signed addition, it is more
2367 // complicated. For values with bit widths less than 32 bits, we extend
2368 // to the next power of two and perform the addition. For 32- and
2369 // 64-bit values we test the signedness of op1 to determine how to clamp
2370 // the addition.
2371 Type *ty = Call->getType();
2372 Value *op0 = Call->getArgOperand(0);
2373 Value *op1 = Call->getArgOperand(1);
2374 Value *result = nullptr;
2375 if (is_signed) {
2376 unsigned bitwidth = ty->getScalarSizeInBits();
2377 if (bitwidth < 32) {
2378 // sext_op0 = sext op0
2379 // sext_op1 = sext op1
2380 // add = add sext_op0 sext_op1
2381 // clamp = clamp(add, min, max)
2382 // result = trunc clamp
2383 unsigned extended_bits = static_cast<unsigned>(bitwidth << 1);
2384 // The clamp values are the signed min and max of the original bitwidth
2385 // sign extended to the extended bitwidth.
2386 Constant *scalar_min = ConstantInt::get(
2387 Call->getContext(),
2388 APInt::getSignedMinValue(bitwidth).sext(extended_bits));
2389 Constant *scalar_max = ConstantInt::get(
2390 Call->getContext(),
2391 APInt::getSignedMaxValue(bitwidth).sext(extended_bits));
2392 Constant *min = scalar_min;
2393 Constant *max = scalar_max;
2394 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2395 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2396 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2397 }
2398 Type *extended_scalar_ty =
2399 IntegerType::get(Call->getContext(), extended_bits);
2400 Type *extended_ty = extended_scalar_ty;
2401 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2402 extended_ty =
2403 VectorType::get(extended_scalar_ty, vec_ty->getElementCount());
2404 }
2405 auto sext_op0 =
2406 CastInst::Create(Instruction::SExt, op0, extended_ty, "", Call);
2407 auto sext_op1 =
2408 CastInst::Create(Instruction::SExt, op1, extended_ty, "", Call);
2409 // Add the nsw flag since we know no overflow can occur.
2410 auto add = BinaryOperator::CreateNSW(Instruction::Add, sext_op0,
2411 sext_op1, "", Call);
2412 FunctionType *func_ty = FunctionType::get(
2413 extended_ty, {extended_ty, extended_ty, extended_ty}, false);
2414
2415 // Don't use the type in GetMangledFunctionName to ensure we get
2416 // signed parameters.
2417 std::string sclamp_name = Builtins::GetMangledFunctionName("clamp");
2418 uint32_t vec_width = 1;
2419 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2420 vec_width = vec_ty->getElementCount().getKnownMinValue();
2421 }
2422 if (extended_bits == 32) {
2423 if (vec_width == 1) {
2424 sclamp_name += "iii";
2425 } else {
2426 sclamp_name += "Dv" + std::to_string(vec_width) + "_iS_S_";
2427 }
2428 } else {
2429 if (vec_width == 1) {
2430 sclamp_name += "sss";
2431 } else {
2432 sclamp_name += "Dv" + std::to_string(vec_width) + "_sS_S_";
2433 }
2434 }
2435 auto sclamp_callee = module->getOrInsertFunction(sclamp_name, func_ty);
2436 auto clamp = CallInst::Create(sclamp_callee, {add, min, max}, "", Call);
2437 result = CastInst::Create(Instruction::Trunc, clamp, ty, "", Call);
2438 } else {
2439 // Pseudo-code:
2440 // c = a + b;
2441 // if (b < 0)
2442 // c = c > a ? min : c;
2443 // else
2444 // c = c < a ? max : c;
2445 //
2446 unsigned bitwidth = ty->getScalarSizeInBits();
2447 Constant *scalar_min = ConstantInt::get(
2448 Call->getContext(), APInt::getSignedMinValue(bitwidth));
2449 Constant *scalar_max = ConstantInt::get(
2450 Call->getContext(), APInt::getSignedMaxValue(bitwidth));
2451 Constant *min = scalar_min;
2452 Constant *max = scalar_max;
2453 if (auto vec_ty = dyn_cast<VectorType>(ty)) {
2454 min = ConstantVector::getSplat(vec_ty->getElementCount(), min);
2455 max = ConstantVector::getSplat(vec_ty->getElementCount(), max);
2456 }
2457 auto zero = Constant::getNullValue(ty);
2458 // Cannot add the nsw flag.
2459 auto add = BinaryOperator::Create(Instruction::Add, op0, op1, "", Call);
2460 auto add_gt_op0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SGT,
2461 add, op0, "", Call);
2462 auto min_clamp = SelectInst::Create(add_gt_op0, min, add, "", Call);
2463 auto add_lt_op0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
2464 add, op0, "", Call);
2465 auto max_clamp = SelectInst::Create(add_lt_op0, max, add, "", Call);
2466 auto op1_lt_0 = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_SLT,
2467 op1, zero, "", Call);
2468 result = SelectInst::Create(op1_lt_0, min_clamp, max_clamp, "", Call);
2469 }
2470 } else {
2471 // Just use OpIAddCarry and use the carry to clamp the result.
2472 auto ret_ty = StructType::get(Call->getContext(), {ty, ty});
2473 auto add = clspv::InsertSPIRVOp(
2474 Call, spv::OpIAddCarry, {Attribute::ReadNone}, ret_ty, {op0, op1});
2475 auto ex0 = ExtractValueInst::Create(add, {0}, "", Call);
2476 auto ex1 = ExtractValueInst::Create(add, {1}, "", Call);
2477 auto cmp = CmpInst::Create(Instruction::ICmp, CmpInst::ICMP_EQ, ex1,
2478 Constant::getNullValue(ty), "", Call);
2479 result =
2480 SelectInst::Create(cmp, ex0, Constant::getAllOnesValue(ty), "", Call);
2481 }
2482
2483 return result;
2484 });
2485}