blob: db378d596c3e43274802de4662e16b25e537aab9 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Netoe345e0e2018-06-15 11:38:32 -040015#include "llvm/IR/Constants.h"
16#include "llvm/IR/IRBuilder.h"
17#include "llvm/IR/Instructions.h"
alan-bakerbccf62c2019-03-29 10:32:41 -040018#include "llvm/IR/IntrinsicInst.h"
David Netoe345e0e2018-06-15 11:38:32 -040019#include "llvm/IR/Module.h"
20#include "llvm/Pass.h"
21#include "llvm/Support/raw_ostream.h"
22#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040023
alan-bakere0902602020-03-23 08:43:40 -040024#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040025
SJW61531372020-06-09 07:31:08 -050026#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040027#include "Passes.h"
28
David Neto22f144c2017-06-12 14:26:21 -040029using namespace llvm;
30
31#define DEBUG_TYPE "ReplaceLLVMIntrinsics"
32
33namespace {
34struct ReplaceLLVMIntrinsicsPass final : public ModulePass {
35 static char ID;
36 ReplaceLLVMIntrinsicsPass() : ModulePass(ID) {}
37
38 bool runOnModule(Module &M) override;
alan-baker5f2e88e2020-12-07 15:24:04 -050039 // TODO: update module-based funtions to work like function-based ones.
40 // Except maybe lifetime intrinsics.
41 bool runOnFunction(Function &F);
David Neto22f144c2017-06-12 14:26:21 -040042 bool replaceMemset(Module &M);
43 bool replaceMemcpy(Module &M);
David Netoe345e0e2018-06-15 11:38:32 -040044 bool removeLifetimeDeclarations(Module &M);
alan-baker5f2e88e2020-12-07 15:24:04 -050045 bool replaceFshl(Function &F);
46 bool replaceCountZeroes(Function &F, bool leading);
47 bool replaceCopysign(Function &F);
48
49 bool replaceCallsWithValue(Function &F,
50 std::function<Value *(CallInst *)> Replacer);
51
52 SmallVector<Function *, 16> DeadFunctions;
David Neto22f144c2017-06-12 14:26:21 -040053};
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040054} // namespace
David Neto22f144c2017-06-12 14:26:21 -040055
56char ReplaceLLVMIntrinsicsPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -040057INITIALIZE_PASS(ReplaceLLVMIntrinsicsPass, "ReplaceLLVMIntrinsics",
58 "Replace LLVM intrinsics Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -040059
60namespace clspv {
61ModulePass *createReplaceLLVMIntrinsicsPass() {
62 return new ReplaceLLVMIntrinsicsPass();
63}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040064} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -040065
66bool ReplaceLLVMIntrinsicsPass::runOnModule(Module &M) {
67 bool Changed = false;
68
James Price3a116142020-10-16 06:52:18 -040069 // Remove lifetime annotations first. They could be using memset
David Netoe345e0e2018-06-15 11:38:32 -040070 // and memcpy calls.
71 Changed |= removeLifetimeDeclarations(M);
David Neto22f144c2017-06-12 14:26:21 -040072 Changed |= replaceMemset(M);
73 Changed |= replaceMemcpy(M);
74
alan-baker5f2e88e2020-12-07 15:24:04 -050075 for (auto &F : M) {
76 Changed |= runOnFunction(F);
77 }
78
79 for (auto F : DeadFunctions) {
80 F->eraseFromParent();
81 }
82
David Neto22f144c2017-06-12 14:26:21 -040083 return Changed;
84}
85
alan-baker5f2e88e2020-12-07 15:24:04 -050086bool ReplaceLLVMIntrinsicsPass::runOnFunction(Function &F) {
87 switch (F.getIntrinsicID()) {
88 case Intrinsic::fshl:
89 return replaceFshl(F);
90 case Intrinsic::copysign:
91 return replaceCopysign(F);
92 case Intrinsic::ctlz:
93 return replaceCountZeroes(F, true);
94 case Intrinsic::cttz:
95 return replaceCountZeroes(F, false);
James Price3a116142020-10-16 06:52:18 -040096
alan-baker5f2e88e2020-12-07 15:24:04 -050097 default:
98 break;
99 }
100
101 return false;
102}
103
104bool ReplaceLLVMIntrinsicsPass::replaceCallsWithValue(
105 Function &F, std::function<Value *(CallInst *)> Replacer) {
106 SmallVector<Instruction *, 8> ToRemove;
107 for (auto &U : F.uses()) {
108 if (auto Call = dyn_cast<CallInst>(U.getUser())) {
109 auto replacement = Replacer(Call);
110 if (replacement != nullptr) {
111 Call->replaceAllUsesWith(replacement);
112 ToRemove.push_back(Call);
113 }
James Price3a116142020-10-16 06:52:18 -0400114 }
115 }
116
alan-baker5f2e88e2020-12-07 15:24:04 -0500117 for (auto inst : ToRemove) {
118 inst->eraseFromParent();
James Price3a116142020-10-16 06:52:18 -0400119 }
120
alan-baker5f2e88e2020-12-07 15:24:04 -0500121 DeadFunctions.push_back(&F);
122
123 return !ToRemove.empty();
124}
125
126bool ReplaceLLVMIntrinsicsPass::replaceFshl(Function &F) {
127 return replaceCallsWithValue(F, [](CallInst *call) {
128 auto arg_hi = call->getArgOperand(0);
129 auto arg_lo = call->getArgOperand(1);
130 auto arg_shift = call->getArgOperand(2);
131
132 // Validate argument types.
133 auto type = arg_hi->getType();
134 if ((type->getScalarSizeInBits() != 8) &&
135 (type->getScalarSizeInBits() != 16) &&
136 (type->getScalarSizeInBits() != 32) &&
137 (type->getScalarSizeInBits() != 64)) {
138 return static_cast<Value *>(nullptr);
139 }
140
141 // We shift the bottom bits of the first argument up, the top bits of the
142 // second argument down, and then OR the two shifted values.
143 IRBuilder<> builder(call);
144
145 // The shift amount is treated modulo the element size.
146 auto mod_mask = ConstantInt::get(type, type->getScalarSizeInBits() - 1);
147 auto shift_amount = builder.CreateAnd(arg_shift, mod_mask);
148
149 // Calculate the amount by which to shift the second argument down.
150 auto scalar_size = ConstantInt::get(type, type->getScalarSizeInBits());
151 auto down_amount = builder.CreateSub(scalar_size, shift_amount);
152
153 // Shift the two arguments and OR the results together.
154 auto hi_bits = builder.CreateShl(arg_hi, shift_amount);
155 auto lo_bits = builder.CreateLShr(arg_lo, down_amount);
156 return builder.CreateOr(lo_bits, hi_bits);
157 });
James Price3a116142020-10-16 06:52:18 -0400158}
159
David Neto22f144c2017-06-12 14:26:21 -0400160bool ReplaceLLVMIntrinsicsPass::replaceMemset(Module &M) {
161 bool Changed = false;
David Netod3f59382017-10-18 18:30:30 -0400162 auto Layout = M.getDataLayout();
David Neto22f144c2017-06-12 14:26:21 -0400163
164 for (auto &F : M) {
165 if (F.getName().startswith("llvm.memset")) {
166 SmallVector<CallInst *, 8> CallsToReplace;
167
168 for (auto U : F.users()) {
169 if (auto CI = dyn_cast<CallInst>(U)) {
170 auto Initializer = dyn_cast<ConstantInt>(CI->getArgOperand(1));
171
172 // We only handle cases where the initializer is a constant int that
173 // is 0.
174 if (!Initializer || (0 != Initializer->getZExtValue())) {
175 Initializer->print(errs());
176 llvm_unreachable("Unhandled llvm.memset.* instruction that had a "
177 "non-0 initializer!");
178 }
179
180 CallsToReplace.push_back(CI);
181 }
182 }
183
184 for (auto CI : CallsToReplace) {
185 auto NewArg = CI->getArgOperand(0);
Kévin Petit70944912019-04-17 23:22:28 +0100186 auto Bitcast = dyn_cast<BitCastInst>(NewArg);
187 if (Bitcast != nullptr) {
David Neto22f144c2017-06-12 14:26:21 -0400188 NewArg = Bitcast->getOperand(0);
189 }
190
David Netod3f59382017-10-18 18:30:30 -0400191 auto NumBytes = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
David Neto22f144c2017-06-12 14:26:21 -0400192 auto Ty = NewArg->getType();
193 auto PointeeTy = Ty->getPointerElementType();
David Neto22f144c2017-06-12 14:26:21 -0400194 auto Zero = Constant::getNullValue(PointeeTy);
195
David Netod3f59382017-10-18 18:30:30 -0400196 const auto num_stores = NumBytes / Layout.getTypeAllocSize(PointeeTy);
197 assert((NumBytes == num_stores * Layout.getTypeAllocSize(PointeeTy)) &&
198 "Null memset can't be divided evenly across multiple stores.");
199 assert((num_stores & 0xFFFFFFFF) == num_stores);
David Neto22f144c2017-06-12 14:26:21 -0400200
David Netod3f59382017-10-18 18:30:30 -0400201 // Generate the first store.
Kévin Petit58c445c2019-06-18 18:09:46 +0100202 new StoreInst(Zero, NewArg, CI);
David Netod3f59382017-10-18 18:30:30 -0400203
204 // Generate subsequent stores, but only if needed.
205 if (num_stores) {
206 auto I32Ty = Type::getInt32Ty(M.getContext());
207 auto One = ConstantInt::get(I32Ty, 1);
208 auto Ptr = NewArg;
209 for (uint32_t i = 1; i < num_stores; i++) {
210 Ptr = GetElementPtrInst::Create(PointeeTy, Ptr, {One}, "", CI);
Kévin Petit58c445c2019-06-18 18:09:46 +0100211 new StoreInst(Zero, Ptr, CI);
David Netod3f59382017-10-18 18:30:30 -0400212 }
213 }
214
David Neto22f144c2017-06-12 14:26:21 -0400215 CI->eraseFromParent();
216
Kévin Petit70944912019-04-17 23:22:28 +0100217 if (Bitcast != nullptr) {
David Neto22f144c2017-06-12 14:26:21 -0400218 Bitcast->eraseFromParent();
219 }
220 }
221 }
222 }
223
224 return Changed;
225}
226
227bool ReplaceLLVMIntrinsicsPass::replaceMemcpy(Module &M) {
228 bool Changed = false;
David Netob84ba342017-06-19 17:55:37 -0400229 auto Layout = M.getDataLayout();
230
231 // Unpack source and destination types until we find a matching
232 // element type. Count the number of levels we unpack for the
233 // source and destination types. So far this only works for
234 // array types, but could be generalized to other regular types
235 // like vectors.
Alan Baker7dea8842018-10-22 10:15:41 -0400236 auto match_types = [&Layout](CallInst &CI, uint64_t Size, Type **DstElemTy,
237 Type **SrcElemTy, unsigned *NumDstUnpackings,
David Netob84ba342017-06-19 17:55:37 -0400238 unsigned *NumSrcUnpackings) {
Alan Baker7dea8842018-10-22 10:15:41 -0400239 auto descend_type = [](Type *InType) {
240 Type *OutType = InType;
241 if (OutType->isStructTy()) {
242 OutType = OutType->getStructElementType(0);
243 } else if (OutType->isArrayTy()) {
244 OutType = OutType->getArrayElementType();
James Pricecf53df42020-04-20 14:41:24 -0400245 } else if (auto vec_type = dyn_cast<VectorType>(OutType)) {
246 OutType = vec_type->getElementType();
Alan Baker7dea8842018-10-22 10:15:41 -0400247 } else {
248 assert(false && "Don't know how to descend into type");
249 }
250
251 return OutType;
252 };
253
David Netob84ba342017-06-19 17:55:37 -0400254 while (*SrcElemTy != *DstElemTy) {
255 auto SrcElemSize = Layout.getTypeSizeInBits(*SrcElemTy);
256 auto DstElemSize = Layout.getTypeSizeInBits(*DstElemTy);
257 if (SrcElemSize >= DstElemSize) {
Alan Baker7dea8842018-10-22 10:15:41 -0400258 *SrcElemTy = descend_type(*SrcElemTy);
David Netob84ba342017-06-19 17:55:37 -0400259 (*NumSrcUnpackings)++;
260 } else if (DstElemSize >= SrcElemSize) {
Alan Baker7dea8842018-10-22 10:15:41 -0400261 *DstElemTy = descend_type(*DstElemTy);
David Netob84ba342017-06-19 17:55:37 -0400262 (*NumDstUnpackings)++;
263 } else {
264 errs() << "Don't know how to unpack types for memcpy: " << CI
265 << "\ngot to: " << **DstElemTy << " vs " << **SrcElemTy << "\n";
266 assert(false && "Don't know how to unpack these types");
267 }
268 }
Alan Baker7dea8842018-10-22 10:15:41 -0400269
270 auto DstElemSize = Layout.getTypeSizeInBits(*DstElemTy) / 8;
271 while (Size < DstElemSize) {
272 *DstElemTy = descend_type(*DstElemTy);
273 *SrcElemTy = descend_type(*SrcElemTy);
274 (*NumDstUnpackings)++;
275 (*NumSrcUnpackings)++;
276 DstElemSize = Layout.getTypeSizeInBits(*DstElemTy) / 8;
277 }
David Netob84ba342017-06-19 17:55:37 -0400278 };
David Neto22f144c2017-06-12 14:26:21 -0400279
280 for (auto &F : M) {
281 if (F.getName().startswith("llvm.memcpy")) {
David Netob84ba342017-06-19 17:55:37 -0400282 SmallPtrSet<Instruction *, 8> BitCastsToForget;
283 SmallVector<CallInst *, 8> CallsToReplaceWithSpirvCopyMemory;
David Neto22f144c2017-06-12 14:26:21 -0400284
285 for (auto U : F.users()) {
286 if (auto CI = dyn_cast<CallInst>(U)) {
alan-bakered80f572019-02-11 17:28:26 -0500287 assert(isa<BitCastOperator>(CI->getArgOperand(0)));
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400288 auto Dst =
289 dyn_cast<BitCastOperator>(CI->getArgOperand(0))->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400290
alan-bakered80f572019-02-11 17:28:26 -0500291 assert(isa<BitCastOperator>(CI->getArgOperand(1)));
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400292 auto Src =
293 dyn_cast<BitCastOperator>(CI->getArgOperand(1))->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400294
295 // The original type of Dst we get from the argument to the bitcast
296 // instruction.
297 auto DstTy = Dst->getType();
298 assert(DstTy->isPointerTy());
299
300 // The original type of Src we get from the argument to the bitcast
301 // instruction.
302 auto SrcTy = Src->getType();
303 assert(SrcTy->isPointerTy());
304
David Neto22f144c2017-06-12 14:26:21 -0400305 // Check that the size is a constant integer.
306 assert(isa<ConstantInt>(CI->getArgOperand(2)));
307 auto Size =
308 dyn_cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
309
Alan Baker7dea8842018-10-22 10:15:41 -0400310 auto DstElemTy = DstTy->getPointerElementType();
311 auto SrcElemTy = SrcTy->getPointerElementType();
312 unsigned NumDstUnpackings = 0;
313 unsigned NumSrcUnpackings = 0;
314 match_types(*CI, Size, &DstElemTy, &SrcElemTy, &NumDstUnpackings,
315 &NumSrcUnpackings);
316
317 // Check that the pointee types match.
318 assert(DstElemTy == SrcElemTy);
319
David Netob84ba342017-06-19 17:55:37 -0400320 auto DstElemSize = Layout.getTypeSizeInBits(DstElemTy) / 8;
alan-baker4a757f62020-04-22 08:17:49 -0400321 (void)DstElemSize;
David Neto22f144c2017-06-12 14:26:21 -0400322
David Netob84ba342017-06-19 17:55:37 -0400323 // Check that the size is a multiple of the size of the pointee type.
324 assert(Size % DstElemSize == 0);
David Neto22f144c2017-06-12 14:26:21 -0400325
alan-bakerbccf62c2019-03-29 10:32:41 -0400326 auto Alignment = cast<MemIntrinsic>(CI)->getDestAlignment();
David Netob84ba342017-06-19 17:55:37 -0400327 auto TypeAlignment = Layout.getABITypeAlignment(DstElemTy);
alan-baker4a757f62020-04-22 08:17:49 -0400328 (void)Alignment;
329 (void)TypeAlignment;
David Neto22f144c2017-06-12 14:26:21 -0400330
331 // Check that the alignment is at least the alignment of the pointee
332 // type.
333 assert(Alignment >= TypeAlignment);
334
335 // Check that the alignment is a multiple of the alignment of the
336 // pointee type.
337 assert(0 == (Alignment % TypeAlignment));
338
339 // Check that volatile is a constant.
alan-bakerbccf62c2019-03-29 10:32:41 -0400340 assert(isa<ConstantInt>(CI->getArgOperand(3)));
David Neto22f144c2017-06-12 14:26:21 -0400341
David Netob84ba342017-06-19 17:55:37 -0400342 CallsToReplaceWithSpirvCopyMemory.push_back(CI);
David Neto22f144c2017-06-12 14:26:21 -0400343 }
344 }
345
David Netob84ba342017-06-19 17:55:37 -0400346 for (auto CI : CallsToReplaceWithSpirvCopyMemory) {
alan-bakered80f572019-02-11 17:28:26 -0500347 auto Arg0 = dyn_cast<BitCastOperator>(CI->getArgOperand(0));
348 auto Arg1 = dyn_cast<BitCastOperator>(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -0400349 auto Arg3 = dyn_cast<ConstantInt>(CI->getArgOperand(3));
David Neto22f144c2017-06-12 14:26:21 -0400350
351 auto I32Ty = Type::getInt32Ty(M.getContext());
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400352 auto Alignment =
353 ConstantInt::get(I32Ty, cast<MemIntrinsic>(CI)->getDestAlignment());
alan-bakerbccf62c2019-03-29 10:32:41 -0400354 auto Volatile = ConstantInt::get(I32Ty, Arg3->getZExtValue());
David Neto22f144c2017-06-12 14:26:21 -0400355
alan-bakered80f572019-02-11 17:28:26 -0500356 auto Dst = Arg0->getOperand(0);
357 auto Src = Arg1->getOperand(0);
David Netob84ba342017-06-19 17:55:37 -0400358
359 auto DstElemTy = Dst->getType()->getPointerElementType();
360 auto SrcElemTy = Src->getType()->getPointerElementType();
361 unsigned NumDstUnpackings = 0;
362 unsigned NumSrcUnpackings = 0;
David Netob84ba342017-06-19 17:55:37 -0400363 auto Size = dyn_cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Alan Baker7dea8842018-10-22 10:15:41 -0400364 match_types(*CI, Size, &DstElemTy, &SrcElemTy, &NumDstUnpackings,
365 &NumSrcUnpackings);
SJW61531372020-06-09 07:31:08 -0500366 auto SPIRVIntrinsic = clspv::CopyMemoryFunction();
David Neto22f144c2017-06-12 14:26:21 -0400367
David Netob84ba342017-06-19 17:55:37 -0400368 auto DstElemSize = Layout.getTypeSizeInBits(DstElemTy) / 8;
David Neto22f144c2017-06-12 14:26:21 -0400369
David Netob84ba342017-06-19 17:55:37 -0400370 IRBuilder<> Builder(CI);
371
372 if (NumSrcUnpackings == 0 && NumDstUnpackings == 0) {
373 auto NewFType = FunctionType::get(
374 F.getReturnType(), {Dst->getType(), Src->getType(), I32Ty, I32Ty},
375 false);
376 auto NewF =
377 Function::Create(NewFType, F.getLinkage(), SPIRVIntrinsic, &M);
378 Builder.CreateCall(NewF, {Dst, Src, Alignment, Volatile}, "");
379 } else {
380 auto Zero = ConstantInt::get(I32Ty, 0);
381 SmallVector<Value *, 3> SrcIndices;
382 SmallVector<Value *, 3> DstIndices;
383 // Make unpacking indices.
384 for (unsigned unpacking = 0; unpacking < NumSrcUnpackings;
385 ++unpacking) {
386 SrcIndices.push_back(Zero);
387 }
388 for (unsigned unpacking = 0; unpacking < NumDstUnpackings;
389 ++unpacking) {
390 DstIndices.push_back(Zero);
391 }
392 // Add a placeholder for the final index.
393 SrcIndices.push_back(Zero);
394 DstIndices.push_back(Zero);
395
396 // Build the function and function type only once.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400397 FunctionType *NewFType = nullptr;
398 Function *NewF = nullptr;
David Netob84ba342017-06-19 17:55:37 -0400399
400 IRBuilder<> Builder(CI);
401 for (unsigned i = 0; i < Size / DstElemSize; ++i) {
402 auto Index = ConstantInt::get(I32Ty, i);
403 SrcIndices.back() = Index;
404 DstIndices.back() = Index;
405
alan-bakered80f572019-02-11 17:28:26 -0500406 // Avoid the builder for Src in order to prevent the folder from
407 // creating constant expressions for constant memcpys.
408 auto SrcElemPtr =
409 GetElementPtrInst::CreateInBounds(Src, SrcIndices, "", CI);
David Netob84ba342017-06-19 17:55:37 -0400410 auto DstElemPtr = Builder.CreateGEP(Dst, DstIndices);
411 NewFType =
412 NewFType != nullptr
413 ? NewFType
414 : FunctionType::get(F.getReturnType(),
415 {DstElemPtr->getType(),
416 SrcElemPtr->getType(), I32Ty, I32Ty},
417 false);
418 NewF = NewF != nullptr ? NewF
419 : Function::Create(NewFType, F.getLinkage(),
420 SPIRVIntrinsic, &M);
421 Builder.CreateCall(
422 NewF, {DstElemPtr, SrcElemPtr, Alignment, Volatile}, "");
423 }
424 }
425
426 // Erase the call.
David Neto22f144c2017-06-12 14:26:21 -0400427 CI->eraseFromParent();
428
David Netob84ba342017-06-19 17:55:37 -0400429 // Erase the bitcasts. A particular bitcast might be used
430 // in more than one memcpy, so defer actual deleting until later.
alan-bakered80f572019-02-11 17:28:26 -0500431 if (isa<BitCastInst>(Arg0))
432 BitCastsToForget.insert(dyn_cast<BitCastInst>(Arg0));
433 if (isa<BitCastInst>(Arg1))
434 BitCastsToForget.insert(dyn_cast<BitCastInst>(Arg1));
David Netob84ba342017-06-19 17:55:37 -0400435 }
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400436 for (auto *Inst : BitCastsToForget) {
David Netob84ba342017-06-19 17:55:37 -0400437 Inst->eraseFromParent();
David Neto22f144c2017-06-12 14:26:21 -0400438 }
439 }
440 }
441
442 return Changed;
443}
David Netoe345e0e2018-06-15 11:38:32 -0400444
445bool ReplaceLLVMIntrinsicsPass::removeLifetimeDeclarations(Module &M) {
446 // SPIR-V OpLifetimeStart and OpLifetimeEnd require Kernel capability.
447 // Vulkan doesn't support that, so remove all lifteime bounds declarations.
448
449 bool Changed = false;
450
451 SmallVector<Function *, 2> WorkList;
452 for (auto &F : M) {
453 if (F.getName().startswith("llvm.lifetime.")) {
454 WorkList.push_back(&F);
455 }
456 }
457
458 for (auto *F : WorkList) {
459 Changed = true;
alan-bakera5ff28e2018-11-21 16:27:20 -0500460 // Copy users to avoid modifying the list in place.
461 SmallVector<User *, 8> users(F->users());
462 for (auto U : users) {
David Netoe345e0e2018-06-15 11:38:32 -0400463 if (auto *CI = dyn_cast<CallInst>(U)) {
464 CI->eraseFromParent();
465 }
466 }
467 F->eraseFromParent();
468 }
469
470 return Changed;
471}
alan-baker5f2e88e2020-12-07 15:24:04 -0500472
473bool ReplaceLLVMIntrinsicsPass::replaceCountZeroes(Function &F, bool leading) {
474 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
475 return false;
476
477 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
478 if (bitwidth == 32 || bitwidth > 64)
479 return false;
480
481 return replaceCallsWithValue(F, [&F, bitwidth, leading](CallInst *Call) {
482 auto c_false = ConstantInt::getFalse(Call->getContext());
483 auto in = Call->getArgOperand(0);
484 IRBuilder<> builder(Call);
485 auto ty = Call->getType()->getWithNewBitWidth(32);
486 auto c32 = ConstantInt::get(ty, 32);
487 auto func_32bit = Intrinsic::getDeclaration(
488 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz, ty);
489 if (bitwidth < 32) {
490 // Extend the input to 32-bits and perform a clz/ctz.
491 auto zext = builder.CreateZExt(in, ty);
492 Value *call_input = zext;
493 if (!leading) {
494 // Or the extended input value with a constant that caps the max to the
495 // right bitwidth (e.g. 256 for i8 and 65536 for i16).
496 auto mask = ConstantInt::get(ty, 1 << bitwidth);
497 call_input = builder.CreateOr(zext, mask);
498 }
499 auto call = builder.CreateCall(func_32bit->getFunctionType(), func_32bit,
500 {call_input, c_false});
501 Value *tmp = call;
502 if (leading) {
503 // Clz is implemented as 31 - FindUMsb(|zext|), so adjust the result
504 // the right bitwidth.
505 auto sub_const = ConstantInt::get(ty, 32 - bitwidth);
506 tmp = builder.CreateSub(call, sub_const);
507 }
508 // Truncate the intermediate result to the right size.
509 return builder.CreateTrunc(tmp, Call->getType());
510 } else {
511 // Perform a 32-bit version of clz/ctz on each half of the 64-bit input.
512 auto lshr = builder.CreateLShr(in, 32);
513 auto top_bits = builder.CreateTrunc(lshr, ty);
514 auto bot_bits = builder.CreateTrunc(in, ty);
515 auto top_func = builder.CreateCall(func_32bit->getFunctionType(),
516 func_32bit, {top_bits, c_false});
517 auto bot_func = builder.CreateCall(func_32bit->getFunctionType(),
518 func_32bit, {bot_bits, c_false});
519 Value *tmp = nullptr;
520 if (leading) {
521 // For clz, if clz(top) is 32, return 32 + clz(bot).
522 auto cmp = builder.CreateICmpEQ(top_func, c32);
523 auto adjust = builder.CreateAdd(bot_func, c32);
524 tmp = builder.CreateSelect(cmp, adjust, top_func);
525 } else {
526 // For ctz, if clz(bot) is 32, return 32 + ctz(top)
527 auto bot_cmp = builder.CreateICmpEQ(bot_func, c32);
528 auto adjust = builder.CreateAdd(top_func, c32);
529 tmp = builder.CreateSelect(bot_cmp, adjust, bot_func);
530 }
531 // Extend the intermediate result to the correct size.
532 return builder.CreateZExt(tmp, Call->getType());
533 }
534 });
535}
536
537bool ReplaceLLVMIntrinsicsPass::replaceCopysign(Function &F) {
538 return replaceCallsWithValue(F, [&F](CallInst *CI) {
539 auto XValue = CI->getOperand(0);
540 auto YValue = CI->getOperand(1);
541
542 auto Ty = XValue->getType();
543
544 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
545 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
546 IntTy = FixedVectorType::get(
547 IntTy, vec_ty->getElementCount().getKnownMinValue());
548 }
549
550 // Return X with the sign of Y
551
552 // Sign bit masks
553 auto SignBit = IntTy->getScalarSizeInBits() - 1;
554 auto SignBitMask = 1 << SignBit;
555 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
556 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
557
558 IRBuilder<> Builder(CI);
559
560 // Extract sign of Y
561 auto YInt = Builder.CreateBitCast(YValue, IntTy);
562 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
563
564 // Clear sign bit in X
565 auto XInt = Builder.CreateBitCast(XValue, IntTy);
566 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
567
568 // Insert sign bit of Y into X
569 auto NewXInt = Builder.CreateOr(XInt, YSign);
570
571 // And cast back to floating-point
572 return Builder.CreateBitCast(NewXInt, Ty);
573 });
574}