blob: 0515bf429dc10fe82a05c10f46d869eb26043b16 [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Netoe345e0e2018-06-15 11:38:32 -040015#include "llvm/IR/Constants.h"
16#include "llvm/IR/IRBuilder.h"
17#include "llvm/IR/Instructions.h"
alan-bakerbccf62c2019-03-29 10:32:41 -040018#include "llvm/IR/IntrinsicInst.h"
David Netoe345e0e2018-06-15 11:38:32 -040019#include "llvm/IR/Module.h"
20#include "llvm/Pass.h"
21#include "llvm/Support/raw_ostream.h"
22#include "llvm/Transforms/Utils/Cloning.h"
David Neto22f144c2017-06-12 14:26:21 -040023
alan-baker3f772c02021-06-15 22:18:11 -040024#include "clspv/Option.h"
alan-bakere0902602020-03-23 08:43:40 -040025#include "spirv/unified1/spirv.hpp"
David Neto22f144c2017-06-12 14:26:21 -040026
SJW61531372020-06-09 07:31:08 -050027#include "Constants.h"
Diego Novilloa4c44fa2019-04-11 10:56:15 -040028#include "Passes.h"
29
David Neto22f144c2017-06-12 14:26:21 -040030using namespace llvm;
31
32#define DEBUG_TYPE "ReplaceLLVMIntrinsics"
33
34namespace {
35struct ReplaceLLVMIntrinsicsPass final : public ModulePass {
36 static char ID;
37 ReplaceLLVMIntrinsicsPass() : ModulePass(ID) {}
38
39 bool runOnModule(Module &M) override;
alan-baker5f2e88e2020-12-07 15:24:04 -050040 // TODO: update module-based funtions to work like function-based ones.
41 // Except maybe lifetime intrinsics.
42 bool runOnFunction(Function &F);
David Neto22f144c2017-06-12 14:26:21 -040043 bool replaceMemset(Module &M);
44 bool replaceMemcpy(Module &M);
David Netoe345e0e2018-06-15 11:38:32 -040045 bool removeLifetimeDeclarations(Module &M);
alan-baker5f2e88e2020-12-07 15:24:04 -050046 bool replaceFshl(Function &F);
47 bool replaceCountZeroes(Function &F, bool leading);
48 bool replaceCopysign(Function &F);
49
50 bool replaceCallsWithValue(Function &F,
51 std::function<Value *(CallInst *)> Replacer);
52
53 SmallVector<Function *, 16> DeadFunctions;
David Neto22f144c2017-06-12 14:26:21 -040054};
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040055} // namespace
David Neto22f144c2017-06-12 14:26:21 -040056
57char ReplaceLLVMIntrinsicsPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -040058INITIALIZE_PASS(ReplaceLLVMIntrinsicsPass, "ReplaceLLVMIntrinsics",
59 "Replace LLVM intrinsics Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -040060
61namespace clspv {
62ModulePass *createReplaceLLVMIntrinsicsPass() {
63 return new ReplaceLLVMIntrinsicsPass();
64}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040065} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -040066
67bool ReplaceLLVMIntrinsicsPass::runOnModule(Module &M) {
68 bool Changed = false;
69
James Price3a116142020-10-16 06:52:18 -040070 // Remove lifetime annotations first. They could be using memset
David Netoe345e0e2018-06-15 11:38:32 -040071 // and memcpy calls.
72 Changed |= removeLifetimeDeclarations(M);
David Neto22f144c2017-06-12 14:26:21 -040073 Changed |= replaceMemset(M);
74 Changed |= replaceMemcpy(M);
75
alan-baker5f2e88e2020-12-07 15:24:04 -050076 for (auto &F : M) {
77 Changed |= runOnFunction(F);
78 }
79
80 for (auto F : DeadFunctions) {
81 F->eraseFromParent();
82 }
83
David Neto22f144c2017-06-12 14:26:21 -040084 return Changed;
85}
86
alan-baker5f2e88e2020-12-07 15:24:04 -050087bool ReplaceLLVMIntrinsicsPass::runOnFunction(Function &F) {
88 switch (F.getIntrinsicID()) {
89 case Intrinsic::fshl:
90 return replaceFshl(F);
91 case Intrinsic::copysign:
92 return replaceCopysign(F);
93 case Intrinsic::ctlz:
94 return replaceCountZeroes(F, true);
95 case Intrinsic::cttz:
96 return replaceCountZeroes(F, false);
James Price3a116142020-10-16 06:52:18 -040097
alan-baker5f2e88e2020-12-07 15:24:04 -050098 default:
99 break;
100 }
101
102 return false;
103}
104
105bool ReplaceLLVMIntrinsicsPass::replaceCallsWithValue(
106 Function &F, std::function<Value *(CallInst *)> Replacer) {
107 SmallVector<Instruction *, 8> ToRemove;
108 for (auto &U : F.uses()) {
109 if (auto Call = dyn_cast<CallInst>(U.getUser())) {
110 auto replacement = Replacer(Call);
alan-baker3e0de472020-12-08 15:57:17 -0500111 if (replacement != nullptr && replacement != Call) {
alan-baker5f2e88e2020-12-07 15:24:04 -0500112 Call->replaceAllUsesWith(replacement);
113 ToRemove.push_back(Call);
114 }
James Price3a116142020-10-16 06:52:18 -0400115 }
116 }
117
alan-baker5f2e88e2020-12-07 15:24:04 -0500118 for (auto inst : ToRemove) {
119 inst->eraseFromParent();
James Price3a116142020-10-16 06:52:18 -0400120 }
121
alan-baker5f2e88e2020-12-07 15:24:04 -0500122 DeadFunctions.push_back(&F);
123
124 return !ToRemove.empty();
125}
126
127bool ReplaceLLVMIntrinsicsPass::replaceFshl(Function &F) {
128 return replaceCallsWithValue(F, [](CallInst *call) {
129 auto arg_hi = call->getArgOperand(0);
130 auto arg_lo = call->getArgOperand(1);
131 auto arg_shift = call->getArgOperand(2);
132
133 // Validate argument types.
134 auto type = arg_hi->getType();
135 if ((type->getScalarSizeInBits() != 8) &&
136 (type->getScalarSizeInBits() != 16) &&
137 (type->getScalarSizeInBits() != 32) &&
138 (type->getScalarSizeInBits() != 64)) {
139 return static_cast<Value *>(nullptr);
140 }
141
142 // We shift the bottom bits of the first argument up, the top bits of the
143 // second argument down, and then OR the two shifted values.
144 IRBuilder<> builder(call);
145
146 // The shift amount is treated modulo the element size.
147 auto mod_mask = ConstantInt::get(type, type->getScalarSizeInBits() - 1);
148 auto shift_amount = builder.CreateAnd(arg_shift, mod_mask);
149
150 // Calculate the amount by which to shift the second argument down.
151 auto scalar_size = ConstantInt::get(type, type->getScalarSizeInBits());
152 auto down_amount = builder.CreateSub(scalar_size, shift_amount);
153
154 // Shift the two arguments and OR the results together.
155 auto hi_bits = builder.CreateShl(arg_hi, shift_amount);
156 auto lo_bits = builder.CreateLShr(arg_lo, down_amount);
157 return builder.CreateOr(lo_bits, hi_bits);
158 });
James Price3a116142020-10-16 06:52:18 -0400159}
160
David Neto22f144c2017-06-12 14:26:21 -0400161bool ReplaceLLVMIntrinsicsPass::replaceMemset(Module &M) {
162 bool Changed = false;
David Netod3f59382017-10-18 18:30:30 -0400163 auto Layout = M.getDataLayout();
David Neto22f144c2017-06-12 14:26:21 -0400164
165 for (auto &F : M) {
166 if (F.getName().startswith("llvm.memset")) {
167 SmallVector<CallInst *, 8> CallsToReplace;
168
169 for (auto U : F.users()) {
170 if (auto CI = dyn_cast<CallInst>(U)) {
171 auto Initializer = dyn_cast<ConstantInt>(CI->getArgOperand(1));
172
173 // We only handle cases where the initializer is a constant int that
174 // is 0.
175 if (!Initializer || (0 != Initializer->getZExtValue())) {
176 Initializer->print(errs());
177 llvm_unreachable("Unhandled llvm.memset.* instruction that had a "
178 "non-0 initializer!");
179 }
180
181 CallsToReplace.push_back(CI);
182 }
183 }
184
185 for (auto CI : CallsToReplace) {
186 auto NewArg = CI->getArgOperand(0);
Kévin Petit70944912019-04-17 23:22:28 +0100187 auto Bitcast = dyn_cast<BitCastInst>(NewArg);
188 if (Bitcast != nullptr) {
David Neto22f144c2017-06-12 14:26:21 -0400189 NewArg = Bitcast->getOperand(0);
190 }
191
David Netod3f59382017-10-18 18:30:30 -0400192 auto NumBytes = cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
David Neto22f144c2017-06-12 14:26:21 -0400193 auto Ty = NewArg->getType();
194 auto PointeeTy = Ty->getPointerElementType();
David Neto22f144c2017-06-12 14:26:21 -0400195 auto Zero = Constant::getNullValue(PointeeTy);
196
David Netod3f59382017-10-18 18:30:30 -0400197 const auto num_stores = NumBytes / Layout.getTypeAllocSize(PointeeTy);
198 assert((NumBytes == num_stores * Layout.getTypeAllocSize(PointeeTy)) &&
199 "Null memset can't be divided evenly across multiple stores.");
200 assert((num_stores & 0xFFFFFFFF) == num_stores);
David Neto22f144c2017-06-12 14:26:21 -0400201
David Netod3f59382017-10-18 18:30:30 -0400202 // Generate the first store.
Kévin Petit58c445c2019-06-18 18:09:46 +0100203 new StoreInst(Zero, NewArg, CI);
David Netod3f59382017-10-18 18:30:30 -0400204
205 // Generate subsequent stores, but only if needed.
206 if (num_stores) {
207 auto I32Ty = Type::getInt32Ty(M.getContext());
208 auto One = ConstantInt::get(I32Ty, 1);
209 auto Ptr = NewArg;
210 for (uint32_t i = 1; i < num_stores; i++) {
211 Ptr = GetElementPtrInst::Create(PointeeTy, Ptr, {One}, "", CI);
Kévin Petit58c445c2019-06-18 18:09:46 +0100212 new StoreInst(Zero, Ptr, CI);
David Netod3f59382017-10-18 18:30:30 -0400213 }
214 }
215
David Neto22f144c2017-06-12 14:26:21 -0400216 CI->eraseFromParent();
217
Kévin Petit70944912019-04-17 23:22:28 +0100218 if (Bitcast != nullptr) {
David Neto22f144c2017-06-12 14:26:21 -0400219 Bitcast->eraseFromParent();
220 }
221 }
222 }
223 }
224
225 return Changed;
226}
227
228bool ReplaceLLVMIntrinsicsPass::replaceMemcpy(Module &M) {
229 bool Changed = false;
David Netob84ba342017-06-19 17:55:37 -0400230 auto Layout = M.getDataLayout();
231
232 // Unpack source and destination types until we find a matching
233 // element type. Count the number of levels we unpack for the
234 // source and destination types. So far this only works for
235 // array types, but could be generalized to other regular types
236 // like vectors.
Alan Baker7dea8842018-10-22 10:15:41 -0400237 auto match_types = [&Layout](CallInst &CI, uint64_t Size, Type **DstElemTy,
238 Type **SrcElemTy, unsigned *NumDstUnpackings,
David Netob84ba342017-06-19 17:55:37 -0400239 unsigned *NumSrcUnpackings) {
Alan Baker7dea8842018-10-22 10:15:41 -0400240 auto descend_type = [](Type *InType) {
241 Type *OutType = InType;
242 if (OutType->isStructTy()) {
243 OutType = OutType->getStructElementType(0);
244 } else if (OutType->isArrayTy()) {
245 OutType = OutType->getArrayElementType();
James Pricecf53df42020-04-20 14:41:24 -0400246 } else if (auto vec_type = dyn_cast<VectorType>(OutType)) {
247 OutType = vec_type->getElementType();
Alan Baker7dea8842018-10-22 10:15:41 -0400248 } else {
249 assert(false && "Don't know how to descend into type");
250 }
251
252 return OutType;
253 };
254
David Netob84ba342017-06-19 17:55:37 -0400255 while (*SrcElemTy != *DstElemTy) {
256 auto SrcElemSize = Layout.getTypeSizeInBits(*SrcElemTy);
257 auto DstElemSize = Layout.getTypeSizeInBits(*DstElemTy);
258 if (SrcElemSize >= DstElemSize) {
Alan Baker7dea8842018-10-22 10:15:41 -0400259 *SrcElemTy = descend_type(*SrcElemTy);
David Netob84ba342017-06-19 17:55:37 -0400260 (*NumSrcUnpackings)++;
261 } else if (DstElemSize >= SrcElemSize) {
Alan Baker7dea8842018-10-22 10:15:41 -0400262 *DstElemTy = descend_type(*DstElemTy);
David Netob84ba342017-06-19 17:55:37 -0400263 (*NumDstUnpackings)++;
264 } else {
265 errs() << "Don't know how to unpack types for memcpy: " << CI
266 << "\ngot to: " << **DstElemTy << " vs " << **SrcElemTy << "\n";
267 assert(false && "Don't know how to unpack these types");
268 }
269 }
Alan Baker7dea8842018-10-22 10:15:41 -0400270
271 auto DstElemSize = Layout.getTypeSizeInBits(*DstElemTy) / 8;
272 while (Size < DstElemSize) {
273 *DstElemTy = descend_type(*DstElemTy);
274 *SrcElemTy = descend_type(*SrcElemTy);
275 (*NumDstUnpackings)++;
276 (*NumSrcUnpackings)++;
277 DstElemSize = Layout.getTypeSizeInBits(*DstElemTy) / 8;
278 }
David Netob84ba342017-06-19 17:55:37 -0400279 };
David Neto22f144c2017-06-12 14:26:21 -0400280
alan-baker3f772c02021-06-15 22:18:11 -0400281 SmallPtrSet<Instruction *, 8> BitCastsToForget;
David Neto22f144c2017-06-12 14:26:21 -0400282 for (auto &F : M) {
283 if (F.getName().startswith("llvm.memcpy")) {
David Netob84ba342017-06-19 17:55:37 -0400284 SmallVector<CallInst *, 8> CallsToReplaceWithSpirvCopyMemory;
David Neto22f144c2017-06-12 14:26:21 -0400285
286 for (auto U : F.users()) {
287 if (auto CI = dyn_cast<CallInst>(U)) {
alan-bakered80f572019-02-11 17:28:26 -0500288 assert(isa<BitCastOperator>(CI->getArgOperand(0)));
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400289 auto Dst =
290 dyn_cast<BitCastOperator>(CI->getArgOperand(0))->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400291
alan-bakered80f572019-02-11 17:28:26 -0500292 assert(isa<BitCastOperator>(CI->getArgOperand(1)));
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400293 auto Src =
294 dyn_cast<BitCastOperator>(CI->getArgOperand(1))->getOperand(0);
David Neto22f144c2017-06-12 14:26:21 -0400295
296 // The original type of Dst we get from the argument to the bitcast
297 // instruction.
298 auto DstTy = Dst->getType();
299 assert(DstTy->isPointerTy());
300
301 // The original type of Src we get from the argument to the bitcast
302 // instruction.
303 auto SrcTy = Src->getType();
304 assert(SrcTy->isPointerTy());
305
David Neto22f144c2017-06-12 14:26:21 -0400306 // Check that the size is a constant integer.
307 assert(isa<ConstantInt>(CI->getArgOperand(2)));
308 auto Size =
309 dyn_cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
310
Alan Baker7dea8842018-10-22 10:15:41 -0400311 auto DstElemTy = DstTy->getPointerElementType();
312 auto SrcElemTy = SrcTy->getPointerElementType();
313 unsigned NumDstUnpackings = 0;
314 unsigned NumSrcUnpackings = 0;
315 match_types(*CI, Size, &DstElemTy, &SrcElemTy, &NumDstUnpackings,
316 &NumSrcUnpackings);
317
318 // Check that the pointee types match.
319 assert(DstElemTy == SrcElemTy);
320
David Netob84ba342017-06-19 17:55:37 -0400321 auto DstElemSize = Layout.getTypeSizeInBits(DstElemTy) / 8;
alan-baker4a757f62020-04-22 08:17:49 -0400322 (void)DstElemSize;
David Neto22f144c2017-06-12 14:26:21 -0400323
David Netob84ba342017-06-19 17:55:37 -0400324 // Check that the size is a multiple of the size of the pointee type.
325 assert(Size % DstElemSize == 0);
David Neto22f144c2017-06-12 14:26:21 -0400326
alan-bakerbccf62c2019-03-29 10:32:41 -0400327 auto Alignment = cast<MemIntrinsic>(CI)->getDestAlignment();
David Netob84ba342017-06-19 17:55:37 -0400328 auto TypeAlignment = Layout.getABITypeAlignment(DstElemTy);
alan-baker4a757f62020-04-22 08:17:49 -0400329 (void)Alignment;
330 (void)TypeAlignment;
David Neto22f144c2017-06-12 14:26:21 -0400331
332 // Check that the alignment is at least the alignment of the pointee
333 // type.
334 assert(Alignment >= TypeAlignment);
335
336 // Check that the alignment is a multiple of the alignment of the
337 // pointee type.
338 assert(0 == (Alignment % TypeAlignment));
339
340 // Check that volatile is a constant.
alan-bakerbccf62c2019-03-29 10:32:41 -0400341 assert(isa<ConstantInt>(CI->getArgOperand(3)));
David Neto22f144c2017-06-12 14:26:21 -0400342
David Netob84ba342017-06-19 17:55:37 -0400343 CallsToReplaceWithSpirvCopyMemory.push_back(CI);
David Neto22f144c2017-06-12 14:26:21 -0400344 }
345 }
346
David Netob84ba342017-06-19 17:55:37 -0400347 for (auto CI : CallsToReplaceWithSpirvCopyMemory) {
alan-bakered80f572019-02-11 17:28:26 -0500348 auto Arg0 = dyn_cast<BitCastOperator>(CI->getArgOperand(0));
349 auto Arg1 = dyn_cast<BitCastOperator>(CI->getArgOperand(1));
David Neto22f144c2017-06-12 14:26:21 -0400350 auto Arg3 = dyn_cast<ConstantInt>(CI->getArgOperand(3));
David Neto22f144c2017-06-12 14:26:21 -0400351
352 auto I32Ty = Type::getInt32Ty(M.getContext());
alan-baker3f772c02021-06-15 22:18:11 -0400353 auto DstAlignment =
354 ConstantInt::get(I32Ty, cast<MemCpyInst>(CI)->getDestAlignment());
355 auto SrcAlignment =
356 ConstantInt::get(I32Ty, cast<MemCpyInst>(CI)->getSourceAlignment());
alan-bakerbccf62c2019-03-29 10:32:41 -0400357 auto Volatile = ConstantInt::get(I32Ty, Arg3->getZExtValue());
David Neto22f144c2017-06-12 14:26:21 -0400358
alan-bakered80f572019-02-11 17:28:26 -0500359 auto Dst = Arg0->getOperand(0);
360 auto Src = Arg1->getOperand(0);
David Netob84ba342017-06-19 17:55:37 -0400361
362 auto DstElemTy = Dst->getType()->getPointerElementType();
363 auto SrcElemTy = Src->getType()->getPointerElementType();
364 unsigned NumDstUnpackings = 0;
365 unsigned NumSrcUnpackings = 0;
David Netob84ba342017-06-19 17:55:37 -0400366 auto Size = dyn_cast<ConstantInt>(CI->getArgOperand(2))->getZExtValue();
Alan Baker7dea8842018-10-22 10:15:41 -0400367 match_types(*CI, Size, &DstElemTy, &SrcElemTy, &NumDstUnpackings,
368 &NumSrcUnpackings);
SJW61531372020-06-09 07:31:08 -0500369 auto SPIRVIntrinsic = clspv::CopyMemoryFunction();
David Neto22f144c2017-06-12 14:26:21 -0400370
David Netob84ba342017-06-19 17:55:37 -0400371 auto DstElemSize = Layout.getTypeSizeInBits(DstElemTy) / 8;
David Neto22f144c2017-06-12 14:26:21 -0400372
David Netob84ba342017-06-19 17:55:37 -0400373 IRBuilder<> Builder(CI);
374
375 if (NumSrcUnpackings == 0 && NumDstUnpackings == 0) {
alan-baker3f772c02021-06-15 22:18:11 -0400376 SmallVector<Type *, 5> param_tys = {Dst->getType(), Src->getType(),
377 I32Ty, I32Ty};
378 SmallVector<Value *, 5> param_values = {Dst, Src, DstAlignment};
379 if (clspv::Option::SpvVersion() >=
380 clspv::Option::SPIRVVersion::SPIRV_1_4) {
381 param_tys.push_back(I32Ty);
382 param_values.push_back(SrcAlignment);
383 }
384 param_values.push_back(Volatile);
385 auto NewFType =
386 FunctionType::get(F.getReturnType(), param_tys, false);
David Netob84ba342017-06-19 17:55:37 -0400387 auto NewF =
388 Function::Create(NewFType, F.getLinkage(), SPIRVIntrinsic, &M);
alan-baker3f772c02021-06-15 22:18:11 -0400389 Builder.CreateCall(NewF, param_values, "");
David Netob84ba342017-06-19 17:55:37 -0400390 } else {
391 auto Zero = ConstantInt::get(I32Ty, 0);
392 SmallVector<Value *, 3> SrcIndices;
393 SmallVector<Value *, 3> DstIndices;
394 // Make unpacking indices.
395 for (unsigned unpacking = 0; unpacking < NumSrcUnpackings;
396 ++unpacking) {
397 SrcIndices.push_back(Zero);
398 }
399 for (unsigned unpacking = 0; unpacking < NumDstUnpackings;
400 ++unpacking) {
401 DstIndices.push_back(Zero);
402 }
403 // Add a placeholder for the final index.
404 SrcIndices.push_back(Zero);
405 DstIndices.push_back(Zero);
406
407 // Build the function and function type only once.
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400408 FunctionType *NewFType = nullptr;
409 Function *NewF = nullptr;
David Netob84ba342017-06-19 17:55:37 -0400410
411 IRBuilder<> Builder(CI);
412 for (unsigned i = 0; i < Size / DstElemSize; ++i) {
413 auto Index = ConstantInt::get(I32Ty, i);
414 SrcIndices.back() = Index;
415 DstIndices.back() = Index;
416
alan-bakered80f572019-02-11 17:28:26 -0500417 // Avoid the builder for Src in order to prevent the folder from
418 // creating constant expressions for constant memcpys.
419 auto SrcElemPtr =
420 GetElementPtrInst::CreateInBounds(Src, SrcIndices, "", CI);
David Netob84ba342017-06-19 17:55:37 -0400421 auto DstElemPtr = Builder.CreateGEP(Dst, DstIndices);
alan-baker3f772c02021-06-15 22:18:11 -0400422 SmallVector<Type *, 5> param_tys = {
423 DstElemPtr->getType(), SrcElemPtr->getType(), I32Ty, I32Ty};
424 SmallVector<Value *, 5> param_values = {DstElemPtr, SrcElemPtr,
425 DstAlignment};
426 if (clspv::Option::SpvVersion() >=
427 clspv::Option::SPIRVVersion::SPIRV_1_4) {
428 param_tys.push_back(I32Ty);
429 param_values.push_back(SrcAlignment);
430 }
431 param_values.push_back(Volatile);
David Netob84ba342017-06-19 17:55:37 -0400432 NewFType =
433 NewFType != nullptr
434 ? NewFType
alan-baker3f772c02021-06-15 22:18:11 -0400435 : FunctionType::get(F.getReturnType(), param_tys, false);
David Netob84ba342017-06-19 17:55:37 -0400436 NewF = NewF != nullptr ? NewF
437 : Function::Create(NewFType, F.getLinkage(),
438 SPIRVIntrinsic, &M);
alan-baker3f772c02021-06-15 22:18:11 -0400439 Builder.CreateCall(NewF, param_values, "");
David Netob84ba342017-06-19 17:55:37 -0400440 }
441 }
442
443 // Erase the call.
David Neto22f144c2017-06-12 14:26:21 -0400444 CI->eraseFromParent();
445
David Netob84ba342017-06-19 17:55:37 -0400446 // Erase the bitcasts. A particular bitcast might be used
447 // in more than one memcpy, so defer actual deleting until later.
alan-bakered80f572019-02-11 17:28:26 -0500448 if (isa<BitCastInst>(Arg0))
449 BitCastsToForget.insert(dyn_cast<BitCastInst>(Arg0));
450 if (isa<BitCastInst>(Arg1))
451 BitCastsToForget.insert(dyn_cast<BitCastInst>(Arg1));
David Netob84ba342017-06-19 17:55:37 -0400452 }
David Neto22f144c2017-06-12 14:26:21 -0400453 }
454 }
alan-baker3f772c02021-06-15 22:18:11 -0400455 for (auto *Inst : BitCastsToForget) {
456 Inst->eraseFromParent();
457 }
David Neto22f144c2017-06-12 14:26:21 -0400458
459 return Changed;
460}
David Netoe345e0e2018-06-15 11:38:32 -0400461
462bool ReplaceLLVMIntrinsicsPass::removeLifetimeDeclarations(Module &M) {
463 // SPIR-V OpLifetimeStart and OpLifetimeEnd require Kernel capability.
464 // Vulkan doesn't support that, so remove all lifteime bounds declarations.
465
466 bool Changed = false;
467
468 SmallVector<Function *, 2> WorkList;
469 for (auto &F : M) {
470 if (F.getName().startswith("llvm.lifetime.")) {
471 WorkList.push_back(&F);
472 }
473 }
474
475 for (auto *F : WorkList) {
476 Changed = true;
alan-bakera5ff28e2018-11-21 16:27:20 -0500477 // Copy users to avoid modifying the list in place.
478 SmallVector<User *, 8> users(F->users());
479 for (auto U : users) {
David Netoe345e0e2018-06-15 11:38:32 -0400480 if (auto *CI = dyn_cast<CallInst>(U)) {
481 CI->eraseFromParent();
482 }
483 }
484 F->eraseFromParent();
485 }
486
487 return Changed;
488}
alan-baker5f2e88e2020-12-07 15:24:04 -0500489
490bool ReplaceLLVMIntrinsicsPass::replaceCountZeroes(Function &F, bool leading) {
491 if (!isa<IntegerType>(F.getReturnType()->getScalarType()))
492 return false;
493
494 auto bitwidth = F.getReturnType()->getScalarSizeInBits();
495 if (bitwidth == 32 || bitwidth > 64)
496 return false;
497
498 return replaceCallsWithValue(F, [&F, bitwidth, leading](CallInst *Call) {
499 auto c_false = ConstantInt::getFalse(Call->getContext());
500 auto in = Call->getArgOperand(0);
501 IRBuilder<> builder(Call);
502 auto ty = Call->getType()->getWithNewBitWidth(32);
503 auto c32 = ConstantInt::get(ty, 32);
504 auto func_32bit = Intrinsic::getDeclaration(
505 F.getParent(), leading ? Intrinsic::ctlz : Intrinsic::cttz, ty);
506 if (bitwidth < 32) {
507 // Extend the input to 32-bits and perform a clz/ctz.
508 auto zext = builder.CreateZExt(in, ty);
509 Value *call_input = zext;
510 if (!leading) {
511 // Or the extended input value with a constant that caps the max to the
512 // right bitwidth (e.g. 256 for i8 and 65536 for i16).
513 auto mask = ConstantInt::get(ty, 1 << bitwidth);
514 call_input = builder.CreateOr(zext, mask);
515 }
516 auto call = builder.CreateCall(func_32bit->getFunctionType(), func_32bit,
517 {call_input, c_false});
518 Value *tmp = call;
519 if (leading) {
520 // Clz is implemented as 31 - FindUMsb(|zext|), so adjust the result
521 // the right bitwidth.
522 auto sub_const = ConstantInt::get(ty, 32 - bitwidth);
523 tmp = builder.CreateSub(call, sub_const);
524 }
525 // Truncate the intermediate result to the right size.
526 return builder.CreateTrunc(tmp, Call->getType());
527 } else {
528 // Perform a 32-bit version of clz/ctz on each half of the 64-bit input.
529 auto lshr = builder.CreateLShr(in, 32);
530 auto top_bits = builder.CreateTrunc(lshr, ty);
531 auto bot_bits = builder.CreateTrunc(in, ty);
532 auto top_func = builder.CreateCall(func_32bit->getFunctionType(),
533 func_32bit, {top_bits, c_false});
534 auto bot_func = builder.CreateCall(func_32bit->getFunctionType(),
535 func_32bit, {bot_bits, c_false});
536 Value *tmp = nullptr;
537 if (leading) {
538 // For clz, if clz(top) is 32, return 32 + clz(bot).
539 auto cmp = builder.CreateICmpEQ(top_func, c32);
540 auto adjust = builder.CreateAdd(bot_func, c32);
541 tmp = builder.CreateSelect(cmp, adjust, top_func);
542 } else {
543 // For ctz, if clz(bot) is 32, return 32 + ctz(top)
544 auto bot_cmp = builder.CreateICmpEQ(bot_func, c32);
545 auto adjust = builder.CreateAdd(top_func, c32);
546 tmp = builder.CreateSelect(bot_cmp, adjust, bot_func);
547 }
548 // Extend the intermediate result to the correct size.
549 return builder.CreateZExt(tmp, Call->getType());
550 }
551 });
552}
553
554bool ReplaceLLVMIntrinsicsPass::replaceCopysign(Function &F) {
555 return replaceCallsWithValue(F, [&F](CallInst *CI) {
556 auto XValue = CI->getOperand(0);
557 auto YValue = CI->getOperand(1);
558
559 auto Ty = XValue->getType();
560
561 Type *IntTy = Type::getIntNTy(F.getContext(), Ty->getScalarSizeInBits());
562 if (auto vec_ty = dyn_cast<VectorType>(Ty)) {
563 IntTy = FixedVectorType::get(
564 IntTy, vec_ty->getElementCount().getKnownMinValue());
565 }
566
567 // Return X with the sign of Y
568
569 // Sign bit masks
570 auto SignBit = IntTy->getScalarSizeInBits() - 1;
571 auto SignBitMask = 1 << SignBit;
572 auto SignBitMaskValue = ConstantInt::get(IntTy, SignBitMask);
573 auto NotSignBitMaskValue = ConstantInt::get(IntTy, ~SignBitMask);
574
575 IRBuilder<> Builder(CI);
576
577 // Extract sign of Y
578 auto YInt = Builder.CreateBitCast(YValue, IntTy);
579 auto YSign = Builder.CreateAnd(YInt, SignBitMaskValue);
580
581 // Clear sign bit in X
582 auto XInt = Builder.CreateBitCast(XValue, IntTy);
583 XInt = Builder.CreateAnd(XInt, NotSignBitMaskValue);
584
585 // Insert sign bit of Y into X
586 auto NewXInt = Builder.CreateOr(XInt, YSign);
587
588 // And cast back to floating-point
589 return Builder.CreateBitCast(NewXInt, Ty);
590 });
591}