blob: e8b3378eb0a731a2619fe6af5409fbf33ca9021c [file] [log] [blame]
David Neto22f144c2017-06-12 14:26:21 -04001// Copyright 2017 The Clspv Authors. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
David Neto118188e2018-08-24 11:27:54 -040015#include "llvm/IR/DataLayout.h"
16#include "llvm/IR/IRBuilder.h"
17#include "llvm/IR/Instructions.h"
18#include "llvm/IR/Module.h"
19#include "llvm/Pass.h"
20#include "llvm/Support/raw_ostream.h"
Marco Antognini42b65222021-02-12 18:40:42 +000021#include "llvm/Transforms/Utils/Local.h"
David Neto22f144c2017-06-12 14:26:21 -040022
Diego Novilloa4c44fa2019-04-11 10:56:15 -040023#include "Passes.h"
24
David Neto22f144c2017-06-12 14:26:21 -040025using namespace llvm;
26
27#define DEBUG_TYPE "replacepointerbitcast"
28
29namespace {
30struct ReplacePointerBitcastPass : public ModulePass {
31 static char ID;
32 ReplacePointerBitcastPass() : ModulePass(ID) {}
33
David Neto30ae05e2017-09-06 19:58:36 -040034 // Returns the number of chunks of source data required to exactly
35 // cover the destination data, if the source and destination types are
36 // different sizes. Otherwise returns 0.
David Neto22f144c2017-06-12 14:26:21 -040037 unsigned CalculateNumIter(unsigned SrcTyBitWidth, unsigned DstTyBitWidth);
38 Value *CalculateNewGEPIdx(unsigned SrcTyBitWidth, unsigned DstTyBitWidth,
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040039 GetElementPtrInst *GEP);
David Neto22f144c2017-06-12 14:26:21 -040040
41 bool runOnModule(Module &M) override;
42};
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040043} // namespace
David Neto22f144c2017-06-12 14:26:21 -040044
45char ReplacePointerBitcastPass::ID = 0;
Diego Novilloa4c44fa2019-04-11 10:56:15 -040046INITIALIZE_PASS(ReplacePointerBitcastPass, "ReplacePointerBitcast",
47 "Replace Pointer Bitcast Pass", false, false)
David Neto22f144c2017-06-12 14:26:21 -040048
49namespace clspv {
50ModulePass *createReplacePointerBitcastPass() {
51 return new ReplacePointerBitcastPass();
52}
Diego Novillo3cc8d7a2019-04-10 13:30:34 -040053} // namespace clspv
David Neto22f144c2017-06-12 14:26:21 -040054
alan-baker32014272019-05-22 08:07:18 -040055namespace {
56
57// Gathers the scalar values of |v| into |elements|. Generates new instructions
58// to extract the values.
59void GatherBaseElements(Value *v, SmallVectorImpl<Value *> *elements,
60 IRBuilder<> &builder) {
61 auto *module = builder.GetInsertBlock()->getParent()->getParent();
62 auto &DL = module->getDataLayout();
63 auto *type = v->getType();
64 if (auto *vec_type = dyn_cast<VectorType>(type)) {
alan-baker5a8c3be2020-09-09 13:44:26 -040065 for (uint64_t i = 0; i != vec_type->getElementCount().getKnownMinValue();
66 ++i) {
alan-baker32014272019-05-22 08:07:18 -040067 elements->push_back(builder.CreateExtractElement(v, i));
68 }
69 } else if (auto *array_type = dyn_cast<ArrayType>(type)) {
70 for (uint64_t i = 0; i != array_type->getNumElements(); ++i) {
71 auto *extract = builder.CreateExtractValue(v, {static_cast<unsigned>(i)});
72 GatherBaseElements(extract, elements, builder);
73 }
74 } else if (auto *struct_type = dyn_cast<StructType>(type)) {
75 const auto *struct_layout = DL.getStructLayout(struct_type);
76 if (struct_layout->hasPadding()) {
77 llvm_unreachable("Unhandled conversion of padded struct");
78 }
79 for (unsigned i = 0; i != struct_type->getNumElements(); ++i) {
80 auto *extract = builder.CreateExtractValue(v, {i});
81 GatherBaseElements(extract, elements, builder);
82 }
83 } else {
84 elements->push_back(v);
85 }
86}
87
88// Returns a value of |dst_type| using the elemental members of |src_elements|.
89Value *BuildFromElements(Type *dst_type, const ArrayRef<Value *> &src_elements,
90 unsigned *used_bits, unsigned *index,
91 IRBuilder<> &builder) {
92 auto *module = builder.GetInsertBlock()->getParent()->getParent();
93 auto &DL = module->getDataLayout();
94 auto &context = dst_type->getContext();
95 Value *dst = nullptr;
96 // Arrays, vectors and structs are annoyingly just different enough to each
97 // require their own cases.
98 if (auto *dst_array_ty = dyn_cast<ArrayType>(dst_type)) {
99 auto *ele_ty = dst_array_ty->getElementType();
100 for (uint64_t i = 0; i != dst_array_ty->getNumElements(); ++i) {
101 auto *tmp_value =
102 BuildFromElements(ele_ty, src_elements, used_bits, index, builder);
103 auto *prev = dst ? dst : UndefValue::get(dst_type);
104 dst = builder.CreateInsertValue(prev, tmp_value,
105 {static_cast<unsigned>(i)});
106 }
107 } else if (auto *dst_struct_ty = dyn_cast<StructType>(dst_type)) {
108 const auto *struct_layout = DL.getStructLayout(dst_struct_ty);
109 if (struct_layout->hasPadding()) {
110 llvm_unreachable("Unhandled padded struct conversion");
111 return nullptr;
112 }
113 for (unsigned i = 0; i != dst_struct_ty->getNumElements(); ++i) {
114 auto *ele_ty = dst_struct_ty->getElementType(i);
115 auto *tmp_value =
116 BuildFromElements(ele_ty, src_elements, used_bits, index, builder);
117 auto *prev = dst ? dst : UndefValue::get(dst_type);
118 dst = builder.CreateInsertValue(prev, tmp_value, {i});
119 }
120 } else if (auto *dst_vec_ty = dyn_cast<VectorType>(dst_type)) {
121 auto *ele_ty = dst_vec_ty->getElementType();
alan-baker5a8c3be2020-09-09 13:44:26 -0400122 for (uint64_t i = 0; i != dst_vec_ty->getElementCount().getKnownMinValue();
123 ++i) {
alan-baker32014272019-05-22 08:07:18 -0400124 auto *tmp_value =
125 BuildFromElements(ele_ty, src_elements, used_bits, index, builder);
126 auto *prev = dst ? dst : UndefValue::get(dst_type);
127 dst = builder.CreateInsertElement(prev, tmp_value, i);
128 }
129 } else {
130 // Scalar conversion eats up elements in src_elements.
131 auto dst_width = DL.getTypeStoreSizeInBits(dst_type);
132 uint64_t bits = 0;
133 Value *tmp_value = nullptr;
134 auto prev_bits = 0;
135 Value *ele_int_cast = nullptr;
136 while (bits < dst_width) {
137 prev_bits = bits;
138 auto *ele = src_elements[*index];
139 auto *ele_ty = ele->getType();
140 auto ele_width = DL.getTypeStoreSizeInBits(ele_ty);
141 auto remaining_bits = ele_width - *used_bits;
142 auto needed_bits = dst_width - bits;
143 // Create a reusable cast to an integer type for this element.
144 if (!ele_int_cast || cast<User>(ele_int_cast)->getOperand(0) != ele) {
145 ele_int_cast =
146 builder.CreateBitCast(ele, IntegerType::get(context, ele_width));
147 }
148 tmp_value = ele_int_cast;
149 // Some of the bits of this element were previously used, so shift the
150 // value that many bits.
151 if (*used_bits != 0) {
152 tmp_value = builder.CreateLShr(tmp_value, *used_bits);
153 }
154 if (needed_bits < remaining_bits) {
155 // Ensure only the needed bits are used.
156 uint64_t mask = (1ull << needed_bits) - 1;
157 tmp_value =
158 builder.CreateAnd(tmp_value, builder.getIntN(dst_width, mask));
159 }
160 // Cast to tbe destination bit width, but stay as a integer type.
161 if (ele_width != dst_width) {
162 tmp_value = builder.CreateIntCast(
163 tmp_value, IntegerType::get(context, dst_width), false);
164 }
165
166 if (remaining_bits <= needed_bits) {
167 // Used the rest of the element.
168 *used_bits = 0;
169 ++(*index);
170 bits += remaining_bits;
171 } else {
172 // Only need part of this element.
173 *used_bits += needed_bits;
174 bits += needed_bits;
175 }
176
177 if (dst) {
178 // Previous iteration generated an integer of the right size. That needs
179 // to be combined with the value generated this iteration.
180 tmp_value = builder.CreateShl(tmp_value, prev_bits);
181 dst = builder.CreateOr(dst, tmp_value);
182 } else {
183 dst = tmp_value;
184 }
185 }
186
187 assert(bits <= dst_width);
188 if (bits == dst_width && dst_type != dst->getType()) {
189 // Finally, cast away from the working integer type if necessary.
190 dst = builder.CreateBitCast(dst, dst_type);
191 }
192 }
193
194 return dst;
195}
196
197// Returns an equivalent value of |src| as |dst_type|.
198//
199// This function requires |src|'s and |dst_type|'s bit widths match. Does not
200// introduce new integer sizes, but generates multiple instructions to mimic a
201// generic bitcast (unless a bitcast is sufficient).
202Value *ConvertValue(Value *src, Type *dst_type, IRBuilder<> &builder) {
203 auto *src_type = src->getType();
204 auto *module = builder.GetInsertBlock()->getParent()->getParent();
205 auto &DL = module->getDataLayout();
206 if (!src_type->isFirstClassType() || !dst_type->isFirstClassType() ||
207 src_type->isAggregateType() || dst_type->isAggregateType()) {
208 SmallVector<Value *, 8> src_elements;
209 if (src_type->isAggregateType()) {
210 GatherBaseElements(src, &src_elements, builder);
211 } else {
212 src_elements.push_back(src);
213 }
214
215 // Check that overall sizes make sense.
216 uint64_t element_sum = 0;
217 // Can only successfully convert unpadded structs.
218 for (auto element : src_elements) {
219 element_sum += DL.getTypeStoreSizeInBits(element->getType());
220 }
221 if (DL.getTypeStoreSizeInBits(dst_type) != element_sum) {
222 llvm_unreachable("Elements do not sum to overall size");
223 return nullptr;
224 }
225
226 unsigned used_bits = 0;
227 unsigned index = 0;
228 return BuildFromElements(dst_type, src_elements, &used_bits, &index,
229 builder);
230 } else {
231 return builder.CreateBitCast(src, dst_type);
232 }
233
234 return nullptr;
235}
236
237} // namespace
238
David Neto22f144c2017-06-12 14:26:21 -0400239unsigned ReplacePointerBitcastPass::CalculateNumIter(unsigned SrcTyBitWidth,
240 unsigned DstTyBitWidth) {
241 unsigned NumIter = 0;
242 if (SrcTyBitWidth > DstTyBitWidth) {
243 if (SrcTyBitWidth % DstTyBitWidth) {
244 llvm_unreachable(
245 "Src type bitwidth should be multiple of Dest type bitwidth");
246 }
247 NumIter = 1;
248 } else if (SrcTyBitWidth < DstTyBitWidth) {
249 if (DstTyBitWidth % SrcTyBitWidth) {
250 llvm_unreachable(
251 "Dest type bitwidth should be multiple of Src type bitwidth");
252 }
253 NumIter = DstTyBitWidth / SrcTyBitWidth;
254 } else {
255 NumIter = 0;
256 }
257
258 return NumIter;
259}
260
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400261Value *ReplacePointerBitcastPass::CalculateNewGEPIdx(unsigned SrcTyBitWidth,
262 unsigned DstTyBitWidth,
263 GetElementPtrInst *GEP) {
David Neto22f144c2017-06-12 14:26:21 -0400264 Value *NewGEPIdx = GEP->getOperand(1);
265 IRBuilder<> Builder(GEP);
266
267 if (SrcTyBitWidth > DstTyBitWidth) {
268 if (GEP->getNumOperands() > 2) {
269 GEP->print(errs());
270 llvm_unreachable("Support above GEP on PointerBitcastPass");
271 }
272
273 NewGEPIdx = Builder.CreateLShr(
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400274 NewGEPIdx, Builder.getInt32(std::log2(SrcTyBitWidth / DstTyBitWidth)));
David Neto22f144c2017-06-12 14:26:21 -0400275 } else if (DstTyBitWidth > SrcTyBitWidth) {
276 if (GEP->getNumOperands() > 2) {
277 GEP->print(errs());
278 llvm_unreachable("Support above GEP on PointerBitcastPass");
279 }
280
281 NewGEPIdx = Builder.CreateShl(
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400282 NewGEPIdx, Builder.getInt32(std::log2(DstTyBitWidth / SrcTyBitWidth)));
David Neto22f144c2017-06-12 14:26:21 -0400283 }
284
285 return NewGEPIdx;
286}
287
288bool ReplacePointerBitcastPass::runOnModule(Module &M) {
289 bool Changed = false;
290
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400291 const DataLayout &DL = M.getDataLayout();
David Neto8e138142018-05-29 10:19:21 -0400292
Marco Antognini42b65222021-02-12 18:40:42 +0000293 using WeakInstructions = SmallVector<WeakTrackingVH, 16>;
294 WeakInstructions ToBeDeleted;
David Neto22f144c2017-06-12 14:26:21 -0400295 SmallVector<Instruction *, 16> VectorWorkList;
296 SmallVector<Instruction *, 16> ScalarWorkList;
alan-baker1b13e8f2019-08-08 17:56:51 -0400297 SmallVector<User *, 16> UserWorkList;
David Neto22f144c2017-06-12 14:26:21 -0400298 for (Function &F : M) {
299 for (BasicBlock &BB : F) {
300 for (Instruction &I : BB) {
301 // Find pointer bitcast instruction.
302 if (isa<BitCastInst>(&I) && isa<PointerType>(I.getType())) {
303 Value *Src = I.getOperand(0);
304 if (isa<PointerType>(Src->getType())) {
alan-baker1b13e8f2019-08-08 17:56:51 -0400305 // Check if this bitcast is one that can be handled during this run
306 // of the pass. If not, just skip it and don't make changes to the
307 // module. These checks are coarse level checks that only the right
308 // instructions appear. Rejected bitcasts might be able to be
309 // handled later in the flow after further optimization.
310 UserWorkList.clear();
311 for (auto User : I.users()) {
312 UserWorkList.push_back(User);
313 }
314 bool ok = true;
315 while (!UserWorkList.empty()) {
316 auto User = UserWorkList.back();
317 UserWorkList.pop_back();
318
319 if (isa<GetElementPtrInst>(User)) {
320 for (auto GEPUser : User->users()) {
321 UserWorkList.push_back(GEPUser);
322 }
323 } else if (!isa<StoreInst>(User) && !isa<LoadInst>(User)) {
324 // Cannot handle this bitcast.
325 ok = false;
326 break;
327 }
328 }
329 if (!ok) {
330 continue;
331 }
332
alan-bakerad1a12f2020-08-25 09:18:38 -0400333 auto inst = &I;
David Neto22f144c2017-06-12 14:26:21 -0400334 Type *SrcEleTy =
alan-bakerad1a12f2020-08-25 09:18:38 -0400335 inst->getOperand(0)->getType()->getPointerElementType();
336
337 // De-"canonicalize" the input pointer.
338 // If Src is an array, LLVM has likely canonicalized all GEPs to
339 // the first element away as the following addresses are all
340 // equivalent:
341 // * %in = alloca [4 x [4 x float]]
342 // * %gep0 = getelementptr [4 x [4 x float]]*, [4 x [4 x [float]]*
343 // %in
344 // * %gep1 = getelementptr [4 x [4 x float]]*, [4 x [4 x [float]]*
345 // %in, i32 0
346 // * %gep2 = getelementptr [4 x [4 x float]]*, [4 x [4 x [float]]*
347 // %in, i32 0, i32 0
348 // * %gep3 = getelementptr [4 x [4 x float]]*, [4 x [4 x [float]]*
349 // %in, i32 0, i32 0, i32 0
350 //
351 // Note: count initialized to 1 to account for the first gep index.
352 uint32_t count = 1;
353 while (auto ArrayTy = dyn_cast<ArrayType>(SrcEleTy)) {
354 ++count;
355 SrcEleTy = ArrayTy->getElementType();
356 }
357
358 if (count > 1) {
359 // Create a cast of the pointer. Replace the original cast with
360 // it and mark the original cast for deletion.
361 SmallVector<Value *, 4> indices(
362 count,
363 ConstantInt::get(IntegerType::get(M.getContext(), 32), 0));
364 auto gep = GetElementPtrInst::CreateInBounds(inst->getOperand(0),
365 indices, "", inst);
366 ToBeDeleted.push_back(&I);
367 auto cast = new BitCastInst(gep, inst->getType(), "", inst);
368 inst->replaceAllUsesWith(cast);
369 inst = cast;
370 }
371
372 Type *DstEleTy = inst->getType()->getPointerElementType();
David Neto22f144c2017-06-12 14:26:21 -0400373 if (SrcEleTy->isVectorTy() || DstEleTy->isVectorTy()) {
374 // Handle case either operand is vector type like char4* -> int4*.
alan-bakerad1a12f2020-08-25 09:18:38 -0400375 VectorWorkList.push_back(inst);
David Neto22f144c2017-06-12 14:26:21 -0400376 } else {
377 // Handle case all operands are scalar type like char* -> int*.
alan-bakerad1a12f2020-08-25 09:18:38 -0400378 ScalarWorkList.push_back(inst);
David Neto22f144c2017-06-12 14:26:21 -0400379 }
380
381 Changed = true;
382 } else {
383 llvm_unreachable("Unsupported bitcast");
384 }
385 }
386 }
387 }
388 }
389
David Neto22f144c2017-06-12 14:26:21 -0400390 for (Instruction *Inst : VectorWorkList) {
391 Value *Src = Inst->getOperand(0);
392 Type *SrcTy = Src->getType()->getPointerElementType();
393 Type *DstTy = Inst->getType()->getPointerElementType();
James Pricecf53df42020-04-20 14:41:24 -0400394 VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy);
395 VectorType *DstVecTy = dyn_cast<VectorType>(DstTy);
396 Type *SrcEleTy = SrcTy->isVectorTy() ? SrcVecTy->getElementType() : SrcTy;
397 Type *DstEleTy = DstTy->isVectorTy() ? DstVecTy->getElementType() : DstTy;
David Neto30ae05e2017-09-06 19:58:36 -0400398 // These are bit widths of the source and destination types, even
alan-bakerad1a12f2020-08-25 09:18:38 -0400399 // if they are vector types. E.g. bit width of float4 is 128.
David Neto8e138142018-05-29 10:19:21 -0400400 unsigned SrcTyBitWidth = DL.getTypeStoreSizeInBits(SrcTy);
401 unsigned DstTyBitWidth = DL.getTypeStoreSizeInBits(DstTy);
402 unsigned SrcEleTyBitWidth = DL.getTypeStoreSizeInBits(SrcEleTy);
403 unsigned DstEleTyBitWidth = DL.getTypeStoreSizeInBits(DstEleTy);
David Neto22f144c2017-06-12 14:26:21 -0400404 unsigned NumIter = CalculateNumIter(SrcTyBitWidth, DstTyBitWidth);
405
406 // Investigate pointer bitcast's users.
407 for (User *BitCastUser : Inst->users()) {
408 Value *BitCastSrc = Inst->getOperand(0);
409 Value *NewAddrIdx = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
410
411 // It consist of User* and bool whether user is gep or not.
412 SmallVector<std::pair<User *, bool>, 32> Users;
413
414 GetElementPtrInst *GEP = nullptr;
415 Value *OrgGEPIdx = nullptr;
Jason Gavrise44af072018-08-14 20:44:50 -0400416 if ((GEP = dyn_cast<GetElementPtrInst>(BitCastUser))) {
David Neto22f144c2017-06-12 14:26:21 -0400417 OrgGEPIdx = GEP->getOperand(1);
418
419 // Build new src/dst address index.
420 NewAddrIdx = CalculateNewGEPIdx(SrcTyBitWidth, DstTyBitWidth, GEP);
421
422 // Record gep's users.
423 for (User *GEPUser : GEP->users()) {
424 Users.push_back(std::make_pair(GEPUser, true));
425 }
426 } else {
427 // Record bitcast's users.
428 Users.push_back(std::make_pair(BitCastUser, false));
429 }
430
431 // Handle users.
432 bool IsGEPUser = false;
433 for (auto UserIter : Users) {
434 User *U = UserIter.first;
435 IsGEPUser = UserIter.second;
436
437 IRBuilder<> Builder(cast<Instruction>(U));
438
439 if (StoreInst *ST = dyn_cast<StoreInst>(U)) {
440 if (SrcTyBitWidth < DstTyBitWidth) {
441 //
442 // Consider below case.
443 //
444 // Original IR (float2* --> float4*)
445 // 1. val = load (float4*) src_addr
446 // 2. dst_addr = bitcast float2*, float4*
447 // 3. dst_addr = gep (float4*) dst_addr, idx
448 // 4. store (float4*) dst_addr
449 //
450 // Transformed IR
451 // 1. val(float4) = load (float4*) src_addr
452 // 2. val1(float2) = shufflevector (float4)val, (float4)undef,
453 // (float2)<0, 1>
454 // 3. val2(float2) = shufflevector (float4)val, (float4)undef,
455 // (float2)<2, 3>
456 // 4. dst_addr1(float2*) = gep (float2*)dst_addr, idx * 2
457 // 5. dst_addr2(float2*) = gep (float2*)dst_addr, idx * 2 + 1
458 // 6. store (float2)val1, (float2*)dst_addr1
459 // 7. store (float2)val2, (float2*)dst_addr2
460 //
461
462 unsigned NumElement = DstTyBitWidth / SrcTyBitWidth;
463 unsigned NumVector = 1;
464 // Vulkan SPIR-V does not support over 4 components for
465 // TypeVector.
466 if (NumElement > 4) {
467 NumVector = NumElement >> 2;
468 NumElement = 4;
469 }
470
471 // Create store values.
472 Type *TmpValTy = SrcTy;
473 if (DstTy->isVectorTy()) {
474 if (SrcEleTyBitWidth == DstEleTyBitWidth) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400475 TmpValTy = FixedVectorType::get(
476 SrcEleTy, DstVecTy->getElementCount().getKnownMinValue());
David Neto22f144c2017-06-12 14:26:21 -0400477 } else {
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400478 TmpValTy = FixedVectorType::get(SrcEleTy, NumElement);
David Neto22f144c2017-06-12 14:26:21 -0400479 }
480 }
481
482 Value *STVal = ST->getValueOperand();
483 for (unsigned VIdx = 0; VIdx < NumVector; VIdx++) {
484 Value *TmpSTVal = nullptr;
485 if (NumVector == 1) {
486 TmpSTVal = Builder.CreateBitCast(STVal, TmpValTy);
487 } else {
488 unsigned DstVecTyNumElement =
alan-baker5a8c3be2020-09-09 13:44:26 -0400489 DstVecTy->getElementCount().getKnownMinValue() / NumVector;
alan-baker4a757f62020-04-22 08:17:49 -0400490 SmallVector<int32_t, 4> Idxs;
Marco Antognini7e338402021-03-15 12:48:37 +0000491 for (unsigned i = 0; i < DstVecTyNumElement; i++) {
David Neto22f144c2017-06-12 14:26:21 -0400492 Idxs.push_back(i + (DstVecTyNumElement * VIdx));
493 }
494 Value *UndefVal = UndefValue::get(DstTy);
495 TmpSTVal = Builder.CreateShuffleVector(STVal, UndefVal, Idxs);
496 TmpSTVal = Builder.CreateBitCast(TmpSTVal, TmpValTy);
497 }
498
499 SmallVector<Value *, 8> STValues;
500 if (!SrcTy->isVectorTy()) {
501 // Handle scalar type.
502 for (unsigned i = 0; i < NumElement; i++) {
503 Value *TmpVal = Builder.CreateExtractElement(
504 TmpSTVal, Builder.getInt32(i));
505 STValues.push_back(TmpVal);
506 }
507 } else {
508 // Handle vector type.
alan-baker5a8c3be2020-09-09 13:44:26 -0400509 unsigned SrcNumElement =
510 SrcVecTy->getElementCount().getKnownMinValue();
511 unsigned DstNumElement =
512 DstVecTy->getElementCount().getKnownMinValue();
David Neto22f144c2017-06-12 14:26:21 -0400513 for (unsigned i = 0; i < NumElement; i++) {
alan-baker4a757f62020-04-22 08:17:49 -0400514 SmallVector<int32_t, 4> Idxs;
Marco Antognini7e338402021-03-15 12:48:37 +0000515 for (unsigned j = 0; j < SrcNumElement; j++) {
David Neto22f144c2017-06-12 14:26:21 -0400516 Idxs.push_back(i * SrcNumElement + j);
517 }
518
519 VectorType *TmpVecTy =
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400520 FixedVectorType::get(SrcEleTy, DstNumElement);
David Neto22f144c2017-06-12 14:26:21 -0400521 Value *UndefVal = UndefValue::get(TmpVecTy);
522 Value *TmpVal =
523 Builder.CreateShuffleVector(TmpSTVal, UndefVal, Idxs);
524 STValues.push_back(TmpVal);
525 }
526 }
527
528 // Generate stores.
529 Value *SrcAddrIdx = NewAddrIdx;
530 Value *BaseAddr = BitCastSrc;
531 for (unsigned i = 0; i < NumElement; i++) {
532 // Calculate store address.
533 Value *DstAddr = Builder.CreateGEP(BaseAddr, SrcAddrIdx);
534 Builder.CreateStore(STValues[i], DstAddr);
535
536 if (i + 1 < NumElement) {
537 // Calculate next store address
538 SrcAddrIdx =
539 Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
540 }
541 }
542 }
543 } else if (SrcTyBitWidth > DstTyBitWidth) {
544 //
545 // Consider below case.
546 //
547 // Original IR (float4* --> float2*)
548 // 1. val = load (float2*) src_addr
549 // 2. dst_addr = bitcast float4*, float2*
550 // 3. dst_addr = gep (float2*) dst_addr, idx
551 // 4. store (float2) val, (float2*) dst_addr
552 //
Diego Novillo3cc8d7a2019-04-10 13:30:34 -0400553 // Transformed IR: Decompose the source vector into elements, then
554 // write them one at a time.
David Neto22f144c2017-06-12 14:26:21 -0400555 // 1. val = load (float2*) src_addr
556 // 2. val1 = (float)extract_element val, 0
557 // 3. val2 = (float)extract_element val, 1
David Neto30ae05e2017-09-06 19:58:36 -0400558 // // Source component k maps to destination component k * idxscale
559 // 3a. idxscale = sizeof(float4)/sizeof(float2)
560 // 3b. idxbase = idx / idxscale
561 // 3c. newarrayidx = idxbase * idxscale
562 // 4. dst_addr1 = gep (float4*) dst, newarrayidx
563 // 5. dst_addr2 = gep (float4*) dst, newarrayidx + 1
David Neto22f144c2017-06-12 14:26:21 -0400564 // 6. store (float)val1, (float*) dst_addr1
565 // 7. store (float)val2, (float*) dst_addr2
566 //
567
568 if (SrcTyBitWidth <= DstEleTyBitWidth) {
569 SrcTy->print(errs());
570 DstTy->print(errs());
571 llvm_unreachable("Handle above src/dst type.");
572 }
573
574 // Create store values.
575 Value *STVal = ST->getValueOperand();
576
577 if (DstTy->isVectorTy() && (SrcEleTyBitWidth != DstTyBitWidth)) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400578 VectorType *TmpVecTy = FixedVectorType::get(
579 SrcEleTy, DstTyBitWidth / SrcEleTyBitWidth);
David Neto22f144c2017-06-12 14:26:21 -0400580 STVal = Builder.CreateBitCast(STVal, TmpVecTy);
581 }
582
583 SmallVector<Value *, 8> STValues;
David Neto30ae05e2017-09-06 19:58:36 -0400584 // How many destination writes are required?
David Neto22f144c2017-06-12 14:26:21 -0400585 unsigned DstNumElement = 1;
586 if (!DstTy->isVectorTy() || SrcEleTyBitWidth == DstTyBitWidth) {
587 // Handle scalar type.
588 STValues.push_back(STVal);
589 } else {
590 // Handle vector type.
alan-baker5a8c3be2020-09-09 13:44:26 -0400591 DstNumElement = DstVecTy->getElementCount().getKnownMinValue();
David Neto22f144c2017-06-12 14:26:21 -0400592 for (unsigned i = 0; i < DstNumElement; i++) {
593 Value *Idx = Builder.getInt32(i);
594 Value *TmpVal = Builder.CreateExtractElement(STVal, Idx);
595 STValues.push_back(TmpVal);
596 }
597 }
598
599 // Generate stores.
600 Value *BaseAddr = BitCastSrc;
601 Value *SubEleIdx = Builder.getInt32(0);
602 if (IsGEPUser) {
David Neto30ae05e2017-09-06 19:58:36 -0400603 // Compute SubNumElement = idxscale
alan-baker5a8c3be2020-09-09 13:44:26 -0400604 unsigned SubNumElement =
605 SrcVecTy->getElementCount().getKnownMinValue();
David Neto22f144c2017-06-12 14:26:21 -0400606 if (DstTy->isVectorTy() && (SrcEleTyBitWidth != DstTyBitWidth)) {
David Neto30ae05e2017-09-06 19:58:36 -0400607 // Same condition under which DstNumElements > 1
alan-baker5a8c3be2020-09-09 13:44:26 -0400608 SubNumElement = SrcVecTy->getElementCount().getKnownMinValue() /
609 DstVecTy->getElementCount().getKnownMinValue();
David Neto22f144c2017-06-12 14:26:21 -0400610 }
611
David Neto30ae05e2017-09-06 19:58:36 -0400612 // Compute SubEleIdx = idxbase * idxscale
David Neto22f144c2017-06-12 14:26:21 -0400613 SubEleIdx = Builder.CreateAnd(
614 OrgGEPIdx, Builder.getInt32(SubNumElement - 1));
David Neto30ae05e2017-09-06 19:58:36 -0400615 if (DstTy->isVectorTy() && (SrcEleTyBitWidth != DstTyBitWidth)) {
616 SubEleIdx = Builder.CreateShl(
617 SubEleIdx, Builder.getInt32(std::log2(SubNumElement)));
618 }
David Neto22f144c2017-06-12 14:26:21 -0400619 }
620
621 for (unsigned i = 0; i < DstNumElement; i++) {
622 // Calculate address.
623 if (i > 0) {
624 SubEleIdx = Builder.CreateAdd(SubEleIdx, Builder.getInt32(i));
625 }
626
627 Value *Idxs[] = {NewAddrIdx, SubEleIdx};
628 Value *DstAddr = Builder.CreateGEP(BaseAddr, Idxs);
629 Type *TmpSrcTy = SrcEleTy;
James Pricecf53df42020-04-20 14:41:24 -0400630 if (auto TmpSrcVecTy = dyn_cast<VectorType>(TmpSrcTy)) {
631 TmpSrcTy = TmpSrcVecTy->getElementType();
David Neto22f144c2017-06-12 14:26:21 -0400632 }
633 Value *TmpVal = Builder.CreateBitCast(STValues[i], TmpSrcTy);
634
635 Builder.CreateStore(TmpVal, DstAddr);
636 }
637 } else {
638 // if SrcTyBitWidth == DstTyBitWidth
639 Type *TmpSrcTy = SrcTy;
640 Value *DstAddr = Src;
641
642 if (IsGEPUser) {
643 SmallVector<Value *, 4> Idxs;
644 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
645 Idxs.push_back(GEP->getOperand(i));
646 }
647 DstAddr = Builder.CreateGEP(BitCastSrc, Idxs);
648
649 if (GEP->getNumOperands() > 2) {
650 TmpSrcTy = SrcEleTy;
651 }
652 }
653
654 Value *TmpVal =
655 Builder.CreateBitCast(ST->getValueOperand(), TmpSrcTy);
656 Builder.CreateStore(TmpVal, DstAddr);
657 }
658 } else if (LoadInst *LD = dyn_cast<LoadInst>(U)) {
659 Value *SrcAddrIdx = Builder.getInt32(0);
660 if (IsGEPUser) {
661 SrcAddrIdx = NewAddrIdx;
662 }
663
664 // Load value from src.
665 SmallVector<Value *, 8> LDValues;
666
667 for (unsigned i = 1; i <= NumIter; i++) {
668 Value *SrcAddr = Builder.CreateGEP(Src, SrcAddrIdx);
669 LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
670 LDValues.push_back(SrcVal);
671
672 if (i + 1 <= NumIter) {
673 // Calculate next SrcAddrIdx.
674 SrcAddrIdx = Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
675 }
676 }
677
678 Value *DstVal = nullptr;
679 if (SrcTyBitWidth > DstTyBitWidth) {
680 unsigned NumElement = SrcTyBitWidth / DstTyBitWidth;
681
682 if (SrcEleTyBitWidth == DstTyBitWidth) {
683 //
684 // Consider below case.
685 //
686 // Original IR (int4* --> char4*)
687 // 1. src_addr = bitcast int4*, char4*
688 // 2. element_addr = gep (char4*) src_addr, idx
689 // 3. load (char4*) element_addr
690 //
691 // Transformed IR
692 // 1. src_addr = gep (int4*) src, idx / 4
693 // 2. src_val(int4) = load (int4*) src_addr
694 // 3. tmp_val(int4) = extractelement src_val, idx % 4
695 // 4. dst_val(char4) = bitcast tmp_val, (char4)
696 //
697 Value *EleIdx = Builder.getInt32(0);
698 if (IsGEPUser) {
699 EleIdx = Builder.CreateAnd(OrgGEPIdx,
700 Builder.getInt32(NumElement - 1));
701 }
702 Value *TmpVal =
703 Builder.CreateExtractElement(LDValues[0], EleIdx, "tmp_val");
704 DstVal = Builder.CreateBitCast(TmpVal, DstTy);
705 } else if (SrcEleTyBitWidth < DstTyBitWidth) {
706 if (IsGEPUser) {
707 //
708 // Consider below case.
709 //
710 // Original IR (float4* --> float2*)
711 // 1. src_addr = bitcast float4*, float2*
712 // 2. element_addr = gep (float2*) src_addr, idx
713 // 3. load (float2*) element_addr
714 //
715 // Transformed IR
716 // 1. src_addr = gep (float4*) src, idx / 2
717 // 2. src_val(float4) = load (float4*) src_addr
718 // 3. tmp_val1(float) = extractelement (idx % 2) * 2
719 // 4. tmp_val2(float) = extractelement (idx % 2) * 2 + 1
720 // 5. dst_val(float2) = insertelement undef(float2), tmp_val1, 0
721 // 6. dst_val(float2) = insertelement undef(float2), tmp_val2, 1
722 // 7. dst_val(float2) = bitcast dst_val, (float2)
723 // ==> if types are same between src and dst, it will be
724 // igonored
725 //
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400726 VectorType *TmpVecTy = FixedVectorType::get(
727 SrcEleTy, DstTyBitWidth / SrcEleTyBitWidth);
David Neto22f144c2017-06-12 14:26:21 -0400728 DstVal = UndefValue::get(TmpVecTy);
729 Value *EleIdx = Builder.CreateAnd(
730 OrgGEPIdx, Builder.getInt32(NumElement - 1));
731 EleIdx = Builder.CreateShl(
732 EleIdx, Builder.getInt32(
733 std::log2(DstTyBitWidth / SrcEleTyBitWidth)));
734 Value *TmpOrgGEPIdx = EleIdx;
735 for (unsigned i = 0; i < NumElement; i++) {
736 Value *TmpVal = Builder.CreateExtractElement(
737 LDValues[0], TmpOrgGEPIdx, "tmp_val");
738 DstVal = Builder.CreateInsertElement(DstVal, TmpVal,
739 Builder.getInt32(i));
740
741 if (i + 1 < NumElement) {
742 TmpOrgGEPIdx =
743 Builder.CreateAdd(TmpOrgGEPIdx, Builder.getInt32(1));
744 }
745 }
746 } else {
747 //
748 // Consider below case.
749 //
750 // Original IR (float4* --> int2*)
751 // 1. src_addr = bitcast float4*, int2*
752 // 2. load (int2*) src_addr
753 //
754 // Transformed IR
755 // 1. src_val(float4) = load (float4*) src_addr
756 // 2. tmp_val(float2) = shufflevector (float4)src_val,
757 // (float4)undef,
758 // (float2)<0, 1>
759 // 3. dst_val(int2) = bitcast (float2)tmp_val, (int2)
760 //
761 unsigned NumElement = DstTyBitWidth / SrcEleTyBitWidth;
762 Value *Undef = UndefValue::get(SrcTy);
763
alan-baker4a757f62020-04-22 08:17:49 -0400764 SmallVector<int32_t, 4> Idxs;
Marco Antognini7e338402021-03-15 12:48:37 +0000765 for (unsigned i = 0; i < NumElement; i++) {
David Neto22f144c2017-06-12 14:26:21 -0400766 Idxs.push_back(i);
767 }
768 DstVal = Builder.CreateShuffleVector(LDValues[0], Undef, Idxs);
769
770 DstVal = Builder.CreateBitCast(DstVal, DstTy);
771 }
772
773 DstVal = Builder.CreateBitCast(DstVal, DstTy);
774 } else {
775 if (IsGEPUser) {
776 //
777 // Consider below case.
778 //
779 // Original IR (int4* --> char2*)
780 // 1. src_addr = bitcast int4*, char2*
781 // 2. element_addr = gep (char2*) src_addr, idx
782 // 3. load (char2*) element_addr
783 //
784 // Transformed IR
785 // 1. src_addr = gep (int4*) src, idx / 8
786 // 2. src_val(int4) = load (int4*) src_addr
787 // 3. tmp_val(int) = extractelement idx / 2
788 // 4. tmp_val(<i16 x 2>) = bitcast tmp_val(int), (<i16 x 2>)
789 // 5. tmp_val(i16) = extractelement idx % 2
790 // 6. dst_val(char2) = bitcast tmp_val, (char2)
791 // ==> if types are same between src and dst, it will be
792 // igonored
793 //
David Neto22f144c2017-06-12 14:26:21 -0400794 unsigned SubNumElement = SrcEleTyBitWidth / DstTyBitWidth;
795 if (SubNumElement != 2 && SubNumElement != 4) {
796 llvm_unreachable("Unsupported SubNumElement");
797 }
798
799 Value *TmpOrgGEPIdx = Builder.CreateLShr(
800 OrgGEPIdx, Builder.getInt32(std::log2(SubNumElement)));
801 Value *TmpVal = Builder.CreateExtractElement(
802 LDValues[0], TmpOrgGEPIdx, "tmp_val");
803 TmpVal = Builder.CreateBitCast(
804 TmpVal,
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400805 FixedVectorType::get(
David Neto22f144c2017-06-12 14:26:21 -0400806 IntegerType::get(DstTy->getContext(), DstTyBitWidth),
807 SubNumElement));
808 TmpOrgGEPIdx = Builder.CreateAnd(
809 OrgGEPIdx, Builder.getInt32(SubNumElement - 1));
810 TmpVal = Builder.CreateExtractElement(TmpVal, TmpOrgGEPIdx,
811 "tmp_val");
812 DstVal = Builder.CreateBitCast(TmpVal, DstTy);
813 } else {
814 Inst->print(errs());
815 llvm_unreachable("Handle this bitcast");
816 }
817 }
818 } else if (SrcTyBitWidth < DstTyBitWidth) {
819 //
820 // Consider below case.
821 //
822 // Original IR (float2* --> float4*)
823 // 1. src_addr = bitcast float2*, float4*
824 // 2. element_addr = gep (float4*) src_addr, idx
825 // 3. load (float4*) element_addr
826 //
827 // Transformed IR
828 // 1. src_addr = gep (float2*) src, idx * 2
829 // 2. src_val1(float2) = load (float2*) src_addr
830 // 3. src_addr2 = gep (float2*) src_addr, 1
831 // 4. src_val2(float2) = load (float2*) src_addr2
832 // 5. dst_val(float4) = shufflevector src_val1, src_val2, <0, 1>
833 // 6. dst_val(float4) = bitcast dst_val, (float4)
834 // ==> if types are same between src and dst, it will be igonored
835 //
836 unsigned NumElement = 1;
837 if (SrcTy->isVectorTy()) {
alan-baker5a8c3be2020-09-09 13:44:26 -0400838 NumElement = SrcVecTy->getElementCount().getKnownMinValue() * 2;
David Neto22f144c2017-06-12 14:26:21 -0400839 }
840
841 // Handle scalar type.
842 if (NumElement == 1) {
843 if (SrcTyBitWidth * 4 <= DstTyBitWidth) {
844 unsigned NumVecElement = DstTyBitWidth / SrcTyBitWidth;
845 unsigned NumVector = 1;
846 if (NumVecElement > 4) {
847 NumVector = NumVecElement >> 2;
848 NumVecElement = 4;
849 }
850
851 SmallVector<Value *, 4> Values;
852 for (unsigned VIdx = 0; VIdx < NumVector; VIdx++) {
853 // In this case, generate only insert element. It generates
854 // less instructions than using shuffle vector.
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400855 VectorType *TmpVecTy =
856 FixedVectorType::get(SrcTy, NumVecElement);
David Neto22f144c2017-06-12 14:26:21 -0400857 Value *TmpVal = UndefValue::get(TmpVecTy);
858 for (unsigned i = 0; i < NumVecElement; i++) {
859 TmpVal = Builder.CreateInsertElement(
860 TmpVal, LDValues[i + (VIdx * 4)], Builder.getInt32(i));
861 }
862 Values.push_back(TmpVal);
863 }
864
865 if (Values.size() > 2) {
866 Inst->print(errs());
867 llvm_unreachable("Support above bitcast");
868 }
869
870 if (Values.size() > 1) {
871 Type *TmpEleTy =
872 Type::getIntNTy(M.getContext(), SrcEleTyBitWidth * 2);
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400873 VectorType *TmpVecTy =
874 FixedVectorType::get(TmpEleTy, NumVector);
David Neto22f144c2017-06-12 14:26:21 -0400875 for (unsigned i = 0; i < Values.size(); i++) {
876 Values[i] = Builder.CreateBitCast(Values[i], TmpVecTy);
877 }
alan-baker4a757f62020-04-22 08:17:49 -0400878 SmallVector<int32_t, 4> Idxs;
Marco Antognini7e338402021-03-15 12:48:37 +0000879 for (unsigned i = 0; i < (NumVector * 2); i++) {
David Neto22f144c2017-06-12 14:26:21 -0400880 Idxs.push_back(i);
881 }
882 for (unsigned i = 0; i < Values.size(); i = i + 2) {
883 Values[i] = Builder.CreateShuffleVector(
884 Values[i], Values[i + 1], Idxs);
885 }
886 }
887
888 LDValues.clear();
889 LDValues.push_back(Values[0]);
890 } else {
891 SmallVector<Value *, 4> TmpLDValues;
892 for (unsigned i = 0; i < LDValues.size(); i = i + 2) {
alan-bakerb3e2b6d2020-06-24 23:59:57 -0400893 VectorType *TmpVecTy = FixedVectorType::get(SrcTy, 2);
David Neto22f144c2017-06-12 14:26:21 -0400894 Value *TmpVal = UndefValue::get(TmpVecTy);
895 TmpVal = Builder.CreateInsertElement(TmpVal, LDValues[i],
896 Builder.getInt32(0));
897 TmpVal = Builder.CreateInsertElement(TmpVal, LDValues[i + 1],
898 Builder.getInt32(1));
899 TmpLDValues.push_back(TmpVal);
900 }
901 LDValues.clear();
902 LDValues = std::move(TmpLDValues);
903 NumElement = 4;
904 }
905 }
906
907 // Handle vector type.
908 while (LDValues.size() != 1) {
909 SmallVector<Value *, 4> TmpLDValues;
910 for (unsigned i = 0; i < LDValues.size(); i = i + 2) {
alan-baker4a757f62020-04-22 08:17:49 -0400911 SmallVector<int32_t, 4> Idxs;
Marco Antognini7e338402021-03-15 12:48:37 +0000912 for (unsigned j = 0; j < NumElement; j++) {
David Neto22f144c2017-06-12 14:26:21 -0400913 Idxs.push_back(j);
914 }
915 Value *TmpVal = Builder.CreateShuffleVector(
916 LDValues[i], LDValues[i + 1], Idxs);
917 TmpLDValues.push_back(TmpVal);
918 }
919 LDValues.clear();
920 LDValues = std::move(TmpLDValues);
921 NumElement *= 2;
922 }
923
924 DstVal = Builder.CreateBitCast(LDValues[0], DstTy);
925 } else {
926 //
927 // Consider below case.
928 //
929 // Original IR (float4* --> int4*)
930 // 1. src_addr = bitcast float4*, int4*
931 // 2. element_addr = gep (int4*) src_addr, idx, 0
932 // 3. load (int) element_addr
933 //
934 // Transformed IR
935 // 1. element_addr = gep (float4*) src_addr, idx, 0
936 // 2. src_val = load (float*) element_addr
937 // 3. val = bitcast (float) src_val to (int)
938 //
939 Value *SrcAddr = Src;
940 if (IsGEPUser) {
941 SmallVector<Value *, 4> Idxs;
942 for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
943 Idxs.push_back(GEP->getOperand(i));
944 }
945 SrcAddr = Builder.CreateGEP(Src, Idxs);
946 }
947 LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
948
949 Type *TmpDstTy = DstTy;
950 if (IsGEPUser) {
951 if (GEP->getNumOperands() > 2) {
952 TmpDstTy = DstEleTy;
953 }
954 }
955 DstVal = Builder.CreateBitCast(SrcVal, TmpDstTy);
956 }
957
958 // Update LD's users with DstVal.
959 LD->replaceAllUsesWith(DstVal);
960 } else {
961 U->print(errs());
962 llvm_unreachable(
963 "Handle above user of gep on ReplacePointerBitcastPass");
964 }
965
966 ToBeDeleted.push_back(cast<Instruction>(U));
967 }
968
969 if (IsGEPUser) {
970 ToBeDeleted.push_back(GEP);
971 }
972 }
973
Marco Antognini42b65222021-02-12 18:40:42 +0000974 // Schedule for removal only if Inst has no users. If all its users are
975 // later also replaced in the module, Inst will be remove by transitivity.
976 if (Inst->user_empty()) {
977 ToBeDeleted.push_back(Inst);
978 }
David Neto22f144c2017-06-12 14:26:21 -0400979 }
980
981 for (Instruction *Inst : ScalarWorkList) {
David Neto8e138142018-05-29 10:19:21 -0400982 // Some tests have a stray bitcast from pointer-to-array to
983 // pointer to i8*, but the bitcast has no uses. Exit early
984 // but be sure to delete it later.
985 //
986 // Example:
987 // %1 = bitcast [25 x float]* %dst to i8*
988
989 // errs () << " Scalar bitcast is " << *Inst << "\n";
990
Marco Antognini42b65222021-02-12 18:40:42 +0000991 if (Inst->use_empty()) {
David Neto8e138142018-05-29 10:19:21 -0400992 ToBeDeleted.push_back(Inst);
993 continue;
994 }
995
David Neto22f144c2017-06-12 14:26:21 -0400996 Value *Src = Inst->getOperand(0);
David Neto8e138142018-05-29 10:19:21 -0400997 Type *SrcTy; // Original type
998 Type *DstTy; // Type that SrcTy is cast to.
999 unsigned SrcTyBitWidth;
1000 unsigned DstTyBitWidth;
1001
alan-baker1b13e8f2019-08-08 17:56:51 -04001002 bool BailOut = false;
David Neto8e138142018-05-29 10:19:21 -04001003 SrcTy = Src->getType()->getPointerElementType();
1004 DstTy = Inst->getType()->getPointerElementType();
1005 int iter_count = 0;
1006 while (++iter_count) {
1007 SrcTyBitWidth = unsigned(DL.getTypeStoreSizeInBits(SrcTy));
1008 DstTyBitWidth = unsigned(DL.getTypeStoreSizeInBits(DstTy));
1009#if 0
1010 errs() << " Try Src " << *Src << "\n";
1011 errs() << " SrcTy elem " << *SrcTy << " bit width " << SrcTyBitWidth
1012 << "\n";
1013 errs() << " DstTy elem " << *DstTy << " bit width " << DstTyBitWidth
1014 << "\n";
1015#endif
1016
1017 // The normal case that we can handle is source type is smaller than
1018 // the dest type.
1019 if (SrcTyBitWidth <= DstTyBitWidth)
1020 break;
1021
1022 // The Source type is bigger than the destination type.
1023 // Walk into the source type to break it down.
1024 if (SrcTy->isArrayTy()) {
1025 // If it's an array, consider only the first element.
1026 Value *Zero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001027 Instruction *NewSrc =
1028 GetElementPtrInst::CreateInBounds(Src, {Zero, Zero});
alan-baker1b13e8f2019-08-08 17:56:51 -04001029 Changed = true;
David Neto8e138142018-05-29 10:19:21 -04001030 // errs() << "NewSrc is " << *NewSrc << "\n";
1031 if (auto *SrcInst = dyn_cast<Instruction>(Src)) {
1032 // errs() << " instruction case\n";
1033 NewSrc->insertAfter(SrcInst);
1034 } else {
1035 // Could be a parameter.
1036 auto where = Inst->getParent()
1037 ->getParent()
1038 ->getEntryBlock()
1039 .getFirstInsertionPt();
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001040 Instruction &whereInst = *where;
David Neto8e138142018-05-29 10:19:21 -04001041 // errs() << "insert " << *NewSrc << " before " << whereInst << "\n";
1042 NewSrc->insertBefore(&whereInst);
1043 }
1044 Src = NewSrc;
1045 SrcTy = Src->getType()->getPointerElementType();
1046 } else {
alan-baker1b13e8f2019-08-08 17:56:51 -04001047 BailOut = true;
1048 break;
David Neto8e138142018-05-29 10:19:21 -04001049 }
1050 if (iter_count > 1000) {
1051 llvm_unreachable("ReplacePointerBitcastPass: Too many iterations!");
1052 }
Marco Antognini42b65222021-02-12 18:40:42 +00001053 }
David Neto8e138142018-05-29 10:19:21 -04001054#if 0
1055 errs() << " Src is " << *Src << "\n";
1056 errs() << " Dst is " << *Inst << "\n";
1057 errs() << " SrcTy elem " << *SrcTy << " bit width " << SrcTyBitWidth
1058 << "\n";
1059 errs() << " DstTy elem " << *DstTy << " bit width " << DstTyBitWidth
1060 << "\n";
1061#endif
David Neto22f144c2017-06-12 14:26:21 -04001062
alan-baker1b13e8f2019-08-08 17:56:51 -04001063 // Only dead code has been generated up to this point so it is safe to bail
1064 // out.
1065 if (BailOut) {
1066 continue;
1067 }
1068
David Neto22f144c2017-06-12 14:26:21 -04001069 for (User *BitCastUser : Inst->users()) {
1070 Value *NewAddrIdx = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
1071 // It consist of User* and bool whether user is gep or not.
1072 SmallVector<std::pair<User *, bool>, 32> Users;
1073
1074 GetElementPtrInst *GEP = nullptr;
Jason Gavrise44af072018-08-14 20:44:50 -04001075 if ((GEP = dyn_cast<GetElementPtrInst>(BitCastUser))) {
David Neto22f144c2017-06-12 14:26:21 -04001076 IRBuilder<> Builder(GEP);
1077
1078 // Build new src/dst address.
David Neto22f144c2017-06-12 14:26:21 -04001079 NewAddrIdx = CalculateNewGEPIdx(SrcTyBitWidth, DstTyBitWidth, GEP);
1080
1081 // If bitcast's user is gep, investigate gep's users too.
1082 for (User *GEPUser : GEP->users()) {
1083 Users.push_back(std::make_pair(GEPUser, true));
1084 }
1085 } else {
1086 Users.push_back(std::make_pair(BitCastUser, false));
1087 }
1088
1089 // Handle users.
1090 bool IsGEPUser = false;
1091 for (auto UserIter : Users) {
1092 User *U = UserIter.first;
1093 IsGEPUser = UserIter.second;
1094
1095 IRBuilder<> Builder(cast<Instruction>(U));
1096
1097 // Handle store instruction with gep.
1098 if (StoreInst *ST = dyn_cast<StoreInst>(U)) {
Diego Novillo3cc8d7a2019-04-10 13:30:34 -04001099 // errs() << " store is " << *ST << "\n";
David Neto22f144c2017-06-12 14:26:21 -04001100 if (SrcTyBitWidth == DstTyBitWidth) {
alan-baker32014272019-05-22 08:07:18 -04001101 auto STVal = ConvertValue(ST->getValueOperand(), SrcTy, Builder);
David Neto22f144c2017-06-12 14:26:21 -04001102 Value *DstAddr = Builder.CreateGEP(Src, NewAddrIdx);
1103 Builder.CreateStore(STVal, DstAddr);
1104 } else if (SrcTyBitWidth < DstTyBitWidth) {
1105 unsigned NumElement = DstTyBitWidth / SrcTyBitWidth;
1106
David Neto22f144c2017-06-12 14:26:21 -04001107 // Create store values.
1108 Value *STVal = ST->getValueOperand();
1109 SmallVector<Value *, 8> STValues;
1110 for (unsigned i = 0; i < NumElement; i++) {
1111 Type *TmpTy = Type::getIntNTy(M.getContext(), DstTyBitWidth);
1112 Value *TmpVal = Builder.CreateBitCast(STVal, TmpTy);
James Price51952282020-02-14 09:41:11 -05001113 TmpVal = Builder.CreateLShr(
1114 TmpVal, Builder.getIntN(DstTyBitWidth, i * SrcTyBitWidth));
David Neto22f144c2017-06-12 14:26:21 -04001115 TmpVal = Builder.CreateTrunc(TmpVal, SrcTy);
1116 STValues.push_back(TmpVal);
1117 }
1118
1119 // Generate stores.
1120 Value *SrcAddrIdx = NewAddrIdx;
1121 Value *BaseAddr = Src;
1122 for (unsigned i = 0; i < NumElement; i++) {
1123 // Calculate store address.
1124 Value *DstAddr = Builder.CreateGEP(BaseAddr, SrcAddrIdx);
1125 Builder.CreateStore(STValues[i], DstAddr);
1126
1127 if (i + 1 < NumElement) {
1128 // Calculate next store address
1129 SrcAddrIdx = Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
1130 }
1131 }
1132
1133 } else {
1134 Inst->print(errs());
1135 llvm_unreachable("Handle different size store with scalar "
1136 "bitcast on ReplacePointerBitcastPass");
1137 }
1138 } else if (LoadInst *LD = dyn_cast<LoadInst>(U)) {
1139 if (SrcTyBitWidth == DstTyBitWidth) {
1140 Value *SrcAddr = Builder.CreateGEP(Src, NewAddrIdx);
1141 LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
alan-baker32014272019-05-22 08:07:18 -04001142 LD->replaceAllUsesWith(ConvertValue(SrcVal, DstTy, Builder));
David Neto22f144c2017-06-12 14:26:21 -04001143 } else if (SrcTyBitWidth < DstTyBitWidth) {
1144 Value *SrcAddrIdx = NewAddrIdx;
1145
1146 // Load value from src.
1147 unsigned NumIter = CalculateNumIter(SrcTyBitWidth, DstTyBitWidth);
1148 SmallVector<Value *, 8> LDValues;
1149 for (unsigned i = 1; i <= NumIter; i++) {
1150 Value *SrcAddr = Builder.CreateGEP(Src, SrcAddrIdx);
1151 LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
1152 LDValues.push_back(SrcVal);
1153
1154 if (i + 1 <= NumIter) {
1155 // Calculate next SrcAddrIdx.
1156 SrcAddrIdx = Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
1157 }
1158 }
1159
1160 // Merge Load.
1161 Type *TmpSrcTy = Type::getIntNTy(M.getContext(), SrcTyBitWidth);
1162 Value *DstVal = Builder.CreateBitCast(LDValues[0], TmpSrcTy);
1163 Type *TmpDstTy = Type::getIntNTy(M.getContext(), DstTyBitWidth);
1164 DstVal = Builder.CreateZExt(DstVal, TmpDstTy);
1165 for (unsigned i = 1; i < LDValues.size(); i++) {
1166 Value *TmpVal = Builder.CreateBitCast(LDValues[i], TmpSrcTy);
1167 TmpVal = Builder.CreateZExt(TmpVal, TmpDstTy);
Kévin Petit0c9cd042020-04-06 17:32:41 +01001168 TmpVal = Builder.CreateShl(
1169 TmpVal, Builder.getIntN(DstTyBitWidth, i * SrcTyBitWidth));
David Neto22f144c2017-06-12 14:26:21 -04001170 DstVal = Builder.CreateOr(DstVal, TmpVal);
1171 }
1172
1173 DstVal = Builder.CreateBitCast(DstVal, DstTy);
1174 LD->replaceAllUsesWith(DstVal);
1175
1176 } else {
1177 Inst->print(errs());
1178 llvm_unreachable("Handle different size load with scalar "
1179 "bitcast on ReplacePointerBitcastPass");
1180 }
1181 } else {
David Neto22f144c2017-06-12 14:26:21 -04001182 Inst->print(errs());
1183 llvm_unreachable("Handle above user of scalar bitcast with gep on "
1184 "ReplacePointerBitcastPass");
1185 }
1186
1187 ToBeDeleted.push_back(cast<Instruction>(U));
1188 }
1189
1190 if (IsGEPUser) {
1191 ToBeDeleted.push_back(GEP);
1192 }
1193 }
1194
Marco Antognini42b65222021-02-12 18:40:42 +00001195 // Schedule for removal only if Inst has no users. If all its users are
1196 // later also replaced in the module, Inst will be remove by transitivity.
1197 if (Inst->user_empty()) {
1198 ToBeDeleted.push_back(Inst);
1199 }
David Neto22f144c2017-06-12 14:26:21 -04001200 }
1201
Marco Antognini42b65222021-02-12 18:40:42 +00001202 // Remove all dead instructions, including their dead operands. Proceed with a
1203 // fixed-point algorithm to handle dependencies.
1204 for (bool Progress = true; Progress;) {
1205 std::size_t PreviousSize = ToBeDeleted.size();
1206
1207 WeakInstructions Deads;
1208 WeakInstructions NextBatch;
1209 for (WeakTrackingVH Handle : ToBeDeleted) {
1210 if (!Handle.pointsToAliveValue())
1211 continue;
1212
1213 auto *Inst = cast<Instruction>(Handle);
1214
1215 // We need to remove stores manually given they are never trivially dead.
1216 if (auto *Store = dyn_cast<StoreInst>(Inst)) {
1217 Store->eraseFromParent();
1218 continue;
1219 }
1220
1221 if (isInstructionTriviallyDead(Inst)) {
1222 Deads.push_back(Handle);
1223 } else {
1224 NextBatch.push_back(Handle);
1225 }
1226 }
1227
1228 RecursivelyDeleteTriviallyDeadInstructions(Deads);
1229
1230 ToBeDeleted = std::move(NextBatch);
1231 Progress = (ToBeDeleted.size() < PreviousSize);
David Neto22f144c2017-06-12 14:26:21 -04001232 }
1233
Marco Antognini42b65222021-02-12 18:40:42 +00001234 assert(ToBeDeleted.empty() && "Some instructions were not deleted.");
1235
David Neto22f144c2017-06-12 14:26:21 -04001236 return Changed;
1237}