Blame - lib/ReplacePointerBitcastPass.cpp - chromium.googlesource.com/chromiumos/third_party/clspv

blob: e8b3378eb0a731a2619fe6af5409fbf33ca9021c [file] [log] [blame]

David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1	// Copyright 2017 The Clspv Authors. All rights reserved.
				2	//
				3	// Licensed under the Apache License, Version 2.0 (the "License");
				4	// you may not use this file except in compliance with the License.
				5	// You may obtain a copy of the License at
				6	//
				7	// http://www.apache.org/licenses/LICENSE-2.0
				8	//
				9	// Unless required by applicable law or agreed to in writing, software
				10	// distributed under the License is distributed on an "AS IS" BASIS,
				11	// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
				12	// See the License for the specific language governing permissions and
				13	// limitations under the License.
				14
David Neto	118188e	2018-08-24 11:27:54 -0400	[diff] [blame]	15	#include "llvm/IR/DataLayout.h"
				16	#include "llvm/IR/IRBuilder.h"
				17	#include "llvm/IR/Instructions.h"
				18	#include "llvm/IR/Module.h"
				19	#include "llvm/Pass.h"
				20	#include "llvm/Support/raw_ostream.h"
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	21	#include "llvm/Transforms/Utils/Local.h"
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	22
Diego Novillo	a4c44fa	2019-04-11 10:56:15 -0400	[diff] [blame]	23	#include "Passes.h"
				24
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	25	using namespace llvm;
				26
				27	#define DEBUG_TYPE "replacepointerbitcast"
				28
				29	namespace {
				30	struct ReplacePointerBitcastPass : public ModulePass {
				31	static char ID;
				32	ReplacePointerBitcastPass() : ModulePass(ID) {}
				33
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	34	// Returns the number of chunks of source data required to exactly
				35	// cover the destination data, if the source and destination types are
				36	// different sizes. Otherwise returns 0.
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	37	unsigned CalculateNumIter(unsigned SrcTyBitWidth, unsigned DstTyBitWidth);
				38	Value *CalculateNewGEPIdx(unsigned SrcTyBitWidth, unsigned DstTyBitWidth,
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	39	GetElementPtrInst *GEP);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	40
				41	bool runOnModule(Module &M) override;
				42	};
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	43	} // namespace
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	44
				45	char ReplacePointerBitcastPass::ID = 0;
Diego Novillo	a4c44fa	2019-04-11 10:56:15 -0400	[diff] [blame]	46	INITIALIZE_PASS(ReplacePointerBitcastPass, "ReplacePointerBitcast",
				47	"Replace Pointer Bitcast Pass", false, false)
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	48
				49	namespace clspv {
				50	ModulePass *createReplacePointerBitcastPass() {
				51	return new ReplacePointerBitcastPass();
				52	}
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	53	} // namespace clspv
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	54
alan-baker	3201427	2019-05-22 08:07:18 -0400	[diff] [blame]	55	namespace {
				56
				57	// Gathers the scalar values of \|v\| into \|elements\|. Generates new instructions
				58	// to extract the values.
				59	void GatherBaseElements(Value v, SmallVectorImpl<Value > *elements,
				60	IRBuilder<> &builder) {
				61	auto *module = builder.GetInsertBlock()->getParent()->getParent();
				62	auto &DL = module->getDataLayout();
				63	auto *type = v->getType();
				64	if (auto *vec_type = dyn_cast<VectorType>(type)) {
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	65	for (uint64_t i = 0; i != vec_type->getElementCount().getKnownMinValue();
				66	++i) {
alan-baker	3201427	2019-05-22 08:07:18 -0400	[diff] [blame]	67	elements->push_back(builder.CreateExtractElement(v, i));
				68	}
				69	} else if (auto *array_type = dyn_cast<ArrayType>(type)) {
				70	for (uint64_t i = 0; i != array_type->getNumElements(); ++i) {
				71	auto *extract = builder.CreateExtractValue(v, {static_cast<unsigned>(i)});
				72	GatherBaseElements(extract, elements, builder);
				73	}
				74	} else if (auto *struct_type = dyn_cast<StructType>(type)) {
				75	const auto *struct_layout = DL.getStructLayout(struct_type);
				76	if (struct_layout->hasPadding()) {
				77	llvm_unreachable("Unhandled conversion of padded struct");
				78	}
				79	for (unsigned i = 0; i != struct_type->getNumElements(); ++i) {
				80	auto *extract = builder.CreateExtractValue(v, {i});
				81	GatherBaseElements(extract, elements, builder);
				82	}
				83	} else {
				84	elements->push_back(v);
				85	}
				86	}
				87
				88	// Returns a value of \|dst_type\| using the elemental members of \|src_elements\|.
				89	Value BuildFromElements(Type dst_type, const ArrayRef<Value *> &src_elements,
				90	unsigned used_bits, unsigned index,
				91	IRBuilder<> &builder) {
				92	auto *module = builder.GetInsertBlock()->getParent()->getParent();
				93	auto &DL = module->getDataLayout();
				94	auto &context = dst_type->getContext();
				95	Value *dst = nullptr;
				96	// Arrays, vectors and structs are annoyingly just different enough to each
				97	// require their own cases.
				98	if (auto *dst_array_ty = dyn_cast<ArrayType>(dst_type)) {
				99	auto *ele_ty = dst_array_ty->getElementType();
				100	for (uint64_t i = 0; i != dst_array_ty->getNumElements(); ++i) {
				101	auto *tmp_value =
				102	BuildFromElements(ele_ty, src_elements, used_bits, index, builder);
				103	auto *prev = dst ? dst : UndefValue::get(dst_type);
				104	dst = builder.CreateInsertValue(prev, tmp_value,
				105	{static_cast<unsigned>(i)});
				106	}
				107	} else if (auto *dst_struct_ty = dyn_cast<StructType>(dst_type)) {
				108	const auto *struct_layout = DL.getStructLayout(dst_struct_ty);
				109	if (struct_layout->hasPadding()) {
				110	llvm_unreachable("Unhandled padded struct conversion");
				111	return nullptr;
				112	}
				113	for (unsigned i = 0; i != dst_struct_ty->getNumElements(); ++i) {
				114	auto *ele_ty = dst_struct_ty->getElementType(i);
				115	auto *tmp_value =
				116	BuildFromElements(ele_ty, src_elements, used_bits, index, builder);
				117	auto *prev = dst ? dst : UndefValue::get(dst_type);
				118	dst = builder.CreateInsertValue(prev, tmp_value, {i});
				119	}
				120	} else if (auto *dst_vec_ty = dyn_cast<VectorType>(dst_type)) {
				121	auto *ele_ty = dst_vec_ty->getElementType();
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	122	for (uint64_t i = 0; i != dst_vec_ty->getElementCount().getKnownMinValue();
				123	++i) {
alan-baker	3201427	2019-05-22 08:07:18 -0400	[diff] [blame]	124	auto *tmp_value =
				125	BuildFromElements(ele_ty, src_elements, used_bits, index, builder);
				126	auto *prev = dst ? dst : UndefValue::get(dst_type);
				127	dst = builder.CreateInsertElement(prev, tmp_value, i);
				128	}
				129	} else {
				130	// Scalar conversion eats up elements in src_elements.
				131	auto dst_width = DL.getTypeStoreSizeInBits(dst_type);
				132	uint64_t bits = 0;
				133	Value *tmp_value = nullptr;
				134	auto prev_bits = 0;
				135	Value *ele_int_cast = nullptr;
				136	while (bits < dst_width) {
				137	prev_bits = bits;
				138	auto ele = src_elements[index];
				139	auto *ele_ty = ele->getType();
				140	auto ele_width = DL.getTypeStoreSizeInBits(ele_ty);
				141	auto remaining_bits = ele_width - *used_bits;
				142	auto needed_bits = dst_width - bits;
				143	// Create a reusable cast to an integer type for this element.
				144	if (!ele_int_cast \|\| cast<User>(ele_int_cast)->getOperand(0) != ele) {
				145	ele_int_cast =
				146	builder.CreateBitCast(ele, IntegerType::get(context, ele_width));
				147	}
				148	tmp_value = ele_int_cast;
				149	// Some of the bits of this element were previously used, so shift the
				150	// value that many bits.
				151	if (*used_bits != 0) {
				152	tmp_value = builder.CreateLShr(tmp_value, *used_bits);
				153	}
				154	if (needed_bits < remaining_bits) {
				155	// Ensure only the needed bits are used.
				156	uint64_t mask = (1ull << needed_bits) - 1;
				157	tmp_value =
				158	builder.CreateAnd(tmp_value, builder.getIntN(dst_width, mask));
				159	}
				160	// Cast to tbe destination bit width, but stay as a integer type.
				161	if (ele_width != dst_width) {
				162	tmp_value = builder.CreateIntCast(
				163	tmp_value, IntegerType::get(context, dst_width), false);
				164	}
				165
				166	if (remaining_bits <= needed_bits) {
				167	// Used the rest of the element.
				168	*used_bits = 0;
				169	++(*index);
				170	bits += remaining_bits;
				171	} else {
				172	// Only need part of this element.
				173	*used_bits += needed_bits;
				174	bits += needed_bits;
				175	}
				176
				177	if (dst) {
				178	// Previous iteration generated an integer of the right size. That needs
				179	// to be combined with the value generated this iteration.
				180	tmp_value = builder.CreateShl(tmp_value, prev_bits);
				181	dst = builder.CreateOr(dst, tmp_value);
				182	} else {
				183	dst = tmp_value;
				184	}
				185	}
				186
				187	assert(bits <= dst_width);
				188	if (bits == dst_width && dst_type != dst->getType()) {
				189	// Finally, cast away from the working integer type if necessary.
				190	dst = builder.CreateBitCast(dst, dst_type);
				191	}
				192	}
				193
				194	return dst;
				195	}
				196
				197	// Returns an equivalent value of \|src\| as \|dst_type\|.
				198	//
				199	// This function requires \|src\|'s and \|dst_type\|'s bit widths match. Does not
				200	// introduce new integer sizes, but generates multiple instructions to mimic a
				201	// generic bitcast (unless a bitcast is sufficient).
				202	Value ConvertValue(Value src, Type *dst_type, IRBuilder<> &builder) {
				203	auto *src_type = src->getType();
				204	auto *module = builder.GetInsertBlock()->getParent()->getParent();
				205	auto &DL = module->getDataLayout();
				206	if (!src_type->isFirstClassType() \|\| !dst_type->isFirstClassType() \|\|
				207	src_type->isAggregateType() \|\| dst_type->isAggregateType()) {
				208	SmallVector<Value *, 8> src_elements;
				209	if (src_type->isAggregateType()) {
				210	GatherBaseElements(src, &src_elements, builder);
				211	} else {
				212	src_elements.push_back(src);
				213	}
				214
				215	// Check that overall sizes make sense.
				216	uint64_t element_sum = 0;
				217	// Can only successfully convert unpadded structs.
				218	for (auto element : src_elements) {
				219	element_sum += DL.getTypeStoreSizeInBits(element->getType());
				220	}
				221	if (DL.getTypeStoreSizeInBits(dst_type) != element_sum) {
				222	llvm_unreachable("Elements do not sum to overall size");
				223	return nullptr;
				224	}
				225
				226	unsigned used_bits = 0;
				227	unsigned index = 0;
				228	return BuildFromElements(dst_type, src_elements, &used_bits, &index,
				229	builder);
				230	} else {
				231	return builder.CreateBitCast(src, dst_type);
				232	}
				233
				234	return nullptr;
				235	}
				236
				237	} // namespace
				238
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	239	unsigned ReplacePointerBitcastPass::CalculateNumIter(unsigned SrcTyBitWidth,
				240	unsigned DstTyBitWidth) {
				241	unsigned NumIter = 0;
				242	if (SrcTyBitWidth > DstTyBitWidth) {
				243	if (SrcTyBitWidth % DstTyBitWidth) {
				244	llvm_unreachable(
				245	"Src type bitwidth should be multiple of Dest type bitwidth");
				246	}
				247	NumIter = 1;
				248	} else if (SrcTyBitWidth < DstTyBitWidth) {
				249	if (DstTyBitWidth % SrcTyBitWidth) {
				250	llvm_unreachable(
				251	"Dest type bitwidth should be multiple of Src type bitwidth");
				252	}
				253	NumIter = DstTyBitWidth / SrcTyBitWidth;
				254	} else {
				255	NumIter = 0;
				256	}
				257
				258	return NumIter;
				259	}
				260
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	261	Value *ReplacePointerBitcastPass::CalculateNewGEPIdx(unsigned SrcTyBitWidth,
				262	unsigned DstTyBitWidth,
				263	GetElementPtrInst *GEP) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	264	Value *NewGEPIdx = GEP->getOperand(1);
				265	IRBuilder<> Builder(GEP);
				266
				267	if (SrcTyBitWidth > DstTyBitWidth) {
				268	if (GEP->getNumOperands() > 2) {
				269	GEP->print(errs());
				270	llvm_unreachable("Support above GEP on PointerBitcastPass");
				271	}
				272
				273	NewGEPIdx = Builder.CreateLShr(
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	274	NewGEPIdx, Builder.getInt32(std::log2(SrcTyBitWidth / DstTyBitWidth)));
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	275	} else if (DstTyBitWidth > SrcTyBitWidth) {
				276	if (GEP->getNumOperands() > 2) {
				277	GEP->print(errs());
				278	llvm_unreachable("Support above GEP on PointerBitcastPass");
				279	}
				280
				281	NewGEPIdx = Builder.CreateShl(
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	282	NewGEPIdx, Builder.getInt32(std::log2(DstTyBitWidth / SrcTyBitWidth)));
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	283	}
				284
				285	return NewGEPIdx;
				286	}
				287
				288	bool ReplacePointerBitcastPass::runOnModule(Module &M) {
				289	bool Changed = false;
				290
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	291	const DataLayout &DL = M.getDataLayout();
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	292
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	293	using WeakInstructions = SmallVector<WeakTrackingVH, 16>;
				294	WeakInstructions ToBeDeleted;
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	295	SmallVector<Instruction *, 16> VectorWorkList;
				296	SmallVector<Instruction *, 16> ScalarWorkList;
alan-baker	1b13e8f	2019-08-08 17:56:51 -0400	[diff] [blame]	297	SmallVector<User *, 16> UserWorkList;
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	298	for (Function &F : M) {
				299	for (BasicBlock &BB : F) {
				300	for (Instruction &I : BB) {
				301	// Find pointer bitcast instruction.
				302	if (isa<BitCastInst>(&I) && isa<PointerType>(I.getType())) {
				303	Value *Src = I.getOperand(0);
				304	if (isa<PointerType>(Src->getType())) {
alan-baker	1b13e8f	2019-08-08 17:56:51 -0400	[diff] [blame]	305	// Check if this bitcast is one that can be handled during this run
				306	// of the pass. If not, just skip it and don't make changes to the
				307	// module. These checks are coarse level checks that only the right
				308	// instructions appear. Rejected bitcasts might be able to be
				309	// handled later in the flow after further optimization.
				310	UserWorkList.clear();
				311	for (auto User : I.users()) {
				312	UserWorkList.push_back(User);
				313	}
				314	bool ok = true;
				315	while (!UserWorkList.empty()) {
				316	auto User = UserWorkList.back();
				317	UserWorkList.pop_back();
				318
				319	if (isa<GetElementPtrInst>(User)) {
				320	for (auto GEPUser : User->users()) {
				321	UserWorkList.push_back(GEPUser);
				322	}
				323	} else if (!isa<StoreInst>(User) && !isa<LoadInst>(User)) {
				324	// Cannot handle this bitcast.
				325	ok = false;
				326	break;
				327	}
				328	}
				329	if (!ok) {
				330	continue;
				331	}
				332
alan-baker	ad1a12f	2020-08-25 09:18:38 -0400	[diff] [blame]	333	auto inst = &I;
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	334	Type *SrcEleTy =
alan-baker	ad1a12f	2020-08-25 09:18:38 -0400	[diff] [blame]	335	inst->getOperand(0)->getType()->getPointerElementType();
				336
				337	// De-"canonicalize" the input pointer.
				338	// If Src is an array, LLVM has likely canonicalized all GEPs to
				339	// the first element away as the following addresses are all
				340	// equivalent:
				341	// * %in = alloca [4 x [4 x float]]
				342	// * %gep0 = getelementptr [4 x [4 x float]], [4 x [4 x [float]]
				343	// %in
				344	// * %gep1 = getelementptr [4 x [4 x float]], [4 x [4 x [float]]
				345	// %in, i32 0
				346	// * %gep2 = getelementptr [4 x [4 x float]], [4 x [4 x [float]]
				347	// %in, i32 0, i32 0
				348	// * %gep3 = getelementptr [4 x [4 x float]], [4 x [4 x [float]]
				349	// %in, i32 0, i32 0, i32 0
				350	//
				351	// Note: count initialized to 1 to account for the first gep index.
				352	uint32_t count = 1;
				353	while (auto ArrayTy = dyn_cast<ArrayType>(SrcEleTy)) {
				354	++count;
				355	SrcEleTy = ArrayTy->getElementType();
				356	}
				357
				358	if (count > 1) {
				359	// Create a cast of the pointer. Replace the original cast with
				360	// it and mark the original cast for deletion.
				361	SmallVector<Value *, 4> indices(
				362	count,
				363	ConstantInt::get(IntegerType::get(M.getContext(), 32), 0));
				364	auto gep = GetElementPtrInst::CreateInBounds(inst->getOperand(0),
				365	indices, "", inst);
				366	ToBeDeleted.push_back(&I);
				367	auto cast = new BitCastInst(gep, inst->getType(), "", inst);
				368	inst->replaceAllUsesWith(cast);
				369	inst = cast;
				370	}
				371
				372	Type *DstEleTy = inst->getType()->getPointerElementType();
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	373	if (SrcEleTy->isVectorTy() \|\| DstEleTy->isVectorTy()) {
				374	// Handle case either operand is vector type like char4* -> int4*.
alan-baker	ad1a12f	2020-08-25 09:18:38 -0400	[diff] [blame]	375	VectorWorkList.push_back(inst);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	376	} else {
				377	// Handle case all operands are scalar type like char* -> int*.
alan-baker	ad1a12f	2020-08-25 09:18:38 -0400	[diff] [blame]	378	ScalarWorkList.push_back(inst);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	379	}
				380
				381	Changed = true;
				382	} else {
				383	llvm_unreachable("Unsupported bitcast");
				384	}
				385	}
				386	}
				387	}
				388	}
				389
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	390	for (Instruction *Inst : VectorWorkList) {
				391	Value *Src = Inst->getOperand(0);
				392	Type *SrcTy = Src->getType()->getPointerElementType();
				393	Type *DstTy = Inst->getType()->getPointerElementType();
James Price	cf53df4	2020-04-20 14:41:24 -0400	[diff] [blame]	394	VectorType *SrcVecTy = dyn_cast<VectorType>(SrcTy);
				395	VectorType *DstVecTy = dyn_cast<VectorType>(DstTy);
				396	Type *SrcEleTy = SrcTy->isVectorTy() ? SrcVecTy->getElementType() : SrcTy;
				397	Type *DstEleTy = DstTy->isVectorTy() ? DstVecTy->getElementType() : DstTy;
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	398	// These are bit widths of the source and destination types, even
alan-baker	ad1a12f	2020-08-25 09:18:38 -0400	[diff] [blame]	399	// if they are vector types. E.g. bit width of float4 is 128.
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	400	unsigned SrcTyBitWidth = DL.getTypeStoreSizeInBits(SrcTy);
				401	unsigned DstTyBitWidth = DL.getTypeStoreSizeInBits(DstTy);
				402	unsigned SrcEleTyBitWidth = DL.getTypeStoreSizeInBits(SrcEleTy);
				403	unsigned DstEleTyBitWidth = DL.getTypeStoreSizeInBits(DstEleTy);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	404	unsigned NumIter = CalculateNumIter(SrcTyBitWidth, DstTyBitWidth);
				405
				406	// Investigate pointer bitcast's users.
				407	for (User *BitCastUser : Inst->users()) {
				408	Value *BitCastSrc = Inst->getOperand(0);
				409	Value *NewAddrIdx = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
				410
				411	// It consist of User* and bool whether user is gep or not.
				412	SmallVector<std::pair<User *, bool>, 32> Users;
				413
				414	GetElementPtrInst *GEP = nullptr;
				415	Value *OrgGEPIdx = nullptr;
Jason Gavris	e44af07	2018-08-14 20:44:50 -0400	[diff] [blame]	416	if ((GEP = dyn_cast<GetElementPtrInst>(BitCastUser))) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	417	OrgGEPIdx = GEP->getOperand(1);
				418
				419	// Build new src/dst address index.
				420	NewAddrIdx = CalculateNewGEPIdx(SrcTyBitWidth, DstTyBitWidth, GEP);
				421
				422	// Record gep's users.
				423	for (User *GEPUser : GEP->users()) {
				424	Users.push_back(std::make_pair(GEPUser, true));
				425	}
				426	} else {
				427	// Record bitcast's users.
				428	Users.push_back(std::make_pair(BitCastUser, false));
				429	}
				430
				431	// Handle users.
				432	bool IsGEPUser = false;
				433	for (auto UserIter : Users) {
				434	User *U = UserIter.first;
				435	IsGEPUser = UserIter.second;
				436
				437	IRBuilder<> Builder(cast<Instruction>(U));
				438
				439	if (StoreInst *ST = dyn_cast<StoreInst>(U)) {
				440	if (SrcTyBitWidth < DstTyBitWidth) {
				441	//
				442	// Consider below case.
				443	//
				444	// Original IR (float2* --> float4*)
				445	// 1. val = load (float4*) src_addr
				446	// 2. dst_addr = bitcast float2, float4
				447	// 3. dst_addr = gep (float4*) dst_addr, idx
				448	// 4. store (float4*) dst_addr
				449	//
				450	// Transformed IR
				451	// 1. val(float4) = load (float4*) src_addr
				452	// 2. val1(float2) = shufflevector (float4)val, (float4)undef,
				453	// (float2)<0, 1>
				454	// 3. val2(float2) = shufflevector (float4)val, (float4)undef,
				455	// (float2)<2, 3>
				456	// 4. dst_addr1(float2) = gep (float2)dst_addr, idx * 2
				457	// 5. dst_addr2(float2) = gep (float2)dst_addr, idx * 2 + 1
				458	// 6. store (float2)val1, (float2*)dst_addr1
				459	// 7. store (float2)val2, (float2*)dst_addr2
				460	//
				461
				462	unsigned NumElement = DstTyBitWidth / SrcTyBitWidth;
				463	unsigned NumVector = 1;
				464	// Vulkan SPIR-V does not support over 4 components for
				465	// TypeVector.
				466	if (NumElement > 4) {
				467	NumVector = NumElement >> 2;
				468	NumElement = 4;
				469	}
				470
				471	// Create store values.
				472	Type *TmpValTy = SrcTy;
				473	if (DstTy->isVectorTy()) {
				474	if (SrcEleTyBitWidth == DstEleTyBitWidth) {
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	475	TmpValTy = FixedVectorType::get(
				476	SrcEleTy, DstVecTy->getElementCount().getKnownMinValue());
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	477	} else {
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	478	TmpValTy = FixedVectorType::get(SrcEleTy, NumElement);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	479	}
				480	}
				481
				482	Value *STVal = ST->getValueOperand();
				483	for (unsigned VIdx = 0; VIdx < NumVector; VIdx++) {
				484	Value *TmpSTVal = nullptr;
				485	if (NumVector == 1) {
				486	TmpSTVal = Builder.CreateBitCast(STVal, TmpValTy);
				487	} else {
				488	unsigned DstVecTyNumElement =
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	489	DstVecTy->getElementCount().getKnownMinValue() / NumVector;
alan-baker	4a757f6	2020-04-22 08:17:49 -0400	[diff] [blame]	490	SmallVector<int32_t, 4> Idxs;
Marco Antognini	7e33840	2021-03-15 12:48:37 +0000	[diff] [blame]	491	for (unsigned i = 0; i < DstVecTyNumElement; i++) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	492	Idxs.push_back(i + (DstVecTyNumElement * VIdx));
				493	}
				494	Value *UndefVal = UndefValue::get(DstTy);
				495	TmpSTVal = Builder.CreateShuffleVector(STVal, UndefVal, Idxs);
				496	TmpSTVal = Builder.CreateBitCast(TmpSTVal, TmpValTy);
				497	}
				498
				499	SmallVector<Value *, 8> STValues;
				500	if (!SrcTy->isVectorTy()) {
				501	// Handle scalar type.
				502	for (unsigned i = 0; i < NumElement; i++) {
				503	Value *TmpVal = Builder.CreateExtractElement(
				504	TmpSTVal, Builder.getInt32(i));
				505	STValues.push_back(TmpVal);
				506	}
				507	} else {
				508	// Handle vector type.
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	509	unsigned SrcNumElement =
				510	SrcVecTy->getElementCount().getKnownMinValue();
				511	unsigned DstNumElement =
				512	DstVecTy->getElementCount().getKnownMinValue();
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	513	for (unsigned i = 0; i < NumElement; i++) {
alan-baker	4a757f6	2020-04-22 08:17:49 -0400	[diff] [blame]	514	SmallVector<int32_t, 4> Idxs;
Marco Antognini	7e33840	2021-03-15 12:48:37 +0000	[diff] [blame]	515	for (unsigned j = 0; j < SrcNumElement; j++) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	516	Idxs.push_back(i * SrcNumElement + j);
				517	}
				518
				519	VectorType *TmpVecTy =
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	520	FixedVectorType::get(SrcEleTy, DstNumElement);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	521	Value *UndefVal = UndefValue::get(TmpVecTy);
				522	Value *TmpVal =
				523	Builder.CreateShuffleVector(TmpSTVal, UndefVal, Idxs);
				524	STValues.push_back(TmpVal);
				525	}
				526	}
				527
				528	// Generate stores.
				529	Value *SrcAddrIdx = NewAddrIdx;
				530	Value *BaseAddr = BitCastSrc;
				531	for (unsigned i = 0; i < NumElement; i++) {
				532	// Calculate store address.
				533	Value *DstAddr = Builder.CreateGEP(BaseAddr, SrcAddrIdx);
				534	Builder.CreateStore(STValues[i], DstAddr);
				535
				536	if (i + 1 < NumElement) {
				537	// Calculate next store address
				538	SrcAddrIdx =
				539	Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
				540	}
				541	}
				542	}
				543	} else if (SrcTyBitWidth > DstTyBitWidth) {
				544	//
				545	// Consider below case.
				546	//
				547	// Original IR (float4* --> float2*)
				548	// 1. val = load (float2*) src_addr
				549	// 2. dst_addr = bitcast float4, float2
				550	// 3. dst_addr = gep (float2*) dst_addr, idx
				551	// 4. store (float2) val, (float2*) dst_addr
				552	//
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	553	// Transformed IR: Decompose the source vector into elements, then
				554	// write them one at a time.
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	555	// 1. val = load (float2*) src_addr
				556	// 2. val1 = (float)extract_element val, 0
				557	// 3. val2 = (float)extract_element val, 1
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	558	// // Source component k maps to destination component k * idxscale
				559	// 3a. idxscale = sizeof(float4)/sizeof(float2)
				560	// 3b. idxbase = idx / idxscale
				561	// 3c. newarrayidx = idxbase * idxscale
				562	// 4. dst_addr1 = gep (float4*) dst, newarrayidx
				563	// 5. dst_addr2 = gep (float4*) dst, newarrayidx + 1
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	564	// 6. store (float)val1, (float*) dst_addr1
				565	// 7. store (float)val2, (float*) dst_addr2
				566	//
				567
				568	if (SrcTyBitWidth <= DstEleTyBitWidth) {
				569	SrcTy->print(errs());
				570	DstTy->print(errs());
				571	llvm_unreachable("Handle above src/dst type.");
				572	}
				573
				574	// Create store values.
				575	Value *STVal = ST->getValueOperand();
				576
				577	if (DstTy->isVectorTy() && (SrcEleTyBitWidth != DstTyBitWidth)) {
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	578	VectorType *TmpVecTy = FixedVectorType::get(
				579	SrcEleTy, DstTyBitWidth / SrcEleTyBitWidth);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	580	STVal = Builder.CreateBitCast(STVal, TmpVecTy);
				581	}
				582
				583	SmallVector<Value *, 8> STValues;
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	584	// How many destination writes are required?
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	585	unsigned DstNumElement = 1;
				586	if (!DstTy->isVectorTy() \|\| SrcEleTyBitWidth == DstTyBitWidth) {
				587	// Handle scalar type.
				588	STValues.push_back(STVal);
				589	} else {
				590	// Handle vector type.
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	591	DstNumElement = DstVecTy->getElementCount().getKnownMinValue();
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	592	for (unsigned i = 0; i < DstNumElement; i++) {
				593	Value *Idx = Builder.getInt32(i);
				594	Value *TmpVal = Builder.CreateExtractElement(STVal, Idx);
				595	STValues.push_back(TmpVal);
				596	}
				597	}
				598
				599	// Generate stores.
				600	Value *BaseAddr = BitCastSrc;
				601	Value *SubEleIdx = Builder.getInt32(0);
				602	if (IsGEPUser) {
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	603	// Compute SubNumElement = idxscale
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	604	unsigned SubNumElement =
				605	SrcVecTy->getElementCount().getKnownMinValue();
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	606	if (DstTy->isVectorTy() && (SrcEleTyBitWidth != DstTyBitWidth)) {
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	607	// Same condition under which DstNumElements > 1
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	608	SubNumElement = SrcVecTy->getElementCount().getKnownMinValue() /
				609	DstVecTy->getElementCount().getKnownMinValue();
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	610	}
				611
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	612	// Compute SubEleIdx = idxbase * idxscale
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	613	SubEleIdx = Builder.CreateAnd(
				614	OrgGEPIdx, Builder.getInt32(SubNumElement - 1));
David Neto	30ae05e	2017-09-06 19:58:36 -0400	[diff] [blame]	615	if (DstTy->isVectorTy() && (SrcEleTyBitWidth != DstTyBitWidth)) {
				616	SubEleIdx = Builder.CreateShl(
				617	SubEleIdx, Builder.getInt32(std::log2(SubNumElement)));
				618	}
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	619	}
				620
				621	for (unsigned i = 0; i < DstNumElement; i++) {
				622	// Calculate address.
				623	if (i > 0) {
				624	SubEleIdx = Builder.CreateAdd(SubEleIdx, Builder.getInt32(i));
				625	}
				626
				627	Value *Idxs[] = {NewAddrIdx, SubEleIdx};
				628	Value *DstAddr = Builder.CreateGEP(BaseAddr, Idxs);
				629	Type *TmpSrcTy = SrcEleTy;
James Price	cf53df4	2020-04-20 14:41:24 -0400	[diff] [blame]	630	if (auto TmpSrcVecTy = dyn_cast<VectorType>(TmpSrcTy)) {
				631	TmpSrcTy = TmpSrcVecTy->getElementType();
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	632	}
				633	Value *TmpVal = Builder.CreateBitCast(STValues[i], TmpSrcTy);
				634
				635	Builder.CreateStore(TmpVal, DstAddr);
				636	}
				637	} else {
				638	// if SrcTyBitWidth == DstTyBitWidth
				639	Type *TmpSrcTy = SrcTy;
				640	Value *DstAddr = Src;
				641
				642	if (IsGEPUser) {
				643	SmallVector<Value *, 4> Idxs;
				644	for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
				645	Idxs.push_back(GEP->getOperand(i));
				646	}
				647	DstAddr = Builder.CreateGEP(BitCastSrc, Idxs);
				648
				649	if (GEP->getNumOperands() > 2) {
				650	TmpSrcTy = SrcEleTy;
				651	}
				652	}
				653
				654	Value *TmpVal =
				655	Builder.CreateBitCast(ST->getValueOperand(), TmpSrcTy);
				656	Builder.CreateStore(TmpVal, DstAddr);
				657	}
				658	} else if (LoadInst *LD = dyn_cast<LoadInst>(U)) {
				659	Value *SrcAddrIdx = Builder.getInt32(0);
				660	if (IsGEPUser) {
				661	SrcAddrIdx = NewAddrIdx;
				662	}
				663
				664	// Load value from src.
				665	SmallVector<Value *, 8> LDValues;
				666
				667	for (unsigned i = 1; i <= NumIter; i++) {
				668	Value *SrcAddr = Builder.CreateGEP(Src, SrcAddrIdx);
				669	LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
				670	LDValues.push_back(SrcVal);
				671
				672	if (i + 1 <= NumIter) {
				673	// Calculate next SrcAddrIdx.
				674	SrcAddrIdx = Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
				675	}
				676	}
				677
				678	Value *DstVal = nullptr;
				679	if (SrcTyBitWidth > DstTyBitWidth) {
				680	unsigned NumElement = SrcTyBitWidth / DstTyBitWidth;
				681
				682	if (SrcEleTyBitWidth == DstTyBitWidth) {
				683	//
				684	// Consider below case.
				685	//
				686	// Original IR (int4* --> char4*)
				687	// 1. src_addr = bitcast int4, char4
				688	// 2. element_addr = gep (char4*) src_addr, idx
				689	// 3. load (char4*) element_addr
				690	//
				691	// Transformed IR
				692	// 1. src_addr = gep (int4*) src, idx / 4
				693	// 2. src_val(int4) = load (int4*) src_addr
				694	// 3. tmp_val(int4) = extractelement src_val, idx % 4
				695	// 4. dst_val(char4) = bitcast tmp_val, (char4)
				696	//
				697	Value *EleIdx = Builder.getInt32(0);
				698	if (IsGEPUser) {
				699	EleIdx = Builder.CreateAnd(OrgGEPIdx,
				700	Builder.getInt32(NumElement - 1));
				701	}
				702	Value *TmpVal =
				703	Builder.CreateExtractElement(LDValues[0], EleIdx, "tmp_val");
				704	DstVal = Builder.CreateBitCast(TmpVal, DstTy);
				705	} else if (SrcEleTyBitWidth < DstTyBitWidth) {
				706	if (IsGEPUser) {
				707	//
				708	// Consider below case.
				709	//
				710	// Original IR (float4* --> float2*)
				711	// 1. src_addr = bitcast float4, float2
				712	// 2. element_addr = gep (float2*) src_addr, idx
				713	// 3. load (float2*) element_addr
				714	//
				715	// Transformed IR
				716	// 1. src_addr = gep (float4*) src, idx / 2
				717	// 2. src_val(float4) = load (float4*) src_addr
				718	// 3. tmp_val1(float) = extractelement (idx % 2) * 2
				719	// 4. tmp_val2(float) = extractelement (idx % 2) * 2 + 1
				720	// 5. dst_val(float2) = insertelement undef(float2), tmp_val1, 0
				721	// 6. dst_val(float2) = insertelement undef(float2), tmp_val2, 1
				722	// 7. dst_val(float2) = bitcast dst_val, (float2)
				723	// ==> if types are same between src and dst, it will be
				724	// igonored
				725	//
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	726	VectorType *TmpVecTy = FixedVectorType::get(
				727	SrcEleTy, DstTyBitWidth / SrcEleTyBitWidth);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	728	DstVal = UndefValue::get(TmpVecTy);
				729	Value *EleIdx = Builder.CreateAnd(
				730	OrgGEPIdx, Builder.getInt32(NumElement - 1));
				731	EleIdx = Builder.CreateShl(
				732	EleIdx, Builder.getInt32(
				733	std::log2(DstTyBitWidth / SrcEleTyBitWidth)));
				734	Value *TmpOrgGEPIdx = EleIdx;
				735	for (unsigned i = 0; i < NumElement; i++) {
				736	Value *TmpVal = Builder.CreateExtractElement(
				737	LDValues[0], TmpOrgGEPIdx, "tmp_val");
				738	DstVal = Builder.CreateInsertElement(DstVal, TmpVal,
				739	Builder.getInt32(i));
				740
				741	if (i + 1 < NumElement) {
				742	TmpOrgGEPIdx =
				743	Builder.CreateAdd(TmpOrgGEPIdx, Builder.getInt32(1));
				744	}
				745	}
				746	} else {
				747	//
				748	// Consider below case.
				749	//
				750	// Original IR (float4* --> int2*)
				751	// 1. src_addr = bitcast float4, int2
				752	// 2. load (int2*) src_addr
				753	//
				754	// Transformed IR
				755	// 1. src_val(float4) = load (float4*) src_addr
				756	// 2. tmp_val(float2) = shufflevector (float4)src_val,
				757	// (float4)undef,
				758	// (float2)<0, 1>
				759	// 3. dst_val(int2) = bitcast (float2)tmp_val, (int2)
				760	//
				761	unsigned NumElement = DstTyBitWidth / SrcEleTyBitWidth;
				762	Value *Undef = UndefValue::get(SrcTy);
				763
alan-baker	4a757f6	2020-04-22 08:17:49 -0400	[diff] [blame]	764	SmallVector<int32_t, 4> Idxs;
Marco Antognini	7e33840	2021-03-15 12:48:37 +0000	[diff] [blame]	765	for (unsigned i = 0; i < NumElement; i++) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	766	Idxs.push_back(i);
				767	}
				768	DstVal = Builder.CreateShuffleVector(LDValues[0], Undef, Idxs);
				769
				770	DstVal = Builder.CreateBitCast(DstVal, DstTy);
				771	}
				772
				773	DstVal = Builder.CreateBitCast(DstVal, DstTy);
				774	} else {
				775	if (IsGEPUser) {
				776	//
				777	// Consider below case.
				778	//
				779	// Original IR (int4* --> char2*)
				780	// 1. src_addr = bitcast int4, char2
				781	// 2. element_addr = gep (char2*) src_addr, idx
				782	// 3. load (char2*) element_addr
				783	//
				784	// Transformed IR
				785	// 1. src_addr = gep (int4*) src, idx / 8
				786	// 2. src_val(int4) = load (int4*) src_addr
				787	// 3. tmp_val(int) = extractelement idx / 2
				788	// 4. tmp_val(<i16 x 2>) = bitcast tmp_val(int), (<i16 x 2>)
				789	// 5. tmp_val(i16) = extractelement idx % 2
				790	// 6. dst_val(char2) = bitcast tmp_val, (char2)
				791	// ==> if types are same between src and dst, it will be
				792	// igonored
				793	//
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	794	unsigned SubNumElement = SrcEleTyBitWidth / DstTyBitWidth;
				795	if (SubNumElement != 2 && SubNumElement != 4) {
				796	llvm_unreachable("Unsupported SubNumElement");
				797	}
				798
				799	Value *TmpOrgGEPIdx = Builder.CreateLShr(
				800	OrgGEPIdx, Builder.getInt32(std::log2(SubNumElement)));
				801	Value *TmpVal = Builder.CreateExtractElement(
				802	LDValues[0], TmpOrgGEPIdx, "tmp_val");
				803	TmpVal = Builder.CreateBitCast(
				804	TmpVal,
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	805	FixedVectorType::get(
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	806	IntegerType::get(DstTy->getContext(), DstTyBitWidth),
				807	SubNumElement));
				808	TmpOrgGEPIdx = Builder.CreateAnd(
				809	OrgGEPIdx, Builder.getInt32(SubNumElement - 1));
				810	TmpVal = Builder.CreateExtractElement(TmpVal, TmpOrgGEPIdx,
				811	"tmp_val");
				812	DstVal = Builder.CreateBitCast(TmpVal, DstTy);
				813	} else {
				814	Inst->print(errs());
				815	llvm_unreachable("Handle this bitcast");
				816	}
				817	}
				818	} else if (SrcTyBitWidth < DstTyBitWidth) {
				819	//
				820	// Consider below case.
				821	//
				822	// Original IR (float2* --> float4*)
				823	// 1. src_addr = bitcast float2, float4
				824	// 2. element_addr = gep (float4*) src_addr, idx
				825	// 3. load (float4*) element_addr
				826	//
				827	// Transformed IR
				828	// 1. src_addr = gep (float2) src, idx 2
				829	// 2. src_val1(float2) = load (float2*) src_addr
				830	// 3. src_addr2 = gep (float2*) src_addr, 1
				831	// 4. src_val2(float2) = load (float2*) src_addr2
				832	// 5. dst_val(float4) = shufflevector src_val1, src_val2, <0, 1>
				833	// 6. dst_val(float4) = bitcast dst_val, (float4)
				834	// ==> if types are same between src and dst, it will be igonored
				835	//
				836	unsigned NumElement = 1;
				837	if (SrcTy->isVectorTy()) {
alan-baker	5a8c3be	2020-09-09 13:44:26 -0400	[diff] [blame]	838	NumElement = SrcVecTy->getElementCount().getKnownMinValue() * 2;
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	839	}
				840
				841	// Handle scalar type.
				842	if (NumElement == 1) {
				843	if (SrcTyBitWidth * 4 <= DstTyBitWidth) {
				844	unsigned NumVecElement = DstTyBitWidth / SrcTyBitWidth;
				845	unsigned NumVector = 1;
				846	if (NumVecElement > 4) {
				847	NumVector = NumVecElement >> 2;
				848	NumVecElement = 4;
				849	}
				850
				851	SmallVector<Value *, 4> Values;
				852	for (unsigned VIdx = 0; VIdx < NumVector; VIdx++) {
				853	// In this case, generate only insert element. It generates
				854	// less instructions than using shuffle vector.
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	855	VectorType *TmpVecTy =
				856	FixedVectorType::get(SrcTy, NumVecElement);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	857	Value *TmpVal = UndefValue::get(TmpVecTy);
				858	for (unsigned i = 0; i < NumVecElement; i++) {
				859	TmpVal = Builder.CreateInsertElement(
				860	TmpVal, LDValues[i + (VIdx * 4)], Builder.getInt32(i));
				861	}
				862	Values.push_back(TmpVal);
				863	}
				864
				865	if (Values.size() > 2) {
				866	Inst->print(errs());
				867	llvm_unreachable("Support above bitcast");
				868	}
				869
				870	if (Values.size() > 1) {
				871	Type *TmpEleTy =
				872	Type::getIntNTy(M.getContext(), SrcEleTyBitWidth * 2);
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	873	VectorType *TmpVecTy =
				874	FixedVectorType::get(TmpEleTy, NumVector);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	875	for (unsigned i = 0; i < Values.size(); i++) {
				876	Values[i] = Builder.CreateBitCast(Values[i], TmpVecTy);
				877	}
alan-baker	4a757f6	2020-04-22 08:17:49 -0400	[diff] [blame]	878	SmallVector<int32_t, 4> Idxs;
Marco Antognini	7e33840	2021-03-15 12:48:37 +0000	[diff] [blame]	879	for (unsigned i = 0; i < (NumVector * 2); i++) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	880	Idxs.push_back(i);
				881	}
				882	for (unsigned i = 0; i < Values.size(); i = i + 2) {
				883	Values[i] = Builder.CreateShuffleVector(
				884	Values[i], Values[i + 1], Idxs);
				885	}
				886	}
				887
				888	LDValues.clear();
				889	LDValues.push_back(Values[0]);
				890	} else {
				891	SmallVector<Value *, 4> TmpLDValues;
				892	for (unsigned i = 0; i < LDValues.size(); i = i + 2) {
alan-baker	b3e2b6d	2020-06-24 23:59:57 -0400	[diff] [blame]	893	VectorType *TmpVecTy = FixedVectorType::get(SrcTy, 2);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	894	Value *TmpVal = UndefValue::get(TmpVecTy);
				895	TmpVal = Builder.CreateInsertElement(TmpVal, LDValues[i],
				896	Builder.getInt32(0));
				897	TmpVal = Builder.CreateInsertElement(TmpVal, LDValues[i + 1],
				898	Builder.getInt32(1));
				899	TmpLDValues.push_back(TmpVal);
				900	}
				901	LDValues.clear();
				902	LDValues = std::move(TmpLDValues);
				903	NumElement = 4;
				904	}
				905	}
				906
				907	// Handle vector type.
				908	while (LDValues.size() != 1) {
				909	SmallVector<Value *, 4> TmpLDValues;
				910	for (unsigned i = 0; i < LDValues.size(); i = i + 2) {
alan-baker	4a757f6	2020-04-22 08:17:49 -0400	[diff] [blame]	911	SmallVector<int32_t, 4> Idxs;
Marco Antognini	7e33840	2021-03-15 12:48:37 +0000	[diff] [blame]	912	for (unsigned j = 0; j < NumElement; j++) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	913	Idxs.push_back(j);
				914	}
				915	Value *TmpVal = Builder.CreateShuffleVector(
				916	LDValues[i], LDValues[i + 1], Idxs);
				917	TmpLDValues.push_back(TmpVal);
				918	}
				919	LDValues.clear();
				920	LDValues = std::move(TmpLDValues);
				921	NumElement *= 2;
				922	}
				923
				924	DstVal = Builder.CreateBitCast(LDValues[0], DstTy);
				925	} else {
				926	//
				927	// Consider below case.
				928	//
				929	// Original IR (float4* --> int4*)
				930	// 1. src_addr = bitcast float4, int4
				931	// 2. element_addr = gep (int4*) src_addr, idx, 0
				932	// 3. load (int) element_addr
				933	//
				934	// Transformed IR
				935	// 1. element_addr = gep (float4*) src_addr, idx, 0
				936	// 2. src_val = load (float*) element_addr
				937	// 3. val = bitcast (float) src_val to (int)
				938	//
				939	Value *SrcAddr = Src;
				940	if (IsGEPUser) {
				941	SmallVector<Value *, 4> Idxs;
				942	for (unsigned i = 1; i < GEP->getNumOperands(); i++) {
				943	Idxs.push_back(GEP->getOperand(i));
				944	}
				945	SrcAddr = Builder.CreateGEP(Src, Idxs);
				946	}
				947	LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
				948
				949	Type *TmpDstTy = DstTy;
				950	if (IsGEPUser) {
				951	if (GEP->getNumOperands() > 2) {
				952	TmpDstTy = DstEleTy;
				953	}
				954	}
				955	DstVal = Builder.CreateBitCast(SrcVal, TmpDstTy);
				956	}
				957
				958	// Update LD's users with DstVal.
				959	LD->replaceAllUsesWith(DstVal);
				960	} else {
				961	U->print(errs());
				962	llvm_unreachable(
				963	"Handle above user of gep on ReplacePointerBitcastPass");
				964	}
				965
				966	ToBeDeleted.push_back(cast<Instruction>(U));
				967	}
				968
				969	if (IsGEPUser) {
				970	ToBeDeleted.push_back(GEP);
				971	}
				972	}
				973
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	974	// Schedule for removal only if Inst has no users. If all its users are
				975	// later also replaced in the module, Inst will be remove by transitivity.
				976	if (Inst->user_empty()) {
				977	ToBeDeleted.push_back(Inst);
				978	}
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	979	}
				980
				981	for (Instruction *Inst : ScalarWorkList) {
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	982	// Some tests have a stray bitcast from pointer-to-array to
				983	// pointer to i8*, but the bitcast has no uses. Exit early
				984	// but be sure to delete it later.
				985	//
				986	// Example:
				987	// %1 = bitcast [25 x float]* %dst to i8*
				988
				989	// errs () << " Scalar bitcast is " << *Inst << "\n";
				990
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	991	if (Inst->use_empty()) {
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	992	ToBeDeleted.push_back(Inst);
				993	continue;
				994	}
				995
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	996	Value *Src = Inst->getOperand(0);
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	997	Type *SrcTy; // Original type
				998	Type *DstTy; // Type that SrcTy is cast to.
				999	unsigned SrcTyBitWidth;
				1000	unsigned DstTyBitWidth;
				1001
alan-baker	1b13e8f	2019-08-08 17:56:51 -0400	[diff] [blame]	1002	bool BailOut = false;
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	1003	SrcTy = Src->getType()->getPointerElementType();
				1004	DstTy = Inst->getType()->getPointerElementType();
				1005	int iter_count = 0;
				1006	while (++iter_count) {
				1007	SrcTyBitWidth = unsigned(DL.getTypeStoreSizeInBits(SrcTy));
				1008	DstTyBitWidth = unsigned(DL.getTypeStoreSizeInBits(DstTy));
				1009	#if 0
				1010	errs() << " Try Src " << *Src << "\n";
				1011	errs() << " SrcTy elem " << *SrcTy << " bit width " << SrcTyBitWidth
				1012	<< "\n";
				1013	errs() << " DstTy elem " << *DstTy << " bit width " << DstTyBitWidth
				1014	<< "\n";
				1015	#endif
				1016
				1017	// The normal case that we can handle is source type is smaller than
				1018	// the dest type.
				1019	if (SrcTyBitWidth <= DstTyBitWidth)
				1020	break;
				1021
				1022	// The Source type is bigger than the destination type.
				1023	// Walk into the source type to break it down.
				1024	if (SrcTy->isArrayTy()) {
				1025	// If it's an array, consider only the first element.
				1026	Value *Zero = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	1027	Instruction *NewSrc =
				1028	GetElementPtrInst::CreateInBounds(Src, {Zero, Zero});
alan-baker	1b13e8f	2019-08-08 17:56:51 -0400	[diff] [blame]	1029	Changed = true;
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	1030	// errs() << "NewSrc is " << *NewSrc << "\n";
				1031	if (auto *SrcInst = dyn_cast<Instruction>(Src)) {
				1032	// errs() << " instruction case\n";
				1033	NewSrc->insertAfter(SrcInst);
				1034	} else {
				1035	// Could be a parameter.
				1036	auto where = Inst->getParent()
				1037	->getParent()
				1038	->getEntryBlock()
				1039	.getFirstInsertionPt();
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	1040	Instruction &whereInst = *where;
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	1041	// errs() << "insert " << *NewSrc << " before " << whereInst << "\n";
				1042	NewSrc->insertBefore(&whereInst);
				1043	}
				1044	Src = NewSrc;
				1045	SrcTy = Src->getType()->getPointerElementType();
				1046	} else {
alan-baker	1b13e8f	2019-08-08 17:56:51 -0400	[diff] [blame]	1047	BailOut = true;
				1048	break;
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	1049	}
				1050	if (iter_count > 1000) {
				1051	llvm_unreachable("ReplacePointerBitcastPass: Too many iterations!");
				1052	}
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	1053	}
David Neto	8e13814	2018-05-29 10:19:21 -0400	[diff] [blame]	1054	#if 0
				1055	errs() << " Src is " << *Src << "\n";
				1056	errs() << " Dst is " << *Inst << "\n";
				1057	errs() << " SrcTy elem " << *SrcTy << " bit width " << SrcTyBitWidth
				1058	<< "\n";
				1059	errs() << " DstTy elem " << *DstTy << " bit width " << DstTyBitWidth
				1060	<< "\n";
				1061	#endif
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1062
alan-baker	1b13e8f	2019-08-08 17:56:51 -0400	[diff] [blame]	1063	// Only dead code has been generated up to this point so it is safe to bail
				1064	// out.
				1065	if (BailOut) {
				1066	continue;
				1067	}
				1068
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1069	for (User *BitCastUser : Inst->users()) {
				1070	Value *NewAddrIdx = ConstantInt::get(Type::getInt32Ty(M.getContext()), 0);
				1071	// It consist of User* and bool whether user is gep or not.
				1072	SmallVector<std::pair<User *, bool>, 32> Users;
				1073
				1074	GetElementPtrInst *GEP = nullptr;
Jason Gavris	e44af07	2018-08-14 20:44:50 -0400	[diff] [blame]	1075	if ((GEP = dyn_cast<GetElementPtrInst>(BitCastUser))) {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1076	IRBuilder<> Builder(GEP);
				1077
				1078	// Build new src/dst address.
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1079	NewAddrIdx = CalculateNewGEPIdx(SrcTyBitWidth, DstTyBitWidth, GEP);
				1080
				1081	// If bitcast's user is gep, investigate gep's users too.
				1082	for (User *GEPUser : GEP->users()) {
				1083	Users.push_back(std::make_pair(GEPUser, true));
				1084	}
				1085	} else {
				1086	Users.push_back(std::make_pair(BitCastUser, false));
				1087	}
				1088
				1089	// Handle users.
				1090	bool IsGEPUser = false;
				1091	for (auto UserIter : Users) {
				1092	User *U = UserIter.first;
				1093	IsGEPUser = UserIter.second;
				1094
				1095	IRBuilder<> Builder(cast<Instruction>(U));
				1096
				1097	// Handle store instruction with gep.
				1098	if (StoreInst *ST = dyn_cast<StoreInst>(U)) {
Diego Novillo	3cc8d7a	2019-04-10 13:30:34 -0400	[diff] [blame]	1099	// errs() << " store is " << *ST << "\n";
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1100	if (SrcTyBitWidth == DstTyBitWidth) {
alan-baker	3201427	2019-05-22 08:07:18 -0400	[diff] [blame]	1101	auto STVal = ConvertValue(ST->getValueOperand(), SrcTy, Builder);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1102	Value *DstAddr = Builder.CreateGEP(Src, NewAddrIdx);
				1103	Builder.CreateStore(STVal, DstAddr);
				1104	} else if (SrcTyBitWidth < DstTyBitWidth) {
				1105	unsigned NumElement = DstTyBitWidth / SrcTyBitWidth;
				1106
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1107	// Create store values.
				1108	Value *STVal = ST->getValueOperand();
				1109	SmallVector<Value *, 8> STValues;
				1110	for (unsigned i = 0; i < NumElement; i++) {
				1111	Type *TmpTy = Type::getIntNTy(M.getContext(), DstTyBitWidth);
				1112	Value *TmpVal = Builder.CreateBitCast(STVal, TmpTy);
James Price	5195228	2020-02-14 09:41:11 -0500	[diff] [blame]	1113	TmpVal = Builder.CreateLShr(
				1114	TmpVal, Builder.getIntN(DstTyBitWidth, i * SrcTyBitWidth));
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1115	TmpVal = Builder.CreateTrunc(TmpVal, SrcTy);
				1116	STValues.push_back(TmpVal);
				1117	}
				1118
				1119	// Generate stores.
				1120	Value *SrcAddrIdx = NewAddrIdx;
				1121	Value *BaseAddr = Src;
				1122	for (unsigned i = 0; i < NumElement; i++) {
				1123	// Calculate store address.
				1124	Value *DstAddr = Builder.CreateGEP(BaseAddr, SrcAddrIdx);
				1125	Builder.CreateStore(STValues[i], DstAddr);
				1126
				1127	if (i + 1 < NumElement) {
				1128	// Calculate next store address
				1129	SrcAddrIdx = Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
				1130	}
				1131	}
				1132
				1133	} else {
				1134	Inst->print(errs());
				1135	llvm_unreachable("Handle different size store with scalar "
				1136	"bitcast on ReplacePointerBitcastPass");
				1137	}
				1138	} else if (LoadInst *LD = dyn_cast<LoadInst>(U)) {
				1139	if (SrcTyBitWidth == DstTyBitWidth) {
				1140	Value *SrcAddr = Builder.CreateGEP(Src, NewAddrIdx);
				1141	LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
alan-baker	3201427	2019-05-22 08:07:18 -0400	[diff] [blame]	1142	LD->replaceAllUsesWith(ConvertValue(SrcVal, DstTy, Builder));
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1143	} else if (SrcTyBitWidth < DstTyBitWidth) {
				1144	Value *SrcAddrIdx = NewAddrIdx;
				1145
				1146	// Load value from src.
				1147	unsigned NumIter = CalculateNumIter(SrcTyBitWidth, DstTyBitWidth);
				1148	SmallVector<Value *, 8> LDValues;
				1149	for (unsigned i = 1; i <= NumIter; i++) {
				1150	Value *SrcAddr = Builder.CreateGEP(Src, SrcAddrIdx);
				1151	LoadInst *SrcVal = Builder.CreateLoad(SrcAddr, "src_val");
				1152	LDValues.push_back(SrcVal);
				1153
				1154	if (i + 1 <= NumIter) {
				1155	// Calculate next SrcAddrIdx.
				1156	SrcAddrIdx = Builder.CreateAdd(SrcAddrIdx, Builder.getInt32(1));
				1157	}
				1158	}
				1159
				1160	// Merge Load.
				1161	Type *TmpSrcTy = Type::getIntNTy(M.getContext(), SrcTyBitWidth);
				1162	Value *DstVal = Builder.CreateBitCast(LDValues[0], TmpSrcTy);
				1163	Type *TmpDstTy = Type::getIntNTy(M.getContext(), DstTyBitWidth);
				1164	DstVal = Builder.CreateZExt(DstVal, TmpDstTy);
				1165	for (unsigned i = 1; i < LDValues.size(); i++) {
				1166	Value *TmpVal = Builder.CreateBitCast(LDValues[i], TmpSrcTy);
				1167	TmpVal = Builder.CreateZExt(TmpVal, TmpDstTy);
Kévin Petit	0c9cd04	2020-04-06 17:32:41 +0100	[diff] [blame]	1168	TmpVal = Builder.CreateShl(
				1169	TmpVal, Builder.getIntN(DstTyBitWidth, i * SrcTyBitWidth));
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1170	DstVal = Builder.CreateOr(DstVal, TmpVal);
				1171	}
				1172
				1173	DstVal = Builder.CreateBitCast(DstVal, DstTy);
				1174	LD->replaceAllUsesWith(DstVal);
				1175
				1176	} else {
				1177	Inst->print(errs());
				1178	llvm_unreachable("Handle different size load with scalar "
				1179	"bitcast on ReplacePointerBitcastPass");
				1180	}
				1181	} else {
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1182	Inst->print(errs());
				1183	llvm_unreachable("Handle above user of scalar bitcast with gep on "
				1184	"ReplacePointerBitcastPass");
				1185	}
				1186
				1187	ToBeDeleted.push_back(cast<Instruction>(U));
				1188	}
				1189
				1190	if (IsGEPUser) {
				1191	ToBeDeleted.push_back(GEP);
				1192	}
				1193	}
				1194
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	1195	// Schedule for removal only if Inst has no users. If all its users are
				1196	// later also replaced in the module, Inst will be remove by transitivity.
				1197	if (Inst->user_empty()) {
				1198	ToBeDeleted.push_back(Inst);
				1199	}
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1200	}
				1201
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	1202	// Remove all dead instructions, including their dead operands. Proceed with a
				1203	// fixed-point algorithm to handle dependencies.
				1204	for (bool Progress = true; Progress;) {
				1205	std::size_t PreviousSize = ToBeDeleted.size();
				1206
				1207	WeakInstructions Deads;
				1208	WeakInstructions NextBatch;
				1209	for (WeakTrackingVH Handle : ToBeDeleted) {
				1210	if (!Handle.pointsToAliveValue())
				1211	continue;
				1212
				1213	auto *Inst = cast<Instruction>(Handle);
				1214
				1215	// We need to remove stores manually given they are never trivially dead.
				1216	if (auto *Store = dyn_cast<StoreInst>(Inst)) {
				1217	Store->eraseFromParent();
				1218	continue;
				1219	}
				1220
				1221	if (isInstructionTriviallyDead(Inst)) {
				1222	Deads.push_back(Handle);
				1223	} else {
				1224	NextBatch.push_back(Handle);
				1225	}
				1226	}
				1227
				1228	RecursivelyDeleteTriviallyDeadInstructions(Deads);
				1229
				1230	ToBeDeleted = std::move(NextBatch);
				1231	Progress = (ToBeDeleted.size() < PreviousSize);
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1232	}
				1233
Marco Antognini	42b6522	2021-02-12 18:40:42 +0000	[diff] [blame]	1234	assert(ToBeDeleted.empty() && "Some instructions were not deleted.");
				1235
David Neto	22f144c	2017-06-12 14:26:21 -0400	[diff] [blame]	1236	return Changed;
				1237	}