[llvm] [Attributor] Reordering bins of an allocation based on access patterns (PR #95319)
Vidush Singhal via llvm-commits
llvm-commits at lists.llvm.org
Tue Jun 18 09:50:48 PDT 2024
https://github.com/vidsinghal updated https://github.com/llvm/llvm-project/pull/95319
>From deaed480035cac22fbafb91cbc9aacebb37aded1 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <singhal2 at ruby964.llnl.gov>
Date: Tue, 4 Jun 2024 16:51:06 -0700
Subject: [PATCH 1/3] [Attributor]: AApointerinfo, allow more than one offset
for a pass through user
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 79 ++++
.../Transforms/IPO/AttributorAttributes.cpp | 360 ++++++++++++++----
.../Attributor/ArgumentPromotion/crash.ll | 6 +-
.../live_called_from_dead.ll | 3 +-
.../live_called_from_dead_2.ll | 3 +-
.../nonzero-address-spaces.ll | 3 +-
.../Attributor/IPConstantProp/pthreads.ll | 4 +-
llvm/test/Transforms/Attributor/allocator.ll | 195 ++++++++--
.../Attributor/call-simplify-pointer-info.ll | 42 +-
.../Transforms/Attributor/heap_to_stack.ll | 3 +-
.../Attributor/heap_to_stack_gpu.ll | 3 +-
llvm/test/Transforms/Attributor/liveness.ll | 8 +-
llvm/test/Transforms/Attributor/nodelete.ll | 5 +-
.../Transforms/Attributor/pointer-info.ll | 6 +-
.../Attributor/value-simplify-pointer-info.ll | 17 +-
15 files changed, 569 insertions(+), 168 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 6ba04dbc31db3..afa96fbfb99fb 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6106,6 +6106,56 @@ struct AAPointerInfo : public AbstractAttribute {
Type *Ty;
};
+ /// A helper containing a list of offsets computed for a Use. Ideally this
+ /// list should be strictly ascending, but we ensure that only when we
+ /// actually translate the list of offsets to a RangeList.
+ struct OffsetInfo {
+ using VecTy = SmallVector<int64_t>;
+ using const_iterator = VecTy::const_iterator;
+ VecTy Offsets;
+
+ const_iterator begin() const { return Offsets.begin(); }
+ const_iterator end() const { return Offsets.end(); }
+
+ bool operator==(const OffsetInfo &RHS) const {
+ return Offsets == RHS.Offsets;
+ }
+
+ bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); }
+
+ void insert(int64_t Offset) { Offsets.push_back(Offset); }
+ bool isUnassigned() const { return Offsets.empty(); }
+
+ bool isUnknown() const {
+ if (isUnassigned())
+ return false;
+ if (Offsets.size() == 1)
+ return Offsets.front() == AA::RangeTy::Unknown;
+ return false;
+ }
+
+ void setUnknown() {
+ Offsets.clear();
+ Offsets.push_back(AA::RangeTy::Unknown);
+ }
+
+ void addToAll(int64_t Inc) {
+ for (auto &Offset : Offsets)
+ Offset += Inc;
+ }
+
+ /// Copy offsets from \p R into the current list.
+ ///
+ /// Ideally all lists should be strictly ascending, but we defer that to the
+ /// actual use of the list. So we just blindly append here.
+ void merge(const OffsetInfo &R) {
+ Offsets.append(R.Offsets);
+ // ensure elements are unique.
+ sort(Offsets.begin(), Offsets.end());
+ Offsets.erase(std::unique(Offsets.begin(), Offsets.end()), Offsets.end());
+ }
+ };
+
/// Create an abstract attribute view for the position \p IRP.
static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A);
@@ -6120,6 +6170,9 @@ struct AAPointerInfo : public AbstractAttribute {
virtual const_bin_iterator begin() const = 0;
virtual const_bin_iterator end() const = 0;
virtual int64_t numOffsetBins() const = 0;
+ virtual void dumpState(raw_ostream &O) const = 0;
+ virtual const Access &getBinAccess(unsigned Index) const = 0;
+ virtual const DenseMap<Value *, OffsetInfo> &getOffsetInfoMap() const = 0;
/// Call \p CB on all accesses that might interfere with \p Range and return
/// true if all such accesses were known and the callback returned true for
@@ -6149,6 +6202,9 @@ struct AAPointerInfo : public AbstractAttribute {
return (AA->getIdAddr() == &ID);
}
+ /// Offsets Info Map
+ DenseMap<Value *, OffsetInfo> OffsetInfoMap;
+
/// Unique ID (due to the unique address)
static const char ID;
};
@@ -6285,12 +6341,35 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
return AbstractAttribute::isValidIRPositionForInit(A, IRP);
}
+ // A helper function to check if simplified values exists for the current
+ // instruction.
+ bool checkIfSimplifiedValuesExists(Attributor &A, Instruction *LocalInst) {
+
+ // If there are potential values that replace the accessed instruction, we
+ // should use those instead
+ bool UsedAssumedInformation = false;
+ SmallVector<AA::ValueAndContext> Values;
+ if (A.getAssumedSimplifiedValues(IRPosition::inst(*LocalInst), *this,
+ Values, AA::AnyScope,
+ UsedAssumedInformation))
+
+ for (auto &ValAndContext : Values)
+ // don't modify instruction if any simplified value exists
+ if (ValAndContext.getValue() && ValAndContext.getValue() != LocalInst)
+ return true;
+
+ return false;
+ }
+
/// Create an abstract attribute view for the position \p IRP.
static AAAllocationInfo &createForPosition(const IRPosition &IRP,
Attributor &A);
virtual std::optional<TypeSize> getAllocatedSize() const = 0;
+ using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>;
+ virtual const NewOffsetsTy &getNewOffsets() const = 0;
+
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAllocationInfo"; }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 57579bbca00ee..6990885b58e0d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1002,54 +1002,9 @@ ChangeStatus AA::PointerInfo::State::addAccess(
namespace {
-/// A helper containing a list of offsets computed for a Use. Ideally this
-/// list should be strictly ascending, but we ensure that only when we
-/// actually translate the list of offsets to a RangeList.
-struct OffsetInfo {
- using VecTy = SmallVector<int64_t>;
- using const_iterator = VecTy::const_iterator;
- VecTy Offsets;
-
- const_iterator begin() const { return Offsets.begin(); }
- const_iterator end() const { return Offsets.end(); }
-
- bool operator==(const OffsetInfo &RHS) const {
- return Offsets == RHS.Offsets;
- }
-
- bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); }
-
- void insert(int64_t Offset) { Offsets.push_back(Offset); }
- bool isUnassigned() const { return Offsets.size() == 0; }
-
- bool isUnknown() const {
- if (isUnassigned())
- return false;
- if (Offsets.size() == 1)
- return Offsets.front() == AA::RangeTy::Unknown;
- return false;
- }
-
- void setUnknown() {
- Offsets.clear();
- Offsets.push_back(AA::RangeTy::Unknown);
- }
-
- void addToAll(int64_t Inc) {
- for (auto &Offset : Offsets) {
- Offset += Inc;
- }
- }
-
- /// Copy offsets from \p R into the current list.
- ///
- /// Ideally all lists should be strictly ascending, but we defer that to the
- /// actual use of the list. So we just blindly append here.
- void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); }
-};
-
#ifndef NDEBUG
-static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) {
+static raw_ostream &operator<<(raw_ostream &OS,
+ const AAPointerInfo::OffsetInfo &OI) {
ListSeparator LS;
OS << "[";
for (auto Offset : OI) {
@@ -1084,6 +1039,15 @@ struct AAPointerInfoImpl
return State::numOffsetBins();
}
+ virtual const Access &getBinAccess(unsigned Index) const override {
+ return getAccess(Index);
+ }
+
+ virtual const DenseMap<Value *, OffsetInfo> &
+ getOffsetInfoMap() const override {
+ return OffsetInfoMap;
+ }
+
bool forallInterferingAccesses(
AA::RangeTy Range,
function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
@@ -1430,7 +1394,7 @@ struct AAPointerInfoImpl
void trackPointerInfoStatistics(const IRPosition &IRP) const {}
/// Dump the state into \p O.
- void dumpState(raw_ostream &O) {
+ virtual void dumpState(raw_ostream &O) const override {
for (auto &It : OffsetBins) {
O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size
<< "] : " << It.getSecond().size() << "\n";
@@ -1464,6 +1428,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
std::optional<Value *> Content, AccessKind Kind,
SmallVectorImpl<int64_t> &Offsets, ChangeStatus &Changed,
Type &Ty) {
+
using namespace AA::PointerInfo;
auto Size = AA::RangeTy::Unknown;
const DataLayout &DL = A.getDataLayout();
@@ -1596,7 +1561,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
const DataLayout &DL = A.getDataLayout();
Value &AssociatedValue = getAssociatedValue();
- DenseMap<Value *, OffsetInfo> OffsetInfoMap;
+ OffsetInfoMap.clear();
OffsetInfoMap[&AssociatedValue].insert(0);
auto HandlePassthroughUser = [&](Value *Usr, Value *CurPtr, bool &Follow) {
@@ -12663,6 +12628,11 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
return AssumedAllocatedSize;
}
+ const NewOffsetsTy &getNewOffsets() const override {
+ assert(isValidState() && "the AA is invalid");
+ return NewComputedOffsets;
+ }
+
std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
const DataLayout &DL) {
@@ -12703,46 +12673,58 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
const DataLayout &DL = A.getDataLayout();
const auto AllocationSize = findInitialAllocationSize(I, DL);
- // If allocation size is nullopt, we give up.
+ // If allocation size is nullopt, we give up
if (!AllocationSize)
return indicatePessimisticFixpoint();
- // For zero sized allocations, we give up.
+ // For zero sized allocations, we give up
// Since we can't reduce further
if (*AllocationSize == 0)
return indicatePessimisticFixpoint();
- int64_t BinSize = PI->numOffsetBins();
+ int64_t NumBins = PI->numOffsetBins();
- // TODO: implement for multiple bins
- if (BinSize > 1)
- return indicatePessimisticFixpoint();
-
- if (BinSize == 0) {
+ if (NumBins == 0) {
auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false));
if (!changeAllocationSize(NewAllocationSize))
return ChangeStatus::UNCHANGED;
return ChangeStatus::CHANGED;
}
- // TODO: refactor this to be part of multiple bin case
- const auto &It = PI->begin();
+ // For each access bin
+ // Compute its new start Offset and store the results in a new map
+ // (NewOffsetBins)
+ unsigned long PrevBinEndOffset = 0;
+ bool ChangedOffsets = false;
- // TODO: handle if Offset is not zero
- if (It->first.Offset != 0)
- return indicatePessimisticFixpoint();
+ for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
+ It != PI->end(); It++) {
+ const AA::RangeTy &OldRange = It->getFirst();
- uint64_t SizeOfBin = It->first.Offset + It->first.Size;
+ // If any range has an unknown offset or size, we should leave the
+ // allocation unmodified
+ if (OldRange.offsetOrSizeAreUnknown())
+ return indicatePessimisticFixpoint();
- if (SizeOfBin >= *AllocationSize)
- return indicatePessimisticFixpoint();
+ unsigned long NewStartOffset = PrevBinEndOffset;
+ unsigned long NewEndOffset = NewStartOffset + OldRange.Size;
+ PrevBinEndOffset = NewEndOffset;
+
+ ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
+ OldRange.Size);
+ }
+ // Set the new size of the allocation, the new size of the Allocation should
+ // be the size of PrevBinEndOffset * 8, in bits
auto NewAllocationSize =
- std::optional<TypeSize>(TypeSize(SizeOfBin * 8, false));
+ std::optional<TypeSize>(TypeSize(PrevBinEndOffset * 8, false));
if (!changeAllocationSize(NewAllocationSize))
return ChangeStatus::UNCHANGED;
+ if (!ChangedOffsets)
+ return ChangeStatus::UNCHANGED;
+
return ChangeStatus::CHANGED;
}
@@ -12752,9 +12734,95 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
assert(isValidState() &&
"Manifest should only be called if the state is valid.");
- Instruction *I = getIRPosition().getCtxI();
+ bool Changed = false;
+ const IRPosition &IRP = getIRPosition();
+ Instruction *I = IRP.getCtxI();
- auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue();
+ // check if simplified values exist
+ if (checkIfSimplifiedValuesExists(A, I))
+ return ChangeStatus::UNCHANGED;
+
+ if (getAllocatedSize() == HasNoAllocationSize)
+ return ChangeStatus::UNCHANGED;
+
+ const AAPointerInfo *PI =
+ A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
+
+ if (!PI)
+ return ChangeStatus::UNCHANGED;
+
+ if (!PI->getState().isValidState())
+ return ChangeStatus::UNCHANGED;
+
+ // Store a map where each instruction maps to a set of bins accessed by that
+ // instruction
+ DenseMap<Instruction *, DenseMap<AA::RangeTy, AA::RangeTy>>
+ AccessedInstructionsToBinsMap;
+
+ const auto &NewOffsetsMap = getNewOffsets();
+ const auto &OffsetInfoMap = PI->getOffsetInfoMap();
+
+ // Map Instructions to accessed bins.
+ for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
+ It != PI->end(); It++) {
+
+ const auto &OldOffsetRange = It->getFirst();
+
+ // If the OldOffsetRange is not in the map, offsets for that bin did not
+ // change. We should just continue and skip changing the offsets in that
+ // case
+ if (!NewOffsetsMap.contains(OldOffsetRange))
+ continue;
+
+ const auto &NewOffsetRange = NewOffsetsMap.lookup(OldOffsetRange);
+
+ for (const auto AccIndex : It->getSecond()) {
+ const auto &AccessInstruction = PI->getBinAccess(AccIndex);
+ Instruction *LocalInst = AccessInstruction.getLocalInst();
+
+ // TODO: handle case for a similified value
+ // Right now we don't change the value and give up
+ // on modifying the size and offsets of the allocation
+ // this may be sub-optimal
+ if (checkIfSimplifiedValuesExists(A, LocalInst))
+ return ChangeStatus::UNCHANGED;
+
+ // BackTrack and check if there are multiple bins for instructions in
+ // the
+ // chain
+ std::vector<Instruction *> ReadyList;
+ DenseMap<Instruction *, bool> Visited;
+ ReadyList.push_back(LocalInst);
+ while (!ReadyList.empty()) {
+ Instruction *GetBack = ReadyList.back();
+ ReadyList.pop_back();
+ // check if the Instruction has multiple bins, if so give up
+ // for calls it is okay to have multiple bins
+ // TODO: handle when one instruction has multiple bins
+ auto OffsetsVecArg = OffsetInfoMap.lookup(GetBack).Offsets;
+ if (GetBack->getOpcode() != Instruction::Call &&
+ OffsetsVecArg.size() > 1)
+ return ChangeStatus::UNCHANGED;
+
+ for (auto *It = GetBack->op_begin(); It != GetBack->op_end(); It++) {
+ if (Instruction *Ins = dyn_cast<Instruction>(*It)) {
+ if (!Visited[Ins])
+ ReadyList.push_back(Ins);
+ }
+ }
+ Visited[GetBack] = true;
+ }
+
+ DenseMap<AA::RangeTy, AA::RangeTy> &NewBinsForInstruction =
+ AccessedInstructionsToBinsMap.getOrInsertDefault(LocalInst);
+
+ NewBinsForInstruction.insert(
+ std::make_pair(OldOffsetRange, NewOffsetRange));
+ }
+ }
+
+ unsigned long FixedAllocatedSizeInBits =
+ getAllocatedSize()->getFixedValue();
unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
@@ -12762,21 +12830,25 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
// TODO: add case for malloc like calls
case Instruction::Alloca: {
- AllocaInst *AI = cast<AllocaInst>(I);
+ AllocaInst *OldAllocaInst = cast<AllocaInst>(I);
+ const DataLayout &DL = A.getDataLayout();
+ auto OriginalAllocationSize = OldAllocaInst->getAllocationSizeInBits(DL);
- Type *CharType = Type::getInt8Ty(I->getContext());
+ if (*OriginalAllocationSize <= FixedAllocatedSizeInBits)
+ return ChangeStatus::UNCHANGED;
- auto *NumBytesToValue =
- ConstantInt::get(I->getContext(), APInt(32, NumBytesToAllocate));
+ Type *CharType = Type::getInt8Ty(I->getContext());
+ Type *CharArrayType = ArrayType::get(CharType, NumBytesToAllocate);
- BasicBlock::iterator insertPt = AI->getIterator();
- insertPt = std::next(insertPt);
+ BasicBlock::iterator InsertPt = OldAllocaInst->getIterator();
+ InsertPt = std::next(InsertPt);
AllocaInst *NewAllocaInst =
- new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue,
- AI->getAlign(), AI->getName(), insertPt);
+ new AllocaInst(CharArrayType, OldAllocaInst->getAddressSpace(),
+ OldAllocaInst->getName(), InsertPt);
- if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst))
- return ChangeStatus::CHANGED;
+ Changed |= A.changeAfterManifest(IRPosition::inst(*OldAllocaInst),
+ *NewAllocaInst);
+ A.deleteAfterManifest(*OldAllocaInst);
break;
}
@@ -12784,7 +12856,102 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
break;
}
- return ChangeStatus::UNCHANGED;
+ for (auto &It : AccessedInstructionsToBinsMap) {
+
+ Instruction *LocalInst = It.first;
+
+ // Get a hold of a map, mapping old to new bins
+ DenseMap<AA::RangeTy, AA::RangeTy> &OldToNewBins = It.second;
+ IntegerType *Int64TyInteger =
+ IntegerType::get(LocalInst->getContext(), 64);
+
+ switch (LocalInst->getOpcode()) {
+ case Instruction::Load: {
+ // The number of bytes to shift the load/store by
+ int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset;
+ int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset;
+ int64_t ShiftValue = OffsetNew - OffsetOld;
+ LoadInst *OldLoadInst = cast<LoadInst>(LocalInst);
+ Value *PointerOperand = OldLoadInst->getPointerOperand();
+ Type *PointeeTy = OldLoadInst->getPointerOperandType();
+
+ Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
+ Value *GepToNewAddress = GetElementPtrInst::Create(
+ PointeeTy, PointerOperand, IndexList, "NewGep", OldLoadInst);
+
+ LoadInst *NewLoadInst = new LoadInst(
+ OldLoadInst->getType(), GepToNewAddress, OldLoadInst->getName(),
+ false, OldLoadInst->getAlign(), OldLoadInst);
+
+ Changed |=
+ A.changeAfterManifest(IRPosition::inst(*OldLoadInst), *NewLoadInst);
+
+ A.deleteAfterManifest(*OldLoadInst);
+ break;
+ }
+ case Instruction::Store: {
+ // The number of bytes to shift the load/store by
+ int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset;
+ int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset;
+ int64_t ShiftValue = OffsetNew - OffsetOld;
+ StoreInst *OldStoreInst = cast<StoreInst>(LocalInst);
+ Value *PointerOperand = OldStoreInst->getPointerOperand();
+ Type *PointeeTy = OldStoreInst->getPointerOperandType();
+
+ Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
+ Value *GepToNewAddress = GetElementPtrInst::Create(
+ PointeeTy, PointerOperand, IndexList, "NewGep", OldStoreInst);
+
+ StoreInst *NewStoreInst =
+ new StoreInst(OldStoreInst->getValueOperand(), GepToNewAddress,
+ false, OldStoreInst->getAlign(), OldStoreInst);
+
+ Changed |= A.changeAfterManifest(IRPosition::inst(*OldStoreInst),
+ *NewStoreInst);
+
+ A.deleteAfterManifest(*OldStoreInst);
+ break;
+ }
+ case Instruction::Call: {
+ CallInst *Call = cast<CallInst>(LocalInst);
+ int ArgPosition = 0;
+ for (const auto &CallArg : Call->args()) {
+ if (OffsetInfoMap.contains(CallArg)) {
+
+ auto OffsetsVecArg = OffsetInfoMap.lookup(CallArg).Offsets;
+ int OldOffsetArg = OffsetsVecArg.front();
+
+ int NewOffsetArg = 0;
+ for (auto OldToNewRange : NewOffsetsMap) {
+ auto Old = OldToNewRange.getFirst();
+ if (Old.Offset == OldOffsetArg)
+ NewOffsetArg = OldToNewRange.getSecond().Offset;
+ }
+
+ // If the offsets did not change, no need to change the offsets.
+ if (NewOffsetArg == OldOffsetArg) {
+ ArgPosition++;
+ continue;
+ }
+
+ int64_t ShiftValue = NewOffsetArg - OldOffsetArg;
+ Value *IndexList[1] = {
+ ConstantInt::get(Int64TyInteger, ShiftValue)};
+ Type *ArgTy = CallArg->getType();
+ Instruction *ArgInstruction = cast<Instruction>(CallArg);
+ Value *GepToNewAddress = GetElementPtrInst::Create(
+ ArgTy, ArgInstruction, IndexList, "NewGep", Call);
+ Call->setArgOperand(ArgPosition, GepToNewAddress);
+ }
+ ArgPosition++;
+ }
+ } break;
+ }
+ }
+
+ if (!Changed)
+ return ChangeStatus::UNCHANGED;
+ return ChangeStatus::CHANGED;
}
/// See AbstractAttribute::getAsStr().
@@ -12798,8 +12965,28 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
")";
}
+ void dumpNewOffsetBins(raw_ostream &O) {
+
+ O << "Printing Map from [OldOffsetsRange] : [NewOffsetsRange] if the "
+ "offsets changed."
+ << "\n";
+ const auto &NewOffsetsMap = getNewOffsets();
+ for (auto It = NewOffsetsMap.begin(); It != NewOffsetsMap.end(); It++) {
+
+ const auto &OldRange = It->getFirst();
+ const auto &NewRange = It->getSecond();
+
+ O << "[" << OldRange.Offset << "," << OldRange.Offset + OldRange.Size
+ << "] : ";
+ O << "[" << NewRange.Offset << "," << NewRange.Offset + NewRange.Size
+ << "]";
+ O << "\n";
+ }
+ }
+
private:
std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
+ NewOffsetsTy NewComputedOffsets;
// Maintain the computed allocation size of the object.
// Returns (bool) weather the size of the allocation was modified or not.
@@ -12811,6 +12998,21 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
}
return false;
}
+
+ // Maps an old byte range to its new Offset range in the new allocation.
+ // Returns (bool) weather the old byte range's offsets changed or not.
+ bool setNewOffsets(const AA::RangeTy &OldRange, int64_t OldOffset,
+ int64_t NewComputedOffset, int64_t Size) {
+
+ if (OldOffset == NewComputedOffset)
+ return false;
+
+ AA::RangeTy &NewRange = NewComputedOffsets.getOrInsertDefault(OldRange);
+ NewRange.Offset = NewComputedOffset;
+ NewRange.Size = Size;
+
+ return true;
+ }
};
struct AAAllocationInfoFloating : AAAllocationInfoImpl {
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
index 595cb37c6c93e..f0efa2a0ae3c1 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
@@ -106,10 +106,8 @@ define i32 @test_inf_promote_caller(i32 %arg) {
; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_caller
; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
; CGSCC-NEXT: bb:
-; CGSCC-NEXT: [[TMP:%.*]] = alloca [[S:%.*]], align 8
-; CGSCC-NEXT: [[TMP3:%.*]] = alloca i8, i32 0, align 8
-; CGSCC-NEXT: [[TMP1:%.*]] = alloca [[S]], align 8
-; CGSCC-NEXT: [[TMP14:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT: [[TMP3:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT: [[TMP14:%.*]] = alloca [0 x i8], align 1
; CGSCC-NEXT: ret i32 0
;
bb:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
index 1c34fff8dd755..63dbc4da7da37 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
@@ -36,8 +36,7 @@ define internal i32 @caller(ptr %B) {
; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CGSCC-LABEL: define {{[^@]+}}@caller
; CGSCC-SAME: () #[[ATTR0]] {
-; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4
-; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT: [[A1:%.*]] = alloca [0 x i8], align 1
; CGSCC-NEXT: ret i32 0
;
%A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index b42647840f7cf..956fa0e88b028 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -53,8 +53,7 @@ define internal i32 @caller(ptr %B) {
; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
; CGSCC-LABEL: define {{[^@]+}}@caller
; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
-; CGSCC-NEXT: [[A:%.*]] = alloca i32, align 4
-; CGSCC-NEXT: [[A1:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT: [[A1:%.*]] = alloca [0 x i8], align 1
; CGSCC-NEXT: [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
; CGSCC-NEXT: ret i32 0
;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
index b588a399e5bd9..7b5e1276ac212 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
@@ -29,8 +29,7 @@ define internal i32 @foo(ptr) {
; CHECK-LABEL: define {{[^@]+}}@foo
; CHECK-SAME: () addrspace(1) #[[ATTR0:[0-9]+]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RETVAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[RETVAL1:%.*]] = alloca i8, i32 0, align 4
+; CHECK-NEXT: [[RETVAL1:%.*]] = alloca [0 x i8], align 1
; CHECK-NEXT: call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
; CHECK-NEXT: unreachable
;
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
index 490894d129023..af2d1ef1eabba 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
@@ -34,8 +34,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
define dso_local i32 @main() {
; TUNIT-LABEL: define {{[^@]+}}@main() {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[ALLOC11:%.*]] = alloca i8, i32 0, align 8
-; TUNIT-NEXT: [[ALLOC22:%.*]] = alloca i8, i32 0, align 8
+; TUNIT-NEXT: [[ALLOC11:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT: [[ALLOC22:%.*]] = alloca [0 x i8], align 1
; TUNIT-NEXT: [[THREAD:%.*]] = alloca i64, align 8
; TUNIT-NEXT: [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @foo, ptr nofree readnone align 4294967296 undef)
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @bar, ptr noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef)
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index f2d9ecd1d8fa4..ad5665d25f517 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -13,8 +13,8 @@ define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
; CHECK-LABEL: define dso_local void @positive_alloca_1
; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[VAL_ADDR1:%.*]] = alloca i8, i32 4, align 4
-; CHECK-NEXT: [[F2:%.*]] = alloca i8, i32 4, align 4
+; CHECK-NEXT: [[VAL_ADDR1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT: [[F2:%.*]] = alloca [4 x i8], align 1
; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
; CHECK-NEXT: store i32 10, ptr [[F2]], align 4
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
@@ -164,37 +164,54 @@ entry:
;TODO: The allocation can be reduced here.
;However, the offsets (load/store etc.) Need to be changed.
; Function Attrs: noinline nounwind uwtable
-define dso_local { i64, ptr } @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
-; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define dso_local { i64, ptr } @positive_test_not_a_single_start_offset
-; CHECK-SAME: (i32 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_test_not_a_single_start_offset
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[RETVAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
; CHECK-NEXT: [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[F1:%.*]] = alloca [5 x i8], align 1
; CHECK-NEXT: store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
-; CHECK-NEXT: store i32 2, ptr [[RETVAL]], align 8
-; CHECK-NEXT: [[FIELD3:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[RETVAL]], i32 0, i32 2
-; CHECK-NEXT: store ptr [[VAL_ADDR]], ptr [[FIELD3]], align 8
-; CHECK-NEXT: [[TMP0:%.*]] = load { i64, ptr }, ptr [[RETVAL]], align 8
-; CHECK-NEXT: ret { i64, ptr } [[TMP0]]
+; CHECK-NEXT: [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
+; CHECK-NEXT: store i32 [[MUL]], ptr [[F1]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
+; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F1]], i32 0, i32 2
+; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[C]], i64 -4
+; CHECK-NEXT: store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
+; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F1]], i32 0, i32 2
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[C2]], i64 -4
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
+; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT: [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
+; CHECK-NEXT: ret void
;
entry:
- %retval = alloca %struct.Foo, align 8
%val.addr = alloca i32, align 4
+ %f = alloca %struct.Foo, align 4
store i32 %val, ptr %val.addr, align 4
- %field1 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 0
- store i32 2, ptr %field1, align 8
- %field3 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 2
- store ptr %val.addr, ptr %field3, align 8
- %0 = load { i64, ptr }, ptr %retval, align 8
- ret { i64, ptr } %0
+ %0 = load i32, ptr %val.addr, align 4
+ %mul = mul nsw i32 2, %0
+ %a = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+ store i32 %mul, ptr %a, align 4
+ %a1 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+ %1 = load i32, ptr %a1, align 4
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %1)
+ %c = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2
+ %conv1 = trunc i32 %1 to i8
+ store i8 %conv1, ptr %c, align 4
+ %c2 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2
+ %2 = load i8, ptr %c2, align 4
+ %conv = sext i8 %2 to i32
+ %call3 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %conv)
+ ret void
}
; Function Attrs: noinline nounwind uwtable
define dso_local void @positive_test_reduce_array_allocation_1() {
; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ARRAY1:%.*]] = alloca i8, i32 4, align 8
+; CHECK-NEXT: [[ARRAY1:%.*]] = alloca [4 x i8], align 1
; CHECK-NEXT: store i32 0, ptr [[ARRAY1]], align 8
; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8
; CHECK-NEXT: [[TMP1:%.*]] = add i32 [[TMP0]], 2
@@ -275,37 +292,37 @@ entry:
define dso_local void @positive_test_reduce_array_allocation_2() #0 {
; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[ARRAY:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: [[ARRAY1:%.*]] = alloca ptr, align 8
+; CHECK-NEXT: [[I2:%.*]] = alloca i32, align 4
; CHECK-NEXT: [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000)
-; CHECK-NEXT: store ptr [[CALL]], ptr [[ARRAY]], align 8
-; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: store ptr [[CALL]], ptr [[ARRAY1]], align 8
+; CHECK-NEXT: store i32 0, ptr [[I2]], align 4
; CHECK-NEXT: br label [[FOR_COND:%.*]]
; CHECK: for.cond:
-; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10000
; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
; CHECK: for.body:
-; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
-; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I2]], align 4
+; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
; CHECK-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]]
; CHECK-NEXT: store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4
; CHECK-NEXT: br label [[FOR_INC:%.*]]
; CHECK: for.inc:
-; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP3:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP3]], 2
-; CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT: store i32 [[ADD]], ptr [[I2]], align 4
; CHECK-NEXT: br label [[FOR_COND]]
; CHECK: for.end:
-; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: store i32 0, ptr [[I2]], align 4
; CHECK-NEXT: br label [[FOR_COND1:%.*]]
; CHECK: for.cond1:
-; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP4:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 10000
; CHECK-NEXT: br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]]
; CHECK: for.body3:
-; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64
; CHECK-NEXT: [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]]
; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
@@ -313,28 +330,28 @@ define dso_local void @positive_test_reduce_array_allocation_2() #0 {
; CHECK-NEXT: store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4
; CHECK-NEXT: br label [[FOR_INC7:%.*]]
; CHECK: for.inc7:
-; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP7:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[ADD8:%.*]] = add nsw i32 [[TMP7]], 2
-; CHECK-NEXT: store i32 [[ADD8]], ptr [[I]], align 4
+; CHECK-NEXT: store i32 [[ADD8]], ptr [[I2]], align 4
; CHECK-NEXT: br label [[FOR_COND1]]
; CHECK: for.end9:
-; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: store i32 0, ptr [[I2]], align 4
; CHECK-NEXT: br label [[FOR_COND10:%.*]]
; CHECK: for.cond10:
-; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP8:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[CMP11:%.*]] = icmp slt i32 [[TMP8]], 10000
; CHECK-NEXT: br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END18:%.*]]
; CHECK: for.body12:
-; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP9:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64
; CHECK-NEXT: [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]]
; CHECK-NEXT: [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
; CHECK-NEXT: [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]])
; CHECK-NEXT: br label [[FOR_INC16:%.*]]
; CHECK: for.inc16:
-; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[TMP11:%.*]] = load i32, ptr [[I2]], align 4
; CHECK-NEXT: [[ADD17:%.*]] = add nsw i32 [[TMP11]], 2
-; CHECK-NEXT: store i32 [[ADD17]], ptr [[I]], align 4
+; CHECK-NEXT: store i32 [[ADD17]], ptr [[I2]], align 4
; CHECK-NEXT: br label [[FOR_COND10]]
; CHECK: for.end18:
; CHECK-NEXT: ret void
@@ -426,7 +443,7 @@ define dso_local void @pthread_test(){
; TUNIT-NEXT: [[ARG1:%.*]] = alloca i8, align 8
; TUNIT-NEXT: [[THREAD:%.*]] = alloca i64, align 8
; TUNIT-NEXT: [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
-; TUNIT-NEXT: [[F1:%.*]] = alloca i8, i32 4, align 4
+; TUNIT-NEXT: [[F1:%.*]] = alloca [4 x i8], align 1
; TUNIT-NEXT: [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef)
; TUNIT-NEXT: [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
; TUNIT-NEXT: [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
@@ -452,6 +469,46 @@ define dso_local void @pthread_test(){
ret void
}
+
+define dso_local void @select_case(i1 %cond){
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define dso_local void @select_case
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[A:%.*]] = alloca [100 x i8], align 1
+; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 3
+; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
+; CHECK-NEXT: store i8 100, ptr [[SEL]], align 1
+; CHECK-NEXT: ret void
+;
+ %a = alloca [100 x i8], align 1
+ %b = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 3
+ %c = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 1
+ %sel = select i1 %cond, ptr %b, ptr %c
+ store i8 100, ptr %sel, align 1
+ ret void
+}
+
+define dso_local void @select_case_2(i1 %cond){
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define dso_local void @select_case_2
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT: [[A:%.*]] = alloca [100 x i32], align 1
+; CHECK-NEXT: [[B:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 3
+; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT: [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
+; CHECK-NEXT: store i8 100, ptr [[SEL]], align 1
+; CHECK-NEXT: ret void
+;
+ %a = alloca [100 x i32], align 1
+ %b = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 3
+ %c = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 1
+ %sel = select i1 %cond, ptr %b, ptr %c
+ %sel2 = getelementptr inbounds i32, ptr %sel, i64 0
+ store i8 100, ptr %sel2, align 1
+ ret void
+}
+
define internal ptr @pthread_allocation_should_remain_same(ptr %arg) {
; CHECK-LABEL: define internal noundef nonnull align 8 dereferenceable(1) ptr @pthread_allocation_should_remain_same
; CHECK-SAME: (ptr noundef nonnull returned align 8 dereferenceable(1) [[ARG:%.*]]) {
@@ -499,6 +556,58 @@ entry:
ret void
}
+define dso_local void @alloca_array_multi_offset(){
+; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none)
+; CHECK-LABEL: define dso_local void @alloca_array_multi_offset
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT: entry:
+; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT: store i32 0, ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND:%.*]]
+; CHECK: for.cond:
+; CHECK-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
+; CHECK-NEXT: br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK: for.body:
+; CHECK-NEXT: br label [[FOR_INC:%.*]]
+; CHECK: for.inc:
+; CHECK-NEXT: [[TMP1:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP1]], 2
+; CHECK-NEXT: store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT: br label [[FOR_COND]]
+; CHECK: for.end:
+; CHECK-NEXT: ret void
+;
+entry:
+ %arr = alloca i8, i32 10, align 4
+ %i = alloca i32, align 4
+ store i32 0, ptr %i, align 4
+ br label %for.cond
+
+for.cond:
+ %0 = load i32, ptr %i, align 4
+ %cmp = icmp slt i32 %0, 10
+ br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+ %1 = load i32, ptr %i, align 4
+ %2 = load ptr, ptr %arr, align 8
+ %3 = load i32, ptr %i, align 4
+ %arrayidx = getelementptr inbounds i32, ptr %2, i32 %3
+ store i32 %1, ptr %arrayidx, align 4
+ br label %for.inc
+
+for.inc:
+ %4 = load i32, ptr %i, align 4
+ %add = add nsw i32 %4, 2
+ store i32 %add, ptr %i, align 4
+ br label %for.cond
+
+for.end:
+ ret void
+
+}
+
declare external void @external_call(ptr)
@@ -511,9 +620,11 @@ declare i32 @printf(ptr noundef, ...) #1
; Function Attrs: nounwind allocsize(0)
declare noalias ptr @malloc(i64 noundef) #1
;.
-; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
;.
-; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
;.
; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
; TUNIT: [[META1]] = !{i64 2, i64 3, i1 false}
diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
index 5bb795911ce40..0dc18cc8340d6 100644
--- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
@@ -36,8 +36,10 @@ define i8 @call_simplifiable_1() {
; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1
; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I0]], i64 -2
+; TUNIT-NEXT: store i8 2, ptr [[NEWGEP]], align 2
; TUNIT-NEXT: ret i8 2
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -93,9 +95,13 @@ define i8 @call_simplifiable_2() {
; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2
; TUNIT-SAME: () #[[ATTR0]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3
+; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [2 x i8], align 1
+; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I0]], i64 -2
+; TUNIT-NEXT: store i8 2, ptr [[NEWGEP]], align 2
+; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3
+; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I1]], i64 -2
+; TUNIT-NEXT: store i8 3, ptr [[NEWGEP3]], align 1
; TUNIT-NEXT: ret i8 4
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -125,8 +131,10 @@ define i8 @call_simplifiable_3() {
; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_3
; TUNIT-SAME: () #[[ATTR0]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
+; TUNIT-NEXT: store i8 2, ptr [[NEWGEP]], align 2
; TUNIT-NEXT: ret i8 2
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -198,13 +206,19 @@ define i8 @call_partially_simplifiable_1() {
; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1
; TUNIT-SAME: () #[[ATTR0]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT: store i8 2, ptr [[I2]], align 2
-; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3
-; TUNIT-NEXT: store i8 3, ptr [[I3]], align 1
-; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 4
-; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR3]]
+; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [3 x i8], align 1
+; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
+; TUNIT-NEXT: store i8 2, ptr [[NEWGEP4]], align 2
+; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3
+; TUNIT-NEXT: [[NEWGEP5:%.*]] = getelementptr ptr, ptr [[I3]], i64 -1
+; TUNIT-NEXT: store i8 3, ptr [[NEWGEP5]], align 1
+; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 4
+; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I4]], i64 -3
+; TUNIT-NEXT: store i8 4, ptr [[NEWGEP3]], align 4
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
+; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[I3]], i64 -1
+; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[NEWGEP]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[NEWGEP2]]) #[[ATTR3]]
; TUNIT-NEXT: ret i8 [[R]]
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 33ac066e43d09..846373e05be1a 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -502,8 +502,7 @@ define i32 @malloc_in_loop(i32 %arg) {
; CHECK-SAME: (i32 [[ARG:%.*]]) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[I1:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: [[I11:%.*]] = alloca i8, i32 0, align 8
+; CHECK-NEXT: [[I11:%.*]] = alloca [0 x i8], align 1
; CHECK-NEXT: store i32 [[ARG]], ptr [[I]], align 4
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index 2a5b3e94291a2..70aace8100abd 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -452,8 +452,7 @@ define i32 @malloc_in_loop(i32 %arg) {
; CHECK-SAME: (i32 [[ARG:%.*]]) {
; CHECK-NEXT: bb:
; CHECK-NEXT: [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT: [[I1:%.*]] = alloca ptr, align 8
-; CHECK-NEXT: [[I11:%.*]] = alloca i8, i32 0, align 8
+; CHECK-NEXT: [[I11:%.*]] = alloca [0 x i8], align 1
; CHECK-NEXT: store i32 [[ARG]], ptr [[I]], align 4
; CHECK-NEXT: br label [[BB2:%.*]]
; CHECK: bb2:
diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index f17bd5795a174..9eb79f8a46723 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2587,8 +2587,8 @@ define void @bad_gep() {
; TUNIT-LABEL: define {{[^@]+}}@bad_gep
; TUNIT-SAME: () #[[ATTR13]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[N1:%.*]] = alloca i8, i32 0, align 1
-; TUNIT-NEXT: [[M2:%.*]] = alloca i8, i32 0, align 1
+; TUNIT-NEXT: [[N1:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT: [[M2:%.*]] = alloca [0 x i8], align 1
; TUNIT-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
; TUNIT-NEXT: br label [[EXIT:%.*]]
; TUNIT: while.body:
@@ -2605,8 +2605,8 @@ define void @bad_gep() {
; CGSCC-LABEL: define {{[^@]+}}@bad_gep
; CGSCC-SAME: () #[[ATTR6]] {
; CGSCC-NEXT: entry:
-; CGSCC-NEXT: [[N1:%.*]] = alloca i8, i32 0, align 1
-; CGSCC-NEXT: [[M2:%.*]] = alloca i8, i32 0, align 1
+; CGSCC-NEXT: [[N1:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT: [[M2:%.*]] = alloca [0 x i8], align 1
; CGSCC-NEXT: call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
; CGSCC-NEXT: br label [[EXIT:%.*]]
; CGSCC: while.body:
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index c28cb28379348..6357bf742bbf1 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -10,15 +10,14 @@ define hidden i64 @f1() align 2 {
; TUNIT-LABEL: define {{[^@]+}}@f1
; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
+; TUNIT-NEXT: [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
; TUNIT-NEXT: ret i64 undef
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
; CGSCC-LABEL: define {{[^@]+}}@f1
; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 {
; CGSCC-NEXT: entry:
-; CGSCC-NEXT: [[REF_TMP:%.*]] = alloca [[A:%.*]], align 8
-; CGSCC-NEXT: [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT: [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
; CGSCC-NEXT: [[CALL2:%.*]] = call i64 @f2() #[[ATTR2:[0-9]+]]
; CGSCC-NEXT: ret i64 [[CALL2]]
;
diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll
index 6afdbdaee317c..c8fec4f1de7b4 100644
--- a/llvm/test/Transforms/Attributor/pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/pointer-info.ll
@@ -10,10 +10,12 @@ define void @foo(ptr %ptr) {
; TUNIT-LABEL: define {{[^@]+}}@foo
; TUNIT-SAME: (ptr nocapture nofree readnone [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
; TUNIT-NEXT: entry:
-; TUNIT-NEXT: [[TMP0:%.*]] = alloca [[STRUCT_TEST_A:%.*]], align 8
+; TUNIT-NEXT: [[TMP0:%.*]] = alloca [8 x i8], align 1
; TUNIT-NEXT: br label [[CALL_BR:%.*]]
; TUNIT: call.br:
-; TUNIT-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A]], ptr [[TMP0]], i64 0, i32 2
+; TUNIT-NEXT: [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A:%.*]], ptr [[TMP0]], i64 0, i32 2
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[TMP1]], i64 -16
+; TUNIT-NEXT: store ptr [[PTR]], ptr [[NEWGEP]], align 8
; TUNIT-NEXT: tail call void @bar(ptr noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_TEST_A]]) align 8 dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]]
; TUNIT-NEXT: ret void
;
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index 7a35b5c856097..f584f30dcf396 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -2666,18 +2666,19 @@ define dso_local void @test_nested_memory(ptr %dst, ptr %src) {
; TUNIT-SAME: (ptr nocapture nofree writeonly [[DST:%.*]], ptr nocapture nofree readonly [[SRC:%.*]]) {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1
-; TUNIT-NEXT: [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
-; TUNIT-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2
-; TUNIT-NEXT: store ptr @global, ptr [[INNER]], align 8
+; TUNIT-NEXT: [[LOCAL1:%.*]] = alloca [8 x i8], align 1
+; TUNIT-NEXT: [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY:%.*]], ptr [[LOCAL1]], i64 0, i32 2
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INNER]], i64 -16
+; TUNIT-NEXT: store ptr @global, ptr [[NEWGEP]], align 8
; TUNIT-NEXT: store ptr [[DST]], ptr [[CALL_H2S]], align 8
; TUNIT-NEXT: [[SRC2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 8
; TUNIT-NEXT: store ptr [[SRC]], ptr [[SRC2]], align 8
; TUNIT-NEXT: store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY]], ptr @global, i64 0, i32 2), align 8
-; TUNIT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[LOCAL]], align 8
-; TUNIT-NEXT: [[LOCAL_B8:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 8
-; TUNIT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LOCAL_B8]], align 8
-; TUNIT-NEXT: [[LOCAL_B16:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 16
-; TUNIT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LOCAL_B16]], align 8
+; TUNIT-NEXT: [[TMP0:%.*]] = load ptr, ptr [[LOCAL1]], align 8
+; TUNIT-NEXT: [[LOCAL1_B8:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 8
+; TUNIT-NEXT: [[TMP1:%.*]] = load ptr, ptr [[LOCAL1_B8]], align 8
+; TUNIT-NEXT: [[LOCAL1_B16:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 16
+; TUNIT-NEXT: [[TMP2:%.*]] = load ptr, ptr [[LOCAL1_B16]], align 8
; TUNIT-NEXT: call fastcc void @nested_memory_callee(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR21:[0-9]+]]
; TUNIT-NEXT: ret void
;
>From b63df15add74dc9422462c7da66174f8a8dcec56 Mon Sep 17 00:00:00 2001
From: vidsinghal <vidush.sl at gmail.com>
Date: Sun, 5 Nov 2023 22:30:42 -0500
Subject: [PATCH 2/3] [Attributor] Fix Load/Store Offsets if multiple bins are
present for a pointer allocation.
---
.../Attributor/multiple-offsets-pointer-info.ll | 4 ++--
.../Attributor/value-simplify-pointer-info-vec.ll | 14 ++++++++++++--
.../Attributor/value-simplify-pointer-info.ll | 12 ++++++++----
3 files changed, 22 insertions(+), 8 deletions(-)
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
index c6945d65acb29..ca8706b2bc331 100644
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
%struct.T = type { i32, [10 x [20 x i8]] }
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
index 70793ec5c7f83..958a0590766b5 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
@@ -83,6 +83,12 @@ define i32 @vec_write_4() {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@vec_write_4
; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT: [[A1:%.*]] = alloca [12 x i8], align 1
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[A1]], i64 4
+; CHECK-NEXT: store i32 3, ptr [[NEWGEP]], align 16
+; CHECK-NEXT: [[G:%.*]] = getelementptr i32, ptr [[A1]], i64 1
+; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[G]], i64 -4
+; CHECK-NEXT: store <2 x i32> <i32 5, i32 5>, ptr [[NEWGEP2]], align 8
; CHECK-NEXT: ret i32 13
;
%a = alloca <4 x i32>
@@ -101,8 +107,12 @@ define i32 @vec_write_5(i32 %arg) {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@vec_write_5
; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT: [[A:%.*]] = alloca <4 x i32>, align 16
-; CHECK-NEXT: store i32 [[ARG]], ptr [[A]], align 16
+; CHECK-NEXT: [[A1:%.*]] = alloca [12 x i8], align 1
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[A1]], i64 4
+; CHECK-NEXT: store i32 [[ARG]], ptr [[NEWGEP]], align 16
+; CHECK-NEXT: [[G:%.*]] = getelementptr i32, ptr [[A1]], i64 1
+; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[G]], i64 -4
+; CHECK-NEXT: store <2 x i32> <i32 5, i32 5>, ptr [[NEWGEP2]], align 8
; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ARG]], 5
; CHECK-NEXT: [[ADD2:%.*]] = add i32 5, [[ADD1]]
; CHECK-NEXT: ret i32 [[ADD2]]
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index f584f30dcf396..82169f5031050 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -3018,8 +3018,10 @@ define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
+; CHECK-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; CHECK-NEXT: [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 12
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[GEP_FIXED]], i64 -12
+; CHECK-NEXT: store i8 100, ptr [[NEWGEP]], align 4
; CHECK-NEXT: ret i8 100
;
entry:
@@ -3037,8 +3039,10 @@ define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) {
; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory
; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
; CHECK-NEXT: entry:
-; CHECK-NEXT: [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
+; CHECK-NEXT: [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; CHECK-NEXT: [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 12
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[GEP_LOADED]], i64 -12
+; CHECK-NEXT: store i8 100, ptr [[NEWGEP]], align 4
; CHECK-NEXT: ret i8 100
;
entry:
>From bd80cc408948e91ed9411d7fa2958efc7fb450de Mon Sep 17 00:00:00 2001
From: Vidush Singhal <singhal2 at ruby967.llnl.gov>
Date: Tue, 11 Jun 2024 10:49:33 -0700
Subject: [PATCH 3/3] [Attributor]: Reorder bins of an allocation based on
access patterns in the code
---
llvm/include/llvm/Transforms/IPO/Attributor.h | 106 +++++++
.../Transforms/IPO/AttributorAttributes.cpp | 290 +++++++++++++++++-
llvm/test/Transforms/Attributor/allocator.ll | 68 +++-
.../Attributor/call-simplify-pointer-info.ll | 25 +-
.../multiple-offsets-pointer-info.ll | 4 +-
.../value-simplify-pointer-info-vec.ll | 12 +-
6 files changed, 462 insertions(+), 43 deletions(-)
diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index afa96fbfb99fb..6e82c684c6323 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -98,16 +98,20 @@
#define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DirectedGraph.h"
#include "llvm/ADT/GraphTraits.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
#include "llvm/ADT/SetVector.h"
#include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/iterator.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/CFG.h"
#include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/DDG.h"
#include "llvm/Analysis/LazyCallGraph.h"
#include "llvm/Analysis/LoopInfo.h"
#include "llvm/Analysis/MemoryLocation.h"
@@ -140,6 +144,7 @@
#include <limits>
#include <map>
#include <optional>
+#include <tuple>
namespace llvm {
@@ -6369,6 +6374,107 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>;
virtual const NewOffsetsTy &getNewOffsets() const = 0;
+ struct BinAccessGraphEdge;
+ struct BinAccessGraphNode;
+
+ struct PriorityQueueGraphNode {
+ PriorityQueueGraphNode(int Priority, BinAccessGraphNode *Node)
+ : Priority(Priority), Node(Node) {}
+
+ public:
+ int Priority;
+ BinAccessGraphNode *Node;
+
+ int getPriority() { return Priority; }
+ BinAccessGraphNode *getNode() { return Node; }
+
+ bool operator<(const PriorityQueueGraphNode *A) {
+ return A->Priority > Priority;
+ }
+
+ bool operator==(const PriorityQueueGraphNode *A) {
+ return A->Priority == Priority;
+ }
+
+ bool operator>(const PriorityQueueGraphNode *A) {
+ return A->Priority > Priority;
+ }
+ };
+
+ // A Edge Type for the field access graph edge
+ struct BinAccessGraphEdge
+ : public DGEdge<BinAccessGraphNode, BinAccessGraphEdge> {
+ BinAccessGraphEdge(BinAccessGraphNode &TargetNode, int EdgeWeight)
+ : DGEdge<BinAccessGraphNode, BinAccessGraphEdge>(TargetNode),
+ EdgeWeight(EdgeWeight) {}
+
+ public:
+ BinAccessGraphNode *SrcNode;
+ int EdgeWeight;
+ int getEdgeWeight() { return EdgeWeight; }
+ void setSrcNode(BinAccessGraphNode *SourceNode) { SrcNode = SourceNode; }
+ BinAccessGraphNode *getSourceNode() { return SrcNode; }
+ };
+
+ // A node type for the field access graph node
+ struct BinAccessGraphNode
+ : public DGNode<BinAccessGraphNode, BinAccessGraphEdge> {
+ BinAccessGraphNode(const AA::RangeTy &Node, BinAccessGraphEdge &Edge)
+ : DGNode<BinAccessGraphNode, BinAccessGraphEdge>(Edge), BinRange(Node) {
+ }
+ BinAccessGraphNode(const AA::RangeTy &Node) : BinRange(Node) {}
+
+ public:
+ const AA::RangeTy BinRange;
+ const AA::RangeTy &getBinRange() const { return BinRange; }
+ };
+
+ struct FieldAccessGraph
+ : public DirectedGraph<BinAccessGraphNode, BinAccessGraphEdge> {
+ FieldAccessGraph() {}
+
+ public:
+ BinAccessGraphNode *getNode(const AA::RangeTy &Range) {
+ for (BinAccessGraphNode *N : Nodes) {
+ if (N->getBinRange() == Range) {
+ return N;
+ }
+ }
+ return nullptr;
+ }
+
+ bool findNode(const AA::RangeTy &Range) {
+ for (BinAccessGraphNode *N : Nodes) {
+ if (N->getBinRange() == Range) {
+ return true;
+ }
+ }
+ return false;
+ }
+
+ bool edgeExists(const AA::RangeTy &HeadNode,
+ BinAccessGraphNode *TargetNode) {
+ for (BinAccessGraphNode *N : Nodes) {
+ if (N->getBinRange() == HeadNode) {
+ return N->hasEdgeTo(*TargetNode);
+ }
+ }
+ return false;
+ }
+
+ // return all nodes that have no incoming edges.
+ void getAllRoots(std::vector<BinAccessGraphNode *> &Roots) {
+ assert(Roots.empty() && "Root set should be empty at the begining!");
+ for (BinAccessGraphNode *N : Nodes) {
+ SmallVector<BinAccessGraphEdge *> EL;
+ if (!findIncomingEdgesToNode(*N, EL)) {
+ Roots.push_back(N);
+ }
+ }
+ }
+ };
+
+ virtual const FieldAccessGraph &getBinAccessGraph() const = 0;
/// See AbstractAttribute::getName()
const std::string getName() const override { return "AAAllocationInfo"; }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 6990885b58e0d..c8b22bb71f96b 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -11,12 +11,13 @@
//
//===----------------------------------------------------------------------===//
-#include "llvm/Transforms/IPO/Attributor.h"
-
#include "llvm/ADT/APInt.h"
#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/DenseMap.h"
#include "llvm/ADT/DenseMapInfo.h"
+#include "llvm/ADT/DirectedGraph.h"
#include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PriorityQueue.h"
#include "llvm/ADT/SCCIterator.h"
#include "llvm/ADT/STLExtras.h"
#include "llvm/ADT/SetOperations.h"
@@ -28,6 +29,7 @@
#include "llvm/Analysis/AliasAnalysis.h"
#include "llvm/Analysis/AssumeBundleQueries.h"
#include "llvm/Analysis/AssumptionCache.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
#include "llvm/Analysis/CaptureTracking.h"
#include "llvm/Analysis/CycleAnalysis.h"
#include "llvm/Analysis/InstructionSimplify.h"
@@ -57,9 +59,12 @@
#include "llvm/IR/LLVMContext.h"
#include "llvm/IR/MDBuilder.h"
#include "llvm/IR/NoFolder.h"
+#include "llvm/IR/PassManager.h"
#include "llvm/IR/Value.h"
#include "llvm/IR/ValueHandle.h"
#include "llvm/Support/Alignment.h"
+#include "llvm/Support/BlockFrequency.h"
+#include "llvm/Support/BranchProbability.h"
#include "llvm/Support/Casting.h"
#include "llvm/Support/CommandLine.h"
#include "llvm/Support/ErrorHandling.h"
@@ -67,14 +72,17 @@
#include "llvm/Support/MathExtras.h"
#include "llvm/Support/TypeSize.h"
#include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/Attributor.h"
#include "llvm/Transforms/Utils/BasicBlockUtils.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
#include "llvm/Transforms/Utils/Local.h"
#include "llvm/Transforms/Utils/ValueMapper.h"
#include <cassert>
+#include <climits>
#include <numeric>
#include <optional>
#include <string>
+#include <utility>
using namespace llvm;
@@ -12623,6 +12631,28 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A)
: AAAllocationInfo(IRP, A) {}
+ /// See AbstractAttribute::initialize(...).
+ void initialize(Attributor &A) override {
+
+ // Map an instruction to its position in the module.
+ // To get a relative sense of distance between instruction.
+ // Useful when we need a measure of
+ // a temporal access amongst instructions.
+ // This is valid as we are operating over a strict language.
+ auto &IRP = getIRPosition();
+ auto *M = IRP.getCtxI()->getModule();
+ int InstructionPosition = 0;
+ for (const auto &F : *M) {
+ for (const auto &BB : F) {
+ for (const auto &I : BB) {
+ InstructionPositionMap.insert(
+ std::make_pair(&I, InstructionPosition));
+ InstructionPosition++;
+ }
+ }
+ }
+ }
+
std::optional<TypeSize> getAllocatedSize() const override {
assert(isValidState() && "the AA is invalid");
return AssumedAllocatedSize;
@@ -12633,6 +12663,11 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
return NewComputedOffsets;
}
+ const FieldAccessGraph &getBinAccessGraph() const override {
+ assert(isValidState() && "the AA is invalid");
+ return BinAccessGraph;
+ }
+
std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
const DataLayout &DL) {
@@ -12691,31 +12726,237 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
return ChangeStatus::CHANGED;
}
- // For each access bin
- // Compute its new start Offset and store the results in a new map
- // (NewOffsetBins)
- unsigned long PrevBinEndOffset = 0;
- bool ChangedOffsets = false;
+ // Maintain a Map from a byte Range to the earliest instruction that
+ // accesses that byte range.
+ // For now the analysis is simple as we only care about the first access to
+ // that byte range.
+ DenseMap<AA::RangeTy, Instruction *> MapByteRangeToEarliestAccess;
+ auto &OffsetInfoMap = PI->getOffsetInfoMap();
for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
It != PI->end(); It++) {
- const AA::RangeTy &OldRange = It->getFirst();
+
+ const AA::RangeTy &Range = It->getFirst();
+ auto AccessedIndices = It->getSecond();
+ SmallVector<Instruction *> ReadyList;
+ for (auto AccIndex : AccessedIndices) {
+ const auto &AccessInstruction = PI->getBinAccess(AccIndex);
+ Instruction *LocalInst = AccessInstruction.getLocalInst();
+ ReadyList.push_back(LocalInst);
+ }
+ // The local instruction should be backtracked to
+ // the operands that cause the actual access.
+ // It should be bactracked to the earliest load/store so as
+ // to optimize for the access patterns.
+ Instruction *EarlisetLoadStore = ReadyList.back();
+ while (!ReadyList.empty()) {
+ Instruction *Back = ReadyList.back();
+ ReadyList.pop_back();
+
+ // make sure to populate the ready list before hand
+ for (auto *It = Back->op_begin(); It != Back->op_end(); It++) {
+ if (Instruction *ToInstruction = dyn_cast<Instruction>(It)) {
+ if (ToInstruction == I) {
+ ReadyList.clear();
+ break;
+ }
+ ReadyList.push_back(ToInstruction);
+ }
+ }
+
+ // Check if it is a load/store with an access to the same byte
+ // range.
+ if (Back->getOpcode() != Instruction::Load ||
+ Back->getOpcode() != Instruction::Store)
+ continue;
+
+ // No information about which byte range the instruction accesses
+ // exists.
+ if (!OffsetInfoMap.contains(Back))
+ continue;
+
+ const auto &OffsetInfo = OffsetInfoMap.lookup(Back);
+ const auto &OffsetsVec = OffsetInfo.Offsets;
+
+ // TODO: implement for multiple offsets per instruction.
+ // Right now we give up if an instruction accesses multiple byte ranges.
+ if (Back->getOpcode() != Instruction::Call && OffsetsVec.size() > 1)
+ return indicatePessimisticFixpoint();
+
+ // Load/store has the same offset as the Instruction we are
+ // bactracking.
+ // Update earliest load/store
+ if (Range.Offset == OffsetsVec.front())
+ EarlisetLoadStore = Back;
+ }
+
+ MapByteRangeToEarliestAccess.insert(
+ std::make_pair(Range, EarlisetLoadStore));
+ }
+
+ const Module *M = I->getModule();
+ const Function *F = I->getFunction();
+
+ for (auto &Key : MapByteRangeToEarliestAccess) {
+
+ const AA::RangeTy &OldRange = Key.getFirst();
// If any range has an unknown offset or size, we should leave the
// allocation unmodified
if (OldRange.offsetOrSizeAreUnknown())
return indicatePessimisticFixpoint();
+ // TODO: should unassigned ranges be completely removed?
+ if (OldRange.isUnassigned())
+ return indicatePessimisticFixpoint();
+
+ // Node for the current range
+ BinAccessGraphNode *FromNode;
+ if (!BinAccessGraph.findNode(OldRange)) {
+ FromNode = new BinAccessGraphNode(OldRange);
+ BinAccessGraph.addNode(*FromNode);
+ } else
+ FromNode = BinAccessGraph.getNode(OldRange);
+
+ // Find the earliest instruction that caused the access from the set
+ Instruction *Earliest = Key.getSecond();
+ int EarlistInstructionPos = InstructionPositionMap.lookup(Earliest);
+
+ int ClosestNextPosition = INT_MAX;
+ Instruction *ClosestNextInstruction;
+ AA::RangeTy CorrespondingBin = OldRange;
+ for (auto &Val : MapByteRangeToEarliestAccess) {
+ auto &Bin = Val.getFirst();
+ auto *Ins = Val.getSecond();
+
+ if (Bin.offsetOrSizeAreUnknown())
+ return indicatePessimisticFixpoint();
+
+ int InsPosition = InstructionPositionMap.lookup(Ins);
+ if (InsPosition > EarlistInstructionPos &&
+ InsPosition < ClosestNextPosition) {
+ ClosestNextPosition = InsPosition;
+ ClosestNextInstruction = Ins;
+ CorrespondingBin = Bin;
+ }
+ }
+
+ // No self loops are allowed in the graph
+ if (CorrespondingBin == OldRange)
+ continue;
+
+ // TODO: Fix when Profiling metadata is nullptr.
+ bool ProfilingEnabled =
+ M->getProfileSummary(false) == nullptr ? false : true;
+ int EdgeWeight = 0;
+ if (ProfilingEnabled) {
+ const BlockFrequencyInfo *BFI =
+ A.getInfoCache()
+ .getAnalysisResultForFunction<BlockFrequencyAnalysis>(*F);
+ const BranchProbabilityInfo *BPI = BFI->getBPI();
+ BlockFrequency BlockFrequency =
+ BFI->getBlockFreq(ClosestNextInstruction->getParent());
+ BranchProbability BP = BPI->getEdgeProbability(
+ Earliest->getParent(), ClosestNextInstruction->getParent());
+ // Assign edge weight as likelihood * frequency.
+ EdgeWeight = (BP.getNumerator() / BP.getDenominator()) *
+ BlockFrequency.getFrequency();
+ }
+
+ // Nodes are already present
+ if (BinAccessGraph.findNode(OldRange) &&
+ BinAccessGraph.findNode(CorrespondingBin)) {
+
+ // Check if the edge does not exits.
+ BinAccessGraphNode *ToNode = BinAccessGraph.getNode(CorrespondingBin);
+ if (!FromNode->hasEdgeTo(*ToNode)) {
+ BinAccessGraphEdge *AccessedEdge =
+ new BinAccessGraphEdge(*ToNode, EdgeWeight);
+ AccessedEdge->setSrcNode(FromNode);
+ BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge);
+ }
+
+ continue;
+ }
+
+ if (BinAccessGraph.findNode(CorrespondingBin)) {
+ BinAccessGraphNode *ToNode = BinAccessGraph.getNode(CorrespondingBin);
+ BinAccessGraphEdge *AccessedEdge =
+ new BinAccessGraphEdge(*ToNode, EdgeWeight);
+ AccessedEdge->setSrcNode(FromNode);
+ BinAccessGraph.addNode(*FromNode);
+ BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge);
+ continue;
+ }
+
+ BinAccessGraphNode *ToNode = new BinAccessGraphNode(CorrespondingBin);
+ BinAccessGraphEdge *AccessedEdge =
+ new BinAccessGraphEdge(*ToNode, EdgeWeight);
+ FromNode->addEdge(*AccessedEdge);
+ AccessedEdge->setSrcNode(FromNode);
+ BinAccessGraph.addNode(*ToNode);
+ BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge);
+ }
+
+ // Traverse the graph in a greedy manner.
+ // Map old bins to new bins.
+ // Compute the size of the allocation as we traverse the graph.
+
+ // get all the root nodes
+ std::vector<BinAccessGraphNode *> RootsVector;
+ // A priority queue to establish greedy order
+ PriorityQueue<PriorityQueueGraphNode *> PriorityQueue;
+ // Map to mark which nodes have been visited so far
+ DenseMap<BinAccessGraphNode *, bool> VisitedMap;
+ BinAccessGraph.getAllRoots(RootsVector);
+
+ for (auto *Root : RootsVector) {
+ PriorityQueueGraphNode *Node = new PriorityQueueGraphNode(0, Root);
+ PriorityQueue.push(Node);
+ }
+
+ unsigned long PrevBinEndOffset = 0;
+ bool ChangedOffsets = false;
+
+ while (!PriorityQueue.empty()) {
+
+ // Pop an element from the priority queue
+ PriorityQueueGraphNode *Node = PriorityQueue.top();
+ PriorityQueue.pop();
+
+ // visit this current graph node
+ BinAccessGraphNode *GraphNode = Node->getNode();
+ VisitedMap[GraphNode] = true;
+
+ // For each access bin
+ // Compute its new start Offset and store the results in a new map
+ // (NewOffsetBins)
+
+ auto &NodeRange = GraphNode->getBinRange();
unsigned long NewStartOffset = PrevBinEndOffset;
- unsigned long NewEndOffset = NewStartOffset + OldRange.Size;
+ unsigned long NewEndOffset = NewStartOffset + NodeRange.Size;
PrevBinEndOffset = NewEndOffset;
- ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
- OldRange.Size);
+ // set the new offsets in the map.
+ ChangedOffsets |= setNewOffsets(NodeRange, NodeRange.Offset,
+ NewStartOffset, NodeRange.Size);
+
+ auto &Edges = GraphNode->getEdges();
+
+ // push all successors onto the priority queue.
+ for (auto &Edge : Edges) {
+ int EdgeWeight = Edge->getEdgeWeight();
+ BinAccessGraphNode &TargetNode = Edge->getTargetNode();
+ if (!VisitedMap[&TargetNode]) {
+ PriorityQueueGraphNode *Node =
+ new PriorityQueueGraphNode(EdgeWeight, &TargetNode);
+ PriorityQueue.push(Node);
+ }
+ }
}
- // Set the new size of the allocation, the new size of the Allocation should
- // be the size of PrevBinEndOffset * 8, in bits
+ // Set the new size of the allocation, the new size of the Allocation
+ // should be the size of PrevBinEndOffset * 8, in bits
auto NewAllocationSize =
std::optional<TypeSize>(TypeSize(PrevBinEndOffset * 8, false));
@@ -12984,9 +13225,32 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
}
}
+ void dumpBinAccessGraph(raw_ostream &O) {
+
+ for (const BinAccessGraphNode *Node : BinAccessGraph) {
+ O << "Node: " << Node->getBinRange() << "\n";
+ SmallVector<BinAccessGraphEdge *> EL;
+ bool EdgesFound = BinAccessGraph.findIncomingEdgesToNode(*Node, EL);
+
+ if (EdgesFound) {
+ O << "Print all incoming edges to node " << Node->getBinRange() << "\n";
+ for (auto &Edge : EL) {
+ O << Edge->getSourceNode()->getBinRange();
+ O << " ---> " << Edge->getTargetNode().getBinRange()
+ << " , Edge weight: " << Edge->getEdgeWeight() << "\n";
+ }
+ } else {
+ O << "No incoming edges found for node " << Node->getBinRange() << "\n";
+ }
+ O << "\n";
+ }
+ }
+
private:
std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
NewOffsetsTy NewComputedOffsets;
+ FieldAccessGraph BinAccessGraph;
+ DenseMap<const Instruction *, int> InstructionPositionMap;
// Maintain the computed allocation size of the object.
// Returns (bool) weather the size of the allocation was modified or not.
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index ad5665d25f517..76e41a0821f64 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -177,11 +177,11 @@ define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val)
; CHECK-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
; CHECK-NEXT: [[C:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F1]], i32 0, i32 2
; CHECK-NEXT: [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[C]], i64 -4
-; CHECK-NEXT: store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
+; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[C]], i64 -4
+; CHECK-NEXT: store i8 [[CONV1]], ptr [[NEWGEP]], align 4
; CHECK-NEXT: [[C2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F1]], i32 0, i32 2
-; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[C2]], i64 -4
-; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
+; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[C2]], i64 -4
+; CHECK-NEXT: [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
; CHECK-NEXT: [[CONV:%.*]] = sext i8 [[TMP1]] to i32
; CHECK-NEXT: [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
; CHECK-NEXT: ret void
@@ -240,6 +240,66 @@ entry:
}
+define dso_local i32 @simple_reordering_alloca(ptr nocapture %val) {
+; TUNIT-LABEL: define dso_local noundef i32 @simple_reordering_alloca
+; TUNIT-SAME: (ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[VAL:%.*]]) {
+; TUNIT-NEXT: entry:
+; TUNIT-NEXT: [[ARRAY1:%.*]] = alloca [12 x i8], align 1
+; TUNIT-NEXT: [[VALUE:%.*]] = load i32, ptr [[VAL]], align 4
+; TUNIT-NEXT: [[INDEX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 9
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INDEX1]], i64 -36
+; TUNIT-NEXT: store i32 100, ptr [[NEWGEP]], align 4
+; TUNIT-NEXT: [[INDEX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 5
+; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[INDEX2]], i64 -16
+; TUNIT-NEXT: store i32 [[VALUE]], ptr [[NEWGEP3]], align 4
+; TUNIT-NEXT: [[INDEX3:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 3
+; TUNIT-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4
+; TUNIT-NEXT: store i32 [[VALUE]], ptr [[NEWGEP4]], align 4
+; TUNIT-NEXT: [[VALMUL:%.*]] = mul i32 [[VALUE]], [[VALUE]]
+; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4
+; TUNIT-NEXT: store i32 [[VALMUL]], ptr [[NEWGEP2]], align 4
+; TUNIT-NEXT: [[RETVAL:%.*]] = add i32 [[VALMUL]], [[VALUE]]
+; TUNIT-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[RETVAL]])
+; TUNIT-NEXT: ret i32 [[RETVAL]]
+;
+; CGSCC-LABEL: define dso_local noundef i32 @simple_reordering_alloca
+; CGSCC-SAME: (ptr nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[VAL:%.*]]) {
+; CGSCC-NEXT: entry:
+; CGSCC-NEXT: [[ARRAY1:%.*]] = alloca [12 x i8], align 1
+; CGSCC-NEXT: [[VALUE:%.*]] = load i32, ptr [[VAL]], align 4
+; CGSCC-NEXT: [[INDEX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 9
+; CGSCC-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[INDEX1]], i64 -36
+; CGSCC-NEXT: store i32 100, ptr [[NEWGEP2]], align 4
+; CGSCC-NEXT: [[INDEX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 5
+; CGSCC-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[INDEX2]], i64 -16
+; CGSCC-NEXT: store i32 [[VALUE]], ptr [[NEWGEP4]], align 4
+; CGSCC-NEXT: [[INDEX3:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 3
+; CGSCC-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4
+; CGSCC-NEXT: store i32 [[VALUE]], ptr [[NEWGEP]], align 4
+; CGSCC-NEXT: [[VALMUL:%.*]] = mul i32 [[VALUE]], [[VALUE]]
+; CGSCC-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4
+; CGSCC-NEXT: store i32 [[VALMUL]], ptr [[NEWGEP3]], align 4
+; CGSCC-NEXT: [[RETVAL:%.*]] = add i32 [[VALMUL]], [[VALUE]]
+; CGSCC-NEXT: [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[RETVAL]])
+; CGSCC-NEXT: ret i32 [[RETVAL]]
+;
+entry:
+ %array = alloca [10 x i32]
+ %value = load i32, ptr %val
+ %index1 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 9
+ store i32 100, ptr %index1
+ %index2 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 5
+ store i32 %value, ptr %index2
+ %index3 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 3
+ store i32 %value, ptr %index3
+ %valmul = mul i32 %value, %value
+ store i32 %valmul, ptr %index3
+ %retval = add i32 %valmul, %value
+ %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %retval)
+ ret i32 %retval
+}
+
+
; Function Attrs: noinline nounwind uwtable
; TODO: Here the array size is not known at compile time.
; However the array does not escape and is only partially used.
diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
index 0dc18cc8340d6..d1d2d112355a3 100644
--- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
@@ -97,11 +97,11 @@ define i8 @call_simplifiable_2() {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [2 x i8], align 1
; TUNIT-NEXT: [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
-; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I0]], i64 -2
-; TUNIT-NEXT: store i8 2, ptr [[NEWGEP]], align 2
+; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[I0]], i64 -1
+; TUNIT-NEXT: store i8 2, ptr [[NEWGEP2]], align 2
; TUNIT-NEXT: [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3
-; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I1]], i64 -2
-; TUNIT-NEXT: store i8 3, ptr [[NEWGEP3]], align 1
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I1]], i64 -3
+; TUNIT-NEXT: store i8 3, ptr [[NEWGEP]], align 1
; TUNIT-NEXT: ret i8 4
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -208,17 +208,16 @@ define i8 @call_partially_simplifiable_1() {
; TUNIT-NEXT: entry:
; TUNIT-NEXT: [[BYTES1:%.*]] = alloca [3 x i8], align 1
; TUNIT-NEXT: [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
-; TUNIT-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
-; TUNIT-NEXT: store i8 2, ptr [[NEWGEP4]], align 2
+; TUNIT-NEXT: store i8 2, ptr [[I2]], align 2
; TUNIT-NEXT: [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3
-; TUNIT-NEXT: [[NEWGEP5:%.*]] = getelementptr ptr, ptr [[I3]], i64 -1
-; TUNIT-NEXT: store i8 3, ptr [[NEWGEP5]], align 1
+; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I3]], i64 -3
+; TUNIT-NEXT: store i8 3, ptr [[NEWGEP]], align 1
; TUNIT-NEXT: [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 4
-; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I4]], i64 -3
-; TUNIT-NEXT: store i8 4, ptr [[NEWGEP3]], align 4
-; TUNIT-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
-; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[I3]], i64 -1
-; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[NEWGEP]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[NEWGEP2]]) #[[ATTR3]]
+; TUNIT-NEXT: [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[I4]], i64 -3
+; TUNIT-NEXT: store i8 4, ptr [[NEWGEP4]], align 4
+; TUNIT-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
+; TUNIT-NEXT: [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I3]], i64 -3
+; TUNIT-NEXT: [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[NEWGEP2]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[NEWGEP3]]) #[[ATTR3]]
; TUNIT-NEXT: ret i8 [[R]]
;
; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
index ca8706b2bc331..c6945d65acb29 100644
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -1,6 +1,6 @@
; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
%struct.T = type { i32, [10 x [20 x i8]] }
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
index 958a0590766b5..2e702f4576660 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
@@ -83,12 +83,6 @@ define i32 @vec_write_4() {
; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
; CHECK-LABEL: define {{[^@]+}}@vec_write_4
; CHECK-SAME: () #[[ATTR0]] {
-; CHECK-NEXT: [[A1:%.*]] = alloca [12 x i8], align 1
-; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[A1]], i64 4
-; CHECK-NEXT: store i32 3, ptr [[NEWGEP]], align 16
-; CHECK-NEXT: [[G:%.*]] = getelementptr i32, ptr [[A1]], i64 1
-; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[G]], i64 -4
-; CHECK-NEXT: store <2 x i32> <i32 5, i32 5>, ptr [[NEWGEP2]], align 8
; CHECK-NEXT: ret i32 13
;
%a = alloca <4 x i32>
@@ -108,11 +102,7 @@ define i32 @vec_write_5(i32 %arg) {
; CHECK-LABEL: define {{[^@]+}}@vec_write_5
; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] {
; CHECK-NEXT: [[A1:%.*]] = alloca [12 x i8], align 1
-; CHECK-NEXT: [[NEWGEP:%.*]] = getelementptr ptr, ptr [[A1]], i64 4
-; CHECK-NEXT: store i32 [[ARG]], ptr [[NEWGEP]], align 16
-; CHECK-NEXT: [[G:%.*]] = getelementptr i32, ptr [[A1]], i64 1
-; CHECK-NEXT: [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[G]], i64 -4
-; CHECK-NEXT: store <2 x i32> <i32 5, i32 5>, ptr [[NEWGEP2]], align 8
+; CHECK-NEXT: store i32 [[ARG]], ptr [[A1]], align 16
; CHECK-NEXT: [[ADD1:%.*]] = add i32 [[ARG]], 5
; CHECK-NEXT: [[ADD2:%.*]] = add i32 5, [[ADD1]]
; CHECK-NEXT: ret i32 [[ADD2]]
More information about the llvm-commits
mailing list