[llvm] [Attributor] Reordering bins on an allocation based on access patterns (PR #95319)

Vidush Singhal via llvm-commits llvm-commits at lists.llvm.org
Wed Jun 12 15:05:57 PDT 2024


https://github.com/vidsinghal updated https://github.com/llvm/llvm-project/pull/95319

>From e1e27af86611206ff745d044b54f0269a53cce44 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <singhal2 at ruby964.llnl.gov>
Date: Tue, 4 Jun 2024 16:51:06 -0700
Subject: [PATCH 1/4] [Attributor]: AApointerinfo, allow more than one offset
 for a pass through user

---
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9a5732dca5b79..471cf4190cbd9 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1607,11 +1607,20 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
     //
     // The RHS is a reference that may be invalidated by an insertion caused by
     // the LHS. So we ensure that the side-effect of the LHS happens first.
+
+    if (!OffsetInfoMap.contains(Usr)) {
+      auto &UsrOI = OffsetInfoMap[Usr];
+      auto &PtrOI = OffsetInfoMap[CurPtr];
+      UsrOI = PtrOI;
+      Follow = true;
+      return true;
+    }
+
     auto &UsrOI = OffsetInfoMap[Usr];
     auto &PtrOI = OffsetInfoMap[CurPtr];
     assert(!PtrOI.isUnassigned() &&
            "Cannot pass through if the input Ptr was not visited!");
-    UsrOI = PtrOI;
+    UsrOI.merge(PtrOI);
     Follow = true;
     return true;
   };

>From 417bccf1ddc60808757ae7c3054c23b80df3e211 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <singhal2 at ruby964.llnl.gov>
Date: Wed, 5 Jun 2024 08:12:21 -0700
Subject: [PATCH 2/4] [NFC] : Run clang-format on Attributor.h and
 AttributorAttributes.cpp

---
 llvm/include/llvm/Transforms/IPO/Attributor.h   | 12 +++---------
 .../lib/Transforms/IPO/AttributorAttributes.cpp | 17 +++++++++--------
 2 files changed, 12 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index d3d3a9c43c84f..6ba04dbc31db3 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -5143,9 +5143,7 @@ struct DenormalFPMathState : public AbstractState {
       return Mode != Other.Mode || ModeF32 != Other.ModeF32;
     }
 
-    bool isValid() const {
-      return Mode.isValid() && ModeF32.isValid();
-    }
+    bool isValid() const { return Mode.isValid() && ModeF32.isValid(); }
 
     static DenormalMode::DenormalModeKind
     unionDenormalKind(DenormalMode::DenormalModeKind Callee,
@@ -5185,9 +5183,7 @@ struct DenormalFPMathState : public AbstractState {
   // state.
   DenormalState getAssumed() const { return Known; }
 
-  bool isValidState() const override {
-    return Known.isValid();
-  }
+  bool isValidState() const override { return Known.isValid(); }
 
   /// Return true if there are no dynamic components to the denormal mode worth
   /// specializing.
@@ -5198,9 +5194,7 @@ struct DenormalFPMathState : public AbstractState {
            Known.ModeF32.Output != DenormalMode::Dynamic;
   }
 
-  bool isAtFixpoint() const override {
-    return IsAtFixedpoint;
-  }
+  bool isAtFixpoint() const override { return IsAtFixedpoint; }
 
   ChangeStatus indicateFixpoint() {
     bool Changed = !IsAtFixedpoint;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 471cf4190cbd9..9b47e2379fcfa 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -419,7 +419,8 @@ struct AAReturnedFromReturnedValues : public BaseType {
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
     StateType S(StateType::getBestState(this->getState()));
-    clampReturnedValueStates<AAType, StateType, IRAttributeKind, RecurseForSelectAndPHI>(
+    clampReturnedValueStates<AAType, StateType, IRAttributeKind,
+                             RecurseForSelectAndPHI>(
         A, *this, S,
         PropagateCallBaseContext ? this->getCallBaseContext() : nullptr);
     // TODO: If we know we visited all returned values, thus no are assumed
@@ -6982,10 +6983,9 @@ ChangeStatus AAHeapToStackFunction::updateImpl(Attributor &A) {
     if (AI.LibraryFunctionId != LibFunc___kmpc_alloc_shared) {
       Instruction *CtxI = isa<InvokeInst>(AI.CB) ? AI.CB : AI.CB->getNextNode();
       if (!Explorer || !Explorer->findInContextOf(UniqueFree, CtxI)) {
-        LLVM_DEBUG(
-            dbgs()
-            << "[H2S] unique free call might not be executed with the allocation "
-            << *UniqueFree << "\n");
+        LLVM_DEBUG(dbgs() << "[H2S] unique free call might not be executed "
+                             "with the allocation "
+                          << *UniqueFree << "\n");
         return false;
       }
     }
@@ -10415,11 +10415,12 @@ struct AANoFPClassFloating : public AANoFPClassImpl {
 
 struct AANoFPClassReturned final
     : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
-                                   AANoFPClassImpl::StateType, false, Attribute::None, false> {
+                                   AANoFPClassImpl::StateType, false,
+                                   Attribute::None, false> {
   AANoFPClassReturned(const IRPosition &IRP, Attributor &A)
       : AAReturnedFromReturnedValues<AANoFPClass, AANoFPClassImpl,
-                                     AANoFPClassImpl::StateType, false, Attribute::None, false>(
-            IRP, A) {}
+                                     AANoFPClassImpl::StateType, false,
+                                     Attribute::None, false>(IRP, A) {}
 
   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override {

>From add303536bb6f65d759063e2b377bc693c48217b Mon Sep 17 00:00:00 2001
From: vidsinghal <vidush.sl at gmail.com>
Date: Sun, 5 Nov 2023 22:30:42 -0500
Subject: [PATCH 3/4] [Attributor] Fix Load/Store Offsets if multiple bins are
 present for a pointer allocation.

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  82 ++++
 .../Transforms/IPO/AttributorAttributes.cpp   | 362 ++++++++++++++----
 .../Attributor/ArgumentPromotion/crash.ll     |   6 +-
 .../live_called_from_dead.ll                  |   4 +-
 .../live_called_from_dead_2.ll                |   4 +-
 .../nonzero-address-spaces.ll                 |   3 +-
 .../Attributor/IPConstantProp/pthreads.ll     |   4 +-
 llvm/test/Transforms/Attributor/allocator.ll  | 195 ++++++++--
 .../Attributor/call-simplify-pointer-info.ll  |  42 +-
 .../Transforms/Attributor/heap_to_stack.ll    |   3 +-
 .../Attributor/heap_to_stack_gpu.ll           |   3 +-
 llvm/test/Transforms/Attributor/liveness.ll   |   8 +-
 .../multiple-offsets-pointer-info.ll          |   4 +-
 llvm/test/Transforms/Attributor/nodelete.ll   |   5 +-
 .../Transforms/Attributor/pointer-info.ll     |   6 +-
 .../value-simplify-pointer-info-vec.ll        |  14 +-
 .../Attributor/value-simplify-pointer-info.ll |  29 +-
 17 files changed, 596 insertions(+), 178 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 6ba04dbc31db3..85f2f76e833fc 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6106,6 +6106,56 @@ struct AAPointerInfo : public AbstractAttribute {
     Type *Ty;
   };
 
+  /// A helper containing a list of offsets computed for a Use. Ideally this
+  /// list should be strictly ascending, but we ensure that only when we
+  /// actually translate the list of offsets to a RangeList.
+  struct OffsetInfo {
+    using VecTy = SmallVector<int64_t>;
+    using const_iterator = VecTy::const_iterator;
+    VecTy Offsets;
+
+    const_iterator begin() const { return Offsets.begin(); }
+    const_iterator end() const { return Offsets.end(); }
+
+    bool operator==(const OffsetInfo &RHS) const {
+      return Offsets == RHS.Offsets;
+    }
+
+    bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); }
+
+    void insert(int64_t Offset) { Offsets.push_back(Offset); }
+    bool isUnassigned() const { return Offsets.empty(); }
+
+    bool isUnknown() const {
+      if (isUnassigned())
+        return false;
+      if (Offsets.size() == 1)
+        return Offsets.front() == AA::RangeTy::Unknown;
+      return false;
+    }
+
+    void setUnknown() {
+      Offsets.clear();
+      Offsets.push_back(AA::RangeTy::Unknown);
+    }
+
+    void addToAll(int64_t Inc) {
+      for (auto &Offset : Offsets)
+        Offset += Inc;
+    }
+
+    /// Copy offsets from \p R into the current list.
+    ///
+    /// Ideally all lists should be strictly ascending, but we defer that to the
+    /// actual use of the list. So we just blindly append here.
+    void merge(const OffsetInfo &R) {
+      Offsets.append(R.Offsets);
+      // ensure elements are unique.
+      sort(Offsets.begin(), Offsets.end());
+      Offsets.erase(std::unique(Offsets.begin(), Offsets.end()), Offsets.end());
+    }
+  };
+
   /// Create an abstract attribute view for the position \p IRP.
   static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A);
 
@@ -6120,6 +6170,9 @@ struct AAPointerInfo : public AbstractAttribute {
   virtual const_bin_iterator begin() const = 0;
   virtual const_bin_iterator end() const = 0;
   virtual int64_t numOffsetBins() const = 0;
+  virtual void dumpState(raw_ostream &O) const = 0;
+  virtual const Access &getBinAccess(unsigned Index) const = 0;
+  virtual const DenseMap<Value *, OffsetInfo> &getOffsetInfoMap() const = 0;
 
   /// Call \p CB on all accesses that might interfere with \p Range and return
   /// true if all such accesses were known and the callback returned true for
@@ -6149,6 +6202,9 @@ struct AAPointerInfo : public AbstractAttribute {
     return (AA->getIdAddr() == &ID);
   }
 
+  /// Offsets Info Map
+  DenseMap<Value *, OffsetInfo> OffsetInfoMap;
+
   /// Unique ID (due to the unique address)
   static const char ID;
 };
@@ -6285,12 +6341,38 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
     return AbstractAttribute::isValidIRPositionForInit(A, IRP);
   }
 
+  // A helper function to check is simplified values exists for the current
+  // instruction.
+  bool simplifiedValuesExists(Attributor &A, Instruction *LocalInst) {
+
+    // If there are potential values that replace the accessed instruction, we
+    // should use those instead
+    bool UsedAssumedInformation = false;
+    SmallVector<AA::ValueAndContext> Values;
+    if (A.getAssumedSimplifiedValues(IRPosition::inst(*LocalInst), *this,
+                                     Values, AA::AnyScope,
+                                     UsedAssumedInformation)) {
+
+      for (auto &ValAndContext : Values) {
+        // don't modify instruction if any simplified value exists
+        if (ValAndContext.getValue() && ValAndContext.getValue() != LocalInst) {
+          return true;
+        }
+      }
+    }
+
+    return false;
+  }
+
   /// Create an abstract attribute view for the position \p IRP.
   static AAAllocationInfo &createForPosition(const IRPosition &IRP,
                                              Attributor &A);
 
   virtual std::optional<TypeSize> getAllocatedSize() const = 0;
 
+  using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>;
+  virtual const NewOffsetsTy &getNewOffsets() const = 0;
+
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAAllocationInfo"; }
 
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 9b47e2379fcfa..e462cbded0f30 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1002,54 +1002,9 @@ ChangeStatus AA::PointerInfo::State::addAccess(
 
 namespace {
 
-/// A helper containing a list of offsets computed for a Use. Ideally this
-/// list should be strictly ascending, but we ensure that only when we
-/// actually translate the list of offsets to a RangeList.
-struct OffsetInfo {
-  using VecTy = SmallVector<int64_t>;
-  using const_iterator = VecTy::const_iterator;
-  VecTy Offsets;
-
-  const_iterator begin() const { return Offsets.begin(); }
-  const_iterator end() const { return Offsets.end(); }
-
-  bool operator==(const OffsetInfo &RHS) const {
-    return Offsets == RHS.Offsets;
-  }
-
-  bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); }
-
-  void insert(int64_t Offset) { Offsets.push_back(Offset); }
-  bool isUnassigned() const { return Offsets.size() == 0; }
-
-  bool isUnknown() const {
-    if (isUnassigned())
-      return false;
-    if (Offsets.size() == 1)
-      return Offsets.front() == AA::RangeTy::Unknown;
-    return false;
-  }
-
-  void setUnknown() {
-    Offsets.clear();
-    Offsets.push_back(AA::RangeTy::Unknown);
-  }
-
-  void addToAll(int64_t Inc) {
-    for (auto &Offset : Offsets) {
-      Offset += Inc;
-    }
-  }
-
-  /// Copy offsets from \p R into the current list.
-  ///
-  /// Ideally all lists should be strictly ascending, but we defer that to the
-  /// actual use of the list. So we just blindly append here.
-  void merge(const OffsetInfo &R) { Offsets.append(R.Offsets); }
-};
-
 #ifndef NDEBUG
-static raw_ostream &operator<<(raw_ostream &OS, const OffsetInfo &OI) {
+static raw_ostream &operator<<(raw_ostream &OS,
+                               const AAPointerInfo::OffsetInfo &OI) {
   ListSeparator LS;
   OS << "[";
   for (auto Offset : OI) {
@@ -1084,6 +1039,15 @@ struct AAPointerInfoImpl
     return State::numOffsetBins();
   }
 
+  virtual const Access &getBinAccess(unsigned Index) const override {
+    return getAccess(Index);
+  }
+
+  virtual const DenseMap<Value *, OffsetInfo> &
+  getOffsetInfoMap() const override {
+    return OffsetInfoMap;
+  }
+
   bool forallInterferingAccesses(
       AA::RangeTy Range,
       function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
@@ -1430,7 +1394,7 @@ struct AAPointerInfoImpl
   void trackPointerInfoStatistics(const IRPosition &IRP) const {}
 
   /// Dump the state into \p O.
-  void dumpState(raw_ostream &O) {
+  virtual void dumpState(raw_ostream &O) const override {
     for (auto &It : OffsetBins) {
       O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size
         << "] : " << It.getSecond().size() << "\n";
@@ -1464,6 +1428,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
                     std::optional<Value *> Content, AccessKind Kind,
                     SmallVectorImpl<int64_t> &Offsets, ChangeStatus &Changed,
                     Type &Ty) {
+
     using namespace AA::PointerInfo;
     auto Size = AA::RangeTy::Unknown;
     const DataLayout &DL = A.getDataLayout();
@@ -1596,7 +1561,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
   const DataLayout &DL = A.getDataLayout();
   Value &AssociatedValue = getAssociatedValue();
 
-  DenseMap<Value *, OffsetInfo> OffsetInfoMap;
+  OffsetInfoMap.clear();
   OffsetInfoMap[&AssociatedValue].insert(0);
 
   auto HandlePassthroughUser = [&](Value *Usr, Value *CurPtr, bool &Follow) {
@@ -12668,6 +12633,11 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     return AssumedAllocatedSize;
   }
 
+  const NewOffsetsTy &getNewOffsets() const override {
+    assert(isValidState() && "the AA is invalid");
+    return NewComputedOffsets;
+  }
+
   std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
                                                     const DataLayout &DL) {
 
@@ -12708,46 +12678,59 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     const DataLayout &DL = A.getDataLayout();
     const auto AllocationSize = findInitialAllocationSize(I, DL);
 
-    // If allocation size is nullopt, we give up.
+    // If allocation size is nullopt, we give up
     if (!AllocationSize)
       return indicatePessimisticFixpoint();
 
-    // For zero sized allocations, we give up.
+    // For zero sized allocations, we give up
     // Since we can't reduce further
     if (*AllocationSize == 0)
       return indicatePessimisticFixpoint();
 
-    int64_t BinSize = PI->numOffsetBins();
-
-    // TODO: implement for multiple bins
-    if (BinSize > 1)
-      return indicatePessimisticFixpoint();
+    int64_t NumBins = PI->numOffsetBins();
 
-    if (BinSize == 0) {
+    if (NumBins == 0) {
       auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false));
       if (!changeAllocationSize(NewAllocationSize))
         return ChangeStatus::UNCHANGED;
       return ChangeStatus::CHANGED;
     }
 
-    // TODO: refactor this to be part of multiple bin case
-    const auto &It = PI->begin();
+    // For each access bin
+    // Compute its new start Offset and store the results in a new map
+    // (NewOffsetBins)
+    unsigned long PrevBinEndOffset = 0;
+    bool ChangedOffsets = false;
 
-    // TODO: handle if Offset is not zero
-    if (It->first.Offset != 0)
-      return indicatePessimisticFixpoint();
+    for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
+         It != PI->end(); It++) {
+      const AA::RangeTy &OldRange = It->getFirst();
 
-    uint64_t SizeOfBin = It->first.Offset + It->first.Size;
+      // If any range has an unknown offset or size, we should leave the
+      // allocation unmodified
+      if (OldRange.offsetOrSizeAreUnknown()) {
+        return indicatePessimisticFixpoint();
+      }
 
-    if (SizeOfBin >= *AllocationSize)
-      return indicatePessimisticFixpoint();
+      unsigned long NewStartOffset = PrevBinEndOffset;
+      unsigned long NewEndOffset = NewStartOffset + OldRange.Size;
+      PrevBinEndOffset = NewEndOffset;
+
+      ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
+                                      OldRange.Size);
+    }
 
+    // Set the new size of the allocation, the new size of the Allocation should
+    // be the size of PrevBinEndOffset * 8, in bits
     auto NewAllocationSize =
-        std::optional<TypeSize>(TypeSize(SizeOfBin * 8, false));
+        std::optional<TypeSize>(TypeSize(PrevBinEndOffset * 8, false));
 
     if (!changeAllocationSize(NewAllocationSize))
       return ChangeStatus::UNCHANGED;
 
+    if (!ChangedOffsets)
+      return ChangeStatus::UNCHANGED;
+
     return ChangeStatus::CHANGED;
   }
 
@@ -12757,9 +12740,94 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     assert(isValidState() &&
            "Manifest should only be called if the state is valid.");
 
-    Instruction *I = getIRPosition().getCtxI();
+    bool Changed = false;
+    const IRPosition &IRP = getIRPosition();
+    Instruction *I = IRP.getCtxI();
+
+    // check if simplified values exist
+    if (simplifiedValuesExists(A, I))
+      return ChangeStatus::UNCHANGED;
+
+    if (getAllocatedSize() == HasNoAllocationSize)
+      return ChangeStatus::UNCHANGED;
+
+    const AAPointerInfo *PI =
+        A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
+
+    if (!PI)
+      return ChangeStatus::UNCHANGED;
+
+    if (!PI->getState().isValidState())
+      return ChangeStatus::UNCHANGED;
+
+    // Store a map where each instruction maps to a set of bins accessed by that
+    // instruction
+    DenseMap<Instruction *, DenseMap<AA::RangeTy, AA::RangeTy>>
+        AccessedInstructionsToBinsMap;
+
+    const auto &NewOffsetsMap = getNewOffsets();
+    const auto &OffsetInfoMap = PI->getOffsetInfoMap();
 
-    auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue();
+    // Map Instructions to accessed bins.
+    for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
+         It != PI->end(); It++) {
+
+      const auto &OldOffsetRange = It->getFirst();
+
+      // If the OldOffsetRange is not in the map, offsets for that bin did not
+      // change. We should just continue and skip changing the offsets in that
+      // case
+      if (!NewOffsetsMap.contains(OldOffsetRange))
+        continue;
+
+      const auto &NewOffsetRange = NewOffsetsMap.lookup(OldOffsetRange);
+
+      for (const auto AccIndex : It->getSecond()) {
+        const auto &AccessInstruction = PI->getBinAccess(AccIndex);
+        Instruction *LocalInst = AccessInstruction.getLocalInst();
+
+        // TODO: handle case for a similified value
+        // Right now we don't change the value but this might cause bugs
+        if (simplifiedValuesExists(A, LocalInst))
+          continue;
+
+        // BackTrack and check if there are multiple bins for instructions in
+        // the
+        // chain
+        std::vector<Instruction *> ReadyList;
+        DenseMap<Instruction *, bool> Visited;
+        ReadyList.push_back(LocalInst);
+        while (!ReadyList.empty()) {
+          Instruction *GetBack = ReadyList.back();
+          ReadyList.pop_back();
+          // check if the Instruction has multiple bins, if so give up
+          // for calls it is okay to have multiple bins
+          // TODO: handle when one instruction has multiple bins
+          auto OffsetsVecArg = OffsetInfoMap.lookup(GetBack).Offsets;
+          if (GetBack->getOpcode() != Instruction::Call &&
+              OffsetsVecArg.size() > 1)
+            return ChangeStatus::UNCHANGED;
+
+          for (auto *It = GetBack->op_begin(); It != GetBack->op_end(); It++) {
+            if (Instruction *Ins = dyn_cast<Instruction>(*It)) {
+              if (!Visited[Ins]) {
+                ReadyList.push_back(Ins);
+              }
+            }
+          }
+          Visited[GetBack] = true;
+        }
+
+        DenseMap<AA::RangeTy, AA::RangeTy> &NewBinsForInstruction =
+            AccessedInstructionsToBinsMap.getOrInsertDefault(LocalInst);
+
+        NewBinsForInstruction.insert(
+            std::make_pair(OldOffsetRange, NewOffsetRange));
+      }
+    }
+
+    unsigned long FixedAllocatedSizeInBits =
+        getAllocatedSize()->getFixedValue();
 
     unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
 
@@ -12767,21 +12835,26 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     // TODO: add case for malloc like calls
     case Instruction::Alloca: {
 
-      AllocaInst *AI = cast<AllocaInst>(I);
+      AllocaInst *OldAllocaInst = cast<AllocaInst>(I);
+      const DataLayout &DL = A.getDataLayout();
+      auto OriginalAllocationSize = OldAllocaInst->getAllocationSizeInBits(DL);
 
-      Type *CharType = Type::getInt8Ty(I->getContext());
+      if (*OriginalAllocationSize <= FixedAllocatedSizeInBits) {
+        return ChangeStatus::UNCHANGED;
+      }
 
-      auto *NumBytesToValue =
-          ConstantInt::get(I->getContext(), APInt(32, NumBytesToAllocate));
+      Type *CharType = Type::getInt8Ty(I->getContext());
+      Type *CharArrayType = ArrayType::get(CharType, NumBytesToAllocate);
 
-      BasicBlock::iterator insertPt = AI->getIterator();
-      insertPt = std::next(insertPt);
+      BasicBlock::iterator InsertPt = OldAllocaInst->getIterator();
+      InsertPt = std::next(InsertPt);
       AllocaInst *NewAllocaInst =
-          new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue,
-                         AI->getAlign(), AI->getName(), insertPt);
+          new AllocaInst(CharArrayType, OldAllocaInst->getAddressSpace(),
+                         OldAllocaInst->getName(), InsertPt);
 
-      if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst))
-        return ChangeStatus::CHANGED;
+      Changed |= A.changeAfterManifest(IRPosition::inst(*OldAllocaInst),
+                                       *NewAllocaInst);
+      A.deleteAfterManifest(*OldAllocaInst);
 
       break;
     }
@@ -12789,7 +12862,103 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
       break;
     }
 
-    return ChangeStatus::UNCHANGED;
+    for (auto &It : AccessedInstructionsToBinsMap) {
+
+      Instruction *LocalInst = It.first;
+
+      // Get a hold of a map, mapping old to new bins
+      DenseMap<AA::RangeTy, AA::RangeTy> &OldToNewBins = It.second;
+      IntegerType *Int64TyInteger =
+          IntegerType::get(LocalInst->getContext(), 64);
+
+      switch (LocalInst->getOpcode()) {
+      case Instruction::Load: {
+        // The number of bytes to shift the load/store by
+        int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset;
+        int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset;
+        int64_t ShiftValue = OffsetNew - OffsetOld;
+        LoadInst *OldLoadInst = cast<LoadInst>(LocalInst);
+        Value *PointerOperand = OldLoadInst->getPointerOperand();
+        Type *PointeeTy = OldLoadInst->getPointerOperandType();
+
+        Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
+        Value *GepToNewAddress = GetElementPtrInst::Create(
+            PointeeTy, PointerOperand, IndexList, "NewGep", OldLoadInst);
+
+        LoadInst *NewLoadInst = new LoadInst(
+            OldLoadInst->getType(), GepToNewAddress, OldLoadInst->getName(),
+            false, OldLoadInst->getAlign(), OldLoadInst);
+
+        Changed |=
+            A.changeAfterManifest(IRPosition::inst(*OldLoadInst), *NewLoadInst);
+
+        A.deleteAfterManifest(*OldLoadInst);
+        break;
+      }
+      case Instruction::Store: {
+        // The number of bytes to shift the load/store by
+        int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset;
+        int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset;
+        int64_t ShiftValue = OffsetNew - OffsetOld;
+        StoreInst *OldStoreInst = cast<StoreInst>(LocalInst);
+        Value *PointerOperand = OldStoreInst->getPointerOperand();
+        Type *PointeeTy = OldStoreInst->getPointerOperandType();
+
+        Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
+        Value *GepToNewAddress = GetElementPtrInst::Create(
+            PointeeTy, PointerOperand, IndexList, "NewGep", OldStoreInst);
+
+        StoreInst *NewStoreInst =
+            new StoreInst(OldStoreInst->getValueOperand(), GepToNewAddress,
+                          false, OldStoreInst->getAlign(), OldStoreInst);
+
+        Changed |= A.changeAfterManifest(IRPosition::inst(*OldStoreInst),
+                                         *NewStoreInst);
+
+        A.deleteAfterManifest(*OldStoreInst);
+        break;
+      }
+      case Instruction::Call: {
+        CallInst *Call = cast<CallInst>(LocalInst);
+        int ArgPosition = 0;
+        for (const auto &CallArg : Call->args()) {
+          if (OffsetInfoMap.contains(CallArg)) {
+
+            auto OffsetsVecArg = OffsetInfoMap.lookup(CallArg).Offsets;
+            int OldOffsetArg = OffsetsVecArg.front();
+
+            int NewOffsetArg = 0;
+            for (auto OldToNewRange : NewOffsetsMap) {
+              auto Old = OldToNewRange.getFirst();
+              if (Old.Offset == OldOffsetArg) {
+                NewOffsetArg = OldToNewRange.getSecond().Offset;
+              }
+            }
+
+            // If the offsets did not change, no need to change the offsets.
+            if (NewOffsetArg == OldOffsetArg) {
+              ArgPosition++;
+              continue;
+            }
+
+            int64_t ShiftValue = NewOffsetArg - OldOffsetArg;
+            Value *IndexList[1] = {
+                ConstantInt::get(Int64TyInteger, ShiftValue)};
+            Type *ArgTy = CallArg->getType();
+            Instruction *ArgInstruction = cast<Instruction>(CallArg);
+            Value *GepToNewAddress = GetElementPtrInst::Create(
+                ArgTy, ArgInstruction, IndexList, "NewGep", Call);
+            Call->setArgOperand(ArgPosition, GepToNewAddress);
+          }
+          ArgPosition++;
+        }
+      } break;
+      }
+    }
+
+    if (!Changed)
+      return ChangeStatus::UNCHANGED;
+    return ChangeStatus::CHANGED;
   }
 
   /// See AbstractAttribute::getAsStr().
@@ -12803,8 +12972,28 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
            ")";
   }
 
+  void dumpNewOffsetBins(raw_ostream &O) {
+
+    O << "Printing Map from [OldOffsetsRange] : [NewOffsetsRange] if the "
+         "offsets changed."
+      << "\n";
+    const auto &NewOffsetsMap = getNewOffsets();
+    for (auto It = NewOffsetsMap.begin(); It != NewOffsetsMap.end(); It++) {
+
+      const auto &OldRange = It->getFirst();
+      const auto &NewRange = It->getSecond();
+
+      O << "[" << OldRange.Offset << "," << OldRange.Offset + OldRange.Size
+        << "] : ";
+      O << "[" << NewRange.Offset << "," << NewRange.Offset + NewRange.Size
+        << "]";
+      O << "\n";
+    }
+  }
+
 private:
   std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
+  NewOffsetsTy NewComputedOffsets;
 
   // Maintain the computed allocation size of the object.
   // Returns (bool) weather the size of the allocation was modified or not.
@@ -12816,6 +13005,21 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     }
     return false;
   }
+
+  // Maps an old byte range to its new Offset range in the new allocation.
+  // Returns (bool) weather the old byte range's offsets changed or not.
+  bool setNewOffsets(const AA::RangeTy &OldRange, int64_t OldOffset,
+                     int64_t NewComputedOffset, int64_t Size) {
+
+    if (OldOffset == NewComputedOffset)
+      return false;
+
+    AA::RangeTy &NewRange = NewComputedOffsets.getOrInsertDefault(OldRange);
+    NewRange.Offset = NewComputedOffset;
+    NewRange.Size = Size;
+
+    return true;
+  }
 };
 
 struct AAAllocationInfoFloating : AAAllocationInfoImpl {
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
index 595cb37c6c93e..f0efa2a0ae3c1 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
@@ -106,10 +106,8 @@ define i32 @test_inf_promote_caller(i32 %arg) {
 ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_caller
 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CGSCC-NEXT:  bb:
-; CGSCC-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]], align 8
-; CGSCC-NEXT:    [[TMP3:%.*]] = alloca i8, i32 0, align 8
-; CGSCC-NEXT:    [[TMP1:%.*]] = alloca [[S]], align 8
-; CGSCC-NEXT:    [[TMP14:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT:    [[TMP3:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[TMP14:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    ret i32 0
 ;
 bb:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
index 2df81d6cb1832..63dbc4da7da37 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
@@ -36,9 +36,7 @@ define internal i32 @caller(ptr %B) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CGSCC-LABEL: define {{[^@]+}}@caller
 ; CGSCC-SAME: () #[[ATTR0]] {
-; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CGSCC-NEXT:    [[A2:%.*]] = alloca i8, i32 0, align 4
-; CGSCC-NEXT:    [[A1:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    ret i32 0
 ;
   %A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index 7c28de24beea2..956fa0e88b028 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -53,9 +53,7 @@ define internal i32 @caller(ptr %B) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
 ; CGSCC-LABEL: define {{[^@]+}}@caller
 ; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
-; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CGSCC-NEXT:    [[A2:%.*]] = alloca i8, i32 0, align 4
-; CGSCC-NEXT:    [[A1:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i32 0
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
index b588a399e5bd9..7b5e1276ac212 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
@@ -29,8 +29,7 @@ define internal i32 @foo(ptr) {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: () addrspace(1) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[RETVAL1:%.*]] = alloca i8, i32 0, align 4
+; CHECK-NEXT:    [[RETVAL1:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
 ; CHECK-NEXT:    unreachable
 ;
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
index 490894d129023..af2d1ef1eabba 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
@@ -34,8 +34,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define dso_local i32 @main() {
 ; TUNIT-LABEL: define {{[^@]+}}@main() {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[ALLOC11:%.*]] = alloca i8, i32 0, align 8
-; TUNIT-NEXT:    [[ALLOC22:%.*]] = alloca i8, i32 0, align 8
+; TUNIT-NEXT:    [[ALLOC11:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT:    [[ALLOC22:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @foo, ptr nofree readnone align 4294967296 undef)
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @bar, ptr noalias nocapture nofree nonnull readnone align 8 dereferenceable(8) undef)
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index f2d9ecd1d8fa4..ad5665d25f517 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -13,8 +13,8 @@ define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
 ; CHECK-LABEL: define dso_local void @positive_alloca_1
 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca i8, i32 4, align 4
-; CHECK-NEXT:    [[F2:%.*]] = alloca i8, i32 4, align 4
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[F2:%.*]] = alloca [4 x i8], align 1
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
 ; CHECK-NEXT:    store i32 10, ptr [[F2]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
@@ -164,37 +164,54 @@ entry:
 ;TODO: The allocation can be reduced here.
 ;However, the offsets (load/store etc.) Need to be changed.
 ; Function Attrs: noinline nounwind uwtable
-define dso_local { i64, ptr } @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
-; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define dso_local { i64, ptr } @positive_test_not_a_single_start_offset
-; CHECK-SAME: (i32 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_test_not_a_single_start_offset
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
 ; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
-; CHECK-NEXT:    store i32 2, ptr [[RETVAL]], align 8
-; CHECK-NEXT:    [[FIELD3:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[RETVAL]], i32 0, i32 2
-; CHECK-NEXT:    store ptr [[VAL_ADDR]], ptr [[FIELD3]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load { i64, ptr }, ptr [[RETVAL]], align 8
-; CHECK-NEXT:    ret { i64, ptr } [[TMP0]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F1]], i32 0, i32 2
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[C]], i64 -4
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
+; CHECK-NEXT:    [[C2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F1]], i32 0, i32 2
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[C2]], i64 -4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
+; CHECK-NEXT:    ret void
 ;
 entry:
-  %retval = alloca %struct.Foo, align 8
   %val.addr = alloca i32, align 4
+  %f = alloca %struct.Foo, align 4
   store i32 %val, ptr %val.addr, align 4
-  %field1 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 0
-  store i32 2, ptr %field1, align 8
-  %field3 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 2
-  store ptr %val.addr, ptr %field3, align 8
-  %0 = load { i64, ptr }, ptr %retval, align 8
-  ret { i64, ptr } %0
+  %0 = load i32, ptr %val.addr, align 4
+  %mul = mul nsw i32 2, %0
+  %a = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+  store i32 %mul, ptr %a, align 4
+  %a1 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+  %1 = load i32, ptr %a1, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %1)
+  %c = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, ptr %c, align 4
+  %c2 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2
+  %2 = load i8, ptr %c2, align 4
+  %conv = sext i8 %2 to i32
+  %call3 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %conv)
+  ret void
 }
 
 ; Function Attrs: noinline nounwind uwtable
 define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca i8, i32 4, align 8
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [4 x i8], align 1
 ; CHECK-NEXT:    store i32 0, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 2
@@ -275,37 +292,37 @@ entry:
 define dso_local void @positive_test_reduce_array_allocation_2() #0 {
 ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAY:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[I2:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000)
-; CHECK-NEXT:    store ptr [[CALL]], ptr [[ARRAY]], align 8
-; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10000
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC:%.*]]
 ; CHECK:       for.inc:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 2
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
 ; CHECK:       for.cond1:
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 10000
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]]
 ; CHECK:       for.body3:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
@@ -313,28 +330,28 @@ define dso_local void @positive_test_reduce_array_allocation_2() #0 {
 ; CHECK-NEXT:    store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC7:%.*]]
 ; CHECK:       for.inc7:
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP7]], 2
-; CHECK-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+; CHECK-NEXT:    store i32 [[ADD8]], ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND1]]
 ; CHECK:       for.end9:
-; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND10:%.*]]
 ; CHECK:       for.cond10:
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp slt i32 [[TMP8]], 10000
 ; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END18:%.*]]
 ; CHECK:       for.body12:
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
 ; CHECK-NEXT:    [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]])
 ; CHECK-NEXT:    br label [[FOR_INC16:%.*]]
 ; CHECK:       for.inc16:
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP11]], 2
-; CHECK-NEXT:    store i32 [[ADD17]], ptr [[I]], align 4
+; CHECK-NEXT:    store i32 [[ADD17]], ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND10]]
 ; CHECK:       for.end18:
 ; CHECK-NEXT:    ret void
@@ -426,7 +443,7 @@ define dso_local void @pthread_test(){
 ; TUNIT-NEXT:    [[ARG1:%.*]] = alloca i8, align 8
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
-; TUNIT-NEXT:    [[F1:%.*]] = alloca i8, i32 4, align 4
+; TUNIT-NEXT:    [[F1:%.*]] = alloca [4 x i8], align 1
 ; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef)
 ; TUNIT-NEXT:    [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
@@ -452,6 +469,46 @@ define dso_local void @pthread_test(){
   ret void
 }
 
+
+define dso_local void @select_case(i1 %cond){
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define dso_local void @select_case
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i8], align 1
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 3
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
+; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
+; CHECK-NEXT:    ret void
+;
+  %a = alloca [100 x i8], align 1
+  %b = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 3
+  %c = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 1
+  %sel = select i1 %cond, ptr %b, ptr %c
+  store i8 100, ptr %sel, align 1
+  ret void
+}
+
+define dso_local void @select_case_2(i1 %cond){
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define dso_local void @select_case_2
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i32], align 1
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 3
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
+; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
+; CHECK-NEXT:    ret void
+;
+  %a = alloca [100 x i32], align 1
+  %b = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 3
+  %c = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 1
+  %sel = select i1 %cond, ptr %b, ptr %c
+  %sel2 = getelementptr inbounds i32, ptr %sel, i64 0
+  store i8 100, ptr %sel2, align 1
+  ret void
+}
+
 define internal ptr @pthread_allocation_should_remain_same(ptr %arg) {
 ; CHECK-LABEL: define internal noundef nonnull align 8 dereferenceable(1) ptr @pthread_allocation_should_remain_same
 ; CHECK-SAME: (ptr noundef nonnull returned align 8 dereferenceable(1) [[ARG:%.*]]) {
@@ -499,6 +556,58 @@ entry:
   ret void
 }
 
+define dso_local void @alloca_array_multi_offset(){
+; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none)
+; CHECK-LABEL: define dso_local void @alloca_array_multi_offset
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arr = alloca i8, i32 10, align 4
+  %i = alloca i32, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %1 = load i32, ptr %i, align 4
+  %2 = load ptr, ptr %arr, align 8
+  %3 = load i32, ptr %i, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %2, i32 %3
+  store i32 %1, ptr %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %4 = load i32, ptr %i, align 4
+  %add = add nsw i32 %4, 2
+  store i32 %add, ptr %i, align 4
+  br label %for.cond
+
+for.end:
+  ret void
+
+}
+
 
 declare external void @external_call(ptr)
 
@@ -511,9 +620,11 @@ declare i32 @printf(ptr noundef, ...) #1
 ; Function Attrs: nounwind allocsize(0)
 declare noalias ptr @malloc(i64 noundef) #1
 ;.
-; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
 ;.
-; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
 ;.
 ; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
 ; TUNIT: [[META1]] = !{i64 2, i64 3, i1 false}
diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
index 5bb795911ce40..0dc18cc8340d6 100644
--- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
@@ -36,8 +36,10 @@ define i8 @call_simplifiable_1() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1
 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I0]], i64 -2
+; TUNIT-NEXT:    store i8 2, ptr [[NEWGEP]], align 2
 ; TUNIT-NEXT:    ret i8 2
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -93,9 +95,13 @@ define i8 @call_simplifiable_2() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT:    [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [2 x i8], align 1
+; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I0]], i64 -2
+; TUNIT-NEXT:    store i8 2, ptr [[NEWGEP]], align 2
+; TUNIT-NEXT:    [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3
+; TUNIT-NEXT:    [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I1]], i64 -2
+; TUNIT-NEXT:    store i8 3, ptr [[NEWGEP3]], align 1
 ; TUNIT-NEXT:    ret i8 4
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -125,8 +131,10 @@ define i8 @call_simplifiable_3() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_3
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
+; TUNIT-NEXT:    store i8 2, ptr [[NEWGEP]], align 2
 ; TUNIT-NEXT:    ret i8 2
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -198,13 +206,19 @@ define i8 @call_partially_simplifiable_1() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT:    store i8 2, ptr [[I2]], align 2
-; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3
-; TUNIT-NEXT:    store i8 3, ptr [[I3]], align 1
-; TUNIT-NEXT:    [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 4
-; TUNIT-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[I2]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[I3]]) #[[ATTR3]]
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [3 x i8], align 1
+; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 2
+; TUNIT-NEXT:    [[NEWGEP4:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
+; TUNIT-NEXT:    store i8 2, ptr [[NEWGEP4]], align 2
+; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 3
+; TUNIT-NEXT:    [[NEWGEP5:%.*]] = getelementptr ptr, ptr [[I3]], i64 -1
+; TUNIT-NEXT:    store i8 3, ptr [[NEWGEP5]], align 1
+; TUNIT-NEXT:    [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 4
+; TUNIT-NEXT:    [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[I4]], i64 -3
+; TUNIT-NEXT:    store i8 4, ptr [[NEWGEP3]], align 4
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[I2]], i64 -2
+; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[I3]], i64 -1
+; TUNIT-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[NEWGEP]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[NEWGEP2]]) #[[ATTR3]]
 ; TUNIT-NEXT:    ret i8 [[R]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 33ac066e43d09..846373e05be1a 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -502,8 +502,7 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-SAME: (i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[I1:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[I11:%.*]] = alloca i8, i32 0, align 8
+; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index 2a5b3e94291a2..70aace8100abd 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -452,8 +452,7 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-SAME: (i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[I1:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[I11:%.*]] = alloca i8, i32 0, align 8
+; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index f17bd5795a174..9eb79f8a46723 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2587,8 +2587,8 @@ define void @bad_gep() {
 ; TUNIT-LABEL: define {{[^@]+}}@bad_gep
 ; TUNIT-SAME: () #[[ATTR13]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[N1:%.*]] = alloca i8, i32 0, align 1
-; TUNIT-NEXT:    [[M2:%.*]] = alloca i8, i32 0, align 1
+; TUNIT-NEXT:    [[N1:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT:    [[M2:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
 ; TUNIT-NEXT:    br label [[EXIT:%.*]]
 ; TUNIT:       while.body:
@@ -2605,8 +2605,8 @@ define void @bad_gep() {
 ; CGSCC-LABEL: define {{[^@]+}}@bad_gep
 ; CGSCC-SAME: () #[[ATTR6]] {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[N1:%.*]] = alloca i8, i32 0, align 1
-; CGSCC-NEXT:    [[M2:%.*]] = alloca i8, i32 0, align 1
+; CGSCC-NEXT:    [[N1:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[M2:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
 ; CGSCC-NEXT:    br label [[EXIT:%.*]]
 ; CGSCC:       while.body:
diff --git a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
index c6945d65acb29..ca8706b2bc331 100644
--- a/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/multiple-offsets-pointer-info.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-annotate-decl-cs  -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -attributor-annotate-decl-cs  -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 
 %struct.T = type { i32, [10 x [20 x i8]] }
 
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index c28cb28379348..6357bf742bbf1 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -10,15 +10,14 @@ define hidden i64 @f1() align 2 {
 ; TUNIT-LABEL: define {{[^@]+}}@f1
 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
+; TUNIT-NEXT:    [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    ret i64 undef
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
 ; CGSCC-LABEL: define {{[^@]+}}@f1
 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[REF_TMP:%.*]] = alloca [[A:%.*]], align 8
-; CGSCC-NEXT:    [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT:    [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    [[CALL2:%.*]] = call i64 @f2() #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i64 [[CALL2]]
 ;
diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll
index 6afdbdaee317c..c8fec4f1de7b4 100644
--- a/llvm/test/Transforms/Attributor/pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/pointer-info.ll
@@ -10,10 +10,12 @@ define void @foo(ptr %ptr) {
 ; TUNIT-LABEL: define {{[^@]+}}@foo
 ; TUNIT-SAME: (ptr nocapture nofree readnone [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = alloca [[STRUCT_TEST_A:%.*]], align 8
+; TUNIT-NEXT:    [[TMP0:%.*]] = alloca [8 x i8], align 1
 ; TUNIT-NEXT:    br label [[CALL_BR:%.*]]
 ; TUNIT:       call.br:
-; TUNIT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A]], ptr [[TMP0]], i64 0, i32 2
+; TUNIT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A:%.*]], ptr [[TMP0]], i64 0, i32 2
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[TMP1]], i64 -16
+; TUNIT-NEXT:    store ptr [[PTR]], ptr [[NEWGEP]], align 8
 ; TUNIT-NEXT:    tail call void @bar(ptr noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_TEST_A]]) align 8 dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
index 70793ec5c7f83..958a0590766b5 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
@@ -83,6 +83,12 @@ define i32 @vec_write_4() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define {{[^@]+}}@vec_write_4
 ; CHECK-SAME: () #[[ATTR0]] {
+; CHECK-NEXT:    [[A1:%.*]] = alloca [12 x i8], align 1
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[A1]], i64 4
+; CHECK-NEXT:    store i32 3, ptr [[NEWGEP]], align 16
+; CHECK-NEXT:    [[G:%.*]] = getelementptr i32, ptr [[A1]], i64 1
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[G]], i64 -4
+; CHECK-NEXT:    store <2 x i32> <i32 5, i32 5>, ptr [[NEWGEP2]], align 8
 ; CHECK-NEXT:    ret i32 13
 ;
   %a = alloca <4 x i32>
@@ -101,8 +107,12 @@ define i32 @vec_write_5(i32 %arg) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define {{[^@]+}}@vec_write_5
 ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = alloca <4 x i32>, align 16
-; CHECK-NEXT:    store i32 [[ARG]], ptr [[A]], align 16
+; CHECK-NEXT:    [[A1:%.*]] = alloca [12 x i8], align 1
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[A1]], i64 4
+; CHECK-NEXT:    store i32 [[ARG]], ptr [[NEWGEP]], align 16
+; CHECK-NEXT:    [[G:%.*]] = getelementptr i32, ptr [[A1]], i64 1
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[G]], i64 -4
+; CHECK-NEXT:    store <2 x i32> <i32 5, i32 5>, ptr [[NEWGEP2]], align 8
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[ARG]], 5
 ; CHECK-NEXT:    [[ADD2:%.*]] = add i32 5, [[ADD1]]
 ; CHECK-NEXT:    ret i32 [[ADD2]]
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index 7a35b5c856097..82169f5031050 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -2666,18 +2666,19 @@ define dso_local void @test_nested_memory(ptr %dst, ptr %src) {
 ; TUNIT-SAME: (ptr nocapture nofree writeonly [[DST:%.*]], ptr nocapture nofree readonly [[SRC:%.*]]) {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1
-; TUNIT-NEXT:    [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
-; TUNIT-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2
-; TUNIT-NEXT:    store ptr @global, ptr [[INNER]], align 8
+; TUNIT-NEXT:    [[LOCAL1:%.*]] = alloca [8 x i8], align 1
+; TUNIT-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY:%.*]], ptr [[LOCAL1]], i64 0, i32 2
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INNER]], i64 -16
+; TUNIT-NEXT:    store ptr @global, ptr [[NEWGEP]], align 8
 ; TUNIT-NEXT:    store ptr [[DST]], ptr [[CALL_H2S]], align 8
 ; TUNIT-NEXT:    [[SRC2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 8
 ; TUNIT-NEXT:    store ptr [[SRC]], ptr [[SRC2]], align 8
 ; TUNIT-NEXT:    store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY]], ptr @global, i64 0, i32 2), align 8
-; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LOCAL]], align 8
-; TUNIT-NEXT:    [[LOCAL_B8:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 8
-; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[LOCAL_B8]], align 8
-; TUNIT-NEXT:    [[LOCAL_B16:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 16
-; TUNIT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[LOCAL_B16]], align 8
+; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LOCAL1]], align 8
+; TUNIT-NEXT:    [[LOCAL1_B8:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 8
+; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[LOCAL1_B8]], align 8
+; TUNIT-NEXT:    [[LOCAL1_B16:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 16
+; TUNIT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[LOCAL1_B16]], align 8
 ; TUNIT-NEXT:    call fastcc void @nested_memory_callee(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR21:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
@@ -3017,8 +3018,10 @@ define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; CHECK-NEXT:    [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
+; CHECK-NEXT:    [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; CHECK-NEXT:    [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 12
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[GEP_FIXED]], i64 -12
+; CHECK-NEXT:    store i8 100, ptr [[NEWGEP]], align 4
 ; CHECK-NEXT:    ret i8 100
 ;
 entry:
@@ -3036,8 +3039,10 @@ define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) {
 ; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory
 ; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; CHECK-NEXT:    [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
+; CHECK-NEXT:    [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; CHECK-NEXT:    [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES1]], i64 0, i64 12
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[GEP_LOADED]], i64 -12
+; CHECK-NEXT:    store i8 100, ptr [[NEWGEP]], align 4
 ; CHECK-NEXT:    ret i8 100
 ;
 entry:

>From 9a91eb09442bf2cd3d4646193d75281e3cac3948 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <singhal2 at ruby967.llnl.gov>
Date: Tue, 11 Jun 2024 10:49:33 -0700
Subject: [PATCH 4/4] [Attributor]: Reorder bins of an allocation based on
 access patterns in the code

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 106 +++++++++
 .../Transforms/IPO/AttributorAttributes.cpp   | 219 +++++++++++++++++-
 llvm/test/Transforms/Attributor/allocator.ll  | 127 ++++++----
 3 files changed, 397 insertions(+), 55 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 85f2f76e833fc..990c5ea7c7ca1 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -98,16 +98,20 @@
 #define LLVM_TRANSFORMS_IPO_ATTRIBUTOR_H
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/ADT/DirectedGraph.h"
 #include "llvm/ADT/GraphTraits.h"
 #include "llvm/ADT/MapVector.h"
+#include "llvm/ADT/PriorityQueue.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CGSCCPassManager.h"
+#include "llvm/Analysis/DDG.h"
 #include "llvm/Analysis/LazyCallGraph.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryLocation.h"
@@ -140,6 +144,7 @@
 #include <limits>
 #include <map>
 #include <optional>
+#include <tuple>
 
 namespace llvm {
 
@@ -6372,6 +6377,107 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
 
   using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>;
   virtual const NewOffsetsTy &getNewOffsets() const = 0;
+  struct FieldAccessGraphEdge;
+  struct FieldAccessGraphNode;
+
+  struct PriorityQueueGraphNode {
+    PriorityQueueGraphNode(int Priority, FieldAccessGraphNode *Node)
+        : Priority(Priority), Node(Node) {}
+
+  public:
+    int Priority;
+    FieldAccessGraphNode *Node;
+
+    int getPriority() { return Priority; }
+    FieldAccessGraphNode *getNode() { return Node; }
+
+    bool operator<(const PriorityQueueGraphNode *A) {
+      return A->Priority > Priority;
+    }
+
+    bool operator==(const PriorityQueueGraphNode *A) {
+      return A->Priority == Priority;
+    }
+
+    bool operator>(const PriorityQueueGraphNode *A) {
+      return A->Priority > Priority;
+    }
+  };
+
+  // A Edge Type for the field access graph edge
+  struct FieldAccessGraphEdge
+      : public DGEdge<FieldAccessGraphNode, FieldAccessGraphEdge> {
+    FieldAccessGraphEdge(FieldAccessGraphNode &TargetNode, int EdgeWeight)
+        : DGEdge<FieldAccessGraphNode, FieldAccessGraphEdge>(TargetNode),
+          EdgeWeight(EdgeWeight) {}
+
+  public:
+    FieldAccessGraphNode *SrcNode;
+    int EdgeWeight;
+    int getEdgeWeight() { return EdgeWeight; }
+    void setSrcNode(FieldAccessGraphNode *SourceNode) { SrcNode = SourceNode; }
+    FieldAccessGraphNode *getSourceNode() { return SrcNode; }
+  };
+
+  // A node type for the field access graph node
+  struct FieldAccessGraphNode
+      : public DGNode<FieldAccessGraphNode, FieldAccessGraphEdge> {
+    FieldAccessGraphNode(const AA::RangeTy &Node, FieldAccessGraphEdge &Edge)
+        : DGNode<FieldAccessGraphNode, FieldAccessGraphEdge>(Edge),
+          BinRange(Node) {}
+    FieldAccessGraphNode(const AA::RangeTy &Node) : BinRange(Node) {}
+
+  public:
+    const AA::RangeTy BinRange;
+    const AA::RangeTy &getBinRange() const { return BinRange; }
+  };
+
+  struct FieldAccessGraph
+      : public DirectedGraph<FieldAccessGraphNode, FieldAccessGraphEdge> {
+    FieldAccessGraph() {}
+
+  public:
+    FieldAccessGraphNode *getNode(const AA::RangeTy &Range) {
+      for (FieldAccessGraphNode *N : Nodes) {
+        if (N->getBinRange() == Range) {
+          return N;
+        }
+      }
+      return nullptr;
+    }
+
+    bool findNode(const AA::RangeTy &Range) {
+      for (FieldAccessGraphNode *N : Nodes) {
+        if (N->getBinRange() == Range) {
+          return true;
+        }
+      }
+      return false;
+    }
+
+    bool edgeExists(const AA::RangeTy &HeadNode,
+                    FieldAccessGraphNode *TargetNode) {
+      for (FieldAccessGraphNode *N : Nodes) {
+        if (N->getBinRange() == HeadNode) {
+          return N->hasEdgeTo(*TargetNode);
+        }
+      }
+      return false;
+    }
+
+    // return all nodes that have no incoming edges.
+    void getAllRoots(std::vector<FieldAccessGraphNode *> &Roots) {
+      assert(Roots.empty() && "Root set should be empty at the begining!");
+      for (FieldAccessGraphNode *N : Nodes) {
+        SmallVector<FieldAccessGraphEdge *> EL;
+        if (!findIncomingEdgesToNode(*N, EL)) {
+          Roots.push_back(N);
+        }
+      }
+    }
+  };
+
+  virtual const FieldAccessGraph &getBinAccessGraph() const = 0;
 
   /// See AbstractAttribute::getName()
   const std::string getName() const override { return "AAAllocationInfo"; }
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index e462cbded0f30..7393939e81dc2 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -11,6 +11,9 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DirectedGraph.h"
+#include "llvm/ADT/PriorityQueue.h"
 #include "llvm/Transforms/IPO/Attributor.h"
 
 #include "llvm/ADT/APInt.h"
@@ -72,9 +75,11 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <cassert>
+#include <climits>
 #include <numeric>
 #include <optional>
 #include <string>
+#include <utility>
 
 using namespace llvm;
 
@@ -12628,6 +12633,27 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
   AAAllocationInfoImpl(const IRPosition &IRP, Attributor &A)
       : AAAllocationInfo(IRP, A) {}
 
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+
+    // Map an instruction to its position in the module.
+    //  To get a relative sense of distance between instruction.
+    //  Useful when we need a measure of
+    //  a temporal access amongst instructions.
+    auto &IRP = getIRPosition();
+    auto *M = IRP.getCtxI()->getModule();
+    int InstructionPosition = 0;
+    for (const auto &F : *M) {
+      for (const auto &BB : F) {
+        for (const auto &I : BB) {
+          InstructionPositionMap.insert(
+              std::make_pair(&I, InstructionPosition));
+          InstructionPosition++;
+        }
+      }
+    }
+  }
+
   std::optional<TypeSize> getAllocatedSize() const override {
     assert(isValidState() && "the AA is invalid");
     return AssumedAllocatedSize;
@@ -12638,6 +12664,11 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     return NewComputedOffsets;
   }
 
+  const FieldAccessGraph &getBinAccessGraph() const override {
+    assert(isValidState() && "the AA is invalid");
+    return BinAccessGraph;
+  }
+
   std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
                                                     const DataLayout &DL) {
 
@@ -12696,11 +12727,19 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
       return ChangeStatus::CHANGED;
     }
 
-    // For each access bin
-    // Compute its new start Offset and store the results in a new map
-    // (NewOffsetBins)
-    unsigned long PrevBinEndOffset = 0;
-    bool ChangedOffsets = false;
+    DenseMap<Instruction *, AA::RangeTy> MapAccessedInstToBins;
+    // map accessed instructions to bins
+    for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
+         It != PI->end(); It++) {
+
+      const AA::RangeTy &Range = It->getFirst();
+      auto AccessedIndices = It->getSecond();
+      for (auto AccIndex : AccessedIndices) {
+        const auto &AccessInstruction = PI->getBinAccess(AccIndex);
+        Instruction *LocalInst = AccessInstruction.getLocalInst();
+        MapAccessedInstToBins.insert(std::make_pair(LocalInst, Range));
+      }
+    }
 
     for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
          It != PI->end(); It++) {
@@ -12712,12 +12751,153 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
         return indicatePessimisticFixpoint();
       }
 
+      auto CurrAccessedIndices = It->getSecond();
+      // Find the earliest instruction that caused the access from the set
+      Instruction *Earliest = nullptr;
+      int EarlistInstructionPos;
+      for (auto Idx : CurrAccessedIndices) {
+        auto &AccessedI = PI->getBinAccess(Idx);
+        Instruction *LI = AccessedI.getLocalInst();
+        if (!Earliest) {
+          Earliest = LI;
+          EarlistInstructionPos = InstructionPositionMap.lookup(LI);
+          continue;
+        }
+
+        int CurrPosition = InstructionPositionMap.lookup(LI);
+        if (CurrPosition < EarlistInstructionPos) {
+          Earliest = LI;
+          EarlistInstructionPos = CurrPosition;
+        }
+      }
+
+      int ClosestNextPosition = INT_MAX;
+      AA::RangeTy CorrespondingBin = OldRange;
+      for (auto &Val : MapAccessedInstToBins) {
+        auto *Ins = Val.getFirst();
+        auto &Bin = Val.getSecond();
+        if (Bin.offsetOrSizeAreUnknown()) {
+          return indicatePessimisticFixpoint();
+        }
+
+        // No self loops are allowed in the graph
+        if (Bin == OldRange)
+          continue;
+
+        int InsPosition = InstructionPositionMap.lookup(Ins);
+        if (InsPosition > EarlistInstructionPos &&
+            InsPosition < ClosestNextPosition) {
+          ClosestNextPosition = InsPosition;
+          CorrespondingBin = Bin;
+        }
+      }
+
+      if (CorrespondingBin == OldRange) {
+        continue;
+      }
+
+      // TODO: get the edge weights from profile information.
+      // We keep this simple for now
+      // The edgeweight is the likelihood to reach the target instruction.
+      // Currently it is harcoded to 0.
+      int EdgeWeight = 0;
+
+      // The edge already exists.
+      if (BinAccessGraph.findNode(OldRange) &&
+          BinAccessGraph.findNode(CorrespondingBin))
+        continue;
+
+      if (BinAccessGraph.findNode(CorrespondingBin)) {
+        FieldAccessGraphNode *ToNode = BinAccessGraph.getNode(CorrespondingBin);
+        FieldAccessGraphEdge *AccessedEdge =
+            new FieldAccessGraphEdge(*ToNode, EdgeWeight);
+        FieldAccessGraphNode *FromNode =
+            new FieldAccessGraphNode(OldRange, *AccessedEdge);
+        AccessedEdge->setSrcNode(FromNode);
+        BinAccessGraph.addNode(*FromNode);
+        BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge);
+        continue;
+      }
+
+      if (BinAccessGraph.findNode(OldRange)) {
+        FieldAccessGraphNode *ToNode =
+            new FieldAccessGraphNode(CorrespondingBin);
+        FieldAccessGraphEdge *AccessedEdge =
+            new FieldAccessGraphEdge(*ToNode, EdgeWeight);
+        FieldAccessGraphNode *FromNode = BinAccessGraph.getNode(OldRange);
+        AccessedEdge->setSrcNode(FromNode);
+        BinAccessGraph.addNode(*ToNode);
+        BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge);
+        continue;
+      }
+
+      // Neither nodes exist in the graph
+      FieldAccessGraphNode *ToNode = new FieldAccessGraphNode(CorrespondingBin);
+      FieldAccessGraphEdge *AccessedEdge =
+          new FieldAccessGraphEdge(*ToNode, EdgeWeight);
+      FieldAccessGraphNode *FromNode =
+          new FieldAccessGraphNode(OldRange, *AccessedEdge);
+      AccessedEdge->setSrcNode(FromNode);
+      BinAccessGraph.addNode(*FromNode);
+      BinAccessGraph.addNode(*ToNode);
+      BinAccessGraph.connect(*FromNode, *ToNode, *AccessedEdge);
+    }
+
+    // Traverse the graph in a greedy manner.
+    // Map old bins to new bins.
+    // Compute the size of the allocation as we traverse the graph.
+
+    // get all the root nodes
+    std::vector<FieldAccessGraphNode *> RootsVector;
+    // A priority queue to establish greedy order
+    PriorityQueue<PriorityQueueGraphNode *> PriorityQueue;
+    // Map to mark which nodes have been visited so far
+    DenseMap<FieldAccessGraphNode *, bool> VisitedMap;
+    BinAccessGraph.getAllRoots(RootsVector);
+
+    for (auto *Root : RootsVector) {
+      PriorityQueueGraphNode *Node = new PriorityQueueGraphNode(0, Root);
+      PriorityQueue.push(Node);
+    }
+
+    unsigned long PrevBinEndOffset = 0;
+    bool ChangedOffsets = false;
+
+    while (!PriorityQueue.empty()) {
+
+      // Pop an element from the priority queue
+      PriorityQueueGraphNode *Node = PriorityQueue.top();
+      PriorityQueue.pop();
+
+      // visit this current graph node
+      FieldAccessGraphNode *GraphNode = Node->getNode();
+      VisitedMap[GraphNode] = true;
+
+      // For each access bin
+      // Compute its new start Offset and store the results in a new map
+      // (NewOffsetBins)
+
+      auto &NodeRange = GraphNode->getBinRange();
       unsigned long NewStartOffset = PrevBinEndOffset;
-      unsigned long NewEndOffset = NewStartOffset + OldRange.Size;
+      unsigned long NewEndOffset = NewStartOffset + NodeRange.Size;
       PrevBinEndOffset = NewEndOffset;
 
-      ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
-                                      OldRange.Size);
+      // set the new offsets in the map.
+      ChangedOffsets |= setNewOffsets(NodeRange, NodeRange.Offset,
+                                      NewStartOffset, NodeRange.Size);
+
+      auto &Edges = GraphNode->getEdges();
+
+      // push all successors onto the priority queue.
+      for (auto &Edge : Edges) {
+        int EdgeWeight = Edge->getEdgeWeight();
+        FieldAccessGraphNode &TargetNode = Edge->getTargetNode();
+        if (!VisitedMap[&TargetNode]) {
+          PriorityQueueGraphNode *Node =
+              new PriorityQueueGraphNode(EdgeWeight, &TargetNode);
+          PriorityQueue.push(Node);
+        }
+      }
     }
 
     // Set the new size of the allocation, the new size of the Allocation should
@@ -12991,9 +13171,32 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     }
   }
 
+  void dumpFieldAccessGraph(raw_ostream &O) {
+
+    for (const FieldAccessGraphNode *Node : BinAccessGraph) {
+      O << "Node: " << Node->getBinRange() << "\n";
+      SmallVector<FieldAccessGraphEdge *> EL;
+      bool EdgesFound = BinAccessGraph.findIncomingEdgesToNode(*Node, EL);
+
+      if (EdgesFound) {
+        O << "Print all incoming edges to node " << Node->getBinRange() << "\n";
+        for (auto &Edge : EL) {
+          O << Edge->getSourceNode()->getBinRange();
+          O << " ---> " << Edge->getTargetNode().getBinRange()
+            << " , Edge weight: " << Edge->getEdgeWeight() << "\n";
+        }
+      } else {
+        O << "No incoming edges found for node " << Node->getBinRange() << "\n";
+      }
+      O << "\n";
+    }
+  }
+
 private:
   std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
   NewOffsetsTy NewComputedOffsets;
+  FieldAccessGraph BinAccessGraph;
+  DenseMap<const Instruction *, int> InstructionPositionMap;
 
   // Maintain the computed allocation size of the object.
   // Returns (bool) weather the size of the allocation was modified or not.
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index ad5665d25f517..7e13e98230df6 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -13,8 +13,8 @@ define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
 ; CHECK-LABEL: define dso_local void @positive_alloca_1
 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [4 x i8], align 1
-; CHECK-NEXT:    [[F2:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[F2:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
 ; CHECK-NEXT:    store i32 10, ptr [[F2]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
@@ -49,11 +49,11 @@ define dso_local void @positive_malloc_1(ptr noundef %val) #0 {
 ; CHECK-LABEL: define dso_local void @positive_malloc_1
 ; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[F2:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR1]], align 8
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 12)
-; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F2]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[CALL]], align 4
@@ -86,11 +86,11 @@ define dso_local void @positive_malloc_2(ptr noundef %val) #0 {
 ; CHECK-LABEL: define dso_local void @positive_malloc_2
 ; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[F3:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR1]], align 8
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 60)
-; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F3]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
 ; CHECK-NEXT:    store i32 [[TMP0]], ptr [[CALL]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
@@ -125,19 +125,19 @@ define dso_local ptr @negative_test_escaping_pointer(i32 noundef %val) #0 {
 ; CHECK-LABEL: define dso_local ptr @negative_test_escaping_pointer
 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[F2:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 16)
-; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F2]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[F2]], align 8
 ; CHECK-NEXT:    store i32 2, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 10, [[VAL]]
-; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[F2]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
 ; CHECK-NEXT:    store i32 [[ADD2]], ptr [[TMP1]], align 8
-; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[F]], align 8
+; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[F2]], align 8
 ; CHECK-NEXT:    ret ptr [[TMP3]]
 ;
 entry:
@@ -168,18 +168,18 @@ define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val)
 ; CHECK-LABEL: define dso_local void @positive_test_not_a_single_start_offset
 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
-; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[F2:%.*]] = alloca [5 x i8], align 1
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
 ; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
-; CHECK-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[F2]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F1]], i32 0, i32 2
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [[STRUCT_FOO:%.*]], ptr [[F2]], i32 0, i32 2
 ; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[C]], i64 -4
-; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
-; CHECK-NEXT:    [[C2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F1]], i32 0, i32 2
+; CHECK-NEXT:    [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[C]], i64 -4
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP3]], align 4
+; CHECK-NEXT:    [[C2:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[F2]], i32 0, i32 2
 ; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[C2]], i64 -4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
@@ -211,7 +211,7 @@ entry:
 define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    store i32 0, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 2
@@ -240,6 +240,39 @@ entry:
 }
 
 
+define dso_local i32 @simple_reordering_alloca(i32 %val) {
+;The bins should be reordered, not the instructions, so the offset should be different.
+; CHECK-LABEL: define dso_local noundef i32 @simple_reordering_alloca
+; CHECK-SAME: (i32 [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [12 x i8], align 1
+; CHECK-NEXT:    [[INDEX1:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 9
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr ptr, ptr [[INDEX1]], i64 -36
+; CHECK-NEXT:    store i32 100, ptr [[NEWGEP2]], align 4
+; CHECK-NEXT:    [[INDEX2:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 5
+; CHECK-NEXT:    [[NEWGEP3:%.*]] = getelementptr ptr, ptr [[INDEX2]], i64 -16
+; CHECK-NEXT:    store i32 20, ptr [[NEWGEP3]], align 4
+; CHECK-NEXT:    [[INDEX3:%.*]] = getelementptr inbounds [10 x i32], ptr [[ARRAY1]], i32 0, i32 3
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr ptr, ptr [[INDEX3]], i64 -4
+; CHECK-NEXT:    store i32 22, ptr [[NEWGEP]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef 32)
+; CHECK-NEXT:    ret i32 32
+;
+entry:
+  %array = alloca [10 x i32]
+  %index1 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 9
+  store i32 100, ptr %index1
+  %index2 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 5
+  store i32 20, ptr %index2
+  %index3 = getelementptr inbounds [10 x i32], ptr %array, i32 0, i32 3
+  store i32 22, ptr %index3
+  %retval = load i32, ptr %index3
+  %retval2 = add i32 10, %retval
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %retval2)
+  ret i32 %retval2
+}
+
+
 ; Function Attrs: noinline nounwind uwtable
 ; TODO: Here the array size is not known at compile time.
 ; However the array does not escape and is only partially used.
@@ -248,15 +281,15 @@ define dso_local void @baz(ptr noundef %val, i32 noundef %arrayLength) #0 {
 ; CHECK-LABEL: define dso_local void @baz
 ; CHECK-SAME: (ptr nocapture nofree noundef readonly [[VAL:%.*]], i32 noundef [[ARRAYLENGTH:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[ARRAYLENGTH_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[F:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
-; CHECK-NEXT:    store i32 [[ARRAYLENGTH]], ptr [[ARRAYLENGTH_ADDR]], align 4
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[ARRAYLENGTH_ADDR2:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[F3:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR1]], align 8
+; CHECK-NEXT:    store i32 [[ARRAYLENGTH]], ptr [[ARRAYLENGTH_ADDR2]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 [[ARRAYLENGTH]] to i64
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 4, [[CONV]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef [[MUL]])
-; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[F3]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
 ; CHECK-NEXT:    store i32 [[TMP0]], ptr [[CALL]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
@@ -292,8 +325,8 @@ entry:
 define dso_local void @positive_test_reduce_array_allocation_2() #0 {
 ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[I2:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[I2:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000)
 ; CHECK-NEXT:    store ptr [[CALL]], ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
@@ -443,7 +476,7 @@ define dso_local void @pthread_test(){
 ; TUNIT-NEXT:    [[ARG1:%.*]] = alloca i8, align 8
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
-; TUNIT-NEXT:    [[F1:%.*]] = alloca [4 x i8], align 1
+; TUNIT-NEXT:    [[F1:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef)
 ; TUNIT-NEXT:    [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
@@ -474,9 +507,9 @@ define dso_local void @select_case(i1 %cond){
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
 ; CHECK-LABEL: define dso_local void @select_case
 ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i8], align 1
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 3
-; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A1]], i64 0, i64 3
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A1]], i64 0, i64 1
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
 ; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
 ; CHECK-NEXT:    ret void
@@ -493,9 +526,9 @@ define dso_local void @select_case_2(i1 %cond){
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
 ; CHECK-LABEL: define dso_local void @select_case_2
 ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i32], align 1
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 3
-; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i32], ptr [[A1]], i64 0, i64 3
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i32], ptr [[A1]], i64 0, i64 1
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
 ; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
 ; CHECK-NEXT:    ret void
@@ -561,19 +594,19 @@ define dso_local void @alloca_array_multi_offset(){
 ; CHECK-LABEL: define dso_local void @alloca_array_multi_offset
 ; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    [[I2:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
 ; CHECK-NEXT:    br label [[FOR_INC:%.*]]
 ; CHECK:       for.inc:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 2
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
 ; CHECK-NEXT:    ret void



More information about the llvm-commits mailing list