[llvm] [Attributor] Change allocation size and load/store offsets using AAPointerInfo for Alloca instructions and keep track of instructions causing an Access (PR #72029)

Vidush Singhal via llvm-commits llvm-commits at lists.llvm.org
Sat Nov 22 19:41:17 PST 2025


https://github.com/vidsinghal updated https://github.com/llvm/llvm-project/pull/72029

>From 2a7b1227b8578c8b6999f3dde562e21d25636fd1 Mon Sep 17 00:00:00 2001
From: vidsinghal <vidush.sl at gmail.com>
Date: Mon, 24 Jun 2024 11:00:52 -0400
Subject: [PATCH 01/14] Store the full chain of instructions that make up the
 access.

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 193 +++++++--
 .../Transforms/IPO/AttributorAttributes.cpp   | 261 +++++++++---
 .../pointer-info-track-access-chain.ll        | 387 ++++++++++++++++++
 3 files changed, 761 insertions(+), 80 deletions(-)
 create mode 100644 llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index eb35e3644bd02..1c8c585b25f20 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -104,7 +104,9 @@
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SetOperations.h"
 #include "llvm/ADT/SetVector.h"
+#include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallSet.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/iterator.h"
 #include "llvm/Analysis/AssumeBundleQueries.h"
 #include "llvm/Analysis/CFG.h"
@@ -329,6 +331,10 @@ inline bool operator==(const RangeTy &A, const RangeTy &B) {
   return A.Offset == B.Offset && A.Size == B.Size;
 }
 
+inline bool operator<(const RangeTy &A, const RangeTy &B) {
+  return A.Offset < B.Offset;
+}
+
 inline bool operator!=(const RangeTy &A, const RangeTy &B) { return !(A == B); }
 
 /// Return the initial value of \p Obj with type \p Ty if that is a constant.
@@ -5858,49 +5864,139 @@ struct AAPointerInfo : public AbstractAttribute {
   /// list should be strictly ascending, but we ensure that only when we
   /// actually translate the list of offsets to a RangeList.
   struct OffsetInfo {
-    using VecTy = SmallSet<int64_t, 4>;
+    using VecTy = SmallVector<AA::RangeTy>;
+    // A map to store depth 1 predecessors per offset.
+    using OriginsTy = SmallVector<SmallPtrSet<Value *, 4>>;
     using const_iterator = VecTy::const_iterator;
-    VecTy Offsets;
+    OriginsTy Origins;
+    VecTy Ranges;
 
-    const_iterator begin() const { return Offsets.begin(); }
-    const_iterator end() const { return Offsets.end(); }
+    const_iterator begin() const { return Ranges.begin(); }
+    const_iterator end() const { return Ranges.end(); }
 
     bool operator==(const OffsetInfo &RHS) const {
-      return Offsets == RHS.Offsets;
+      return Ranges == RHS.Ranges && Origins == RHS.Origins;
     }
 
     bool operator!=(const OffsetInfo &RHS) const { return !(*this == RHS); }
 
-    bool insert(int64_t Offset) { return Offsets.insert(Offset).second; }
-    bool isUnassigned() const { return Offsets.size() == 0; }
+    // Insert a new Range and Origin
+    void insert(AA::RangeTy Range, Value &V) {
+      auto *It = std::find(Ranges.begin(), Ranges.end(), Range);
+      // Offset exists in Offsets map
+      if (It != Ranges.end()) {
+        size_t Index = It - Ranges.begin();
+        if (Index < Origins.size())
+          Origins[Index].insert(&V);
+      } else {
+        Ranges.push_back(Range);
+        Origins.emplace_back();
+        Origins.back().insert(&V);
+      }
+    }
+
+    // Set the size of the offset for all ranges.
+    void setSizeAll(uint64_t Size) {
+      for (auto &Range : Ranges)
+        Range.Size = Size;
+    }
+
+    // Helper function to get just the offsets from Ranges.
+    void getOnlyOffsets(SmallVector<int64_t> &Offsets) {
+      for (auto &Range : Ranges)
+        Offsets.push_back(Range.Offset);
+      // ensure unique
+      sort(Offsets.begin(), Offsets.end());
+      Offsets.erase(std::unique(Offsets.begin(), Offsets.end()), Offsets.end());
+    }
+
+    bool isUnassigned() const { return Ranges.empty(); }
 
     bool isUnknown() const {
       if (isUnassigned())
         return false;
-      if (Offsets.size() == 1)
-        return *Offsets.begin() == AA::RangeTy::Unknown;
+      if (Ranges.size() == 1)
+        return Ranges.front().Offset == AA::RangeTy::Unknown;
       return false;
     }
 
-    void setUnknown() {
-      Offsets.clear();
-      Offsets.insert(AA::RangeTy::Unknown);
+    void setUnknown(Value &V) {
+      Ranges.clear();
+      Origins.clear();
+      insert(AA::RangeTy{AA::RangeTy::Unknown, AA::RangeTy::Unknown}, V);
+    }
+
+    // Increment all ranges by Inc.
+    // Add an origin V to all offsets.
+    void addToAll(int64_t Inc, Value &V) {
+      for (auto &Range : Ranges)
+        Range.Offset += Inc;
+
+      if (!Origins.empty()) {
+        for (auto &Origin : Origins)
+          Origin.insert(&V);
+      } else {
+        for (size_t Index = 0; Index < Ranges.size(); Index++) {
+          Origins.emplace_back();
+          Origins[Index].insert(&V);
+        }
+      }
     }
 
+    // Increment all ranges by Inc.
     void addToAll(int64_t Inc) {
-      VecTy NewOffsets;
-      for (auto &Offset : Offsets)
-        NewOffsets.insert(Offset + Inc);
-      Offsets = std::move(NewOffsets);
+      for (auto &Range : Ranges)
+        Range.Offset += Inc;
     }
 
     /// Copy offsets from \p R into the current list.
     ///
     /// Ideally all lists should be strictly ascending, but we defer that to the
     /// actual use of the list. So we just blindly append here.
-    bool merge(const OffsetInfo &R) { return set_union(Offsets, R.Offsets); }
+
+    bool merge(const OffsetInfo &R) {
+      bool Changed =  set_union(Ranges, R.Ranges);
+      // ensure elements are unique.
+      sort(Ranges.begin(), Ranges.end());
+      Ranges.erase(std::unique(Ranges.begin(), Ranges.end()), Ranges.end());
+
+      OriginsTy ToBeMergeOrigins = R.Origins;
+      for (auto &Origin : ToBeMergeOrigins)
+        Origins.emplace_back(Origin);
+
+      return Changed;
+    }
+
+    // Merge two OffsetInfo structs.
+    // takes an additional origin argument
+    // and adds it to the corresponding offset in the
+    // origins map.
+    bool mergeWithOffset(const OffsetInfo &R, Value &CurPtr) {
+      bool Changed = set_union(Ranges, R.Ranges);
+      // ensure elements are unique.
+      sort(Ranges.begin(), Ranges.end());
+      Ranges.erase(std::unique(Ranges.begin(), Ranges.end()), Ranges.end());
+      auto &ROffsets = R.Ranges;
+      for (auto Offset : ROffsets) {
+        auto *It = std::find(Ranges.begin(), Ranges.end(), Offset);
+        if (It == Ranges.end())
+          continue;
+        size_t Index = It - Ranges.begin();
+        if (Index >= Origins.size()) {
+          Origins.emplace_back();
+          Origins.back().insert(&CurPtr);
+        } else {
+          Origins[Index].insert(&CurPtr);
+        }
+      }
+      return Changed;
+    }
   };
 
+  using OffsetInfoMapTy = DenseMap<Value *, OffsetInfo>;
+  using AccessPathTy = SmallVector<Value *, 4>;
+  using AccessPathSetTy = SmallPtrSet<AccessPathTy *, 4>;
+
   /// A container for a list of ranges.
   struct RangeList {
     // The set of ranges rarely contains more than one element, and is unlikely
@@ -6055,15 +6151,17 @@ struct AAPointerInfo : public AbstractAttribute {
   /// An access description.
   struct Access {
     Access(Instruction *I, int64_t Offset, int64_t Size,
-           std::optional<Value *> Content, AccessKind Kind, Type *Ty)
+           std::optional<Value *> Content, AccessKind Kind, Type *Ty,
+           AccessPathSetTy *AccessPaths)
         : LocalI(I), RemoteI(I), Content(Content), Ranges(Offset, Size),
-          Kind(Kind), Ty(Ty) {
+          Kind(Kind), Ty(Ty), AccessPaths(AccessPaths) {
       verify();
     }
     Access(Instruction *LocalI, Instruction *RemoteI, const RangeList &Ranges,
-           std::optional<Value *> Content, AccessKind K, Type *Ty)
+           std::optional<Value *> Content, AccessKind K, Type *Ty,
+           AccessPathSetTy *AccessPaths)
         : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Ranges(Ranges),
-          Kind(K), Ty(Ty) {
+          Kind(K), Ty(Ty), AccessPaths(AccessPaths) {
       if (Ranges.size() > 1) {
         Kind = AccessKind(Kind | AK_MAY);
         Kind = AccessKind(Kind & ~AK_MUST);
@@ -6072,9 +6170,9 @@ struct AAPointerInfo : public AbstractAttribute {
     }
     Access(Instruction *LocalI, Instruction *RemoteI, int64_t Offset,
            int64_t Size, std::optional<Value *> Content, AccessKind Kind,
-           Type *Ty)
+           Type *Ty, AccessPathSetTy *AccessPaths)
         : LocalI(LocalI), RemoteI(RemoteI), Content(Content),
-          Ranges(Offset, Size), Kind(Kind), Ty(Ty) {
+          Ranges(Offset, Size), Kind(Kind), Ty(Ty), AccessPaths(AccessPaths) {
       verify();
     }
     Access(const Access &Other) = default;
@@ -6082,7 +6180,8 @@ struct AAPointerInfo : public AbstractAttribute {
     Access &operator=(const Access &Other) = default;
     bool operator==(const Access &R) const {
       return LocalI == R.LocalI && RemoteI == R.RemoteI && Ranges == R.Ranges &&
-             Content == R.Content && Kind == R.Kind;
+             Content == R.Content && Kind == R.Kind &&
+             checkAccessPathsAreSame(R.AccessPaths);
     }
     bool operator!=(const Access &R) const { return !(*this == R); }
 
@@ -6194,11 +6293,53 @@ struct AAPointerInfo : public AbstractAttribute {
       }
     }
 
+    // Merge two access paths into one.
+    void mergeAccessPaths(const AccessPathSetTy *AccessPathsNew) const {
+      for (auto *Path : *AccessPathsNew)
+        if (!existsChain(Path))
+          AccessPaths->insert(Path);
+    }
+
+    // Check if the given access paths are same.
+    bool checkAccessPathsAreSame(const AccessPathSetTy *AccessPathsR) const {
+      bool IsSame = true;
+      if (AccessPaths->size() != AccessPathsR->size())
+        return false;
+
+      for (auto *Path : *AccessPathsR) {
+        if (!existsChain(Path))
+          IsSame = false;
+      }
+      return IsSame;
+    }
+
+    // Check if the chain exists in the AccessPathsSet.
+    bool existsChain(const AccessPathTy *NewPath) const {
+      for (auto *OldPath : *AccessPaths)
+        if (*OldPath == *NewPath)
+          return true;
+
+      return false;
+    }
+
+    void dumpAccessPaths(raw_ostream &O) const {
+      O << "Print all access paths found:"
+        << "\n";
+      for (auto *It : *AccessPaths) {
+        O << "Backtrack a unique access path:\n";
+        for (Value *Ins : *It) {
+          O << *Ins << "\n";
+        }
+      }
+    }
+
+    const AccessPathSetTy *getAccessChain() const { return AccessPaths; }
     const RangeList &getRanges() const { return Ranges; }
 
     using const_iterator = RangeList::const_iterator;
     const_iterator begin() const { return Ranges.begin(); }
     const_iterator end() const { return Ranges.end(); }
+    size_t size() const { return Ranges.size(); }
 
   private:
     /// The instruction responsible for the access with respect to the local
@@ -6221,6 +6362,10 @@ struct AAPointerInfo : public AbstractAttribute {
     /// The type of the content, thus the type read/written, can be null if not
     /// available.
     Type *Ty;
+
+    /// The full chain of instructions that participate in the Access.
+    /// There may be more than one access chain.
+    AccessPathSetTy *AccessPaths;
   };
 
   /// Create an abstract attribute view for the position \p IRP.
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index a6ac7610a2c7a..2e28e95ded171 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -11,6 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
+#include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/IR/GlobalVariable.h"
 #include "llvm/Transforms/IPO/Attributor.h"
 
 #include "llvm/ADT/APInt.h"
@@ -856,8 +858,13 @@ struct AA::PointerInfo::State : public AbstractState {
   ChangeStatus addAccess(Attributor &A, const AAPointerInfo::RangeList &Ranges,
                          Instruction &I, std::optional<Value *> Content,
                          AAPointerInfo::AccessKind Kind, Type *Ty,
+                         AAPointerInfo::OffsetInfoMapTy &OffsetInfoMap,
                          Instruction *RemoteI = nullptr);
 
+  AAPointerInfo::AccessPathSetTy *
+  findAllAccessPaths(AAPointerInfo::OffsetInfoMapTy &OffsetInfoMap,
+                     Instruction *LocalI);
+
   AAPointerInfo::const_bin_iterator begin() const { return OffsetBins.begin(); }
   AAPointerInfo::const_bin_iterator end() const { return OffsetBins.end(); }
   int64_t numOffsetBins() const { return OffsetBins.size(); }
@@ -935,10 +942,95 @@ struct AA::PointerInfo::State : public AbstractState {
   BooleanState BS;
 };
 
+AAPointerInfo::AccessPathSetTy *AA::PointerInfo::State::findAllAccessPaths(
+    AAPointerInfo::OffsetInfoMapTy &OffsetInfoMap, Instruction *LocalI) {
+  AAPointerInfo::AccessPathSetTy *AccessPathsSet =
+      new AAPointerInfo::AccessPathSetTy();
+
+  // Store the instruction and its storage (i.e, which path it belongs to)
+  // on the stack.
+  // We also store the visited map on the stack.
+  // Since we want to find new paths, we want to make sure an instruction is
+  // not visited twice on the same path. However, we can visit the same
+  // instruction more that once if it exists on different paths.
+  using VisitedTy = SmallPtrSet<Value *, 4>;
+  using StackElementTy =
+      std::tuple<Value *, AAPointerInfo::AccessPathTy *, VisitedTy>;
+
+  SmallVector<StackElementTy, 16> Stack;
+
+  // Populate the stack with elements.
+  for (auto *It = LocalI->op_begin(); It != LocalI->op_end(); It++) {
+    Value *V = cast<Value>(It);
+    if (!OffsetInfoMap.contains(V))
+      continue;
+
+    SmallPtrSet<Value *, 4> LocalVisitedMap;
+    AAPointerInfo::AccessPathTy *NewPath = new AAPointerInfo::AccessPathTy();
+    AccessPathsSet->insert(NewPath);
+    NewPath->push_back(LocalI);
+    Stack.push_back(std::make_tuple(V, NewPath, LocalVisitedMap));
+  }
+
+  while (!Stack.empty()) {
+    auto Entry = Stack.pop_back_val();
+    Value *Top = std::get<0>(Entry);
+    AAPointerInfo::AccessPathTy *CurrentChain = std::get<1>(Entry);
+    auto &Visited = std::get<2>(Entry);
+
+    if (!OffsetInfoMap.contains(Top))
+      continue;
+
+    if (!Visited.insert(Top).second)
+      continue;
+
+    CurrentChain->push_back(Top);
+    auto OI = OffsetInfoMap.lookup(Top);
+    auto &Origins = OI.Origins;
+
+    SmallPtrSet<Value *, 16> Successors;
+    for (auto &Origin : Origins) {
+      for (auto *Val : Origin) {
+        // Since we store depth 1 predecessors in our Origins map
+        // We can be sure that we hit termination condition if the
+        // Successor is the current instruction.
+        if (Val != Top)
+          Successors.insert(Val);
+      }
+    }
+
+    if (Successors.empty())
+      continue;
+
+    // Create new paths to be forked
+    SmallVector<AAPointerInfo::AccessPathTy *> NewPaths;
+    NewPaths.push_back(CurrentChain);
+    for (size_t Index = 1; Index < Successors.size(); Index++) {
+      AAPointerInfo::AccessPathTy *NewPath = new AAPointerInfo::AccessPathTy(
+          CurrentChain->begin(), CurrentChain->end());
+      NewPaths.push_back(NewPath);
+    }
+
+    int Index = 0;
+    // Traverse the successors
+    for (auto *Successor : Successors) {
+      AAPointerInfo::AccessPathTy *NextChain = NewPaths[Index];
+      AccessPathsSet->insert(NextChain);
+      // Push successors to traverse and their corresponding storage on
+      // stack.
+      VisitedTy NewVisitedSet(Visited.begin(), Visited.end());
+      Stack.push_back(std::make_tuple(Successor, NextChain, NewVisitedSet));
+      Index++;
+    }
+  }
+
+  return AccessPathsSet;
+}
+
 ChangeStatus AA::PointerInfo::State::addAccess(
     Attributor &A, const AAPointerInfo::RangeList &Ranges, Instruction &I,
     std::optional<Value *> Content, AAPointerInfo::AccessKind Kind, Type *Ty,
-    Instruction *RemoteI) {
+    AAPointerInfo::OffsetInfoMapTy &OffsetInfoMap, Instruction *RemoteI) {
   RemoteI = RemoteI ? RemoteI : &I;
 
   // Check if we have an access for this instruction, if not, simply add it.
@@ -965,7 +1057,11 @@ ChangeStatus AA::PointerInfo::State::addAccess(
   };
 
   if (!AccExists) {
-    AccessList.emplace_back(&I, RemoteI, Ranges, Content, Kind, Ty);
+    AAPointerInfo::AccessPathSetTy *AccessPaths =
+        AA::PointerInfo::State::findAllAccessPaths(OffsetInfoMap, &I);
+    AccessList.emplace_back(&I, RemoteI, Ranges, Content, Kind, Ty,
+                            AccessPaths);
+
     assert((AccessList.size() == AccIndex + 1) &&
            "New Access should have been at AccIndex");
     LocalList.push_back(AccIndex);
@@ -975,13 +1071,18 @@ ChangeStatus AA::PointerInfo::State::addAccess(
 
   // Combine the new Access with the existing Access, and then update the
   // mapping in the offset bins.
-  AAPointerInfo::Access Acc(&I, RemoteI, Ranges, Content, Kind, Ty);
+  AAPointerInfo::AccessPathSetTy *AccessPaths =
+      AA::PointerInfo::State::findAllAccessPaths(OffsetInfoMap, &I);
+  AAPointerInfo::Access Acc(&I, RemoteI, Ranges, Content, Kind, Ty,
+                            AccessPaths);
   auto &Current = AccessList[AccIndex];
   auto Before = Current;
   Current &= Acc;
   if (Current == Before)
     return ChangeStatus::UNCHANGED;
 
+  // Merge the newly generated access paths with the old access paths.
+  Before.mergeAccessPaths(Acc.getAccessChain());
   auto &ExistingRanges = Before.getRanges();
   auto &NewRanges = Current.getRanges();
 
@@ -1014,7 +1115,16 @@ namespace {
 #ifndef NDEBUG
 static raw_ostream &operator<<(raw_ostream &OS,
                                const AAPointerInfo::OffsetInfo &OI) {
-  OS << llvm::interleaved_array(OI);
+  ListSeparator LS;
+  int I = 0;
+  for (auto Offset : OI) {
+    OS << LS << "[Offset, Size]: " << Offset << "\n";
+    auto &Origin = OI.Origins[I];
+    for (auto *Val : Origin)
+      OS << "Origin: " << *Val << "\n";
+  }
+  OS << "\n";
+
   return OS;
 }
 #endif // NDEBUG
@@ -1358,7 +1468,8 @@ struct AAPointerInfoImpl
 
   ChangeStatus translateAndAddStateFromCallee(Attributor &A,
                                               const AAPointerInfo &OtherAA,
-                                              CallBase &CB) {
+                                              CallBase &CB,
+                                              OffsetInfoMapTy &OffsetInfoMap) {
     using namespace AA::PointerInfo;
     if (!OtherAA.getState().isValidState() || !isValidState())
       return indicatePessimisticFixpoint();
@@ -1382,15 +1493,16 @@ struct AAPointerInfoImpl
         AK = AccessKind(AK & (IsByval ? AccessKind::AK_R : AccessKind::AK_RW));
         AK = AccessKind(AK | (RAcc.isMayAccess() ? AK_MAY : AK_MUST));
 
-        Changed |= addAccess(A, RAcc.getRanges(), CB, Content, AK,
-                             RAcc.getType(), RAcc.getRemoteInst());
+        Changed |=
+            addAccess(A, RAcc.getRanges(), CB, Content, AK, RAcc.getType(),
+                      OffsetInfoMap, RAcc.getRemoteInst());
       }
     }
     return Changed;
   }
 
   ChangeStatus translateAndAddState(Attributor &A, const AAPointerInfo &OtherAA,
-                                    const OffsetInfo &Offsets, CallBase &CB,
+                                    const OffsetInfo &Ranges, CallBase &CB,
                                     bool IsMustAcc) {
     using namespace AA::PointerInfo;
     if (!OtherAA.getState().isValidState() || !isValidState())
@@ -1406,18 +1518,19 @@ struct AAPointerInfoImpl
         const auto &RAcc = State.getAccess(Index);
         if (!IsMustAcc && RAcc.isAssumption())
           continue;
-        for (auto Offset : Offsets) {
-          auto NewRanges = Offset == AA::RangeTy::Unknown
+        for (auto Range : Ranges) {
+          auto NewRanges = Range.Offset == AA::RangeTy::Unknown
                                ? AA::RangeTy::getUnknown()
                                : RAcc.getRanges();
           if (!NewRanges.isUnknown()) {
-            NewRanges.addToAllOffsets(Offset);
+            NewRanges.addToAllOffsets(Range.Offset);
           }
           AccessKind AK = RAcc.getKind();
           if (!IsMustAcc)
             AK = AccessKind((AK & ~AK_MUST) | AK_MAY);
-          Changed |= addAccess(A, NewRanges, CB, RAcc.getContent(), AK,
-                               RAcc.getType(), RAcc.getRemoteInst());
+          Changed |=
+              addAccess(A, NewRanges, CB, RAcc.getContent(), RAcc.getKind(),
+                        RAcc.getType(), OffsetInfoMap, RAcc.getRemoteInst());
         }
       }
     }
@@ -1448,9 +1561,11 @@ struct AAPointerInfoImpl
           else
             O << "       - c: <unknown>\n";
         }
+        Acc.dumpAccessPaths(O);
       }
     }
   }
+  OffsetInfoMapTy OffsetInfoMap;
 };
 
 struct AAPointerInfoFloating : public AAPointerInfoImpl {
@@ -1461,8 +1576,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
   /// Deal with an access and signal if it was handled successfully.
   bool handleAccess(Attributor &A, Instruction &I,
                     std::optional<Value *> Content, AccessKind Kind,
-                    OffsetInfo::VecTy &Offsets, ChangeStatus &Changed,
-                    Type &Ty) {
+                    OffsetInfo &OI, ChangeStatus &Changed, Type &Ty,
+                    OffsetInfoMapTy &OffsetInfoMap) {
     using namespace AA::PointerInfo;
     auto Size = AA::RangeTy::Unknown;
     const DataLayout &DL = A.getDataLayout();
@@ -1471,16 +1586,23 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
       Size = AccessSize.getFixedValue();
 
     // Make a strictly ascending list of offsets as required by addAccess()
-    SmallVector<int64_t> OffsetsSorted(Offsets.begin(), Offsets.end());
-    llvm::sort(OffsetsSorted);
+    auto Ranges = OI.Ranges;
+    auto Origins = OI.Origins;
+
+    llvm::sort(Ranges);
+    auto *Last = llvm::unique(Ranges);
+    Ranges.erase(Last, Ranges.end());
+
+    SmallVector<int64_t> OffsetsOnly;
+    OI.getOnlyOffsets(OffsetsOnly);
 
     VectorType *VT = dyn_cast<VectorType>(&Ty);
     if (!VT || VT->getElementCount().isScalable() ||
         !Content.value_or(nullptr) || !isa<Constant>(*Content) ||
         (*Content)->getType() != VT ||
         DL.getTypeStoreSize(VT->getElementType()).isScalable()) {
-      Changed =
-          Changed | addAccess(A, {OffsetsSorted, Size}, I, Content, Kind, &Ty);
+      Changed = Changed | addAccess(A, {OffsetsOnly, Size}, I, Content, Kind,
+                                    &Ty, OffsetInfoMap);
     } else {
       // Handle vector stores with constant content element-wise.
       // TODO: We could look for the elements or create instructions
@@ -1492,7 +1614,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
       int64_t ElementSize = DL.getTypeStoreSize(ElementType).getFixedValue();
       auto *ConstContent = cast<Constant>(*Content);
       Type *Int32Ty = Type::getInt32Ty(ElementType->getContext());
-      SmallVector<int64_t> ElementOffsets(Offsets.begin(), Offsets.end());
+      SmallVector<int64_t> ElementOffsets;
+      OI.getOnlyOffsets(ElementOffsets);
 
       for (int i = 0, e = VT->getElementCount().getFixedValue(); i != e; ++i) {
         Value *ElementContent = ConstantExpr::getExtractElement(
@@ -1500,7 +1623,8 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
 
         // Add the element access.
         Changed = Changed | addAccess(A, {ElementOffsets, ElementSize}, I,
-                                      ElementContent, Kind, ElementType);
+                                      ElementContent, Kind, ElementType,
+                                      OffsetInfoMap);
 
         // Advance the offsets for the next element.
         for (auto &ElementOffset : ElementOffsets)
@@ -1519,7 +1643,7 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
   /// \return true iff \p UsrOI is updated.
   bool collectConstantsForGEP(Attributor &A, const DataLayout &DL,
                               OffsetInfo &UsrOI, const OffsetInfo &PtrOI,
-                              const GEPOperator *GEP);
+                              GEPOperator *GEP, Value *CurPtr);
 
   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override {
@@ -1527,11 +1651,9 @@ struct AAPointerInfoFloating : public AAPointerInfoImpl {
   }
 };
 
-bool AAPointerInfoFloating::collectConstantsForGEP(Attributor &A,
-                                                   const DataLayout &DL,
-                                                   OffsetInfo &UsrOI,
-                                                   const OffsetInfo &PtrOI,
-                                                   const GEPOperator *GEP) {
+bool AAPointerInfoFloating::collectConstantsForGEP(
+    Attributor &A, const DataLayout &DL, OffsetInfo &UsrOI,
+    const OffsetInfo &PtrOI, GEPOperator *GEP, Value *CurPtr) {
   unsigned BitWidth = DL.getIndexTypeSizeInBits(GEP->getType());
   SmallMapVector<Value *, APInt, 4> VariableOffsets;
   APInt ConstantOffset(BitWidth, 0);
@@ -1541,7 +1663,7 @@ bool AAPointerInfoFloating::collectConstantsForGEP(Attributor &A,
          "determined to be unknown.");
 
   if (!GEP->collectOffset(DL, BitWidth, VariableOffsets, ConstantOffset)) {
-    UsrOI.setUnknown();
+    UsrOI.setUnknown(*CurPtr);
     return true;
   }
 
@@ -1550,7 +1672,9 @@ bool AAPointerInfoFloating::collectConstantsForGEP(Attributor &A,
                     << *GEP << "\n");
 
   auto Union = PtrOI;
-  Union.addToAll(ConstantOffset.getSExtValue());
+  // clear the origins since we just want to keep only one predecessor.
+  Union.Origins.clear();
+  Union.addToAll(ConstantOffset.getSExtValue(), *CurPtr);
 
   // Each VI in VariableOffsets has a set of potential constant values. Every
   // combination of elements, picked one each from these sets, is separately
@@ -1559,7 +1683,7 @@ bool AAPointerInfoFloating::collectConstantsForGEP(Attributor &A,
     auto *PotentialConstantsAA = A.getAAFor<AAPotentialConstantValues>(
         *this, IRPosition::value(*VI.first), DepClassTy::OPTIONAL);
     if (!PotentialConstantsAA || !PotentialConstantsAA->isValidState()) {
-      UsrOI.setUnknown();
+      UsrOI.setUnknown(*CurPtr);
       return true;
     }
 
@@ -1578,14 +1702,16 @@ bool AAPointerInfoFloating::collectConstantsForGEP(Attributor &A,
     OffsetInfo Product;
     for (const auto &ConstOffset : AssumedSet) {
       auto CopyPerOffset = Union;
-      CopyPerOffset.addToAll(ConstOffset.getSExtValue() *
-                             VI.second.getZExtValue());
+      CopyPerOffset.addToAll(
+          ConstOffset.getSExtValue() * VI.second.getZExtValue(), *CurPtr);
       Product.merge(CopyPerOffset);
     }
     Union = Product;
   }
 
   UsrOI = std::move(Union);
+  TypeSize Size = DL.getTypeAllocSize(GEP->getResultElementType());
+  UsrOI.setSizeAll(Size);
   return true;
 }
 
@@ -1594,9 +1720,27 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
   ChangeStatus Changed = ChangeStatus::UNCHANGED;
   const DataLayout &DL = A.getDataLayout();
   Value &AssociatedValue = getAssociatedValue();
-
-  DenseMap<Value *, OffsetInfo> OffsetInfoMap;
-  OffsetInfoMap[&AssociatedValue].insert(0);
+  OffsetInfoMap.clear();
+
+  uint64_t Size;
+  Function *F = getAssociatedFunction();
+  TargetLibraryInfo *TLI = nullptr;
+  if (F)
+    TLI = A.getInfoCache().getTargetLibraryInfoForFunction(*F);
+
+  if (TLI && getObjectSize(&AssociatedValue, Size, DL, TLI)) {
+    OffsetInfoMap[&AssociatedValue].insert(AA::RangeTy(0, Size),
+                                           AssociatedValue);
+  } else if (isa<GlobalVariable>(AssociatedValue)) {
+    auto &Glob = cast<GlobalVariable>(AssociatedValue);
+    TypeSize SizeOfType = DL.getTypeAllocSize(Glob.getValueType());
+    OffsetInfoMap[&AssociatedValue].insert(AA::RangeTy(0, SizeOfType),
+                                           AssociatedValue);
+  } else {
+    TypeSize SizeOfType = DL.getTypeAllocSize(AssociatedValue.getType());
+    OffsetInfoMap[&AssociatedValue].insert(AA::RangeTy(0, SizeOfType),
+                                           AssociatedValue);
+  }
 
   auto HandlePassthroughUser = [&](Value *Usr, Value *CurPtr, bool &Follow) {
     // One does not simply walk into a map and assign a reference to a possibly
@@ -1615,7 +1759,13 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
     auto &PtrOI = OffsetInfoMap[CurPtr];
     assert(!PtrOI.isUnassigned() &&
            "Cannot pass through if the input Ptr was not visited!");
-    UsrOI.merge(PtrOI);
+    if (isa<PHINode>(Usr) || isa<SelectInst>(Usr)) {
+      UsrOI.mergeWithOffset(PtrOI, *CurPtr);
+    } else {
+      UsrOI = PtrOI;
+      UsrOI.Origins.clear();
+      UsrOI.addToAll(0, *CurPtr);
+    }
     Follow = true;
     return true;
   };
@@ -1650,11 +1800,11 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
 
       if (PtrOI.isUnknown()) {
         Follow = true;
-        UsrOI.setUnknown();
+        UsrOI.setUnknown(*GEP);
         return true;
       }
 
-      Follow = collectConstantsForGEP(A, DL, UsrOI, PtrOI, GEP);
+      Follow = collectConstantsForGEP(A, DL, UsrOI, PtrOI, GEP, CurPtr);
       return true;
     }
     if (isa<PtrToIntInst>(Usr))
@@ -1689,7 +1839,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
         LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand offset unknown "
                           << *CurPtr << " in " << *PHI << "\n");
         Follow = !UsrOI.isUnknown();
-        UsrOI.setUnknown();
+        UsrOI.setUnknown(*CurPtr);
         return true;
       }
 
@@ -1700,7 +1850,6 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
         LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant (so far)");
         return true;
       }
-
       // Check if the PHI operand can be traced back to AssociatedValue.
       APInt Offset(
           DL.getIndexSizeInBits(CurPtr->getType()->getPointerAddressSpace()),
@@ -1712,7 +1861,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
         LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI operand is too complex "
                           << *CurPtr << " in " << *PHI
                           << " (base: " << *CurPtrBase << ")\n");
-        UsrOI.setUnknown();
+        UsrOI.setUnknown(*CurPtr);
         Follow = true;
         return true;
       }
@@ -1729,7 +1878,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
               *PHI->getFunction());
       if (mayBeInCycle(CI, cast<Instruction>(Usr), /* HeaderOnly */ true)) {
         auto BaseOI = It->getSecond();
-        BaseOI.addToAll(Offset.getZExtValue());
+        BaseOI.addToAll(Offset.getZExtValue(), *CurPtr);
         if (IsFirstPHIUser || BaseOI == UsrOI) {
           LLVM_DEBUG(dbgs() << "[AAPointerInfo] PHI is invariant " << *CurPtr
                             << " in " << *Usr << "\n");
@@ -1739,12 +1888,12 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
         LLVM_DEBUG(
             dbgs() << "[AAPointerInfo] PHI operand pointer offset mismatch "
                    << *CurPtr << " in " << *PHI << "\n");
-        UsrOI.setUnknown();
+        UsrOI.setUnknown(*CurPtr);
         Follow = true;
         return true;
       }
 
-      UsrOI.merge(PtrOI);
+      UsrOI.mergeWithOffset(PtrOI, *CurPtr);
       Follow = true;
       return true;
     }
@@ -1758,8 +1907,8 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
       else
         AK = AccessKind(AK | AccessKind::AK_MAY);
       if (!handleAccess(A, *LoadI, /* Content */ nullptr, AK,
-                        OffsetInfoMap[CurPtr].Offsets, Changed,
-                        *LoadI->getType()))
+                        OffsetInfoMap[CurPtr], Changed, *LoadI->getType(),
+                        OffsetInfoMap))
         return false;
 
       auto IsAssumption = [](Instruction &I) {
@@ -1842,9 +1991,9 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
         Content =
             A.getAssumedSimplified(*Assumption.first, *this,
                                    UsedAssumedInformation, AA::Interprocedural);
-      return handleAccess(
-          A, *Assumption.second, Content, AccessKind::AK_ASSUMPTION,
-          OffsetInfoMap[CurPtr].Offsets, Changed, *LoadI->getType());
+      return handleAccess(A, *Assumption.second, Content,
+                          AccessKind::AK_ASSUMPTION, OffsetInfoMap[CurPtr],
+                          Changed, *LoadI->getType(), OffsetInfoMap);
     }
 
     auto HandleStoreLike = [&](Instruction &I, Value *ValueOp, Type &ValueTy,
@@ -1870,8 +2019,8 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
       if (ValueOp)
         Content = A.getAssumedSimplified(
             *ValueOp, *this, UsedAssumedInformation, AA::Interprocedural);
-      return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr].Offsets,
-                          Changed, ValueTy);
+      return handleAccess(A, I, Content, AK, OffsetInfoMap[CurPtr], Changed,
+                          ValueTy, OffsetInfoMap);
     };
 
     if (auto *StoreI = dyn_cast<StoreInst>(Usr))
@@ -2018,8 +2167,8 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
       } else {
         auto Kind =
             ArgNo == 0 ? AccessKind::AK_MUST_WRITE : AccessKind::AK_MUST_READ;
-        Changed =
-            Changed | addAccess(A, {0, LengthVal}, *MI, nullptr, Kind, nullptr);
+        Changed = Changed | addAccess(A, {0, LengthVal}, *MI, nullptr, Kind,
+                                      nullptr, OffsetInfoMap);
       }
       LLVM_DEBUG({
         dbgs() << "Accesses by bin after update:\n";
@@ -2039,8 +2188,8 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
       auto *ArgAA =
           A.getAAFor<AAPointerInfo>(*this, ArgPos, DepClassTy::REQUIRED);
       if (ArgAA && ArgAA->getState().isValidState())
-        return translateAndAddStateFromCallee(A, *ArgAA,
-                                              *cast<CallBase>(getCtxI()));
+        return translateAndAddStateFromCallee(
+            A, *ArgAA, *cast<CallBase>(getCtxI()), OffsetInfoMap);
       if (!Arg->getParent()->isDeclaration())
         return indicatePessimisticFixpoint();
     }
@@ -2057,7 +2206,7 @@ struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
     auto Kind =
         ReadOnly ? AccessKind::AK_MAY_READ : AccessKind::AK_MAY_READ_WRITE;
     return addAccess(A, AA::RangeTy::getUnknown(), *getCtxI(), nullptr, Kind,
-                     nullptr);
+                     nullptr, OffsetInfoMap);
   }
 
   /// See AbstractAttribute::trackStatistics()
diff --git a/llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll b/llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll
new file mode 100644
index 0000000000000..b7c3f1f33191e
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll
@@ -0,0 +1,387 @@
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal -debug-only=attributor  -attributor-annotate-decl-cs  -S < %s 2>&1 | FileCheck %s
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal -debug-only=attributor  -attributor-annotate-decl-cs -S < %s 2>&1  | FileCheck %s
+; REQUIRES: asserts
+
+
+ at globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+
+; CHECK: Accesses by bin after update:
+; CHECK: [8-12] : 1
+; CHECK:      - 5 -   %1 = load i32, ptr %field22, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %1 = load i32, ptr %field22, align 4
+; CHECK:   %field22 = getelementptr i32, ptr %field2, i32 0
+; CHECK:   %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [4-5] : 1
+; CHECK:      - 9 -   store i8 10, ptr %field11, align 4
+; CHECK:        - c: i8 10
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i8 10, ptr %field11, align 4
+; CHECK:   %field11 = getelementptr i32, ptr %field1, i32 0
+; CHECK:   %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [32-36] : 1
+; CHECK:      - 9 -   store i32 %3, ptr %field8, align 4
+; CHECK:        - c:   %3 = load i32, ptr %val, align 4
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 %3, ptr %field8, align 4
+; CHECK:   %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [4-8] : 1
+; CHECK:      - 5 -   %0 = load i32, ptr %field11, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %0 = load i32, ptr %field11, align 4
+; CHECK:   %field11 = getelementptr i32, ptr %field1, i32 0
+; CHECK:   %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [8-9] : 1
+; CHECK:      - 9 -   store i8 12, ptr %field22, align 4
+; CHECK:        - c: i8 12
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i8 12, ptr %field22, align 4
+; CHECK:   %field22 = getelementptr i32, ptr %field2, i32 0
+; CHECK:   %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+; CHECK:   %f = alloca [10 x i32], align 4
+define dso_local i32 @track_chain(ptr nocapture %val) #0 {
+entry:
+  %f = alloca [10 x i32]
+  %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+  %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+  %field3 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 3
+  %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+
+  %field11 = getelementptr i32, ptr %field1, i32 0
+  %field22 = getelementptr i32, ptr %field2, i32 0
+  store i8 10, ptr %field11, align 4
+  store i8 12, ptr %field22, align 4
+
+  %1 = load i32, ptr %field11, align 4
+  %2 = load i32, ptr %field22, align 4
+  %3 = add i32 %1, %2
+
+  %4 = load i32, ptr %val, align 4
+  store i32 %4, ptr %field8, align 4
+
+  %5 = add i32 %4, %3
+
+  ret i32 %5
+}
+
+
+; CHECK: Accesses by bin after update:
+; CHECK: [12-16] : 1
+; CHECK:      - 5 -   %0 = load i32, ptr %field11, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %0 = load i32, ptr %field11, align 4
+; CHECK:   %field11 = getelementptr i32, ptr %field1, i32 2
+; CHECK:   %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [16-17] : 1
+; CHECK:      - 9 -   store i8 12, ptr %field22, align 4
+; CHECK:        - c: i8 12
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i8 12, ptr %field22, align 4
+; CHECK:   %field22 = getelementptr i32, ptr %field2, i32 2
+; CHECK:   %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [32-36] : 1
+; CHECK:      - 9 -   store i32 %3, ptr %field8, align 4
+; CHECK:        - c:   %3 = load i32, ptr %val, align 4
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 %3, ptr %field8, align 4
+; CHECK:   %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [16-20] : 1
+; CHECK:      - 5 -   %1 = load i32, ptr %field22, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %1 = load i32, ptr %field22, align 4
+; CHECK:   %field22 = getelementptr i32, ptr %field2, i32 2
+; CHECK:   %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [12-13] : 1
+; CHECK:      - 9 -   store i8 10, ptr %field11, align 4
+; CHECK:        - c: i8 10
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i8 10, ptr %field11, align 4
+; CHECK:   %field11 = getelementptr i32, ptr %field1, i32 2
+; CHECK:   %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+; CHECK:   %f = alloca [10 x i32], align 4
+define dso_local i32 @track_chain_2(ptr nocapture %val) #0 {
+entry:
+  %f = alloca [10 x i32]
+  %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+  %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+  %field3 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 3
+  %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+
+  %field11 = getelementptr i32, ptr %field1, i32 2
+  %field22 = getelementptr i32, ptr %field2, i32 2
+  store i8 10, ptr %field11, align 4
+  store i8 12, ptr %field22, align 4
+
+  %1 = load i32, ptr %field11, align 4
+  %2 = load i32, ptr %field22, align 4
+  %3 = add i32 %1, %2
+
+  %4 = load i32, ptr %val, align 4
+  store i32 %4, ptr %field8, align 4
+
+  %5 = add i32 %4, %3
+
+  ret i32 %5
+}
+
+
+; CHECK: Accesses by bin after update:
+; CHECK: [12-16] : 3
+; CHECK:      - 5 -   %0 = load i32, ptr %field11, align 4
+; CHECK:       - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %0 = load i32, ptr %field11, align 4
+; CHECK:   %field11 = getelementptr i32, ptr %field1, i32 2
+; CHECK:   %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK:      - 5 -   %b = load i32, ptr %field3, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %b = load i32, ptr %field3, align 4
+; CHECK:   %field3 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 3
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK:      - 10 -   store i32 1000, ptr %6, align 4
+; CHECK:        - c: i32 1000
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 1000, ptr %6, align 4
+; CHECK:   %6 = select i1 %cond, ptr %field3, ptr %field8
+; CHECK:   %field3 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 3
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 1000, ptr %6, align 4
+; CHECK:   %6 = select i1 %cond, ptr %field3, ptr %field8
+; CHECK:   %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [16-17] : 1
+; CHECK:      - 9 -   store i8 12, ptr %field22, align 4
+; CHECK:       - c: i8 12
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i8 12, ptr %field22, align 4
+; CHECK:   %field22 = getelementptr i32, ptr %field2, i32 2
+; CHECK:   %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [32-36] : 4
+; CHECK:      - 9 -   store i32 %3, ptr %field8, align 4
+; CHECK:        - c:   %3 = load i32, ptr %val, align 4
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 %3, ptr %field8, align 4
+; CHECK:   %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK:      - 5 -   %a1 = load i32, ptr %field8, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %a1 = load i32, ptr %field8, align 4
+; CHECK:   %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK:      - 10 -   store i32 1000, ptr %6, align 4
+; CHECK:        - c: i32 1000
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 1000, ptr %6, align 4
+; CHECK:   %6 = select i1 %cond, ptr %field3, ptr %field8
+; CHECK:   %field3 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 3
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 1000, ptr %6, align 4
+; CHECK:   %6 = select i1 %cond, ptr %field3, ptr %field8
+; CHECK:  %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK:      - 5 -   %8 = load i32, ptr %field8, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:  %8 = load i32, ptr %field8, align 4
+; CHECK:  %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+; CHECK:  %f = alloca [10 x i32], align 4
+; CHECK: [16-20] : 1
+; CHECK:      - 5 -   %1 = load i32, ptr %field22, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %1 = load i32, ptr %field22, align 4
+; CHECK:   %field22 = getelementptr i32, ptr %field2, i32 2
+; CHECK:   %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+; CHECK:   %f = alloca [10 x i32], align 4
+; CHECK: [12-13] : 1
+; CHECK:      - 9 -   store i8 10, ptr %field11, align 4
+; CHECK:        - c: i8 10
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i8 10, ptr %field11, align 4
+; CHECK:   %field11 = getelementptr i32, ptr %field1, i32 2
+; CHECK:   %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+; CHECK:   %f = alloca [10 x i32], align 4
+define dso_local i32 @track_chain_3(ptr nocapture %val, i1 %cond) #0 {
+entry:
+  %f = alloca [10 x i32]
+  %field1 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 1
+  %field2 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 2
+  %field3 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 3
+  %field8 = getelementptr inbounds [10 x i32], ptr %f, i32 0, i32 8
+
+  %field11 = getelementptr i32, ptr %field1, i32 2
+  %field22 = getelementptr i32, ptr %field2, i32 2
+  store i8 10, ptr %field11, align 4
+  store i8 12, ptr %field22, align 4
+  %1 = load i32, ptr %field11, align 4
+  %2 = load i32, ptr %field22, align 4
+  %3 = add i32 %1, %2
+  %4 = load i32, ptr %val, align 4
+  store i32 %4, ptr %field8, align 4
+  %5 = add i32 %4, %3
+  %6 = load i32, ptr %val
+  %a1 = load i32, ptr %field8
+  %a = add i32 %a1, %6
+  %b = load i32, ptr %field3
+  ;%b  = sub i32 %b1, %6
+  %7 = select i1 %cond, ptr %field3, ptr %field8
+  store i32 1000, ptr %7
+  %8 = add i32 %5, %b
+  %9 = load i32, ptr %field8
+  %10 = add i32 %9, %8
+  ret i32 %10
+}
+
+; CHECK: Accesses by bin after update:
+; CHECK: [8-12] : 2
+; CHECK:      - 9 -   store i32 %0, ptr %field2, align 4
+; CHECK:        - c:   %0 = load i32, ptr %val, align 4
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 %0, ptr %field2, align 4
+; CHECK:   %field2 = getelementptr i32, ptr @globalBytes, i32 2
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK:      - 6 -   %ret = load i32, ptr %x, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %ret = load i32, ptr %x, align 4
+; CHECK:   %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+; CHECK:   %field2 = getelementptr i32, ptr @globalBytes, i32 2
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK: Backtrack a unique access path:
+; CHECK:   %ret = load i32, ptr %x, align 4
+; CHECK:   %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+; CHECK:   %field8 = getelementptr i32, ptr @globalBytes, i32 8
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK: [32-36] : 5
+; CHECK:      - 6 -   %ret = load i32, ptr %x, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %ret = load i32, ptr %x, align 4
+; CHECK:   %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+; CHECK:   %field2 = getelementptr i32, ptr @globalBytes, i32 2
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK: Backtrack a unique access path:
+; CHECK:   %ret = load i32, ptr %x, align 4
+; CHECK:   %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+; CHECK:   %field8 = getelementptr i32, ptr @globalBytes, i32 8
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK:      - 9 -   store i32 %1, ptr %field8, align 4
+; CHECK:        - c:   %1 = load i32, ptr %val2, align 4
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 %1, ptr %field8, align 4
+; CHECK:   %field8 = getelementptr i32, ptr @globalBytes, i32 8
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK:      - 9 -   store i32 %0, ptr %field2, align 4
+; CHECK:        - c:   %0 = load i32, ptr %val, align 4
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 %0, ptr %field2, align 4
+; CHECK:   %field2 = getelementptr i32, ptr @globalBytes, i32 8
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK:      - 6 -   %ret = load i32, ptr %x, align 4
+; CHECK:        - c: <unknown>
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   %ret = load i32, ptr %x, align 4
+; CHECK:   %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+; CHECK:   %field2 = getelementptr i32, ptr @globalBytes, i32 8
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK: Backtrack a unique access path:
+; CHECK:   %ret = load i32, ptr %x, align 4
+; CHECK:   %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+; CHECK:   %field8 = getelementptr i32, ptr @globalBytes, i32 8
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+; CHECK:      - 9 -   store i32 %1, ptr %field8, align 4
+; CHECK:        - c:   %1 = load i32, ptr %val2, align 4
+; CHECK: Print all access paths found:
+; CHECK: Backtrack a unique access path:
+; CHECK:   store i32 %1, ptr %field8, align 4
+; CHECK:   %field8 = getelementptr i32, ptr @globalBytes, i32 8
+; CHECK: @globalBytes = internal global [1024 x i8] zeroinitializer, align 16
+
+define dso_local i32 @phi_different_offsets(ptr nocapture %val, ptr nocapture %val2, i1 %cmp) {
+entry:
+  br i1 %cmp, label %then, label %else
+
+then:
+  %field2 = getelementptr i32, ptr @globalBytes, i32 2
+  %0 = load i32, ptr %val
+  store i32 %0, ptr %field2
+  br label %end
+
+else:
+  %field8 = getelementptr i32, ptr @globalBytes, i32 8
+  %2 = load i32, ptr %val2
+  store i32 %2, ptr %field8
+  br label %end
+
+end:
+  %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+  %ret = load i32, ptr %x
+  ret i32 %ret
+
+}
+
+define dso_local i32 @phi_same_offsets(ptr nocapture %val, ptr nocapture %val2, i1 %cmp) {
+entry:
+  br i1 %cmp, label %then, label %else
+
+then:
+  %field2 = getelementptr i32, ptr @globalBytes, i32 8
+  %0 = load i32, ptr %val
+  store i32 %0, ptr %field2
+  br label %end
+
+else:
+  %field8 = getelementptr i32, ptr @globalBytes, i32 8
+  %2 = load i32, ptr %val2
+  store i32 %2, ptr %field8
+  br label %end
+
+end:
+  %x = phi ptr [ %field2, %then ], [ %field8, %else ]
+  %ret = load i32, ptr %x
+  ret i32 %ret
+}
\ No newline at end of file

>From ea6611075158b54627ad486827a466286c90d0be Mon Sep 17 00:00:00 2001
From: Vidush Singhal <singhal2 at ruby964.llnl.gov>
Date: Tue, 4 Jun 2024 16:51:06 -0700
Subject: [PATCH 02/14] [Attributor]: Change allocation size and load/store
 offsets using AAPointerInfo for Alloca instructions

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |   8 +
 .../Transforms/IPO/AttributorAttributes.cpp   | 474 ++++++++++++++++--
 .../Attributor/ArgumentPromotion/crash.ll     |   6 +-
 .../live_called_from_dead.ll                  |   3 +-
 .../live_called_from_dead_2.ll                |   7 +-
 .../nonzero-address-spaces.ll                 |   3 +-
 .../Attributor/IPConstantProp/pthreads.ll     |   4 +-
 llvm/test/Transforms/Attributor/allocator.ll  | 197 ++++++--
 .../Attributor/call-simplify-pointer-info.ll  |  28 +-
 .../Transforms/Attributor/heap_to_stack.ll    |   3 +-
 .../Attributor/heap_to_stack_gpu.ll           |   7 +-
 llvm/test/Transforms/Attributor/liveness.ll   |  12 +-
 llvm/test/Transforms/Attributor/nodelete.ll   |   5 +-
 .../Transforms/Attributor/pointer-info.ll     |   6 +-
 .../Attributor/value-simplify-pointer-info.ll |  18 +-
 15 files changed, 644 insertions(+), 137 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 1c8c585b25f20..d785f37035102 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -5827,8 +5827,10 @@ struct AANonConvergent : public StateWrapper<BooleanState, AbstractAttribute> {
 
 /// An abstract interface for struct information.
 struct AAPointerInfo : public AbstractAttribute {
+protected:
   AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {}
 
+public:
   /// See AbstractAttribute::isValidIRPositionForInit
   static bool isValidIRPositionForInit(Attributor &A, const IRPosition &IRP) {
     if (!IRP.getAssociatedType()->isPtrOrPtrVectorTy())
@@ -6385,6 +6387,9 @@ struct AAPointerInfo : public AbstractAttribute {
   virtual int64_t numOffsetBins() const = 0;
   virtual bool reachesReturn() const = 0;
   virtual void addReturnedOffsetsTo(OffsetInfo &) const = 0;
+  virtual void dumpState(raw_ostream &O) const = 0;
+  virtual const Access &getBinAccess(unsigned Index) const = 0;
+  virtual const DenseMap<Value *, OffsetInfo> &getOffsetInfoMap() const = 0;
 
   /// Call \p CB on all accesses that might interfere with \p Range and return
   /// true if all such accesses were known and the callback returned true for
@@ -6639,6 +6644,9 @@ struct AAAllocationInfo : public StateWrapper<BooleanState, AbstractAttribute> {
 
   virtual std::optional<TypeSize> getAllocatedSize() const = 0;
 
+  using NewOffsetsTy = DenseMap<AA::RangeTy, AA::RangeTy>;
+  virtual const NewOffsetsTy &getNewOffsets() const = 0;
+
   /// See AbstractAttribute::getName()
   StringRef getName() const override { return "AAAllocationInfo"; }
 
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 2e28e95ded171..6adc60931d645 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1187,6 +1187,15 @@ struct AAPointerInfoImpl
     return ChangeStatus::UNCHANGED;
   }
 
+  virtual const Access &getBinAccess(unsigned Index) const override {
+    return getAccess(Index);
+  }
+
+  virtual const DenseMap<Value *, OffsetInfo> &
+  getOffsetInfoMap() const override {
+    return OffsetInfoMap;
+  }
+
   bool forallInterferingAccesses(
       AA::RangeTy Range,
       function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
@@ -1537,12 +1546,15 @@ struct AAPointerInfoImpl
     return Changed;
   }
 
+  // /// Offsets Info Map
+  // DenseMap<Value *, OffsetInfo> OffsetInfoMap;
+
   /// Statistic tracking for all AAPointerInfo implementations.
   /// See AbstractAttribute::trackStatistics().
   void trackPointerInfoStatistics(const IRPosition &IRP) const {}
 
   /// Dump the state into \p O.
-  void dumpState(raw_ostream &O) {
+  virtual void dumpState(raw_ostream &O) const override {
     for (auto &It : OffsetBins) {
       O << "[" << It.first.Offset << "-" << It.first.Offset + It.first.Size
         << "] : " << It.getSecond().size() << "\n";
@@ -13556,6 +13568,11 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     return AssumedAllocatedSize;
   }
 
+  const NewOffsetsTy &getNewOffsets() const override {
+    assert(isValidState() && "the AA is invalid");
+    return NewComputedOffsets;
+  }
+
   std::optional<TypeSize> findInitialAllocationSize(Instruction *I,
                                                     const DataLayout &DL) {
 
@@ -13600,41 +13617,53 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     if (!AllocationSize)
       return indicatePessimisticFixpoint();
 
-    // For zero sized allocations, we give up.
-    // Since we can't reduce further
+    // For zero sized allocations, we give up
+    // because we cannot reduce them any further.
     if (*AllocationSize == 0)
       return indicatePessimisticFixpoint();
 
-    int64_t BinSize = PI->numOffsetBins();
-
-    // TODO: implement for multiple bins
-    if (BinSize > 1)
-      return indicatePessimisticFixpoint();
-
-    if (BinSize == 0) {
-      auto NewAllocationSize = std::make_optional<TypeSize>(0, false);
+    int64_t NumBins = PI->numOffsetBins();
+    if (NumBins == 0) {
+      auto NewAllocationSize = std::optional<TypeSize>(TypeSize(0, false));
       if (!changeAllocationSize(NewAllocationSize))
         return ChangeStatus::UNCHANGED;
       return ChangeStatus::CHANGED;
     }
 
-    // TODO: refactor this to be part of multiple bin case
-    const auto &It = PI->begin();
+    // For each access bin we compute its new start offset
+    // and store the results in a new map (NewOffsetBins).
+    // NewOffsetsBins is a Map from AA::RangeTy OldRange to AA::RangeTy
+    // NewRange.
+    unsigned long PrevBinEndOffset = 0;
+    bool ChangedOffsets = false;
+    for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
+         It != PI->end(); It++) {
+      const AA::RangeTy &OldRange = It->getFirst();
 
-    // TODO: handle if Offset is not zero
-    if (It->first.Offset != 0)
-      return indicatePessimisticFixpoint();
+      // If any byte range has an unknown offset or size, we should leave the
+      // original allocation unmodified.
+      if (OldRange.offsetOrSizeAreUnknown())
+        return indicatePessimisticFixpoint();
 
-    uint64_t SizeOfBin = It->first.Offset + It->first.Size;
+      unsigned long NewStartOffset = PrevBinEndOffset;
+      unsigned long NewEndOffset = NewStartOffset + OldRange.Size;
+      PrevBinEndOffset = NewEndOffset;
 
-    if (SizeOfBin >= *AllocationSize)
-      return indicatePessimisticFixpoint();
+      ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
+                                      OldRange.Size);
+    }
 
-    auto NewAllocationSize = std::make_optional<TypeSize>(SizeOfBin * 8, false);
+    // Set the new size of the allocation. The new size of the Allocation should
+    // be the size of PrevBinEndOffset * 8 in bits.
+    auto NewAllocationSize =
+        std::optional<TypeSize>(TypeSize(PrevBinEndOffset * 8, false));
 
     if (!changeAllocationSize(NewAllocationSize))
       return ChangeStatus::UNCHANGED;
 
+    if (!ChangedOffsets)
+      return ChangeStatus::UNCHANGED;
+
     return ChangeStatus::CHANGED;
   }
 
@@ -13644,39 +13673,314 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     assert(isValidState() &&
            "Manifest should only be called if the state is valid.");
 
-    Instruction *I = getIRPosition().getCtxI();
+    bool Changed = false;
+    const IRPosition &IRP = getIRPosition();
+    Instruction *I = IRP.getCtxI();
 
-    auto FixedAllocatedSizeInBits = getAllocatedSize()->getFixedValue();
+    // Check if simplified values exist.
+    if (checkIfSimplifiedValuesExists(A, I))
+      return ChangeStatus::UNCHANGED;
 
-    unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
+    if (getAllocatedSize() == HasNoAllocationSize)
+      return ChangeStatus::UNCHANGED;
 
-    switch (I->getOpcode()) {
-    // TODO: add case for malloc like calls
-    case Instruction::Alloca: {
+    const AAPointerInfo *PI =
+        A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
 
-      AllocaInst *AI = cast<AllocaInst>(I);
+    if (!PI)
+      return ChangeStatus::UNCHANGED;
 
-      Type *CharType = Type::getInt8Ty(I->getContext());
+    assert(PI->getState().isValidState() &&
+           "[AAAllocationinfo]: AAPointerinfo was not in valid state!");
 
-      auto *NumBytesToValue =
-          ConstantInt::get(I->getContext(), APInt(32, NumBytesToAllocate));
+    // Store a map where each instruction is mapped to a map containing
+    // old bins accessed by that instruction to the corresponding new
+    // bins in the allocation.
+    DenseMap<Instruction *, DenseMap<AA::RangeTy, AA::RangeTy>>
+        AccessedInstructionsToBinsMap;
 
-      BasicBlock::iterator insertPt = AI->getIterator();
-      insertPt = std::next(insertPt);
-      AllocaInst *NewAllocaInst =
-          new AllocaInst(CharType, AI->getAddressSpace(), NumBytesToValue,
-                         AI->getAlign(), AI->getName(), insertPt);
+    auto AddBins =
+        [](DenseMap<Instruction *, DenseMap<AA::RangeTy, AA::RangeTy>> &Map,
+           Instruction *LocalInst, const AA::RangeTy &OldRange,
+           const AA::RangeTy &NewRange) {
+          DenseMap<AA::RangeTy, AA::RangeTy> &NewBinsForInstruction =
+              Map.getOrInsertDefault(LocalInst);
 
-      if (A.changeAfterManifest(IRPosition::inst(*AI), *NewAllocaInst))
-        return ChangeStatus::CHANGED;
+          NewBinsForInstruction.insert(std::make_pair(OldRange, NewRange));
+        };
+
+    const auto &NewOffsetsMap = getNewOffsets();
+    const auto &OffsetInfoMap = PI->getOffsetInfoMap();
+
+    // Map access causing instructions to a tuple of (Old, New) bins.
+    // The access causing instruction contains the pointer operand
+    // which comes from the allocation we may want to backtrack that
+    // pointer operand, there are 2 cases that may arise.
+    // A) A GEP exists that calculates the pointer operand from the original
+    // allocation instruction: I
+    // B) A GEP does not exists in which case we need to insert a GEP just
+    // before the access causing instruction with the shift value from the
+    // original offset.
+    for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
+         It != PI->end(); It++) {
+      const auto &OldOffsetRange = It->getFirst();
+      // If the OldOffsetRange is not in the map, offsets for that bin did not
+      // change. We should just continue and skip changing the offsets in that
+      // case.
+      if (!NewOffsetsMap.contains(OldOffsetRange))
+        continue;
+
+      const auto &NewOffsetRange = NewOffsetsMap.lookup(OldOffsetRange);
+      for (const auto AccIndex : It->getSecond()) {
+        const auto &AccessInstruction = PI->getBinAccess(AccIndex);
+        Instruction *LocalInst = AccessInstruction.getLocalInst();
+
+        if (checkIfSimplifiedValuesExists(A, LocalInst))
+          return ChangeStatus::UNCHANGED;
+
+        if (checkIfAccessChainUsesMultipleBins(A, LocalInst, OffsetInfoMap))
+          return ChangeStatus::UNCHANGED;
+
+        // Check if we can backtrack the access causing instruction to a GEP
+        // from the original allocation, if yes, then we prefer to change the
+        // GEP rather than inserting a new one just before the access causing
+        // instruction.
+        switch (LocalInst->getOpcode()) {
+        case Instruction::Call: {
+          CallInst *CallInstruction = cast<CallInst>(LocalInst);
+          for (auto *It = CallInstruction->op_begin();
+               It != CallInstruction->op_end(); It++) {
+            if (Instruction *OperandInstruction = dyn_cast<Instruction>(It)) {
+              // Operand does not cause an access in the current byte range.
+              if (!OffsetInfoMap.contains(OperandInstruction))
+                continue;
+
+              // Find the old offset and the corresponding new offset for the
+              // call argument.
+              auto OffsetsVecArg =
+                  OffsetInfoMap.lookup(OperandInstruction).Ranges;
+              int64_t OldOffsetArg = OffsetsVecArg.front().Offset;
+              int NewOffsetArg = 0;
+              for (auto OldToNewRange : NewOffsetsMap) {
+                auto Old = OldToNewRange.getFirst();
+                if (Old.Offset == OldOffsetArg)
+                  NewOffsetArg = OldToNewRange.getSecond().Offset;
+              }
+
+              // If the offsets did not change, continue.
+              if (NewOffsetArg == OldOffsetArg)
+                continue;
+
+              // We don't have access to the size of the offset here but it is
+              // ok since we do not need it here.
+              AA::RangeTy &CallArgOldRange = OffsetsVecArg.front();
+              AA::RangeTy CallArgNewRange =
+                  AA::RangeTy(NewOffsetArg, CallArgOldRange.Size);
+
+              // Find the chain the call instruction is part of
+              const AAPointerInfo::AccessPathSetTy *AccessPaths =
+                  AccessInstruction.getAccessChain();
+
+              const AAPointerInfo::AccessPathTy *ChainWithArg = nullptr;
+              for (auto *Chain : *AccessPaths) {
+
+                if (std::find(Chain->begin(), Chain->end(),
+                              OperandInstruction) != Chain->end()) {
+                  ChainWithArg = Chain;
+                }
+              }
+
+              bool BackTrackInstructionToGEP = false;
+              if (ChainWithArg) {
+                bool Exists = false;
+                for (auto *V : *ChainWithArg) {
+
+                  GetElementPtrInst *GepI = dyn_cast<GetElementPtrInst>(V);
+
+                  if (!GepI)
+                    continue;
+
+                  if (AccessedInstructionsToBinsMap.contains(GepI)) {
+                    Exists = true;
+                    continue;
+                  }
+
+                  // check if its a GEP and weather the GEP accesses the
+                  // Allocation
+                  if (GepI->getPointerOperand() == I) {
+                    if (checkIfSimplifiedValuesExists(A, GepI))
+                      return ChangeStatus::UNCHANGED;
+
+                    AddBins(AccessedInstructionsToBinsMap, GepI,
+                            CallArgOldRange, CallArgNewRange);
+                    BackTrackInstructionToGEP = true;
+                  }
+                }
+
+                if (Exists)
+                  continue;
+              }
+
+              if (!BackTrackInstructionToGEP) {
+                AddBins(AccessedInstructionsToBinsMap, OperandInstruction,
+                        CallArgOldRange, CallArgNewRange);
+                continue;
+              }
+            }
+          }
+          break;
+        }
+        default: {
+
+          bool BackTrackInstructionToGEP = false;
+          bool Exists = false;
+          const AAPointerInfo::AccessPathSetTy *AccessPaths =
+              AccessInstruction.getAccessChain();
+          for (auto *Chain : *AccessPaths) {
+            for (auto *V : *Chain) {
+
+              GetElementPtrInst *GepI = dyn_cast<GetElementPtrInst>(V);
+
+              if (!GepI)
+                continue;
+
+              if (AccessedInstructionsToBinsMap.contains(GepI)) {
+                Exists = true;
+                continue;
+              }
+
+              // check if its a GEP and weather the GEP accesses the Allocation
+              if (GepI->getPointerOperand() == I) {
+                if (checkIfSimplifiedValuesExists(A, GepI))
+                  return ChangeStatus::UNCHANGED;
 
+                AddBins(AccessedInstructionsToBinsMap, GepI, OldOffsetRange,
+                        NewOffsetRange);
+                BackTrackInstructionToGEP = true;
+              }
+            }
+          }
+
+          if (Exists)
+            continue;
+
+          if (!BackTrackInstructionToGEP)
+            AddBins(AccessedInstructionsToBinsMap, LocalInst, OldOffsetRange,
+                    NewOffsetRange);
+
+          break;
+        }
+        }
+      }
+    }
+
+    unsigned long FixedAllocatedSizeInBits =
+        getAllocatedSize()->getFixedValue();
+    unsigned long NumBytesToAllocate = (FixedAllocatedSizeInBits + 7) / 8;
+    Type *NewAllocationType = nullptr;
+    switch (I->getOpcode()) {
+    // TODO: add case for malloc like calls
+    case Instruction::Alloca: {
+      AllocaInst *OldAllocaInst = cast<AllocaInst>(I);
+      const DataLayout &DL = A.getDataLayout();
+      auto OriginalAllocationSize = OldAllocaInst->getAllocationSizeInBits(DL);
+
+      if (*OriginalAllocationSize <= FixedAllocatedSizeInBits)
+        return ChangeStatus::UNCHANGED;
+
+      Type *CharType = Type::getInt8Ty(I->getContext());
+      Type *CharArrayType = ArrayType::get(CharType, NumBytesToAllocate);
+      NewAllocationType = CharArrayType;
+      BasicBlock::iterator InsertPt = OldAllocaInst->getIterator();
+      InsertPt = std::next(InsertPt);
+      Instruction *NewAllocationInstruction =
+          new AllocaInst(CharArrayType, OldAllocaInst->getAddressSpace(),
+                         OldAllocaInst->getName(), InsertPt);
+
+      Changed |= A.changeAfterManifest(IRPosition::inst(*I),
+                                       *NewAllocationInstruction);
+      A.deleteAfterManifest(*I);
       break;
     }
     default:
       break;
     }
 
-    return ChangeStatus::UNCHANGED;
+    for (auto &It : AccessedInstructionsToBinsMap) {
+      Instruction *LocalInst = It.first;
+      // Get a hold of a map, mapping old to new bins.
+      DenseMap<AA::RangeTy, AA::RangeTy> &OldToNewBins = It.second;
+      IntegerType *Int64TyInteger =
+          IntegerType::get(LocalInst->getContext(), 64);
+      switch (LocalInst->getOpcode()) {
+      case Instruction::Load: {
+        // The number of bytes to shift the load/store by.
+        int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset;
+        int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset;
+        LoadInst *OldLoadInst = cast<LoadInst>(LocalInst);
+        Instruction *PointerOperand =
+            cast<Instruction>(OldLoadInst->getPointerOperand());
+        Type *PointeeTy = OldLoadInst->getPointerOperandType();
+        int64_t ShiftValue = OffsetNew - OffsetOld;
+        Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
+        Value *GepToNewAddress = GetElementPtrInst::Create(
+            PointeeTy, PointerOperand, IndexList, "NewGep", OldLoadInst);
+
+        LoadInst *NewLoadInst = new LoadInst(
+            OldLoadInst->getType(), GepToNewAddress, OldLoadInst->getName(),
+            false, OldLoadInst->getAlign(), OldLoadInst);
+
+        Changed |=
+            A.changeAfterManifest(IRPosition::inst(*OldLoadInst), *NewLoadInst);
+
+        A.deleteAfterManifest(*OldLoadInst);
+        break;
+      }
+      case Instruction::Store: {
+        // The number of bytes to shift the load/store by.
+        int64_t OffsetOld = OldToNewBins.begin()->getFirst().Offset;
+        int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset;
+        int64_t ShiftValue = OffsetNew - OffsetOld;
+        StoreInst *OldStoreInst = cast<StoreInst>(LocalInst);
+        Instruction *PointerOperand =
+            cast<Instruction>(OldStoreInst->getPointerOperand());
+        Type *PointeeTy = OldStoreInst->getPointerOperandType();
+        Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
+        Value *GepToNewAddress = GetElementPtrInst::Create(
+            PointeeTy, PointerOperand, IndexList, "NewGep", OldStoreInst);
+
+        StoreInst *NewStoreInst =
+            new StoreInst(OldStoreInst->getValueOperand(), GepToNewAddress,
+                          false, OldStoreInst->getAlign(), OldStoreInst);
+
+        Changed |= A.changeAfterManifest(IRPosition::inst(*OldStoreInst),
+                                         *NewStoreInst);
+
+        A.deleteAfterManifest(*OldStoreInst);
+        break;
+      }
+      case Instruction::GetElementPtr: {
+        GetElementPtrInst *OldGEP = cast<GetElementPtrInst>(LocalInst);
+        int64_t OffsetNew = OldToNewBins.begin()->getSecond().Offset;
+        Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, OffsetNew)};
+        Value *OldPointerOperand = OldGEP->getPointerOperand();
+        Value *GepToNewAddress = GetElementPtrInst::Create(
+            NewAllocationType, OldPointerOperand, IndexList, "NewGep", OldGEP);
+
+        Changed |=
+            A.changeAfterManifest(IRPosition::inst(*OldGEP), *GepToNewAddress);
+
+        A.deleteAfterManifest(*OldGEP);
+        break;
+      }
+      default:
+        break;
+      }
+    }
+
+    if (!Changed)
+      return ChangeStatus::UNCHANGED;
+    return ChangeStatus::CHANGED;
   }
 
   /// See AbstractAttribute::getAsStr().
@@ -13690,8 +13994,28 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
            ")";
   }
 
+  void dumpNewOffsetBins(raw_ostream &O) {
+
+    O << "Printing Map from [OldOffsetsRange] : [NewOffsetsRange] if the "
+         "offsets changed."
+      << "\n";
+    const auto &NewOffsetsMap = getNewOffsets();
+    for (auto It = NewOffsetsMap.begin(); It != NewOffsetsMap.end(); It++) {
+
+      const auto &OldRange = It->getFirst();
+      const auto &NewRange = It->getSecond();
+
+      O << "[" << OldRange.Offset << "," << OldRange.Offset + OldRange.Size
+        << "] : ";
+      O << "[" << NewRange.Offset << "," << NewRange.Offset + NewRange.Size
+        << "]";
+      O << "\n";
+    }
+  }
+
 private:
   std::optional<TypeSize> AssumedAllocatedSize = HasNoAllocationSize;
+  NewOffsetsTy NewComputedOffsets;
 
   // Maintain the computed allocation size of the object.
   // Returns (bool) weather the size of the allocation was modified or not.
@@ -13703,6 +14027,80 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     }
     return false;
   }
+
+  // Maps an old byte range to its new offset range in the new allocation.
+  // Returns (bool) weather the old byte range's offsets changed or not.
+  bool setNewOffsets(const AA::RangeTy &OldRange, int64_t OldOffset,
+                     int64_t NewComputedOffset, int64_t Size) {
+
+    if (OldOffset == NewComputedOffset)
+      return false;
+
+    AA::RangeTy &NewRange = NewComputedOffsets.getOrInsertDefault(OldRange);
+    NewRange.Offset = NewComputedOffset;
+    NewRange.Size = Size;
+
+    return true;
+  }
+
+  // A helper function to check if simplified values exists for the current
+  // instruction.
+  // Right now we don't change the value and give up
+  // on modifying the size and offsets of the allocation
+  // but this may be sub-optimal.
+  // TODO: handle case for a similified value
+  bool checkIfSimplifiedValuesExists(Attributor &A, Instruction *LocalInst) {
+
+    // If there are potential values that replace the accessed instruction, we
+    // should use those values instead.
+    bool UsedAssumedInformation = false;
+    SmallVector<AA::ValueAndContext> Values;
+    if (A.getAssumedSimplifiedValues(IRPosition::inst(*LocalInst), *this,
+                                     Values, AA::AnyScope,
+                                     UsedAssumedInformation))
+
+      for (auto &ValAndContext : Values)
+        // Don't modify the instruction if any simplified value exists.
+        if (ValAndContext.getValue() && ValAndContext.getValue() != LocalInst)
+          return true;
+
+    return false;
+  }
+
+  bool checkIfAccessChainUsesMultipleBins(
+      Attributor &A, Instruction *LocalInst,
+      const DenseMap<Value *, AAPointerInfo::OffsetInfo> &OffsetInfoMap) {
+
+    // BackTrack and check if any instruction in the access causing chain
+    // accessed multiple byte ranges. If they do, we currently give up.
+    SmallVector<Instruction *> ReadyList;
+    DenseMap<Instruction *, bool> Visited;
+    ReadyList.push_back(LocalInst);
+    while (!ReadyList.empty()) {
+      Instruction *GetBack = ReadyList.back();
+      ReadyList.pop_back();
+
+      if (!Visited.insert(std::make_pair(GetBack, true)).second)
+        continue;
+
+      // Check if the Instruction has multiple bins, if so give up
+      // for calls it is okay to have multiple bins since they may
+      // come from different call arguments and we can address them
+      // seperately.
+      // TODO: handle when one instruction has multiple bins
+      auto OffsetsVecArg = OffsetInfoMap.lookup(GetBack).Ranges;
+      if (GetBack->getOpcode() != Instruction::Call && OffsetsVecArg.size() > 1)
+        return true;
+
+      for (auto *It = GetBack->op_begin(); It != GetBack->op_end(); It++) {
+        if (Instruction *Ins = dyn_cast<Instruction>(*It)) {
+          ReadyList.push_back(Ins);
+        }
+      }
+    }
+
+    return false;
+  }
 };
 
 struct AAAllocationInfoFloating : AAAllocationInfoImpl {
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
index 595cb37c6c93e..f0efa2a0ae3c1 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
@@ -106,10 +106,8 @@ define i32 @test_inf_promote_caller(i32 %arg) {
 ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_caller
 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CGSCC-NEXT:  bb:
-; CGSCC-NEXT:    [[TMP:%.*]] = alloca [[S:%.*]], align 8
-; CGSCC-NEXT:    [[TMP3:%.*]] = alloca i8, i32 0, align 8
-; CGSCC-NEXT:    [[TMP1:%.*]] = alloca [[S]], align 8
-; CGSCC-NEXT:    [[TMP14:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT:    [[TMP3:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[TMP14:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    ret i32 0
 ;
 bb:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
index 1c34fff8dd755..63dbc4da7da37 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
@@ -36,8 +36,7 @@ define internal i32 @caller(ptr %B) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CGSCC-LABEL: define {{[^@]+}}@caller
 ; CGSCC-SAME: () #[[ATTR0]] {
-; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CGSCC-NEXT:    [[A1:%.*]] = alloca i8, i32 0, align 4
+; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    ret i32 0
 ;
   %A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index 6a8605ed19546..115461c7adce7 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -52,10 +52,9 @@ define internal i32 @caller(ptr %B) {
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
 ; CGSCC-LABEL: define {{[^@]+}}@caller
-; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
-; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
-; CGSCC-NEXT:    [[A1:%.*]] = alloca i8, i32 0, align 4
-; CGSCC-NEXT:    [[C:%.*]] = call i32 @test(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
+; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
+; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i32 0
 ;
   %A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
index b588a399e5bd9..7b5e1276ac212 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
@@ -29,8 +29,7 @@ define internal i32 @foo(ptr) {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: () addrspace(1) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[RETVAL1:%.*]] = alloca i8, i32 0, align 4
+; CHECK-NEXT:    [[RETVAL1:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
 ; CHECK-NEXT:    unreachable
 ;
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
index 502751147f884..7c613d31c3aeb 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
@@ -34,8 +34,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define dso_local i32 @main() {
 ; TUNIT-LABEL: define {{[^@]+}}@main() {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[ALLOC1:%.*]] = alloca i8, align 8
-; TUNIT-NEXT:    [[ALLOC2:%.*]] = alloca i8, align 8
+; TUNIT-NEXT:    [[ALLOC11:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT:    [[ALLOC22:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @foo, ptr nofree readnone undef)
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @bar, ptr noalias nofree nonnull readnone align 8 captures(none) dereferenceable(8) undef)
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index b6c7e4ccc0218..9fa886c35c904 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -13,8 +13,8 @@ define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
 ; CHECK-LABEL: define dso_local void @positive_alloca_1
 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca i8, i32 4, align 4
-; CHECK-NEXT:    [[F2:%.*]] = alloca i8, i32 4, align 4
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[F2:%.*]] = alloca [4 x i8], align 1
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
 ; CHECK-NEXT:    store i32 10, ptr [[F2]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
@@ -164,37 +164,52 @@ entry:
 ;TODO: The allocation can be reduced here.
 ;However, the offsets (load/store etc.) Need to be changed.
 ; Function Attrs: noinline nounwind uwtable
-define dso_local { i64, ptr } @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
-; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define dso_local { i64, ptr } @positive_test_not_a_single_start_offset
-; CHECK-SAME: (i32 noundef [[VAL:%.*]]) #[[ATTR0:[0-9]+]] {
+define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @positive_test_not_a_single_start_offset
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL:%.*]] = alloca [[STRUCT_FOO:%.*]], align 8
 ; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
-; CHECK-NEXT:    store i32 2, ptr [[RETVAL]], align 8
-; CHECK-NEXT:    [[FIELD3:%.*]] = getelementptr inbounds [[STRUCT_FOO]], ptr [[RETVAL]], i32 0, i32 2
-; CHECK-NEXT:    store ptr [[VAL_ADDR]], ptr [[FIELD3]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load { i64, ptr }, ptr [[RETVAL]], align 8
-; CHECK-NEXT:    ret { i64, ptr } [[TMP0]]
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
+; CHECK-NEXT:    ret void
 ;
 entry:
-  %retval = alloca %struct.Foo, align 8
   %val.addr = alloca i32, align 4
+  %f = alloca %struct.Foo, align 4
   store i32 %val, ptr %val.addr, align 4
-  %field1 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 0
-  store i32 2, ptr %field1, align 8
-  %field3 = getelementptr inbounds %struct.Foo, ptr %retval, i32 0, i32 2
-  store ptr %val.addr, ptr %field3, align 8
-  %0 = load { i64, ptr }, ptr %retval, align 8
-  ret { i64, ptr } %0
+  %0 = load i32, ptr %val.addr, align 4
+  %mul = mul nsw i32 2, %0
+  %a = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+  store i32 %mul, ptr %a, align 4
+  %a1 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 0
+  %1 = load i32, ptr %a1, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %1)
+  %c = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2
+  %conv1 = trunc i32 %1 to i8
+  store i8 %conv1, ptr %c, align 4
+  %c2 = getelementptr inbounds %struct.Foo, ptr %f, i32 0, i32 2
+  %2 = load i8, ptr %c2, align 4
+  %conv = sext i8 %2 to i32
+  %call3 = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %conv)
+  ret void
 }
 
 ; Function Attrs: noinline nounwind uwtable
 define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca i8, i32 4, align 8
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [4 x i8], align 1
 ; CHECK-NEXT:    store i32 0, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 2
@@ -275,37 +290,37 @@ entry:
 define dso_local void @positive_test_reduce_array_allocation_2() #0 {
 ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_2() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAY:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[I2:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 40000)
-; CHECK-NEXT:    store ptr [[CALL]], ptr [[ARRAY]], align 8
-; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    store ptr [[CALL]], ptr [[ARRAY1]], align 8
+; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND:%.*]]
 ; CHECK:       for.cond:
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10000
 ; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
 ; CHECK:       for.body:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM]]
 ; CHECK-NEXT:    store i32 [[TMP1]], ptr [[ARRAYIDX]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC:%.*]]
 ; CHECK:       for.inc:
-; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP3]], 2
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND]]
 ; CHECK:       for.end:
-; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND1:%.*]]
 ; CHECK:       for.cond1:
-; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[CMP2:%.*]] = icmp slt i32 [[TMP4]], 10000
 ; CHECK-NEXT:    br i1 [[CMP2]], label [[FOR_BODY3:%.*]], label [[FOR_END9:%.*]]
 ; CHECK:       for.body3:
-; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]]
 ; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
@@ -313,28 +328,28 @@ define dso_local void @positive_test_reduce_array_allocation_2() #0 {
 ; CHECK-NEXT:    store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC7:%.*]]
 ; CHECK:       for.inc7:
-; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP7:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[ADD8:%.*]] = add nsw i32 [[TMP7]], 2
-; CHECK-NEXT:    store i32 [[ADD8]], ptr [[I]], align 4
+; CHECK-NEXT:    store i32 [[ADD8]], ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND1]]
 ; CHECK:       for.end9:
-; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    store i32 0, ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND10:%.*]]
 ; CHECK:       for.cond10:
-; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP8:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[CMP11:%.*]] = icmp slt i32 [[TMP8]], 10000
 ; CHECK-NEXT:    br i1 [[CMP11]], label [[FOR_BODY12:%.*]], label [[FOR_END18:%.*]]
 ; CHECK:       for.body12:
-; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]]
 ; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
 ; CHECK-NEXT:    [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]])
 ; CHECK-NEXT:    br label [[FOR_INC16:%.*]]
 ; CHECK:       for.inc16:
-; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[TMP11:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[ADD17:%.*]] = add nsw i32 [[TMP11]], 2
-; CHECK-NEXT:    store i32 [[ADD17]], ptr [[I]], align 4
+; CHECK-NEXT:    store i32 [[ADD17]], ptr [[I2]], align 4
 ; CHECK-NEXT:    br label [[FOR_COND10]]
 ; CHECK:       for.end18:
 ; CHECK-NEXT:    ret void
@@ -425,9 +440,9 @@ define dso_local void @pthread_test(){
 ; TUNIT-LABEL: define dso_local void @pthread_test() {
 ; TUNIT-NEXT:    [[ARG1:%.*]] = alloca i8, align 8
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
-; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
-; TUNIT-NEXT:    [[F1:%.*]] = alloca i8, i32 4, align 4
-; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nofree nonnull readnone align 4 captures(none) dereferenceable(12) undef)
+; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
+; TUNIT-NEXT:    [[F1:%.*]] = alloca [4 x i8], align 1
+; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef)
 ; TUNIT-NEXT:    [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
 ; TUNIT-NEXT:    ret void
@@ -452,6 +467,46 @@ define dso_local void @pthread_test(){
   ret void
 }
 
+
+define dso_local void @select_case(i1 %cond){
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define dso_local void @select_case
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i8], align 1
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 3
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
+; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
+; CHECK-NEXT:    ret void
+;
+  %a = alloca [100 x i8], align 1
+  %b = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 3
+  %c = getelementptr inbounds [100 x i8], ptr %a, i64 0, i64 1
+  %sel = select i1 %cond, ptr %b, ptr %c
+  store i8 100, ptr %sel, align 1
+  ret void
+}
+
+define dso_local void @select_case_2(i1 %cond){
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK-LABEL: define dso_local void @select_case_2
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i32], align 1
+; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 3
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 1
+; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
+; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
+; CHECK-NEXT:    ret void
+;
+  %a = alloca [100 x i32], align 1
+  %b = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 3
+  %c = getelementptr inbounds [100 x i32], ptr %a, i64 0, i64 1
+  %sel = select i1 %cond, ptr %b, ptr %c
+  %sel2 = getelementptr inbounds i32, ptr %sel, i64 0
+  store i8 100, ptr %sel2, align 1
+  ret void
+}
+
 define internal ptr @pthread_allocation_should_remain_same(ptr %arg) {
 ; CHECK-LABEL: define internal noundef nonnull align 8 dereferenceable(1) ptr @pthread_allocation_should_remain_same
 ; CHECK-SAME: (ptr noundef nonnull returned align 8 dereferenceable(1) [[ARG:%.*]]) {
@@ -499,6 +554,58 @@ entry:
   ret void
 }
 
+define dso_local void @alloca_array_multi_offset(){
+; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none)
+; CHECK-LABEL: define dso_local void @alloca_array_multi_offset
+; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND:%.*]]
+; CHECK:       for.cond:
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[CMP:%.*]] = icmp slt i32 [[TMP0]], 10
+; CHECK-NEXT:    br i1 [[CMP]], label [[FOR_BODY:%.*]], label [[FOR_END:%.*]]
+; CHECK:       for.body:
+; CHECK-NEXT:    br label [[FOR_INC:%.*]]
+; CHECK:       for.inc:
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[I]], align 4
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 2
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[I]], align 4
+; CHECK-NEXT:    br label [[FOR_COND]]
+; CHECK:       for.end:
+; CHECK-NEXT:    ret void
+;
+entry:
+  %arr = alloca i8, i32 10, align 4
+  %i = alloca i32, align 4
+  store i32 0, ptr %i, align 4
+  br label %for.cond
+
+for.cond:
+  %0 = load i32, ptr %i, align 4
+  %cmp = icmp slt i32 %0, 10
+  br i1 %cmp, label %for.body, label %for.end
+
+for.body:
+  %1 = load i32, ptr %i, align 4
+  %2 = load ptr, ptr %arr, align 8
+  %3 = load i32, ptr %i, align 4
+  %arrayidx = getelementptr inbounds i32, ptr %2, i32 %3
+  store i32 %1, ptr %arrayidx, align 4
+  br label %for.inc
+
+for.inc:
+  %4 = load i32, ptr %i, align 4
+  %add = add nsw i32 %4, 2
+  store i32 %add, ptr %i, align 4
+  br label %for.cond
+
+for.end:
+  ret void
+
+}
+
 
 declare external void @external_call(ptr)
 
@@ -511,9 +618,11 @@ declare i32 @printf(ptr noundef, ...) #1
 ; Function Attrs: nounwind allocsize(0)
 declare noalias ptr @malloc(i64 noundef) #1
 ;.
-; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
 ;.
-; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
 ;.
 ; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
 ; TUNIT: [[META1]] = !{i64 2, i64 3, i1 false}
diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
index 1d435815d89e3..13e12c7922e29 100644
--- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
@@ -36,8 +36,8 @@ define i8 @call_simplifiable_1() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_1
 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [1 x i8], ptr [[BYTES1]], i64 0
 ; TUNIT-NEXT:    ret i8 2
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -93,9 +93,9 @@ define i8 @call_simplifiable_2() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_2
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I0:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT:    [[I1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [2 x i8], align 1
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [2 x i8], ptr [[BYTES1]], i64 0
+; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [2 x i8], ptr [[BYTES1]], i64 1
 ; TUNIT-NEXT:    ret i8 4
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -125,8 +125,8 @@ define i8 @call_simplifiable_3() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_simplifiable_3
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [1 x i8], align 1
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [1 x i8], ptr [[BYTES1]], i64 0
 ; TUNIT-NEXT:    ret i8 2
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -198,13 +198,13 @@ define i8 @call_partially_simplifiable_1() {
 ; TUNIT-LABEL: define {{[^@]+}}@call_partially_simplifiable_1
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
-; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 2
-; TUNIT-NEXT:    store i8 2, ptr [[I2]], align 2
-; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 3
-; TUNIT-NEXT:    store i8 3, ptr [[I3]], align 1
-; TUNIT-NEXT:    [[I4:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 4
-; TUNIT-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(ptr nofree noundef nonnull readonly align 2 captures(none) dereferenceable(1022) [[I2]], ptr nofree noundef nonnull readonly captures(none) dereferenceable(1021) [[I3]]) #[[ATTR3:[0-9]+]]
+; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [3 x i8], align 1
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 0
+; TUNIT-NEXT:    store i8 2, ptr [[NEWGEP]], align 2
+; TUNIT-NEXT:    [[NEWGEP3:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 1
+; TUNIT-NEXT:    store i8 3, ptr [[NEWGEP3]], align 1
+; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 2
+; TUNIT-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[NEWGEP]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[NEWGEP3]]) #[[ATTR3]]
 ; TUNIT-NEXT:    ret i8 [[R]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index d54f713e7bbfe..1f2631c109169 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -482,8 +482,7 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-SAME: (i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[I1:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[I11:%.*]] = alloca i8, i32 0, align 8
+; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1
 ; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index 9a6e0680bb44d..5b266efd26359 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -430,10 +430,9 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-LABEL: define {{[^@]+}}@malloc_in_loop
 ; CHECK-SAME: (i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4, addrspace(5)
-; CHECK-NEXT:    [[I1:%.*]] = alloca ptr, align 8, addrspace(5)
-; CHECK-NEXT:    [[I11:%.*]] = alloca i8, i32 0, align 8, addrspace(5)
-; CHECK-NEXT:    store i32 [[ARG]], ptr addrspace(5) [[I]], align 4
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[I3:%.*]] = load i32, ptr addrspace(5) [[I]], align 4
diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index c112d995148d5..92f2b2acd936b 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2587,9 +2587,9 @@ define void @bad_gep() {
 ; TUNIT-LABEL: define {{[^@]+}}@bad_gep
 ; TUNIT-SAME: () #[[ATTR13]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[N1:%.*]] = alloca i8, i32 0, align 1
-; TUNIT-NEXT:    [[M2:%.*]] = alloca i8, i32 0, align 1
-; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull captures(none) dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
+; TUNIT-NEXT:    [[N1:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT:    [[M2:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
 ; TUNIT-NEXT:    br label [[EXIT:%.*]]
 ; TUNIT:       while.body:
 ; TUNIT-NEXT:    unreachable
@@ -2605,9 +2605,9 @@ define void @bad_gep() {
 ; CGSCC-LABEL: define {{[^@]+}}@bad_gep
 ; CGSCC-SAME: () #[[ATTR6]] {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[N1:%.*]] = alloca i8, i32 0, align 1
-; CGSCC-NEXT:    [[M2:%.*]] = alloca i8, i32 0, align 1
-; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull captures(none) dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
+; CGSCC-NEXT:    [[N1:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[M2:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
 ; CGSCC-NEXT:    br label [[EXIT:%.*]]
 ; CGSCC:       while.body:
 ; CGSCC-NEXT:    unreachable
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index c28cb28379348..6357bf742bbf1 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -10,15 +10,14 @@ define hidden i64 @f1() align 2 {
 ; TUNIT-LABEL: define {{[^@]+}}@f1
 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
+; TUNIT-NEXT:    [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    ret i64 undef
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
 ; CGSCC-LABEL: define {{[^@]+}}@f1
 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[REF_TMP:%.*]] = alloca [[A:%.*]], align 8
-; CGSCC-NEXT:    [[REF_TMP1:%.*]] = alloca i8, i32 0, align 8
+; CGSCC-NEXT:    [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    [[CALL2:%.*]] = call i64 @f2() #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i64 [[CALL2]]
 ;
diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll
index cd7fd1a4c1123..204016c6922a5 100644
--- a/llvm/test/Transforms/Attributor/pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/pointer-info.ll
@@ -10,11 +10,11 @@ define void @foo(ptr %ptr) {
 ; TUNIT-LABEL: define {{[^@]+}}@foo
 ; TUNIT-SAME: (ptr nofree readnone captures(none) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = alloca [[STRUCT_TEST_A:%.*]], align 8
+; TUNIT-NEXT:    [[TMP0:%.*]] = alloca [8 x i8], align 1
 ; TUNIT-NEXT:    br label [[CALL_BR:%.*]]
 ; TUNIT:       call.br:
-; TUNIT-NEXT:    [[TMP1:%.*]] = getelementptr inbounds [[STRUCT_TEST_A]], ptr [[TMP0]], i64 0, i32 2
-; TUNIT-NEXT:    tail call void @bar(ptr noalias nofree noundef nonnull readonly byval([[STRUCT_TEST_A]]) align 8 captures(none) dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]]
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [8 x i8], ptr [[TMP0]], i64 0
+; TUNIT-NEXT:    tail call void @bar(ptr noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_TEST_A:%.*]]) align 8 dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index 3e07fe42261e9..d0934ecc0c986 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -2647,18 +2647,18 @@ define dso_local void @test_nested_memory(ptr %dst, ptr %src) {
 ; TUNIT-SAME: ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) {
 ; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1
-; TUNIT-NEXT:    [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
-; TUNIT-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2
-; TUNIT-NEXT:    store ptr @global, ptr [[INNER]], align 8
+; TUNIT-NEXT:    [[LOCAL1:%.*]] = alloca [8 x i8], align 1
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [8 x i8], ptr [[LOCAL1]], i64 0
+; TUNIT-NEXT:    store ptr @global, ptr [[NEWGEP]], align 8
 ; TUNIT-NEXT:    store ptr [[DST]], ptr [[CALL_H2S]], align 8
 ; TUNIT-NEXT:    [[SRC2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 8
 ; TUNIT-NEXT:    store ptr [[SRC]], ptr [[SRC2]], align 8
-; TUNIT-NEXT:    store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY]], ptr @global, i64 0, i32 2), align 8
-; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LOCAL]], align 8
-; TUNIT-NEXT:    [[LOCAL_B8:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 8
-; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[LOCAL_B8]], align 8
-; TUNIT-NEXT:    [[LOCAL_B16:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 16
-; TUNIT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[LOCAL_B16]], align 8
+; TUNIT-NEXT:    store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY:%.*]], ptr @global, i64 0, i32 2), align 8
+; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LOCAL1]], align 8
+; TUNIT-NEXT:    [[LOCAL1_B8:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 8
+; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[LOCAL1_B8]], align 8
+; TUNIT-NEXT:    [[LOCAL1_B16:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 16
+; TUNIT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[LOCAL1_B16]], align 8
 ; TUNIT-NEXT:    call fastcc void @nested_memory_callee(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR21:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;

>From 90f5cc62fcbee5872dfb421e2c03ee921bcd08f5 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Sun, 9 Nov 2025 16:57:19 -0500
Subject: [PATCH 03/14] Fix Rebase and tests

---
 llvm/include/llvm/Transforms/IPO/Attributor.h |  15 ++-
 .../Transforms/IPO/AttributorAttributes.cpp   |  33 ++++--
 .../live_called_from_dead_2.ll                |   4 +-
 .../Attributor/IPConstantProp/pthreads.ll     |   4 +-
 llvm/test/Transforms/Attributor/allocator.ll  | 103 ++++++++++--------
 .../Attributor/call-simplify-pointer-info.ll  |  28 ++---
 .../Attributor/heap_to_stack_gpu.ll           |   6 +-
 llvm/test/Transforms/Attributor/liveness.ll   |   6 +-
 .../Transforms/Attributor/pointer-info.ll     |   2 +-
 .../Attributor/value-simplify-pointer-info.ll |   7 +-
 10 files changed, 120 insertions(+), 88 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index d785f37035102..3896b364054b1 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -5957,7 +5957,12 @@ struct AAPointerInfo : public AbstractAttribute {
     /// actual use of the list. So we just blindly append here.
 
     bool merge(const OffsetInfo &R) {
-      bool Changed =  set_union(Ranges, R.Ranges);
+
+      SmallSet<AA::RangeTy, 2> Set1(Ranges.begin(), Ranges.end());
+      SmallSet<AA::RangeTy, 2> Set2(R.Ranges.begin(), R.Ranges.end());
+
+      bool Changed = set_union(Set1, Set2);
+      Ranges.append(R.Ranges);
       // ensure elements are unique.
       sort(Ranges.begin(), Ranges.end());
       Ranges.erase(std::unique(Ranges.begin(), Ranges.end()), Ranges.end());
@@ -5974,7 +5979,11 @@ struct AAPointerInfo : public AbstractAttribute {
     // and adds it to the corresponding offset in the
     // origins map.
     bool mergeWithOffset(const OffsetInfo &R, Value &CurPtr) {
-      bool Changed = set_union(Ranges, R.Ranges);
+      SmallSet<AA::RangeTy, 2> Set1(Ranges.begin(), Ranges.end());
+      SmallSet<AA::RangeTy, 2> Set2(R.Ranges.begin(), R.Ranges.end());
+
+      bool Changed = set_union(Set1, Set2);
+      Ranges.append(R.Ranges);
       // ensure elements are unique.
       sort(Ranges.begin(), Ranges.end());
       Ranges.erase(std::unique(Ranges.begin(), Ranges.end()), Ranges.end());
@@ -6386,7 +6395,7 @@ struct AAPointerInfo : public AbstractAttribute {
   virtual const_bin_iterator end() const = 0;
   virtual int64_t numOffsetBins() const = 0;
   virtual bool reachesReturn() const = 0;
-  virtual void addReturnedOffsetsTo(OffsetInfo &) const = 0;
+  virtual void addReturnedOffsetsTo(OffsetInfo &, Value &V) const = 0;
   virtual void dumpState(raw_ostream &O) const = 0;
   virtual const Access &getBinAccess(unsigned Index) const = 0;
   virtual const DenseMap<Value *, OffsetInfo> &getOffsetInfoMap() const = 0;
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 6adc60931d645..0444d394ef7ae 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1143,7 +1143,13 @@ struct AAPointerInfoImpl
            (reachesReturn()
                 ? (" (returned:" +
                    join(map_range(ReturnedOffsets,
-                                  [](int64_t O) { return std::to_string(O); }),
+                                  [](AA::RangeTy O) {
+                                    return std::string("(") +
+                                           std::to_string(O.Offset) +
+                                           std::string(",") +
+                                           std::to_string(O.Size) +
+                                           std::string(")");
+                                  }),
                         ", ") +
                    ")")
                 : "");
@@ -1160,26 +1166,27 @@ struct AAPointerInfoImpl
   bool reachesReturn() const override {
     return !ReturnedOffsets.isUnassigned();
   }
-  void addReturnedOffsetsTo(OffsetInfo &OI) const override {
+  void addReturnedOffsetsTo(OffsetInfo &OI, Value &Origin) const override {
     if (ReturnedOffsets.isUnknown()) {
-      OI.setUnknown();
+      OI.setUnknown(Origin);
       return;
     }
 
     OffsetInfo MergedOI;
     for (auto Offset : ReturnedOffsets) {
       OffsetInfo TmpOI = OI;
-      TmpOI.addToAll(Offset);
+      TmpOI.addToAll(Offset.Offset);
       MergedOI.merge(TmpOI);
     }
     OI = std::move(MergedOI);
   }
 
-  ChangeStatus setReachesReturn(const OffsetInfo &ReachedReturnedOffsets) {
+  ChangeStatus setReachesReturn(const OffsetInfo &ReachedReturnedOffsets,
+                                Value &V) {
     if (ReturnedOffsets.isUnknown())
       return ChangeStatus::UNCHANGED;
     if (ReachedReturnedOffsets.isUnknown()) {
-      ReturnedOffsets.setUnknown();
+      ReturnedOffsets.setUnknown(V);
       return ChangeStatus::CHANGED;
     }
     if (ReturnedOffsets.merge(ReachedReturnedOffsets))
@@ -1486,7 +1493,7 @@ struct AAPointerInfoImpl
     ChangeStatus Changed = ChangeStatus::UNCHANGED;
     const auto &OtherAAImpl = static_cast<const AAPointerInfoImpl &>(OtherAA);
     bool IsByval = OtherAAImpl.getAssociatedArgument()->hasByValAttr();
-    Changed |= setReachesReturn(OtherAAImpl.ReturnedOffsets);
+    Changed |= setReachesReturn(OtherAAImpl.ReturnedOffsets, CB);
 
     // Combine the accesses bin by bin.
     const auto &State = OtherAAImpl.getState();
@@ -1829,7 +1836,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
     if (auto *RI = dyn_cast<ReturnInst>(Usr)) {
       if (RI->getFunction() == getAssociatedFunction()) {
         auto &PtrOI = OffsetInfoMap[CurPtr];
-        Changed |= setReachesReturn(PtrOI);
+        Changed |= setReachesReturn(PtrOI, *CurPtr);
         return true;
       }
       return false;
@@ -2087,7 +2094,7 @@ ChangeStatus AAPointerInfoFloating::updateImpl(Attributor &A) {
         if (!CSRetPI)
           return false;
         OffsetInfo OI = OffsetInfoMap[CurPtr];
-        CSArgPI->addReturnedOffsetsTo(OI);
+        CSArgPI->addReturnedOffsetsTo(OI, *CurPtr);
         Changed =
             translateAndAddState(A, *CSRetPI, OI, *CB, IsRetMustAcc) | Changed;
         return isValidState();
@@ -13703,8 +13710,9 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
         [](DenseMap<Instruction *, DenseMap<AA::RangeTy, AA::RangeTy>> &Map,
            Instruction *LocalInst, const AA::RangeTy &OldRange,
            const AA::RangeTy &NewRange) {
+          auto [It, Inserted] = Map.try_emplace(LocalInst);
           DenseMap<AA::RangeTy, AA::RangeTy> &NewBinsForInstruction =
-              Map.getOrInsertDefault(LocalInst);
+              It->second;
 
           NewBinsForInstruction.insert(std::make_pair(OldRange, NewRange));
         };
@@ -14036,11 +14044,12 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     if (OldOffset == NewComputedOffset)
       return false;
 
-    AA::RangeTy &NewRange = NewComputedOffsets.getOrInsertDefault(OldRange);
+    auto [It, Inserted] = NewComputedOffsets.try_emplace(OldRange);
+    AA::RangeTy &NewRange = It->second;
     NewRange.Offset = NewComputedOffset;
     NewRange.Size = Size;
 
-    return true;
+    return Inserted;
   }
 
   // A helper function to check if simplified values exists for the current
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index 115461c7adce7..90c75a7c86be9 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -52,9 +52,9 @@ define internal i32 @caller(ptr %B) {
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
 ; CGSCC-LABEL: define {{[^@]+}}@caller
-; CGSCC-SAME: (ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
 ; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
-; CGSCC-NEXT:    [[C:%.*]] = call i32 @test(ptr noalias nocapture nofree noundef nonnull writeonly align 4 dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
+; CGSCC-NEXT:    [[C:%.*]] = call i32 @test(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i32 0
 ;
   %A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
index 7c613d31c3aeb..502751147f884 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/pthreads.ll
@@ -34,8 +34,8 @@ target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
 define dso_local i32 @main() {
 ; TUNIT-LABEL: define {{[^@]+}}@main() {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[ALLOC11:%.*]] = alloca [0 x i8], align 1
-; TUNIT-NEXT:    [[ALLOC22:%.*]] = alloca [0 x i8], align 1
+; TUNIT-NEXT:    [[ALLOC1:%.*]] = alloca i8, align 8
+; TUNIT-NEXT:    [[ALLOC2:%.*]] = alloca i8, align 8
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[CALL:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @foo, ptr nofree readnone undef)
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @bar, ptr noalias nofree nonnull readnone align 8 captures(none) dereferenceable(8) undef)
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index 9fa886c35c904..de99e90e16e46 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -54,10 +54,10 @@ define dso_local void @positive_malloc_1(ptr noundef %val) #0 {
 ; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 12)
 ; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[CALL]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
@@ -91,9 +91,9 @@ define dso_local void @positive_malloc_2(ptr noundef %val) #0 {
 ; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 60)
 ; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    store i32 [[TMP0]], ptr [[CALL]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
@@ -134,7 +134,7 @@ define dso_local ptr @negative_test_escaping_pointer(i32 noundef %val) #0 {
 ; CHECK-NEXT:    store i32 2, ptr [[TMP0]], align 8
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 10, [[VAL]]
 ; CHECK-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[F]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[TMP1]], align 8, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[ADD2:%.*]] = add nsw i32 [[TMP2]], [[ADD]]
 ; CHECK-NEXT:    store i32 [[ADD2]], ptr [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP3:%.*]] = load ptr, ptr [[F]], align 8
@@ -165,24 +165,43 @@ entry:
 ;However, the offsets (load/store etc.) Need to be changed.
 ; Function Attrs: noinline nounwind uwtable
 define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
-; CHECK-LABEL: define dso_local void @positive_test_not_a_single_start_offset
-; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
-; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
-; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
-; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
-; CHECK-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
-; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
-; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
-; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
-; CHECK-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
-; CHECK-NEXT:    ret void
+; TUNIT-LABEL: define dso_local void @positive_test_not_a_single_start_offset
+; TUNIT-SAME: (i32 noundef [[VAL:%.*]]) {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+; TUNIT-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
+; TUNIT-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; TUNIT-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
+; TUNIT-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
+; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; TUNIT-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; TUNIT-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
+; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
+; TUNIT-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC-LABEL: define dso_local void @positive_test_not_a_single_start_offset
+; CGSCC-SAME: (i32 noundef [[VAL:%.*]]) {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CGSCC-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
+; CGSCC-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CGSCC-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
+; CGSCC-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
+; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
+; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; CGSCC-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CGSCC-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CGSCC-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
+; CGSCC-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CGSCC-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
+; CGSCC-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+; CGSCC-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
+; CGSCC-NEXT:    ret void
 ;
 entry:
   %val.addr = alloca i32, align 4
@@ -255,9 +274,9 @@ define dso_local void @baz(ptr noundef %val, i32 noundef %arrayLength) #0 {
 ; CHECK-NEXT:    [[MUL:%.*]] = mul i64 4, [[CONV]]
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef [[MUL]])
 ; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    store i32 [[TMP0]], ptr [[CALL]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[CALL2:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP1]])
 ; CHECK-NEXT:    ret void
 ;
@@ -323,7 +342,7 @@ define dso_local void @positive_test_reduce_array_allocation_2() #0 {
 ; CHECK-NEXT:    [[TMP5:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM4:%.*]] = sext i32 [[TMP5]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM4]]
-; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4
+; CHECK-NEXT:    [[TMP6:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[ADD6:%.*]] = add nsw i32 [[TMP6]], 1
 ; CHECK-NEXT:    store i32 [[ADD6]], ptr [[ARRAYIDX5]], align 4
 ; CHECK-NEXT:    br label [[FOR_INC7:%.*]]
@@ -343,7 +362,7 @@ define dso_local void @positive_test_reduce_array_allocation_2() #0 {
 ; CHECK-NEXT:    [[TMP9:%.*]] = load i32, ptr [[I2]], align 4
 ; CHECK-NEXT:    [[IDXPROM13:%.*]] = sext i32 [[TMP9]] to i64
 ; CHECK-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[CALL]], i64 [[IDXPROM13]]
-; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4
+; CHECK-NEXT:    [[TMP10:%.*]] = load i32, ptr [[ARRAYIDX14]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[CALL15:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP10]])
 ; CHECK-NEXT:    br label [[FOR_INC16:%.*]]
 ; CHECK:       for.inc16:
@@ -440,9 +459,9 @@ define dso_local void @pthread_test(){
 ; TUNIT-LABEL: define dso_local void @pthread_test() {
 ; TUNIT-NEXT:    [[ARG1:%.*]] = alloca i8, align 8
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
-; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
+; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
 ; TUNIT-NEXT:    [[F1:%.*]] = alloca [4 x i8], align 1
-; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef align 4294967296 null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nocapture nofree nonnull readnone align 4 dereferenceable(12) undef)
+; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nofree nonnull readnone align 4 captures(none) dereferenceable(12) undef)
 ; TUNIT-NEXT:    [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
 ; TUNIT-NEXT:    ret void
@@ -469,14 +488,13 @@ define dso_local void @pthread_test(){
 
 
 define dso_local void @select_case(i1 %cond){
-; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define dso_local void @select_case
 ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i8], align 1
 ; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 3
 ; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 1
 ; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
-; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
 ; CHECK-NEXT:    ret void
 ;
   %a = alloca [100 x i8], align 1
@@ -488,14 +506,9 @@ define dso_local void @select_case(i1 %cond){
 }
 
 define dso_local void @select_case_2(i1 %cond){
-; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
+; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define dso_local void @select_case_2
 ; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i32], align 1
-; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 3
-; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i32], ptr [[A]], i64 0, i64 1
-; CHECK-NEXT:    [[SEL:%.*]] = select i1 [[COND]], ptr [[B]], ptr [[C]]
-; CHECK-NEXT:    store i8 100, ptr [[SEL]], align 1
 ; CHECK-NEXT:    ret void
 ;
   %a = alloca [100 x i32], align 1
@@ -530,7 +543,7 @@ define internal void @pthread_allocation_should_be_reduced(ptr %arg) {
 ; CGSCC-LABEL: define internal void @pthread_allocation_should_be_reduced
 ; CGSCC-SAME: (ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[ARG:%.*]]) {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG]], align 4
+; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARG]], align 4, !invariant.load [[META0]]
 ; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
 ; CGSCC-NEXT:    ret void
 ;
@@ -618,15 +631,17 @@ declare i32 @printf(ptr noundef, ...) #1
 ; Function Attrs: nounwind allocsize(0)
 declare noalias ptr @malloc(i64 noundef) #1
 ;.
-; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
 ; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
 ;.
-; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
 ; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
 ;.
-; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
-; TUNIT: [[META1]] = !{i64 2, i64 3, i1 false}
+; TUNIT: [[META0]] = !{}
+; TUNIT: [[META1:![0-9]+]] = !{[[META2:![0-9]+]]}
+; TUNIT: [[META2]] = !{i64 2, i64 3, i1 false}
 ;.
-; CGSCC: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
-; CGSCC: [[META1]] = !{i64 2, i64 3, i1 false}
+; CGSCC: [[META0]] = !{}
+; CGSCC: [[META1:![0-9]+]] = !{[[META2:![0-9]+]]}
+; CGSCC: [[META2]] = !{i64 2, i64 3, i1 false}
 ;.
diff --git a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
index 13e12c7922e29..c1a99b85bcb0d 100644
--- a/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/call-simplify-pointer-info.ll
@@ -8,7 +8,7 @@ define internal i8 @read_arg(ptr %p) {
 ; CGSCC-LABEL: define {{[^@]+}}@read_arg
 ; CGSCC-SAME: (ptr nofree noundef nonnull readonly captures(none) dereferenceable(1022) [[P:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[L:%.*]] = load i8, ptr [[P]], align 1
+; CGSCC-NEXT:    [[L:%.*]] = load i8, ptr [[P]], align 1, !invariant.load [[META0:![0-9]+]]
 ; CGSCC-NEXT:    ret i8 [[L]]
 ;
 entry:
@@ -22,7 +22,7 @@ define internal i8 @read_arg_index(ptr %p, i64 %index) {
 ; CGSCC-SAME: (ptr nofree noundef nonnull readonly align 16 captures(none) dereferenceable(1024) [[P:%.*]]) #[[ATTR0]] {
 ; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[G:%.*]] = getelementptr inbounds i8, ptr [[P]], i64 2
-; CGSCC-NEXT:    [[L:%.*]] = load i8, ptr [[G]], align 1
+; CGSCC-NEXT:    [[L:%.*]] = load i8, ptr [[G]], align 1, !invariant.load [[META0]]
 ; CGSCC-NEXT:    ret i8 [[L]]
 ;
 entry:
@@ -94,8 +94,8 @@ define i8 @call_simplifiable_2() {
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [2 x i8], align 1
-; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [2 x i8], ptr [[BYTES1]], i64 0
-; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [2 x i8], ptr [[BYTES1]], i64 1
+; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [2 x i8], ptr [[BYTES1]], i64 0
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [2 x i8], ptr [[BYTES1]], i64 1
 ; TUNIT-NEXT:    ret i8 4
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -199,12 +199,12 @@ define i8 @call_partially_simplifiable_1() {
 ; TUNIT-SAME: () #[[ATTR0]] {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[BYTES1:%.*]] = alloca [3 x i8], align 1
-; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 0
-; TUNIT-NEXT:    store i8 2, ptr [[NEWGEP]], align 2
-; TUNIT-NEXT:    [[NEWGEP3:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 1
-; TUNIT-NEXT:    store i8 3, ptr [[NEWGEP3]], align 1
+; TUNIT-NEXT:    [[NEWGEP3:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 0
+; TUNIT-NEXT:    store i8 2, ptr [[NEWGEP3]], align 2
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 1
+; TUNIT-NEXT:    store i8 3, ptr [[NEWGEP]], align 1
 ; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [3 x i8], ptr [[BYTES1]], i64 2
-; TUNIT-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(ptr nocapture nofree noundef nonnull readonly align 2 dereferenceable(1022) [[NEWGEP]], ptr nocapture nofree noundef nonnull readonly dereferenceable(1021) [[NEWGEP3]]) #[[ATTR3]]
+; TUNIT-NEXT:    [[R:%.*]] = call i8 @sum_two_different_loads(ptr nofree noundef nonnull readonly align 2 captures(none) dereferenceable(1022) [[NEWGEP3]], ptr nofree noundef nonnull readonly captures(none) dereferenceable(1021) [[NEWGEP]]) #[[ATTR3:[0-9]+]]
 ; TUNIT-NEXT:    ret i8 [[R]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
@@ -281,14 +281,16 @@ entry:
   ret i8 %r
 }
 
+;.
+; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }
+; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree nosync nounwind willreturn memory(none) }
+; CGSCC: attributes #[[ATTR2]] = { mustprogress nofree nosync nounwind willreturn memory(argmem: read) }
+; CGSCC: attributes #[[ATTR3]] = { nofree willreturn memory(read) }
 ;.
 ; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
 ; TUNIT: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }
 ; TUNIT: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn memory(read) }
 ; TUNIT: attributes #[[ATTR3]] = { nofree norecurse nosync nounwind willreturn memory(read) }
 ;.
-; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read) }
-; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree nosync nounwind willreturn memory(none) }
-; CGSCC: attributes #[[ATTR2]] = { mustprogress nofree nosync nounwind willreturn memory(argmem: read) }
-; CGSCC: attributes #[[ATTR3]] = { nofree willreturn memory(read) }
+; CGSCC: [[META0]] = !{}
 ;.
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index 5b266efd26359..63a234acbcb47 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -430,9 +430,9 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-LABEL: define {{[^@]+}}@malloc_in_loop
 ; CHECK-SAME: (i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1
-; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
+; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4, addrspace(5)
+; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    store i32 [[ARG]], ptr addrspace(5) [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
 ; CHECK-NEXT:    [[I3:%.*]] = load i32, ptr addrspace(5) [[I]], align 4
diff --git a/llvm/test/Transforms/Attributor/liveness.ll b/llvm/test/Transforms/Attributor/liveness.ll
index 92f2b2acd936b..94c7e16f51de3 100644
--- a/llvm/test/Transforms/Attributor/liveness.ll
+++ b/llvm/test/Transforms/Attributor/liveness.ll
@@ -2474,7 +2474,7 @@ define internal void @dead_with_blockaddress_users(ptr nocapture %pc) nounwind r
 ; CGSCC-NEXT:    [[TMP1_PN:%.*]] = load i32, ptr [[PC_ADDR_0]], align 4
 ; CGSCC-NEXT:    [[INDIRECT_GOTO_DEST_IN:%.*]] = getelementptr inbounds [2 x ptr], ptr @dead_with_blockaddress_users.l, i32 0, i32 [[TMP1_PN]]
 ; CGSCC-NEXT:    [[INDIRECT_GOTO_DEST:%.*]] = load ptr, ptr [[INDIRECT_GOTO_DEST_IN]], align 8
-; CGSCC-NEXT:    indirectbr ptr [[INDIRECT_GOTO_DEST]], [label [[LAB0]], label %end]
+; CGSCC-NEXT:    indirectbr ptr [[INDIRECT_GOTO_DEST]], [label [[LAB0]], label [[END:%.*]]]
 ;
 entry:
   br label %indirectgoto
@@ -2589,7 +2589,7 @@ define void @bad_gep() {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[N1:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    [[M2:%.*]] = alloca [0 x i8], align 1
-; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
+; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull captures(none) dereferenceable(1) [[N1]]) #[[ATTR18:[0-9]+]]
 ; TUNIT-NEXT:    br label [[EXIT:%.*]]
 ; TUNIT:       while.body:
 ; TUNIT-NEXT:    unreachable
@@ -2607,7 +2607,7 @@ define void @bad_gep() {
 ; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[N1:%.*]] = alloca [0 x i8], align 1
 ; CGSCC-NEXT:    [[M2:%.*]] = alloca [0 x i8], align 1
-; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(i64 noundef 1, ptr noalias nocapture nofree noundef nonnull dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
+; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull captures(none) dereferenceable(1) [[N1]]) #[[ATTR21:[0-9]+]]
 ; CGSCC-NEXT:    br label [[EXIT:%.*]]
 ; CGSCC:       while.body:
 ; CGSCC-NEXT:    unreachable
diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll
index 204016c6922a5..a90a106d3a5c1 100644
--- a/llvm/test/Transforms/Attributor/pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/pointer-info.ll
@@ -14,7 +14,7 @@ define void @foo(ptr %ptr) {
 ; TUNIT-NEXT:    br label [[CALL_BR:%.*]]
 ; TUNIT:       call.br:
 ; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [8 x i8], ptr [[TMP0]], i64 0
-; TUNIT-NEXT:    tail call void @bar(ptr noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_TEST_A:%.*]]) align 8 dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]]
+; TUNIT-NEXT:    tail call void @bar(ptr noalias nofree noundef nonnull readonly byval([[STRUCT_TEST_A:%.*]]) align 8 captures(none) dereferenceable(24) [[TMP0]]) #[[ATTR2:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index d0934ecc0c986..de301c7f817ec 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -3248,10 +3248,7 @@ define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c
 ; TUNIT-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[B]]) #[[ATTR23:[0-9]+]]
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[B]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[CALL]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[CALL1]]) #[[ATTR18]]
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
-; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
-; TUNIT-NEXT:    ret i32 [[ADD]]
+; TUNIT-NEXT:    ret i32 10
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
 ; CGSCC-LABEL: define i32 @may_access_after_return_choice(
@@ -3456,7 +3453,7 @@ define void @returnedPtrAccesses() {
 ; TUNIT-NEXT:    store i8 2, ptr [[A2]], align 1
 ; TUNIT-NEXT:    store i8 4, ptr [[A4]], align 1
 ; TUNIT-NEXT:    store i8 6, ptr [[A6]], align 1
-; TUNIT-NEXT:    call void @use3i8(i8 2, i8 4, i8 6)
+; TUNIT-NEXT:    call void @use3i8(i8 noundef 2, i8 noundef 4, i8 noundef 6)
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC-LABEL: define void @returnedPtrAccesses() {

>From b3419e9830e504bb18e7128f79b9177399453ea8 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Sun, 9 Nov 2025 17:34:21 -0500
Subject: [PATCH 04/14] fix depreciated warning

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 13 ++++++++++++
 .../Transforms/IPO/AttributorAttributes.cpp   | 21 +++++++++++--------
 2 files changed, 25 insertions(+), 9 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 3896b364054b1..1d8c9b5a4f365 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -6306,6 +6306,8 @@ struct AAPointerInfo : public AbstractAttribute {
 
     // Merge two access paths into one.
     void mergeAccessPaths(const AccessPathSetTy *AccessPathsNew) const {
+      assert(AccessPathsNew != nullptr &&
+             "Expected the set of access paths to be non null!");
       for (auto *Path : *AccessPathsNew)
         if (!existsChain(Path))
           AccessPaths->insert(Path);
@@ -6313,6 +6315,8 @@ struct AAPointerInfo : public AbstractAttribute {
 
     // Check if the given access paths are same.
     bool checkAccessPathsAreSame(const AccessPathSetTy *AccessPathsR) const {
+      assert(AccessPathsR != nullptr &&
+             "Expected the set of access paths to be non null!");
       bool IsSame = true;
       if (AccessPaths->size() != AccessPathsR->size())
         return false;
@@ -6326,6 +6330,10 @@ struct AAPointerInfo : public AbstractAttribute {
 
     // Check if the chain exists in the AccessPathsSet.
     bool existsChain(const AccessPathTy *NewPath) const {
+
+      if (AccessPaths == nullptr)
+        return false;
+
       for (auto *OldPath : *AccessPaths)
         if (*OldPath == *NewPath)
           return true;
@@ -6336,6 +6344,11 @@ struct AAPointerInfo : public AbstractAttribute {
     void dumpAccessPaths(raw_ostream &O) const {
       O << "Print all access paths found:"
         << "\n";
+
+      if (AccessPaths == nullptr) {
+        O << "Could not find any access paths!\n";
+      }
+
       for (auto *It : *AccessPaths) {
         O << "Backtrack a unique access path:\n";
         for (Value *Ins : *It) {
diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 0444d394ef7ae..7eedc0db2ca9f 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -13931,12 +13931,13 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
         Type *PointeeTy = OldLoadInst->getPointerOperandType();
         int64_t ShiftValue = OffsetNew - OffsetOld;
         Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
-        Value *GepToNewAddress = GetElementPtrInst::Create(
-            PointeeTy, PointerOperand, IndexList, "NewGep", OldLoadInst);
+        Value *GepToNewAddress =
+            GetElementPtrInst::Create(PointeeTy, PointerOperand, IndexList,
+                                      "NewGep", OldLoadInst->getIterator());
 
         LoadInst *NewLoadInst = new LoadInst(
             OldLoadInst->getType(), GepToNewAddress, OldLoadInst->getName(),
-            false, OldLoadInst->getAlign(), OldLoadInst);
+            false, OldLoadInst->getAlign(), OldLoadInst->getIterator());
 
         Changed |=
             A.changeAfterManifest(IRPosition::inst(*OldLoadInst), *NewLoadInst);
@@ -13954,12 +13955,13 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
             cast<Instruction>(OldStoreInst->getPointerOperand());
         Type *PointeeTy = OldStoreInst->getPointerOperandType();
         Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, ShiftValue)};
-        Value *GepToNewAddress = GetElementPtrInst::Create(
-            PointeeTy, PointerOperand, IndexList, "NewGep", OldStoreInst);
+        Value *GepToNewAddress =
+            GetElementPtrInst::Create(PointeeTy, PointerOperand, IndexList,
+                                      "NewGep", OldStoreInst->getIterator());
 
-        StoreInst *NewStoreInst =
-            new StoreInst(OldStoreInst->getValueOperand(), GepToNewAddress,
-                          false, OldStoreInst->getAlign(), OldStoreInst);
+        StoreInst *NewStoreInst = new StoreInst(
+            OldStoreInst->getValueOperand(), GepToNewAddress, false,
+            OldStoreInst->getAlign(), OldStoreInst->getIterator());
 
         Changed |= A.changeAfterManifest(IRPosition::inst(*OldStoreInst),
                                          *NewStoreInst);
@@ -13973,7 +13975,8 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
         Value *IndexList[1] = {ConstantInt::get(Int64TyInteger, OffsetNew)};
         Value *OldPointerOperand = OldGEP->getPointerOperand();
         Value *GepToNewAddress = GetElementPtrInst::Create(
-            NewAllocationType, OldPointerOperand, IndexList, "NewGep", OldGEP);
+            NewAllocationType, OldPointerOperand, IndexList, "NewGep",
+            OldGEP->getIterator());
 
         Changed |=
             A.changeAfterManifest(IRPosition::inst(*OldGEP), *GepToNewAddress);

>From 289c158679cadeb9ced12f0f4d35cf43340a258c Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Sat, 15 Nov 2025 10:44:20 -0500
Subject: [PATCH 05/14] Fix comments, spacing, and logic of Offset merging

---
 llvm/include/llvm/Transforms/IPO/Attributor.h | 51 +++++++++++--------
 1 file changed, 31 insertions(+), 20 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 1d8c9b5a4f365..a486f6177ac16 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -5928,8 +5928,8 @@ struct AAPointerInfo : public AbstractAttribute {
       insert(AA::RangeTy{AA::RangeTy::Unknown, AA::RangeTy::Unknown}, V);
     }
 
-    // Increment all ranges by Inc.
-    // Add an origin V to all offsets.
+    // We need to increment all ranges by Inc and add an origin V to all
+    // offsets.
     void addToAll(int64_t Inc, Value &V) {
       for (auto &Range : Ranges)
         Range.Offset += Inc;
@@ -5955,13 +5955,20 @@ struct AAPointerInfo : public AbstractAttribute {
     ///
     /// Ideally all lists should be strictly ascending, but we defer that to the
     /// actual use of the list. So we just blindly append here.
-
     bool merge(const OffsetInfo &R) {
+      bool Changed = false;
 
-      SmallSet<AA::RangeTy, 2> Set1(Ranges.begin(), Ranges.end());
-      SmallSet<AA::RangeTy, 2> Set2(R.Ranges.begin(), R.Ranges.end());
+      for (const auto &Range : R.Ranges) {
+        if (!is_contained(Ranges, Range)) {
+          Changed = true;
+          break;
+        }
+      }
+
+      // No need to merge if the Ranges did not change.
+      if (!Changed)
+        return Changed;
 
-      bool Changed = set_union(Set1, Set2);
       Ranges.append(R.Ranges);
       // ensure elements are unique.
       sort(Ranges.begin(), Ranges.end());
@@ -5974,15 +5981,21 @@ struct AAPointerInfo : public AbstractAttribute {
       return Changed;
     }
 
-    // Merge two OffsetInfo structs.
-    // takes an additional origin argument
-    // and adds it to the corresponding offset in the
-    // origins map.
+    // Merge two OffsetInfo structs. The function takes an additional origin
+    // argument (CurPtr) and adds it to the corresponding offset in the origins
+    // map.
     bool mergeWithOffset(const OffsetInfo &R, Value &CurPtr) {
-      SmallSet<AA::RangeTy, 2> Set1(Ranges.begin(), Ranges.end());
-      SmallSet<AA::RangeTy, 2> Set2(R.Ranges.begin(), R.Ranges.end());
+      bool Changed = false;
+
+      // We cannot return if nothing changed since we might still be adding a
+      // new Origin.
+      for (const auto &Range : Ranges) {
+        if (!is_contained(Ranges, Range)) {
+          Changed = true;
+          break;
+        }
+      }
 
-      bool Changed = set_union(Set1, Set2);
       Ranges.append(R.Ranges);
       // ensure elements are unique.
       sort(Ranges.begin(), Ranges.end());
@@ -6330,8 +6343,7 @@ struct AAPointerInfo : public AbstractAttribute {
 
     // Check if the chain exists in the AccessPathsSet.
     bool existsChain(const AccessPathTy *NewPath) const {
-
-      if (AccessPaths == nullptr)
+      if (!AccessPaths)
         return false;
 
       for (auto *OldPath : *AccessPaths)
@@ -6342,18 +6354,17 @@ struct AAPointerInfo : public AbstractAttribute {
     }
 
     void dumpAccessPaths(raw_ostream &O) const {
-      O << "Print all access paths found:"
-        << "\n";
+      O << "Print all access paths found:\n";
 
-      if (AccessPaths == nullptr) {
+      if (!AccessPaths) {
         O << "Could not find any access paths!\n";
+        return;
       }
 
       for (auto *It : *AccessPaths) {
         O << "Backtrack a unique access path:\n";
-        for (Value *Ins : *It) {
+        for (Value *Ins : *It)
           O << *Ins << "\n";
-        }
       }
     }
 

>From e861887f037e94e73e9689d9477251f0032a197d Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Sat, 15 Nov 2025 19:32:24 -0500
Subject: [PATCH 06/14] fix issue with AAPrivatizablePtr

---
 .../Transforms/IPO/AttributorAttributes.cpp   | 56 +++++++++++++++++++
 .../IPConstantProp/openmp_parallel_for.ll     | 22 ++++----
 llvm/test/Transforms/Attributor/nodelete.ll   |  1 -
 .../Attributor/value-simplify-pointer-info.ll |  6 +-
 4 files changed, 71 insertions(+), 14 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 7eedc0db2ca9f..8bbf790b390a6 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -13608,6 +13608,62 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
             A, this, IRP, DepClassTy::OPTIONAL, IsKnownNoCapture))
       return indicatePessimisticFixpoint();
 
+    const auto *AAPrivatizablePtrI = A.getOrCreateAAFor<AAPrivatizablePtr>(
+        getIRPosition(), *this, DepClassTy::OPTIONAL);
+
+    // If this allocation is privatizable we don't want to modify its allocation
+    // size.
+    // TODO: update AAPointerInfo to update bins once AAPrivitizable makes a
+    // change.
+    if (AAPrivatizablePtrI && (AAPrivatizablePtrI->isAssumedPrivatizablePtr() ||
+                               AAPrivatizablePtrI->isKnownPrivatizablePtr()))
+      return indicateOptimisticFixpoint();
+
+    // For all call sites, check if the called function can privatize the
+    // pointer.
+    for (Use &U : I->uses()) {
+      auto *CB = dyn_cast<CallBase>(U.getUser());
+      if (!CB)
+        continue;
+
+      unsigned ArgIdx = 0;
+      for (auto *It = CB->arg_begin(); It != CB->arg_end(); It++) {
+        Value *ArgVal = *It;
+
+        // Remove any pointer casts.
+        Value *Stripped = ArgVal->stripPointerCasts();
+        if (Stripped != I)
+          continue;
+
+        Function *Callee = CB->getCalledFunction();
+        if (!Callee)
+          continue;
+
+        if (ArgIdx >= Callee->arg_size())
+          continue;
+
+        Argument &FunctionDefArg = *Callee->getArg(ArgIdx);
+
+        IRPosition FunctionDefArgPos = IRPosition::argument(FunctionDefArg);
+
+        const auto *AAPrivateArgPos = A.getOrCreateAAFor<AAPrivatizablePtr>(
+            FunctionDefArgPos, *this, DepClassTy::OPTIONAL);
+
+        if (!AAPrivateArgPos)
+          continue;
+
+        // If this allocation is privitizable we don't want to modify its
+        // allocation size.
+        // TODO: update AAPointerInfo to update bins once AAPrivitizable makes a
+        // change.
+        if (AAPrivateArgPos->isAssumedPrivatizablePtr() ||
+            AAPrivateArgPos->isKnownPrivatizablePtr())
+          return indicateOptimisticFixpoint();
+
+        ArgIdx++;
+      }
+    }
+
     const AAPointerInfo *PI =
         A.getOrCreateAAFor<AAPointerInfo>(IRP, *this, DepClassTy::REQUIRED);
 
diff --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
index c0d778b9d72b1..24b498e1b8140 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
@@ -76,7 +76,7 @@ define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bo
 ; TUNIT-NEXT:    store i32 4, ptr [[DOTOMP_UB]], align 4
 ; TUNIT-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 ; TUNIT-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-; TUNIT-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; TUNIT-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META0:![0-9]+]]
 ; TUNIT-NEXT:    call void @__kmpc_for_static_init_4(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP5]], i32 noundef 34, ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 noundef 1, i32 noundef 1)
 ; TUNIT-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 ; TUNIT-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], 4
@@ -111,7 +111,7 @@ define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bo
 ; TUNIT:       omp.inner.for.end:
 ; TUNIT-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 ; TUNIT:       omp.loop.exit:
-; TUNIT-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; TUNIT-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META0]]
 ; TUNIT-NEXT:    call void @__kmpc_for_static_fini(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP12]])
 ; TUNIT-NEXT:    br label [[OMP_PRECOND_END:%.*]]
 ; TUNIT:       omp.precond.end:
@@ -126,7 +126,7 @@ define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bo
 ; CGSCC-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i64 4617315517961601024, ptr [[Q_ADDR]], align 8
-; CGSCC-NEXT:    [[TMP:%.*]] = load i32, ptr [[N]], align 4
+; CGSCC-NEXT:    [[TMP:%.*]] = load i32, ptr [[N]], align 4, !invariant.load [[META0:![0-9]+]]
 ; CGSCC-NEXT:    [[SUB3:%.*]] = add nsw i32 [[TMP]], -3
 ; CGSCC-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2
 ; CGSCC-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
@@ -135,7 +135,7 @@ define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bo
 ; CGSCC-NEXT:    store i32 [[SUB3]], ptr [[DOTOMP_UB]], align 4
 ; CGSCC-NEXT:    store i32 1, ptr [[DOTOMP_STRIDE]], align 4
 ; CGSCC-NEXT:    store i32 0, ptr [[DOTOMP_IS_LAST]], align 4
-; CGSCC-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CGSCC-NEXT:    [[TMP5:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META0]]
 ; CGSCC-NEXT:    call void @__kmpc_for_static_init_4(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP5]], i32 noundef 34, ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], ptr noundef nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 noundef 1, i32 noundef 1)
 ; CGSCC-NEXT:    [[TMP6:%.*]] = load i32, ptr [[DOTOMP_UB]], align 4
 ; CGSCC-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]]
@@ -159,7 +159,7 @@ define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bo
 ; CGSCC-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
 ; CGSCC:       omp.inner.for.body:
 ; CGSCC-NEXT:    [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2
-; CGSCC-NEXT:    [[TMP10:%.*]] = load float, ptr [[P]], align 4
+; CGSCC-NEXT:    [[TMP10:%.*]] = load float, ptr [[P]], align 4, !invariant.load [[META0]]
 ; CGSCC-NEXT:    [[TMP11:%.*]] = load double, ptr [[Q_ADDR]], align 8
 ; CGSCC-NEXT:    call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]])
 ; CGSCC-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
@@ -171,7 +171,7 @@ define internal void @.omp_outlined.(ptr noalias %.global_tid., ptr noalias %.bo
 ; CGSCC:       omp.inner.for.end:
 ; CGSCC-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
 ; CGSCC:       omp.loop.exit:
-; CGSCC-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4
+; CGSCC-NEXT:    [[TMP12:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META0]]
 ; CGSCC-NEXT:    call void @__kmpc_for_static_fini(ptr noundef nonnull align 8 dereferenceable(24) @[[GLOB0]], i32 [[TMP12]])
 ; CGSCC-NEXT:    br label [[OMP_PRECOND_END]]
 ; CGSCC:       omp.precond.end:
@@ -259,11 +259,13 @@ declare !callback !0 dso_local void @__kmpc_fork_call(ptr, i32, ptr, ...)
 !1 = !{i64 2, i64 -1, i64 -1, i1 true}
 !0 = !{!1}
 ;.
-; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
-; TUNIT: [[META1]] = !{i64 2, i64 -1, i64 -1, i1 true}
+; TUNIT: [[META0]] = !{}
+; TUNIT: [[META1:![0-9]+]] = !{[[META2:![0-9]+]]}
+; TUNIT: [[META2]] = !{i64 2, i64 -1, i64 -1, i1 true}
 ;.
-; CGSCC: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
-; CGSCC: [[META1]] = !{i64 2, i64 -1, i64 -1, i1 true}
+; CGSCC: [[META0]] = !{}
+; CGSCC: [[META1:![0-9]+]] = !{[[META2:![0-9]+]]}
+; CGSCC: [[META2]] = !{i64 2, i64 -1, i64 -1, i1 true}
 ;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index 6357bf742bbf1..9a4a1098d010a 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -10,7 +10,6 @@ define hidden i64 @f1() align 2 {
 ; TUNIT-LABEL: define {{[^@]+}}@f1
 ; TUNIT-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
 ; TUNIT-NEXT:    ret i64 undef
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index de301c7f817ec..8cbdccf75aac3 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -2647,13 +2647,13 @@ define dso_local void @test_nested_memory(ptr %dst, ptr %src) {
 ; TUNIT-SAME: ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) {
 ; TUNIT-NEXT:  [[ENTRY:.*:]]
 ; TUNIT-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1
-; TUNIT-NEXT:    [[LOCAL1:%.*]] = alloca [8 x i8], align 1
-; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [8 x i8], ptr [[LOCAL1]], i64 0
+; TUNIT-NEXT:    [[LOCAL1:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL1]], i64 0, i32 2
 ; TUNIT-NEXT:    store ptr @global, ptr [[NEWGEP]], align 8
 ; TUNIT-NEXT:    store ptr [[DST]], ptr [[CALL_H2S]], align 8
 ; TUNIT-NEXT:    [[SRC2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 8
 ; TUNIT-NEXT:    store ptr [[SRC]], ptr [[SRC2]], align 8
-; TUNIT-NEXT:    store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY:%.*]], ptr @global, i64 0, i32 2), align 8
+; TUNIT-NEXT:    store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY]], ptr @global, i64 0, i32 2), align 8
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LOCAL1]], align 8
 ; TUNIT-NEXT:    [[LOCAL1_B8:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 8
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[LOCAL1_B8]], align 8

>From ca9940bc92be8bdef59c1ea1b36fd79226847809 Mon Sep 17 00:00:00 2001
From: vidush <vidush.sl at gmail.com>
Date: Tue, 18 Nov 2025 17:53:49 -0500
Subject: [PATCH 07/14] Fix rebase error, pass AK to addAccess

---
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp           | 4 ++--
 .../Transforms/Attributor/value-simplify-pointer-info.ll   | 7 +++++--
 2 files changed, 7 insertions(+), 4 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 8bbf790b390a6..27e973f0bca77 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -1545,8 +1545,8 @@ struct AAPointerInfoImpl
           if (!IsMustAcc)
             AK = AccessKind((AK & ~AK_MUST) | AK_MAY);
           Changed |=
-              addAccess(A, NewRanges, CB, RAcc.getContent(), RAcc.getKind(),
-                        RAcc.getType(), OffsetInfoMap, RAcc.getRemoteInst());
+              addAccess(A, NewRanges, CB, RAcc.getContent(), AK, RAcc.getType(),
+                        OffsetInfoMap, RAcc.getRemoteInst());
         }
       }
     }
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index 8cbdccf75aac3..f64552735c3a2 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -3248,7 +3248,10 @@ define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c
 ; TUNIT-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[B]]) #[[ATTR23:[0-9]+]]
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[B]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[CALL]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[CALL1]]) #[[ATTR18]]
-; TUNIT-NEXT:    ret i32 10
+; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[A]], align 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, ptr [[B]], align 4
+; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], [[TMP1]]
+; TUNIT-NEXT:    ret i32 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
 ; CGSCC-LABEL: define i32 @may_access_after_return_choice(
@@ -3453,7 +3456,7 @@ define void @returnedPtrAccesses() {
 ; TUNIT-NEXT:    store i8 2, ptr [[A2]], align 1
 ; TUNIT-NEXT:    store i8 4, ptr [[A4]], align 1
 ; TUNIT-NEXT:    store i8 6, ptr [[A6]], align 1
-; TUNIT-NEXT:    call void @use3i8(i8 noundef 2, i8 noundef 4, i8 noundef 6)
+; TUNIT-NEXT:    call void @use3i8(i8 2, i8 4, i8 6)
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC-LABEL: define void @returnedPtrAccesses() {

>From dc737d926a9f76c2e7ea856db8b1392ea37de3af Mon Sep 17 00:00:00 2001
From: vidush <vidush.sl at gmail.com>
Date: Fri, 21 Nov 2025 17:08:39 -0500
Subject: [PATCH 08/14] make sure alignment of new instruction is same as old

---
 .../Transforms/IPO/AttributorAttributes.cpp   |    4 +-
 .../Attributor/ArgumentPromotion/crash.ll     |    4 +-
 .../live_called_from_dead.ll                  |    2 +-
 .../live_called_from_dead_2.ll                |    2 +-
 .../nonzero-address-spaces.ll                 |    2 +-
 llvm/test/Transforms/Attributor/allocator.ll  |   28 +-
 .../Transforms/Attributor/heap_to_stack.ll    |    2 +-
 llvm/test/Transforms/Attributor/nodelete.ll   |    2 +-
 .../pointer-info-track-access-chain.ll        |    3 +-
 .../Transforms/Attributor/pointer-info.ll     |    2 +-
 .../Attributor/value-simplify-pointer-info.ll | 1978 ++++++++---------
 11 files changed, 1016 insertions(+), 1013 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 27e973f0bca77..aff88cc044fb7 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -13957,10 +13957,12 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
       NewAllocationType = CharArrayType;
       BasicBlock::iterator InsertPt = OldAllocaInst->getIterator();
       InsertPt = std::next(InsertPt);
-      Instruction *NewAllocationInstruction =
+      AllocaInst *NewAllocationInstruction =
           new AllocaInst(CharArrayType, OldAllocaInst->getAddressSpace(),
                          OldAllocaInst->getName(), InsertPt);
 
+      NewAllocationInstruction->setAlignment(OldAllocaInst->getAlign());
+
       Changed |= A.changeAfterManifest(IRPosition::inst(*I),
                                        *NewAllocationInstruction);
       A.deleteAfterManifest(*I);
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
index f0efa2a0ae3c1..ce17a12d4a3b6 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/crash.ll
@@ -106,8 +106,8 @@ define i32 @test_inf_promote_caller(i32 %arg) {
 ; CGSCC-LABEL: define {{[^@]+}}@test_inf_promote_caller
 ; CGSCC-SAME: (i32 [[ARG:%.*]]) #[[ATTR3:[0-9]+]] {
 ; CGSCC-NEXT:  bb:
-; CGSCC-NEXT:    [[TMP3:%.*]] = alloca [0 x i8], align 1
-; CGSCC-NEXT:    [[TMP14:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[TMP3:%.*]] = alloca [0 x i8], align 8
+; CGSCC-NEXT:    [[TMP14:%.*]] = alloca [0 x i8], align 8
 ; CGSCC-NEXT:    ret i32 0
 ;
 bb:
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
index 63dbc4da7da37..c769534e8d401 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead.ll
@@ -36,7 +36,7 @@ define internal i32 @caller(ptr %B) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CGSCC-LABEL: define {{[^@]+}}@caller
 ; CGSCC-SAME: () #[[ATTR0]] {
-; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 4
 ; CGSCC-NEXT:    ret i32 0
 ;
   %A = alloca i32
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index 90c75a7c86be9..6e07c6addb5d2 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -53,7 +53,7 @@ define internal i32 @caller(ptr %B) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
 ; CGSCC-LABEL: define {{[^@]+}}@caller
 ; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B:%.*]]) #[[ATTR0]] {
-; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[A1:%.*]] = alloca [0 x i8], align 4
 ; CGSCC-NEXT:    [[C:%.*]] = call i32 @test(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i32 0
 ;
diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
index 7b5e1276ac212..7428e77bdaaeb 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/nonzero-address-spaces.ll
@@ -29,7 +29,7 @@ define internal i32 @foo(ptr) {
 ; CHECK-LABEL: define {{[^@]+}}@foo
 ; CHECK-SAME: () addrspace(1) #[[ATTR0:[0-9]+]] {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[RETVAL1:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[RETVAL1:%.*]] = alloca [0 x i8], align 4
 ; CHECK-NEXT:    call addrspace(0) void asm sideeffect "ldr r0, [r0] \0Abx lr \0A", ""()
 ; CHECK-NEXT:    unreachable
 ;
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index de99e90e16e46..3611dbad79bf7 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -13,8 +13,8 @@ define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
 ; CHECK-LABEL: define dso_local void @positive_alloca_1
 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [4 x i8], align 1
-; CHECK-NEXT:    [[F2:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[VAL_ADDR1:%.*]] = alloca [4 x i8], align 4
+; CHECK-NEXT:    [[F2:%.*]] = alloca [4 x i8], align 4
 ; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR1]], align 4
 ; CHECK-NEXT:    store i32 10, ptr [[F2]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F2]], align 4
@@ -169,17 +169,17 @@ define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val)
 ; TUNIT-SAME: (i32 noundef [[VAL:%.*]]) {
 ; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
-; TUNIT-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
+; TUNIT-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 4
 ; TUNIT-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 ; TUNIT-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
 ; TUNIT-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
 ; TUNIT-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; TUNIT-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; TUNIT-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
 ; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
+; TUNIT-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; TUNIT-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
 ; TUNIT-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 ; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
 ; TUNIT-NEXT:    ret void
@@ -188,17 +188,17 @@ define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val)
 ; CGSCC-SAME: (i32 noundef [[VAL:%.*]]) {
 ; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
-; CGSCC-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 1
+; CGSCC-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 4
 ; CGSCC-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
 ; CGSCC-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
 ; CGSCC-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
 ; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; CGSCC-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CGSCC-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CGSCC-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
 ; CGSCC-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CGSCC-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
+; CGSCC-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CGSCC-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
+; CGSCC-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CGSCC-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
 ; CGSCC-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 ; CGSCC-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
 ; CGSCC-NEXT:    ret void
@@ -228,7 +228,7 @@ entry:
 define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-LABEL: define dso_local void @positive_test_reduce_array_allocation_1() {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [4 x i8], align 1
+; CHECK-NEXT:    [[ARRAY1:%.*]] = alloca [4 x i8], align 8
 ; CHECK-NEXT:    store i32 0, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAY1]], align 8
 ; CHECK-NEXT:    [[TMP1:%.*]] = add i32 [[TMP0]], 2
@@ -460,7 +460,7 @@ define dso_local void @pthread_test(){
 ; TUNIT-NEXT:    [[ARG1:%.*]] = alloca i8, align 8
 ; TUNIT-NEXT:    [[THREAD:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[CALL1:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_allocation_should_remain_same, ptr noundef nonnull align 8 dereferenceable(1) [[ARG1]])
-; TUNIT-NEXT:    [[F1:%.*]] = alloca [4 x i8], align 1
+; TUNIT-NEXT:    [[F1:%.*]] = alloca [4 x i8], align 4
 ; TUNIT-NEXT:    [[CALL2:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_allocation_should_be_reduced, ptr noalias nofree nonnull readnone align 4 captures(none) dereferenceable(12) undef)
 ; TUNIT-NEXT:    [[F2:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 @pthread_create(ptr noundef nonnull align 8 dereferenceable(8) [[THREAD]], ptr noundef null, ptr noundef nonnull @pthread_check_captured_pointer, ptr noundef nonnull align 4 dereferenceable(12) [[F2]])
diff --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index 1f2631c109169..86f34254d294c 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -482,7 +482,7 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-SAME: (i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
-; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1
+; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 8
 ; CHECK-NEXT:    store i32 [[ARG]], ptr [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:
diff --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index 9a4a1098d010a..d7ad06306285e 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -16,7 +16,7 @@ define hidden i64 @f1() align 2 {
 ; CGSCC-LABEL: define {{[^@]+}}@f1
 ; CGSCC-SAME: () #[[ATTR0:[0-9]+]] align 2 {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[REF_TMP1:%.*]] = alloca [0 x i8], align 1
+; CGSCC-NEXT:    [[REF_TMP1:%.*]] = alloca [0 x i8], align 8
 ; CGSCC-NEXT:    [[CALL2:%.*]] = call i64 @f2() #[[ATTR2:[0-9]+]]
 ; CGSCC-NEXT:    ret i64 [[CALL2]]
 ;
diff --git a/llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll b/llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll
index b7c3f1f33191e..f36c69a313767 100644
--- a/llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll
+++ b/llvm/test/Transforms/Attributor/pointer-info-track-access-chain.ll
@@ -384,4 +384,5 @@ end:
   %x = phi ptr [ %field2, %then ], [ %field8, %else ]
   %ret = load i32, ptr %x
   ret i32 %ret
-}
\ No newline at end of file
+}
+
diff --git a/llvm/test/Transforms/Attributor/pointer-info.ll b/llvm/test/Transforms/Attributor/pointer-info.ll
index a90a106d3a5c1..e97f42d9684c2 100644
--- a/llvm/test/Transforms/Attributor/pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/pointer-info.ll
@@ -10,7 +10,7 @@ define void @foo(ptr %ptr) {
 ; TUNIT-LABEL: define {{[^@]+}}@foo
 ; TUNIT-SAME: (ptr nofree readnone captures(none) [[PTR:%.*]]) #[[ATTR0:[0-9]+]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[TMP0:%.*]] = alloca [8 x i8], align 1
+; TUNIT-NEXT:    [[TMP0:%.*]] = alloca [8 x i8], align 8
 ; TUNIT-NEXT:    br label [[CALL_BR:%.*]]
 ; TUNIT:       call.br:
 ; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [8 x i8], ptr [[TMP0]], i64 0
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index f64552735c3a2..4ffe0ee18caee 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -1,4 +1,4 @@
-; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-attributes --check-globals all --version 6
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-annotate-decl-cs  -S < %s | FileCheck %s --check-prefixes=CHECK,TUNIT
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,CGSCC
 ;
@@ -66,10 +66,10 @@
 ;.
 define void @write_arg(ptr %p, i32 %v) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
-; CHECK-LABEL: define void @write_arg(
-; CHECK-SAME: ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    store i32 [[V]], ptr [[P]], align 4, !tbaa [[INT_TBAA3:![0-9]+]]
+; CHECK-LABEL: define {{[^@]+}}@write_arg
+; CHECK-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[P:%.*]], i32 [[V:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 [[V]], ptr [[P]], align 4, !tbaa [[TBAA3:![0-9]+]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -78,11 +78,11 @@ entry:
 }
 
 define void @write_random(ptr %p) {
-; CHECK-LABEL: define void @write_random(
-; CHECK-SAME: ptr nofree writeonly captures(none) [[P:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@write_random
+; CHECK-SAME: (ptr nofree writeonly captures(none) [[P:%.*]]) {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 (...) @random()
-; CHECK-NEXT:    store i32 [[CALL]], ptr [[P]], align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    store i32 [[CALL]], ptr [[P]], align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -112,9 +112,9 @@ declare i32 @random(...)
 ;    }
 define void @local_alloca_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define void @local_alloca_simplifiable_1(
-; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1
+; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR17:[0-9]+]]
 ; TUNIT-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
@@ -126,71 +126,71 @@ define void @local_alloca_simplifiable_1(ptr noalias sret(%struct.S) align 4 %ag
 ; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR18]]
 ; TUNIT-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7:![0-9]+]]
+; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F12]], align 4, !tbaa [[TBAA7:![0-9]+]]
 ; TUNIT-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10:![0-9]+]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F24]], align 4, !tbaa [[TBAA10:![0-9]+]]
 ; TUNIT-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11:![0-9]+]]
-; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F37]], align 4, !tbaa [[TBAA11:![0-9]+]]
+; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12:![0-9]+]]
 ; TUNIT-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; TUNIT-NEXT:    store i32 4, ptr [[I212]], align 4, !tbaa [[INT_TBAA13:![0-9]+]]
+; TUNIT-NEXT:    store i32 4, ptr [[I212]], align 4, !tbaa [[TBAA13:![0-9]+]]
 ; TUNIT-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; TUNIT-NEXT:    store i32 4, ptr [[I316]], align 4, !tbaa [[INT_TBAA14:![0-9]+]]
+; TUNIT-NEXT:    store i32 4, ptr [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define void @local_alloca_simplifiable_1(
-; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_1
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR1:[0-9]+]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[S:%.*]] = alloca [[STRUCT_S]], align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR20:[0-9]+]]
 ; CGSCC-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7:![0-9]+]]
+; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7:![0-9]+]]
 ; CGSCC-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10:![0-9]+]]
+; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 4, !tbaa [[TBAA10:![0-9]+]]
 ; CGSCC-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11:![0-9]+]]
+; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11:![0-9]+]]
 ; CGSCC-NEXT:    call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR21:[0-9]+]]
 ; CGSCC-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR21]]
 ; CGSCC-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR21]]
 ; CGSCC-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; CGSCC-NEXT:    store float [[I4]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    store float [[I4]], ptr [[F12]], align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F23]], align 4, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F23]], align 4, !tbaa [[TBAA10]]
 ; CGSCC-NEXT:    [[MUL:%.*]] = fmul float [[I5]], 2.000000e+00
 ; CGSCC-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]]
 ; CGSCC-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]]
 ; CGSCC-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I7:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    [[I7:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = fadd float [[I6]], [[I7]]
 ; CGSCC-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]]
-; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[INT_TBAA12:![0-9]+]]
-; CGSCC-NEXT:    store i32 [[I8]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]]
+; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; CGSCC-NEXT:    store i32 [[I8]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
 ; CGSCC-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
-; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13:![0-9]+]]
+; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13:![0-9]+]]
 ; CGSCC-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I9]], 1
 ; CGSCC-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]]
+; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]]
 ; CGSCC-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
-; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[I313]], align 4, !tbaa [[INT_TBAA14:![0-9]+]]
-; CGSCC-NEXT:    [[I11:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[I313]], align 4, !tbaa [[TBAA14:![0-9]+]]
+; CGSCC-NEXT:    [[I11:%.*]] = load i32, ptr [[S]], align 4, !tbaa [[TBAA12]]
 ; CGSCC-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I10]], [[I11]]
 ; CGSCC-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]]
+; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]]
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(24) [[S]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret void
 ;
 entry:
   %s = alloca %struct.S, align 4
-  call void @llvm.lifetime.start.p0(ptr nonnull %s)
+  call void @llvm.lifetime.start.p0(i64 24, ptr nonnull %s)
   %f1 = getelementptr inbounds %struct.S, ptr %s, i64 0, i32 3
   store float 0x3FF19999A0000000, ptr %f1, align 4, !tbaa !7
   %f2 = getelementptr inbounds %struct.S, ptr %s, i64 0, i32 4
@@ -231,13 +231,13 @@ entry:
   %add15 = add nsw i32 %i10, %i11
   %i316 = getelementptr inbounds %struct.S, ptr %agg.result, i64 0, i32 2
   store i32 %add15, ptr %i316, align 4, !tbaa !14
-  call void @llvm.lifetime.end.p0(ptr nonnull %s)
+  call void @llvm.lifetime.end.p0(i64 24, ptr nonnull %s)
   ret void
 }
 
-declare void @llvm.lifetime.start.p0(ptr nocapture)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture)
 
-declare void @llvm.lifetime.end.p0(ptr nocapture)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture)
 
 ;    void local_alloca_simplifiable_2(void) {
 ;      char Bytes[1024];
@@ -256,162 +256,162 @@ declare void @llvm.lifetime.end.p0(ptr nocapture)
 ;
 define void @local_alloca_simplifiable_2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define void @local_alloca_simplifiable_2(
-; TUNIT-SAME: ) #[[ATTR3:[0-9]+]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2
+; TUNIT-SAME: () #[[ATTR2:[0-9]+]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR17]]
-; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
-; TUNIT:       [[FOR_COND]]:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
+; TUNIT:       for.cond:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP]]:
-; TUNIT-NEXT:    br label %[[FOR_END:.*]]
-; TUNIT:       [[FOR_BODY]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; TUNIT:       for.cond.cleanup:
+; TUNIT-NEXT:    br label [[FOR_END:%.*]]
+; TUNIT:       for.body:
 ; TUNIT-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[I15]]
-; TUNIT-NEXT:    br label %[[FOR_INC]]
-; TUNIT:       [[FOR_INC]]:
+; TUNIT-NEXT:    br label [[FOR_INC]]
+; TUNIT:       for.inc:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
-; TUNIT:       [[FOR_END]]:
-; TUNIT-NEXT:    br label %[[FOR_COND2:.*]]
-; TUNIT:       [[FOR_COND2]]:
-; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
+; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP15:![0-9]+]]
+; TUNIT:       for.end:
+; TUNIT-NEXT:    br label [[FOR_COND2:%.*]]
+; TUNIT:       for.cond2:
+; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
 ; TUNIT-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; TUNIT-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP4]]:
-; TUNIT-NEXT:    br label %[[FOR_END11:.*]]
-; TUNIT:       [[FOR_BODY5]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
+; TUNIT:       for.cond.cleanup4:
+; TUNIT-NEXT:    br label [[FOR_END11:%.*]]
+; TUNIT:       for.body5:
 ; TUNIT-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; TUNIT-NEXT:    [[I18:%.*]] = or i64 [[I17]], 1
 ; TUNIT-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I18]]
-; TUNIT-NEXT:    br label %[[FOR_INC9]]
-; TUNIT:       [[FOR_INC9]]:
+; TUNIT-NEXT:    br label [[FOR_INC9]]
+; TUNIT:       for.inc9:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]]
-; TUNIT:       [[FOR_END11]]:
-; TUNIT-NEXT:    br label %[[FOR_COND13:.*]]
-; TUNIT:       [[FOR_COND13]]:
-; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC22:.*]] ], [ 0, %[[FOR_END11]] ]
+; TUNIT-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP17:![0-9]+]]
+; TUNIT:       for.end11:
+; TUNIT-NEXT:    br label [[FOR_COND13:%.*]]
+; TUNIT:       for.cond13:
+; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC22:%.*]] ], [ 0, [[FOR_END11]] ]
 ; TUNIT-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; TUNIT-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP15]]:
-; TUNIT-NEXT:    br label %[[FOR_END24:.*]]
-; TUNIT:       [[FOR_BODY16]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
+; TUNIT:       for.cond.cleanup15:
+; TUNIT-NEXT:    br label [[FOR_END24:%.*]]
+; TUNIT:       for.body16:
 ; TUNIT-NEXT:    [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; TUNIT-NEXT:    [[I21:%.*]] = add nuw nsw i64 [[I20]], 2
 ; TUNIT-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I21]]
-; TUNIT-NEXT:    br label %[[FOR_INC22]]
-; TUNIT:       [[FOR_INC22]]:
+; TUNIT-NEXT:    br label [[FOR_INC22]]
+; TUNIT:       for.inc22:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]]
-; TUNIT:       [[FOR_END24]]:
+; TUNIT-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP18:![0-9]+]]
+; TUNIT:       for.end24:
 ; TUNIT-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 1023
 ; TUNIT-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 500
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) [[ARRAYIDX26]], i32 noundef 0) #[[ATTR18]]
-; TUNIT-NEXT:    br label %[[FOR_COND28:.*]]
-; TUNIT:       [[FOR_COND28]]:
-; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC36:.*]] ], [ 0, %[[FOR_END24]] ]
+; TUNIT-NEXT:    br label [[FOR_COND28:%.*]]
+; TUNIT:       for.cond28:
+; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
 ; TUNIT-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; TUNIT-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY31:.*]], label %[[FOR_COND_CLEANUP30:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP30]]:
-; TUNIT-NEXT:    br label %[[FOR_END38:.*]]
-; TUNIT:       [[FOR_BODY31]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY31:%.*]], label [[FOR_COND_CLEANUP30:%.*]]
+; TUNIT:       for.cond.cleanup30:
+; TUNIT-NEXT:    br label [[FOR_END38:%.*]]
+; TUNIT:       for.body31:
 ; TUNIT-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX35]], align 1, !tbaa [[CHAR_TBAA19:![0-9]+]]
-; TUNIT-NEXT:    br label %[[FOR_INC36]]
-; TUNIT:       [[FOR_INC36]]:
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX35]], align 1, !tbaa [[TBAA19:![0-9]+]]
+; TUNIT-NEXT:    br label [[FOR_INC36]]
+; TUNIT:       for.inc36:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]]
-; TUNIT:       [[FOR_END38]]:
+; TUNIT-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP20:![0-9]+]]
+; TUNIT:       for.end38:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define void @local_alloca_simplifiable_2(
-; CGSCC-SAME: ) #[[ATTR3:[0-9]+]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-LABEL: define {{[^@]+}}@local_alloca_simplifiable_2
+; CGSCC-SAME: () #[[ATTR2:[0-9]+]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR20]]
-; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
-; CGSCC:       [[FOR_COND]]:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
+; CGSCC:       for.cond:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP]]:
-; CGSCC-NEXT:    br label %[[FOR_END:.*]]
-; CGSCC:       [[FOR_BODY]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CGSCC:       for.cond.cleanup:
+; CGSCC-NEXT:    br label [[FOR_END:%.*]]
+; CGSCC:       for.body:
 ; CGSCC-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[I15]]
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[CHAR_TBAA15:![0-9]+]]
-; CGSCC-NEXT:    br label %[[FOR_INC]]
-; CGSCC:       [[FOR_INC]]:
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA15:![0-9]+]]
+; CGSCC-NEXT:    br label [[FOR_INC]]
+; CGSCC:       for.inc:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
-; CGSCC:       [[FOR_END]]:
-; CGSCC-NEXT:    br label %[[FOR_COND2:.*]]
-; CGSCC:       [[FOR_COND2]]:
-; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
+; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP16:![0-9]+]]
+; CGSCC:       for.end:
+; CGSCC-NEXT:    br label [[FOR_COND2:%.*]]
+; CGSCC:       for.cond2:
+; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
 ; CGSCC-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; CGSCC-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP4]]:
-; CGSCC-NEXT:    br label %[[FOR_END11:.*]]
-; CGSCC:       [[FOR_BODY5]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
+; CGSCC:       for.cond.cleanup4:
+; CGSCC-NEXT:    br label [[FOR_END11:%.*]]
+; CGSCC:       for.body5:
 ; CGSCC-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; CGSCC-NEXT:    [[I18:%.*]] = or i64 [[I17]], 1
 ; CGSCC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I18]]
-; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18:![0-9]+]]
-; CGSCC-NEXT:    br label %[[FOR_INC9]]
-; CGSCC:       [[FOR_INC9]]:
+; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18:![0-9]+]]
+; CGSCC-NEXT:    br label [[FOR_INC9]]
+; CGSCC:       for.inc9:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]]
-; CGSCC:       [[FOR_END11]]:
-; CGSCC-NEXT:    br label %[[FOR_COND13:.*]]
-; CGSCC:       [[FOR_COND13]]:
-; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC22:.*]] ], [ 0, %[[FOR_END11]] ]
+; CGSCC-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP19:![0-9]+]]
+; CGSCC:       for.end11:
+; CGSCC-NEXT:    br label [[FOR_COND13:%.*]]
+; CGSCC:       for.cond13:
+; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC22:%.*]] ], [ 0, [[FOR_END11]] ]
 ; CGSCC-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; CGSCC-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP15]]:
-; CGSCC-NEXT:    br label %[[FOR_END24:.*]]
-; CGSCC:       [[FOR_BODY16]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
+; CGSCC:       for.cond.cleanup15:
+; CGSCC-NEXT:    br label [[FOR_END24:%.*]]
+; CGSCC:       for.body16:
 ; CGSCC-NEXT:    [[I20:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; CGSCC-NEXT:    [[I21:%.*]] = add nuw nsw i64 [[I20]], 2
 ; CGSCC-NEXT:    [[ARRAYIDX21:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I21]]
-; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX21]], align 16, !tbaa [[LONG_LONG_TBAA20:![0-9]+]]
-; CGSCC-NEXT:    br label %[[FOR_INC22]]
-; CGSCC:       [[FOR_INC22]]:
+; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX21]], align 16, !tbaa [[TBAA20:![0-9]+]]
+; CGSCC-NEXT:    br label [[FOR_INC22]]
+; CGSCC:       for.inc22:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]]
-; CGSCC:       [[FOR_END24]]:
+; CGSCC-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP22:![0-9]+]]
+; CGSCC:       for.end24:
 ; CGSCC-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 1023
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX25]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 500
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) [[ARRAYIDX26]], i32 noundef 0) #[[ATTR21]]
-; CGSCC-NEXT:    br label %[[FOR_COND28:.*]]
-; CGSCC:       [[FOR_COND28]]:
-; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC36:.*]] ], [ 0, %[[FOR_END24]] ]
+; CGSCC-NEXT:    br label [[FOR_COND28:%.*]]
+; CGSCC:       for.cond28:
+; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
 ; CGSCC-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; CGSCC-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY31:.*]], label %[[FOR_COND_CLEANUP30:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP30]]:
-; CGSCC-NEXT:    br label %[[FOR_END38:.*]]
-; CGSCC:       [[FOR_BODY31]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY31:%.*]], label [[FOR_COND_CLEANUP30:%.*]]
+; CGSCC:       for.cond.cleanup30:
+; CGSCC-NEXT:    br label [[FOR_END38:%.*]]
+; CGSCC:       for.body31:
 ; CGSCC-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    [[I23:%.*]] = load i8, ptr [[ARRAYIDX33]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    [[I23:%.*]] = load i8, ptr [[ARRAYIDX33]], align 1, !tbaa [[TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    store i8 [[I23]], ptr [[ARRAYIDX35]], align 1, !tbaa [[CHAR_TBAA15]]
-; CGSCC-NEXT:    br label %[[FOR_INC36]]
-; CGSCC:       [[FOR_INC36]]:
+; CGSCC-NEXT:    store i8 [[I23]], ptr [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    br label [[FOR_INC36]]
+; CGSCC:       for.inc36:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]]
-; CGSCC:       [[FOR_END38]]:
+; CGSCC-NEXT:    br label [[FOR_COND28]], !llvm.loop [[LOOP23:![0-9]+]]
+; CGSCC:       for.end38:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(1024) [[BYTES]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret void
 ;
 entry:
   %Bytes = alloca [1024 x i8], align 16
-  call void @llvm.lifetime.start.p0(ptr nonnull %Bytes)
+  call void @llvm.lifetime.start.p0(i64 1024, ptr nonnull %Bytes)
   br label %for.cond
 
 for.cond:                                         ; preds = %for.inc, %entry
@@ -503,7 +503,7 @@ for.inc36:                                        ; preds = %for.body31
   br label %for.cond28, !llvm.loop !23
 
 for.end38:                                        ; preds = %for.cond.cleanup30
-  call void @llvm.lifetime.end.p0(ptr nonnull %Bytes)
+  call void @llvm.lifetime.end.p0(i64 1024, ptr nonnull %Bytes)
   ret void
 }
 
@@ -516,10 +516,10 @@ for.end38:                                        ; preds = %for.cond.cleanup30
 ;
 define i32 @local_alloca_simplifiable_3() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define noundef i32 @local_alloca_simplifiable_3(
-; CHECK-SAME: ) #[[ATTR4:[0-9]+]] {
-; CHECK-NEXT:    br label %[[SPLIT:.*]]
-; CHECK:       [[SPLIT]]:
+; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_3
+; CHECK-SAME: () #[[ATTR3:[0-9]+]] {
+; CHECK-NEXT:    br label [[SPLIT:%.*]]
+; CHECK:       split:
 ; CHECK-NEXT:    ret i32 2
 ;
   %A = alloca i32, align 4
@@ -537,8 +537,8 @@ split:
 ;
 define i32 @local_alloca_simplifiable_4() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define i32 @local_alloca_simplifiable_4(
-; CHECK-SAME: ) #[[ATTR4]] {
+; CHECK-LABEL: define {{[^@]+}}@local_alloca_simplifiable_4
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:    ret i32 undef
 ;
   %A = alloca i32, align 4
@@ -554,40 +554,40 @@ define i32 @local_alloca_simplifiable_4() {
 ;    }
 define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define noundef i32 @multi_obj_simplifiable_1(
-; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1
+; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR2]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
-; TUNIT:       [[COND_TRUE]]:
-; TUNIT-NEXT:    br label %[[COND_END:.*]]
-; TUNIT:       [[COND_FALSE]]:
-; TUNIT-NEXT:    br label %[[COND_END]]
-; TUNIT:       [[COND_END]]:
+; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
+; TUNIT:       cond.true:
+; TUNIT-NEXT:    br label [[COND_END:%.*]]
+; TUNIT:       cond.false:
+; TUNIT-NEXT:    br label [[COND_END]]
+; TUNIT:       cond.end:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret i32 5
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define noundef i32 @multi_obj_simplifiable_1(
-; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1
+; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR4:[0-9]+]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
-; CGSCC:       [[COND_TRUE]]:
-; CGSCC-NEXT:    br label %[[COND_END:.*]]
-; CGSCC:       [[COND_FALSE]]:
-; CGSCC-NEXT:    br label %[[COND_END]]
-; CGSCC:       [[COND_END]]:
+; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
+; CGSCC:       cond.true:
+; CGSCC-NEXT:    br label [[COND_END:%.*]]
+; CGSCC:       cond.false:
+; CGSCC-NEXT:    br label [[COND_END]]
+; CGSCC:       cond.end:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret i32 5
 ;
 entry:
   %L = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(ptr nonnull %L)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %L)
   store i32 5, ptr @GI1, align 4, !tbaa !3
   store i32 5, ptr %L, align 4, !tbaa !3
   %tobool.not = icmp eq i32 %cnd, 0
@@ -602,7 +602,7 @@ cond.false:                                       ; preds = %entry
 cond.end:                                         ; preds = %cond.false, %cond.true
   %cond = phi ptr [ @GI1, %cond.true ], [ %L, %cond.false ]
   %i1 = load i32, ptr %cond, align 4, !tbaa !3
-  call void @llvm.lifetime.end.p0(ptr nonnull %L)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %L)
   ret i32 %i1
 }
 
@@ -616,40 +616,40 @@ cond.end:                                         ; preds = %cond.false, %cond.t
 ;
 define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i32 @multi_obj_simplifiable_2(
-; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2
+; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR2]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
-; TUNIT:       [[COND_TRUE]]:
-; TUNIT-NEXT:    br label %[[COND_END:.*]]
-; TUNIT:       [[COND_FALSE]]:
-; TUNIT-NEXT:    br label %[[COND_END]]
-; TUNIT:       [[COND_END]]:
+; TUNIT-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
+; TUNIT:       cond.true:
+; TUNIT-NEXT:    br label [[COND_END:%.*]]
+; TUNIT:       cond.false:
+; TUNIT-NEXT:    br label [[COND_END]]
+; TUNIT:       cond.end:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret i32 5
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i32 @multi_obj_simplifiable_2(
-; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR5]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2
+; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR4]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[L:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label %[[COND_FALSE:.*]], label %[[COND_TRUE:.*]]
-; CGSCC:       [[COND_TRUE]]:
-; CGSCC-NEXT:    br label %[[COND_END:.*]]
-; CGSCC:       [[COND_FALSE]]:
-; CGSCC-NEXT:    br label %[[COND_END]]
-; CGSCC:       [[COND_END]]:
+; CGSCC-NEXT:    br i1 [[TOBOOL_NOT]], label [[COND_FALSE:%.*]], label [[COND_TRUE:%.*]]
+; CGSCC:       cond.true:
+; CGSCC-NEXT:    br label [[COND_END:%.*]]
+; CGSCC:       cond.false:
+; CGSCC-NEXT:    br label [[COND_END]]
+; CGSCC:       cond.end:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[L]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret i32 5
 ;
 entry:
   %L = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(ptr nonnull %L)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %L)
   %tobool.not = icmp eq i32 %cnd, 0
   br i1 %tobool.not, label %cond.false, label %cond.true
 
@@ -663,7 +663,7 @@ cond.end:                                         ; preds = %cond.false, %cond.t
   %cond = phi ptr [ @GI2, %cond.true ], [ %L, %cond.false ]
   store i32 5, ptr %cond, align 4, !tbaa !3
   %l = load i32, ptr %cond, align 4, !tbaa !3
-  call void @llvm.lifetime.end.p0(ptr nonnull %L)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %L)
   ret i32 %l
 }
 
@@ -687,58 +687,58 @@ cond.end:                                         ; preds = %cond.false, %cond.t
 ;
 define void @static_global_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define void @static_global_simplifiable_1(
-; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR5:[0-9]+]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_1
+; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]]) #[[ATTR4:[0-9]+]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) @Gs1, i32 noundef 1) #[[ATTR18]]
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR18]]
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR18]]
 ; TUNIT-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
+; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]]
 ; TUNIT-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F2]], align 4, !tbaa [[TBAA10]]
 ; TUNIT-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
-; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
+; TUNIT-NEXT:    store float 0x40119999A0000000, ptr [[F3]], align 4, !tbaa [[TBAA11]]
+; TUNIT-NEXT:    store i32 1, ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
 ; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; TUNIT-NEXT:    store i32 4, ptr [[I2]], align 4, !tbaa [[INT_TBAA13]]
+; TUNIT-NEXT:    store i32 4, ptr [[I2]], align 4, !tbaa [[TBAA13]]
 ; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; TUNIT-NEXT:    store i32 4, ptr [[I3]], align 4, !tbaa [[INT_TBAA14]]
+; TUNIT-NEXT:    store i32 4, ptr [[I3]], align 4, !tbaa [[TBAA14]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define void @static_global_simplifiable_1(
-; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR3]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
-; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]]
-; CGSCC-NEXT:    store float 0x40019999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[FLOAT_TBAA10]]
-; CGSCC-NEXT:    store float 0x400A666660000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_1
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR2]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    store float 0x40019999A0000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    store float 0x400A666660000000, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(24) @Gs1, i32 noundef 1) #[[ATTR21]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR21]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR21]]
-; CGSCC-NEXT:    [[I:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    [[I:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; CGSCC-NEXT:    store float [[I]], ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
-; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    store float [[I]], ptr [[F1]], align 4, !tbaa [[TBAA7]]
+; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
 ; CGSCC-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
 ; CGSCC-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; CGSCC-NEXT:    store float [[MUL]], ptr [[F2]], align 4, !tbaa [[FLOAT_TBAA10]]
-; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[FLOAT_TBAA11]]
-; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    store float [[MUL]], ptr [[F2]], align 4, !tbaa [[TBAA10]]
+; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
+; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
 ; CGSCC-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; CGSCC-NEXT:    store float [[ADD]], ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
-; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[INT_TBAA12]]
-; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
-; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), align 4, !tbaa [[INT_TBAA13]]
+; CGSCC-NEXT:    store float [[ADD]], ptr [[F3]], align 4, !tbaa [[TBAA11]]
+; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 1), align 4, !tbaa [[TBAA13]]
 ; CGSCC-NEXT:    [[MUL1:%.*]] = shl nsw i32 [[I8]], 1
 ; CGSCC-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; CGSCC-NEXT:    store i32 [[MUL1]], ptr [[I2]], align 4, !tbaa [[INT_TBAA13]]
-; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), align 4, !tbaa [[INT_TBAA14]]
-; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    store i32 [[MUL1]], ptr [[I2]], align 4, !tbaa [[TBAA13]]
+; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S]], ptr @Gs1, i64 0, i32 2), align 4, !tbaa [[TBAA14]]
+; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr @Gs1, align 4, !tbaa [[TBAA12]]
 ; CGSCC-NEXT:    [[ADD2:%.*]] = add nsw i32 [[I9]], [[I10]]
 ; CGSCC-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; CGSCC-NEXT:    store i32 [[ADD2]], ptr [[I3]], align 4, !tbaa [[INT_TBAA14]]
+; CGSCC-NEXT:    store i32 [[ADD2]], ptr [[I3]], align 4, !tbaa [[TBAA14]]
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -776,13 +776,13 @@ entry:
 
 define i32 @test_range_merge1() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define noundef i32 @test_range_merge1(
-; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-LABEL: define {{[^@]+}}@test_range_merge1
+; TUNIT-SAME: () #[[ATTR4]] {
 ; TUNIT-NEXT:    ret i32 2
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define noundef i32 @test_range_merge1(
-; CGSCC-SAME: ) #[[ATTR6:[0-9]+]] {
+; CGSCC-LABEL: define {{[^@]+}}@test_range_merge1
+; CGSCC-SAME: () #[[ATTR5:[0-9]+]] {
 ; CGSCC-NEXT:    ret i32 2
 ;
   store <2 x i32> <i32 1, i32 1>, ptr @Vs1
@@ -795,8 +795,8 @@ define i32 @test_range_merge1() {
 
 define i32 @test_range_merge2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i32 @test_range_merge2(
-; TUNIT-SAME: ) #[[ATTR3]] {
+; TUNIT-LABEL: define {{[^@]+}}@test_range_merge2
+; TUNIT-SAME: () #[[ATTR2]] {
 ; TUNIT-NEXT:    store <2 x i32> <i32 3, i32 4>, ptr @Vs2, align 8
 ; TUNIT-NEXT:    [[L0:%.*]] = load i32, ptr @Vs2, align 4
 ; TUNIT-NEXT:    [[L1:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @Vs2, i64 0, i32 1), align 4
@@ -804,8 +804,8 @@ define i32 @test_range_merge2() {
 ; TUNIT-NEXT:    ret i32 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i32 @test_range_merge2(
-; CGSCC-SAME: ) #[[ATTR5]] {
+; CGSCC-LABEL: define {{[^@]+}}@test_range_merge2
+; CGSCC-SAME: () #[[ATTR4]] {
 ; CGSCC-NEXT:    store <2 x i32> <i32 3, i32 4>, ptr @Vs2, align 8
 ; CGSCC-NEXT:    [[L0:%.*]] = load i32, ptr @Vs2, align 4
 ; CGSCC-NEXT:    [[L1:%.*]] = load i32, ptr getelementptr inbounds ([[STRUCT_S:%.*]], ptr @Vs2, i64 0, i32 1), align 4
@@ -837,147 +837,147 @@ define i32 @test_range_merge2() {
 ;
 define void @static_global_simplifiable_2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define void @static_global_simplifiable_2(
-; TUNIT-SAME: ) #[[ATTR5]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
-; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
-; TUNIT:       [[FOR_COND]]:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_2
+; TUNIT-SAME: () #[[ATTR4]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
+; TUNIT:       for.cond:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP]]:
-; TUNIT-NEXT:    br label %[[FOR_END:.*]]
-; TUNIT:       [[FOR_BODY]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; TUNIT:       for.cond.cleanup:
+; TUNIT-NEXT:    br label [[FOR_END:%.*]]
+; TUNIT:       for.body:
 ; TUNIT-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[I]]
-; TUNIT-NEXT:    br label %[[FOR_INC]]
-; TUNIT:       [[FOR_INC]]:
+; TUNIT-NEXT:    br label [[FOR_INC]]
+; TUNIT:       for.inc:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
-; TUNIT:       [[FOR_END]]:
-; TUNIT-NEXT:    br label %[[FOR_COND2:.*]]
-; TUNIT:       [[FOR_COND2]]:
-; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
+; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP21:![0-9]+]]
+; TUNIT:       for.end:
+; TUNIT-NEXT:    br label [[FOR_COND2:%.*]]
+; TUNIT:       for.cond2:
+; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
 ; TUNIT-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; TUNIT-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP4]]:
-; TUNIT-NEXT:    br label %[[FOR_END11:.*]]
-; TUNIT:       [[FOR_BODY5]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
+; TUNIT:       for.cond.cleanup4:
+; TUNIT-NEXT:    br label [[FOR_END11:%.*]]
+; TUNIT:       for.body5:
 ; TUNIT-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; TUNIT-NEXT:    [[I16:%.*]] = or i64 [[I15]], 1
 ; TUNIT-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr @GBytes, i64 [[I16]]
-; TUNIT-NEXT:    br label %[[FOR_INC9]]
-; TUNIT:       [[FOR_INC9]]:
+; TUNIT-NEXT:    br label [[FOR_INC9]]
+; TUNIT:       for.inc9:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]]
-; TUNIT:       [[FOR_END11]]:
-; TUNIT-NEXT:    br label %[[FOR_COND13:.*]]
-; TUNIT:       [[FOR_COND13]]:
-; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
+; TUNIT-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP22:![0-9]+]]
+; TUNIT:       for.end11:
+; TUNIT-NEXT:    br label [[FOR_COND13:%.*]]
+; TUNIT:       for.cond13:
+; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
 ; TUNIT-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; TUNIT-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP15]]:
-; TUNIT-NEXT:    br label %[[FOR_END23:.*]]
-; TUNIT:       [[FOR_BODY16]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
+; TUNIT:       for.cond.cleanup15:
+; TUNIT-NEXT:    br label [[FOR_END23:%.*]]
+; TUNIT:       for.body16:
 ; TUNIT-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; TUNIT-NEXT:    [[I18:%.*]] = add nuw nsw i64 [[I17]], 2
 ; TUNIT-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr @GBytes, i64 [[I18]]
-; TUNIT-NEXT:    br label %[[FOR_INC21]]
-; TUNIT:       [[FOR_INC21]]:
+; TUNIT-NEXT:    br label [[FOR_INC21]]
+; TUNIT:       for.inc21:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]]
-; TUNIT:       [[FOR_END23]]:
+; TUNIT-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP23:![0-9]+]]
+; TUNIT:       for.end23:
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 500), i32 noundef 0) #[[ATTR18]]
-; TUNIT-NEXT:    br label %[[FOR_COND25:.*]]
-; TUNIT:       [[FOR_COND25]]:
-; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC33:.*]] ], [ 0, %[[FOR_END23]] ]
+; TUNIT-NEXT:    br label [[FOR_COND25:%.*]]
+; TUNIT:       for.cond25:
+; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
 ; TUNIT-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; TUNIT-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY28:.*]], label %[[FOR_COND_CLEANUP27:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP27]]:
-; TUNIT-NEXT:    br label %[[FOR_END35:.*]]
-; TUNIT:       [[FOR_BODY28]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY28:%.*]], label [[FOR_COND_CLEANUP27:%.*]]
+; TUNIT:       for.cond.cleanup27:
+; TUNIT-NEXT:    br label [[FOR_END35:%.*]]
+; TUNIT:       for.body28:
 ; TUNIT-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA19]]
-; TUNIT-NEXT:    br label %[[FOR_INC33]]
-; TUNIT:       [[FOR_INC33]]:
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
+; TUNIT-NEXT:    br label [[FOR_INC33]]
+; TUNIT:       for.inc33:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]]
-; TUNIT:       [[FOR_END35]]:
+; TUNIT-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP24:![0-9]+]]
+; TUNIT:       for.end35:
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define void @static_global_simplifiable_2(
-; CGSCC-SAME: ) #[[ATTR3]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
-; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
-; CGSCC:       [[FOR_COND]]:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_2
+; CGSCC-SAME: () #[[ATTR2]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
+; CGSCC:       for.cond:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP]]:
-; CGSCC-NEXT:    br label %[[FOR_END:.*]]
-; CGSCC:       [[FOR_BODY]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CGSCC:       for.cond.cleanup:
+; CGSCC-NEXT:    br label [[FOR_END:%.*]]
+; CGSCC:       for.body:
 ; CGSCC-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[I]]
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[CHAR_TBAA15]]
-; CGSCC-NEXT:    br label %[[FOR_INC]]
-; CGSCC:       [[FOR_INC]]:
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 2, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    br label [[FOR_INC]]
+; CGSCC:       for.inc:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
-; CGSCC:       [[FOR_END]]:
-; CGSCC-NEXT:    br label %[[FOR_COND2:.*]]
-; CGSCC:       [[FOR_COND2]]:
-; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
+; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP24:![0-9]+]]
+; CGSCC:       for.end:
+; CGSCC-NEXT:    br label [[FOR_COND2:%.*]]
+; CGSCC:       for.cond2:
+; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
 ; CGSCC-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; CGSCC-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP4]]:
-; CGSCC-NEXT:    br label %[[FOR_END11:.*]]
-; CGSCC:       [[FOR_BODY5]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
+; CGSCC:       for.cond.cleanup4:
+; CGSCC-NEXT:    br label [[FOR_END11:%.*]]
+; CGSCC:       for.body5:
 ; CGSCC-NEXT:    [[I15:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; CGSCC-NEXT:    [[I16:%.*]] = or i64 [[I15]], 1
 ; CGSCC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr @GBytes, i64 [[I16]]
-; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18]]
-; CGSCC-NEXT:    br label %[[FOR_INC9]]
-; CGSCC:       [[FOR_INC9]]:
+; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
+; CGSCC-NEXT:    br label [[FOR_INC9]]
+; CGSCC:       for.inc9:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]]
-; CGSCC:       [[FOR_END11]]:
-; CGSCC-NEXT:    br label %[[FOR_COND13:.*]]
-; CGSCC:       [[FOR_COND13]]:
-; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
+; CGSCC-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP25:![0-9]+]]
+; CGSCC:       for.end11:
+; CGSCC-NEXT:    br label [[FOR_COND13:%.*]]
+; CGSCC:       for.cond13:
+; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
 ; CGSCC-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; CGSCC-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP15]]:
-; CGSCC-NEXT:    br label %[[FOR_END23:.*]]
-; CGSCC:       [[FOR_BODY16]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
+; CGSCC:       for.cond.cleanup15:
+; CGSCC-NEXT:    br label [[FOR_END23:%.*]]
+; CGSCC:       for.body16:
 ; CGSCC-NEXT:    [[I17:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; CGSCC-NEXT:    [[I18:%.*]] = add nuw nsw i64 [[I17]], 2
 ; CGSCC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr @GBytes, i64 [[I18]]
-; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 16, !tbaa [[LONG_LONG_TBAA20]]
-; CGSCC-NEXT:    br label %[[FOR_INC21]]
-; CGSCC:       [[FOR_INC21]]:
+; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 16, !tbaa [[TBAA20]]
+; CGSCC-NEXT:    br label [[FOR_INC21]]
+; CGSCC:       for.inc21:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
-; CGSCC:       [[FOR_END23]]:
-; CGSCC-NEXT:    store i8 0, ptr getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 1023), align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
+; CGSCC:       for.end23:
+; CGSCC-NEXT:    store i8 0, ptr getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]]
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(524) getelementptr inbounds ([1024 x i8], ptr @GBytes, i64 0, i64 500), i32 noundef 0) #[[ATTR21]]
-; CGSCC-NEXT:    br label %[[FOR_COND25:.*]]
-; CGSCC:       [[FOR_COND25]]:
-; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC33:.*]] ], [ 0, %[[FOR_END23]] ]
+; CGSCC-NEXT:    br label [[FOR_COND25:%.*]]
+; CGSCC:       for.cond25:
+; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
 ; CGSCC-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; CGSCC-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY28:.*]], label %[[FOR_COND_CLEANUP27:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP27]]:
-; CGSCC-NEXT:    br label %[[FOR_END35:.*]]
-; CGSCC:       [[FOR_BODY28]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY28:%.*]], label [[FOR_COND_CLEANUP27:%.*]]
+; CGSCC:       for.cond.cleanup27:
+; CGSCC-NEXT:    br label [[FOR_END35:%.*]]
+; CGSCC:       for.body28:
 ; CGSCC-NEXT:    [[ARRAYIDX30:%.*]] = getelementptr inbounds [1024 x i8], ptr @GBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    [[I19:%.*]] = load i8, ptr [[ARRAYIDX30]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    [[I19:%.*]] = load i8, ptr [[ARRAYIDX30]], align 1, !tbaa [[TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    store i8 [[I19]], ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA15]]
-; CGSCC-NEXT:    br label %[[FOR_INC33]]
-; CGSCC:       [[FOR_INC33]]:
+; CGSCC-NEXT:    store i8 [[I19]], ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    br label [[FOR_INC33]]
+; CGSCC:       for.inc33:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]]
-; CGSCC:       [[FOR_END35]]:
+; CGSCC-NEXT:    br label [[FOR_COND25]], !llvm.loop [[LOOP27:![0-9]+]]
+; CGSCC:       for.end35:
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -1080,15 +1080,15 @@ for.end35:                                        ; preds = %for.cond.cleanup27
 ;    }
 define i32 @static_global_simplifiable_3() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define noundef i32 @static_global_simplifiable_3(
-; TUNIT-SAME: ) #[[ATTR5]] {
-; TUNIT-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-LABEL: define {{[^@]+}}@static_global_simplifiable_3
+; TUNIT-SAME: () #[[ATTR4]] {
+; TUNIT-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    ret i32 1
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define noundef i32 @static_global_simplifiable_3(
-; CGSCC-SAME: ) #[[ATTR6]] {
-; CGSCC-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-LABEL: define {{[^@]+}}@static_global_simplifiable_3
+; CGSCC-SAME: () #[[ATTR5]] {
+; CGSCC-NEXT:    store i32 1, ptr @Flag3, align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    ret i32 1
 ;
   store i32 1, ptr @Flag3, align 4, !tbaa !3
@@ -1115,95 +1115,95 @@ define i32 @static_global_simplifiable_3() {
 ;
 define void @noalias_arg_simplifiable_1(ptr noalias sret(%struct.S) align 4 %agg.result, ptr byval(%struct.S) align 8 %s) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define void @noalias_arg_simplifiable_1(
-; TUNIT-SAME: ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]], ptr noalias nofree nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1
+; TUNIT-SAME: (ptr noalias nofree writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable_or_null(24) [[AGG_RESULT:%.*]], ptr noalias nofree nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
+; TUNIT-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]]
 ; TUNIT-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; TUNIT-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[FLOAT_TBAA10]]
+; TUNIT-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[TBAA10]]
 ; TUNIT-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; TUNIT-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
+; TUNIT-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11]]
 ; TUNIT-NEXT:    call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR18]]
 ; TUNIT-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR18]]
 ; TUNIT-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR18]]
 ; TUNIT-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; TUNIT-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]]
+; TUNIT-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]]
 ; TUNIT-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; TUNIT-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]]
+; TUNIT-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[TBAA7]]
 ; TUNIT-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; TUNIT-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[FLOAT_TBAA10]]
+; TUNIT-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[TBAA10]]
 ; TUNIT-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
 ; TUNIT-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; TUNIT-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]]
+; TUNIT-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]]
 ; TUNIT-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; TUNIT-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]]
+; TUNIT-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]]
 ; TUNIT-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; TUNIT-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]]
+; TUNIT-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]]
 ; TUNIT-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
 ; TUNIT-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; TUNIT-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]]
-; TUNIT-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
-; TUNIT-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
+; TUNIT-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]]
+; TUNIT-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
+; TUNIT-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
 ; TUNIT-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
-; TUNIT-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13]]
+; TUNIT-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13]]
 ; TUNIT-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I8]], 1
 ; TUNIT-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; TUNIT-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]]
+; TUNIT-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]]
 ; TUNIT-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
-; TUNIT-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[INT_TBAA14]]
-; TUNIT-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
+; TUNIT-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[TBAA14]]
+; TUNIT-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
 ; TUNIT-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I9]], [[I10]]
 ; TUNIT-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; TUNIT-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]]
+; TUNIT-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define void @noalias_arg_simplifiable_1(
-; CGSCC-SAME: ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]], ptr noalias nofree noundef nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 captures(none) dereferenceable(24) [[AGG_RESULT:%.*]], ptr noalias nofree noundef nonnull byval([[STRUCT_S]]) align 8 captures(none) dereferenceable(24) [[S:%.*]]) #[[ATTR1]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    store float 0x3FF19999A0000000, ptr [[F1]], align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    store float 0x40019999A0000000, ptr [[F2]], align 8, !tbaa [[TBAA10]]
 ; CGSCC-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-NEXT:    store float 0x400A666660000000, ptr [[F3]], align 4, !tbaa [[TBAA11]]
 ; CGSCC-NEXT:    call void @write_arg(ptr noalias nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(24) [[S]], i32 noundef 1) #[[ATTR21]]
 ; CGSCC-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR21]]
 ; CGSCC-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 8 captures(none) dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR21]]
 ; CGSCC-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    [[I:%.*]] = load float, ptr [[F11]], align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 3
-; CGSCC-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    store float [[I]], ptr [[F12]], align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 4
-; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    [[I4:%.*]] = load float, ptr [[F23]], align 8, !tbaa [[TBAA10]]
 ; CGSCC-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
 ; CGSCC-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 4
-; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[FLOAT_TBAA10]]
+; CGSCC-NEXT:    store float [[MUL]], ptr [[F24]], align 4, !tbaa [[TBAA10]]
 ; CGSCC-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 5
-; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[FLOAT_TBAA11]]
+; CGSCC-NEXT:    [[I5:%.*]] = load float, ptr [[F35]], align 4, !tbaa [[TBAA11]]
 ; CGSCC-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 3
-; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[FLOAT_TBAA7]]
+; CGSCC-NEXT:    [[I6:%.*]] = load float, ptr [[F16]], align 4, !tbaa [[TBAA7]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
 ; CGSCC-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 5
-; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[FLOAT_TBAA11]]
-; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
-; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    store float [[ADD]], ptr [[F37]], align 4, !tbaa [[TBAA11]]
+; CGSCC-NEXT:    [[I7:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
+; CGSCC-NEXT:    store i32 [[I7]], ptr [[AGG_RESULT]], align 4, !tbaa [[TBAA12]]
 ; CGSCC-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 1
-; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[INT_TBAA13]]
+; CGSCC-NEXT:    [[I8:%.*]] = load i32, ptr [[I210]], align 4, !tbaa [[TBAA13]]
 ; CGSCC-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I8]], 1
 ; CGSCC-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 1
-; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[INT_TBAA13]]
+; CGSCC-NEXT:    store i32 [[MUL11]], ptr [[I212]], align 4, !tbaa [[TBAA13]]
 ; CGSCC-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
-; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[INT_TBAA14]]
-; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[INT_TBAA12]]
+; CGSCC-NEXT:    [[I9:%.*]] = load i32, ptr [[I313]], align 8, !tbaa [[TBAA14]]
+; CGSCC-NEXT:    [[I10:%.*]] = load i32, ptr [[S]], align 8, !tbaa [[TBAA12]]
 ; CGSCC-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I9]], [[I10]]
 ; CGSCC-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[AGG_RESULT]], i64 0, i32 2
-; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[INT_TBAA14]]
+; CGSCC-NEXT:    store i32 [[ADD15]], ptr [[I316]], align 4, !tbaa [[TBAA14]]
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -1266,157 +1266,157 @@ entry:
 ;
 define void @noalias_arg_simplifiable_2(ptr %Bytes) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define void @noalias_arg_simplifiable_2(
-; TUNIT-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
-; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
-; TUNIT:       [[FOR_COND]]:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; TUNIT-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_2
+; TUNIT-SAME: (ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR2]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
+; TUNIT:       for.cond:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP]]:
-; TUNIT-NEXT:    br label %[[FOR_END:.*]]
-; TUNIT:       [[FOR_BODY]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; TUNIT:       for.cond.cleanup:
+; TUNIT-NEXT:    br label [[FOR_END:%.*]]
+; TUNIT:       for.body:
 ; TUNIT-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[I]]
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA19]]
-; TUNIT-NEXT:    br label %[[FOR_INC]]
-; TUNIT:       [[FOR_INC]]:
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA19]]
+; TUNIT-NEXT:    br label [[FOR_INC]]
+; TUNIT:       for.inc:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
-; TUNIT:       [[FOR_END]]:
-; TUNIT-NEXT:    br label %[[FOR_COND2:.*]]
-; TUNIT:       [[FOR_COND2]]:
-; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
+; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP25:![0-9]+]]
+; TUNIT:       for.end:
+; TUNIT-NEXT:    br label [[FOR_COND2:%.*]]
+; TUNIT:       for.cond2:
+; TUNIT-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
 ; TUNIT-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; TUNIT-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP4]]:
-; TUNIT-NEXT:    br label %[[FOR_END11:.*]]
-; TUNIT:       [[FOR_BODY5]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
+; TUNIT:       for.cond.cleanup4:
+; TUNIT-NEXT:    br label [[FOR_END11:%.*]]
+; TUNIT:       for.body5:
 ; TUNIT-NEXT:    [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; TUNIT-NEXT:    [[I17:%.*]] = or i64 [[I16]], 1
 ; TUNIT-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I17]]
-; TUNIT-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA26:![0-9]+]]
-; TUNIT-NEXT:    br label %[[FOR_INC9]]
-; TUNIT:       [[FOR_INC9]]:
+; TUNIT-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA26:![0-9]+]]
+; TUNIT-NEXT:    br label [[FOR_INC9]]
+; TUNIT:       for.inc9:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]]
-; TUNIT:       [[FOR_END11]]:
-; TUNIT-NEXT:    br label %[[FOR_COND13:.*]]
-; TUNIT:       [[FOR_COND13]]:
-; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
+; TUNIT-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP27:![0-9]+]]
+; TUNIT:       for.end11:
+; TUNIT-NEXT:    br label [[FOR_COND13:%.*]]
+; TUNIT:       for.cond13:
+; TUNIT-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
 ; TUNIT-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; TUNIT-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP15]]:
-; TUNIT-NEXT:    br label %[[FOR_END23:.*]]
-; TUNIT:       [[FOR_BODY16]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
+; TUNIT:       for.cond.cleanup15:
+; TUNIT-NEXT:    br label [[FOR_END23:%.*]]
+; TUNIT:       for.body16:
 ; TUNIT-NEXT:    [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; TUNIT-NEXT:    [[I20:%.*]] = add nuw nsw i64 [[I19]], 2
 ; TUNIT-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I20]]
-; TUNIT-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[LONG_LONG_TBAA28:![0-9]+]]
-; TUNIT-NEXT:    br label %[[FOR_INC21]]
-; TUNIT:       [[FOR_INC21]]:
+; TUNIT-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[TBAA28:![0-9]+]]
+; TUNIT-NEXT:    br label [[FOR_INC21]]
+; TUNIT:       for.inc21:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
-; TUNIT:       [[FOR_END23]]:
+; TUNIT-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
+; TUNIT:       for.end23:
 ; TUNIT-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 1023
-; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[CHAR_TBAA19]]
+; TUNIT-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[TBAA19]]
 ; TUNIT-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 500
 ; TUNIT-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR18]]
-; TUNIT-NEXT:    br label %[[FOR_COND27:.*]]
-; TUNIT:       [[FOR_COND27]]:
-; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC35:.*]] ], [ 0, %[[FOR_END23]] ]
+; TUNIT-NEXT:    br label [[FOR_COND27:%.*]]
+; TUNIT:       for.cond27:
+; TUNIT-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
 ; TUNIT-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; TUNIT-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY30:.*]], label %[[FOR_COND_CLEANUP29:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP29]]:
-; TUNIT-NEXT:    br label %[[FOR_END37:.*]]
-; TUNIT:       [[FOR_BODY30]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY30:%.*]], label [[FOR_COND_CLEANUP29:%.*]]
+; TUNIT:       for.cond.cleanup29:
+; TUNIT-NEXT:    br label [[FOR_END37:%.*]]
+; TUNIT:       for.body30:
 ; TUNIT-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA19]]
+; TUNIT-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA19]]
 ; TUNIT-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; TUNIT-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[CHAR_TBAA19]]
-; TUNIT-NEXT:    br label %[[FOR_INC35]]
-; TUNIT:       [[FOR_INC35]]:
+; TUNIT-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[TBAA19]]
+; TUNIT-NEXT:    br label [[FOR_INC35]]
+; TUNIT:       for.inc35:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
-; TUNIT:       [[FOR_END37]]:
+; TUNIT-NEXT:    br label [[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
+; TUNIT:       for.end37:
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define void @noalias_arg_simplifiable_2(
-; CGSCC-SAME: ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR3]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
-; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
-; CGSCC:       [[FOR_COND]]:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_INC:.*]] ], [ 0, %[[ENTRY]] ]
+; CGSCC-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_2
+; CGSCC-SAME: (ptr nofree captures(none) [[BYTES:%.*]]) #[[ATTR2]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
+; CGSCC:       for.cond:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_INC:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], 100
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY:.*]], label %[[FOR_COND_CLEANUP:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP]]:
-; CGSCC-NEXT:    br label %[[FOR_END:.*]]
-; CGSCC:       [[FOR_BODY]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY:%.*]], label [[FOR_COND_CLEANUP:%.*]]
+; CGSCC:       for.cond.cleanup:
+; CGSCC-NEXT:    br label [[FOR_END:%.*]]
+; CGSCC:       for.body:
 ; CGSCC-NEXT:    [[I:%.*]] = mul nuw nsw i64 [[INDVARS_IV]], 10
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[I]]
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[CHAR_TBAA15]]
-; CGSCC-NEXT:    br label %[[FOR_INC]]
-; CGSCC:       [[FOR_INC]]:
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    br label [[FOR_INC]]
+; CGSCC:       for.inc:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
-; CGSCC:       [[FOR_END]]:
-; CGSCC-NEXT:    br label %[[FOR_COND2:.*]]
-; CGSCC:       [[FOR_COND2]]:
-; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], %[[FOR_INC9:.*]] ], [ 0, %[[FOR_END]] ]
+; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[LOOP28:![0-9]+]]
+; CGSCC:       for.end:
+; CGSCC-NEXT:    br label [[FOR_COND2:%.*]]
+; CGSCC:       for.cond2:
+; CGSCC-NEXT:    [[INDVARS_IV2:%.*]] = phi i64 [ [[INDVARS_IV_NEXT3:%.*]], [[FOR_INC9:%.*]] ], [ 0, [[FOR_END]] ]
 ; CGSCC-NEXT:    [[EXITCOND6:%.*]] = icmp ne i64 [[INDVARS_IV2]], 10
-; CGSCC-NEXT:    br i1 [[EXITCOND6]], label %[[FOR_BODY5:.*]], label %[[FOR_COND_CLEANUP4:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP4]]:
-; CGSCC-NEXT:    br label %[[FOR_END11:.*]]
-; CGSCC:       [[FOR_BODY5]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND6]], label [[FOR_BODY5:%.*]], label [[FOR_COND_CLEANUP4:%.*]]
+; CGSCC:       for.cond.cleanup4:
+; CGSCC-NEXT:    br label [[FOR_END11:%.*]]
+; CGSCC:       for.body5:
 ; CGSCC-NEXT:    [[I16:%.*]] = mul nuw nsw i64 [[INDVARS_IV2]], 10
 ; CGSCC-NEXT:    [[I17:%.*]] = or i64 [[I16]], 1
 ; CGSCC-NEXT:    [[ARRAYIDX8:%.*]] = getelementptr inbounds float, ptr [[BYTES]], i64 [[I17]]
-; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[FLOAT_TBAA18]]
-; CGSCC-NEXT:    br label %[[FOR_INC9]]
-; CGSCC:       [[FOR_INC9]]:
+; CGSCC-NEXT:    store float 0.000000e+00, ptr [[ARRAYIDX8]], align 4, !tbaa [[TBAA18]]
+; CGSCC-NEXT:    br label [[FOR_INC9]]
+; CGSCC:       for.inc9:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT3]] = add nuw nsw i64 [[INDVARS_IV2]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]]
-; CGSCC:       [[FOR_END11]]:
-; CGSCC-NEXT:    br label %[[FOR_COND13:.*]]
-; CGSCC:       [[FOR_COND13]]:
-; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], %[[FOR_INC21:.*]] ], [ 0, %[[FOR_END11]] ]
+; CGSCC-NEXT:    br label [[FOR_COND2]], !llvm.loop [[LOOP29:![0-9]+]]
+; CGSCC:       for.end11:
+; CGSCC-NEXT:    br label [[FOR_COND13:%.*]]
+; CGSCC:       for.cond13:
+; CGSCC-NEXT:    [[INDVARS_IV7:%.*]] = phi i64 [ [[INDVARS_IV_NEXT8:%.*]], [[FOR_INC21:%.*]] ], [ 0, [[FOR_END11]] ]
 ; CGSCC-NEXT:    [[EXITCOND11:%.*]] = icmp ne i64 [[INDVARS_IV7]], 20
-; CGSCC-NEXT:    br i1 [[EXITCOND11]], label %[[FOR_BODY16:.*]], label %[[FOR_COND_CLEANUP15:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP15]]:
-; CGSCC-NEXT:    br label %[[FOR_END23:.*]]
-; CGSCC:       [[FOR_BODY16]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND11]], label [[FOR_BODY16:%.*]], label [[FOR_COND_CLEANUP15:%.*]]
+; CGSCC:       for.cond.cleanup15:
+; CGSCC-NEXT:    br label [[FOR_END23:%.*]]
+; CGSCC:       for.body16:
 ; CGSCC-NEXT:    [[I19:%.*]] = mul nuw nsw i64 [[INDVARS_IV7]], 10
 ; CGSCC-NEXT:    [[I20:%.*]] = add nuw nsw i64 [[I19]], 2
 ; CGSCC-NEXT:    [[ARRAYIDX20:%.*]] = getelementptr inbounds i64, ptr [[BYTES]], i64 [[I20]]
-; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[LONG_LONG_TBAA20]]
-; CGSCC-NEXT:    br label %[[FOR_INC21]]
-; CGSCC:       [[FOR_INC21]]:
+; CGSCC-NEXT:    store i64 0, ptr [[ARRAYIDX20]], align 8, !tbaa [[TBAA20]]
+; CGSCC-NEXT:    br label [[FOR_INC21]]
+; CGSCC:       for.inc21:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT8]] = add nuw nsw i64 [[INDVARS_IV7]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
-; CGSCC:       [[FOR_END23]]:
+; CGSCC-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP30:![0-9]+]]
+; CGSCC:       for.end23:
 ; CGSCC-NEXT:    [[ARRAYIDX24:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 1023
-; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    store i8 0, ptr [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 500
 ; CGSCC-NEXT:    call void @write_arg(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[ARRAYIDX25]], i32 noundef 0) #[[ATTR21]]
-; CGSCC-NEXT:    br label %[[FOR_COND27:.*]]
-; CGSCC:       [[FOR_COND27]]:
-; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], %[[FOR_INC35:.*]] ], [ 0, %[[FOR_END23]] ]
+; CGSCC-NEXT:    br label [[FOR_COND27:%.*]]
+; CGSCC:       for.cond27:
+; CGSCC-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
 ; CGSCC-NEXT:    [[EXITCOND14:%.*]] = icmp ne i64 [[INDVARS_IV12]], 1024
-; CGSCC-NEXT:    br i1 [[EXITCOND14]], label %[[FOR_BODY30:.*]], label %[[FOR_COND_CLEANUP29:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP29]]:
-; CGSCC-NEXT:    br label %[[FOR_END37:.*]]
-; CGSCC:       [[FOR_BODY30]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND14]], label [[FOR_BODY30:%.*]], label [[FOR_COND_CLEANUP29:%.*]]
+; CGSCC:       for.cond.cleanup29:
+; CGSCC-NEXT:    br label [[FOR_END37:%.*]]
+; CGSCC:       for.body30:
 ; CGSCC-NEXT:    [[ARRAYIDX32:%.*]] = getelementptr inbounds i8, ptr [[BYTES]], i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[CHAR_TBAA15]]
+; CGSCC-NEXT:    [[I22:%.*]] = load i8, ptr [[ARRAYIDX32]], align 1, !tbaa [[TBAA15]]
 ; CGSCC-NEXT:    [[ARRAYIDX34:%.*]] = getelementptr inbounds [1024 x i8], ptr @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; CGSCC-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[CHAR_TBAA15]]
-; CGSCC-NEXT:    br label %[[FOR_INC35]]
-; CGSCC:       [[FOR_INC35]]:
+; CGSCC-NEXT:    store i8 [[I22]], ptr [[ARRAYIDX34]], align 1, !tbaa [[TBAA15]]
+; CGSCC-NEXT:    br label [[FOR_INC35]]
+; CGSCC:       for.inc35:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
-; CGSCC:       [[FOR_END37]]:
+; CGSCC-NEXT:    br label [[FOR_COND27]], !llvm.loop [[LOOP31:![0-9]+]]
+; CGSCC:       for.end37:
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -1524,40 +1524,40 @@ for.end37:                                        ; preds = %for.cond.cleanup29
 ;    }
 ;
 define i32 @local_alloca_not_simplifiable_1() {
-; TUNIT-LABEL: define i32 @local_alloca_not_simplifiable_1() {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_1() {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[X:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[Y:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]]) #[[ATTR17]]
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) #[[ATTR17]]
-; TUNIT-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
-; TUNIT-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    call void @escape(ptr noundef nonnull align 4 dereferenceable(4) [[X]])
 ; TUNIT-NEXT:    call void @write_random(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Y]])
-; TUNIT-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[I3]], 0
 ; TUNIT-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL_NOT]], i32 2, i32 1
-; TUNIT-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[I3]], [[I4]]
 ; TUNIT-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[COND]]
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]])
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]])
 ; TUNIT-NEXT:    ret i32 [[ADD1]]
 ;
-; CGSCC-LABEL: define i32 @local_alloca_not_simplifiable_1() {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_1() {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[X:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[Y:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[X]]) #[[ATTR20]]
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]]) #[[ATTR20]]
-; CGSCC-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
-; CGSCC-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    store i32 1, ptr [[Y]], align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    store i32 1, ptr [[X]], align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    call void @escape(ptr noundef nonnull align 4 dereferenceable(4) [[X]])
 ; CGSCC-NEXT:    call void @write_random(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Y]])
-; CGSCC-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    [[I3:%.*]] = load i32, ptr [[X]], align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[I3]], 0
 ; CGSCC-NEXT:    [[COND:%.*]] = select i1 [[TOBOOL_NOT]], i32 2, i32 1
-; CGSCC-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    [[I4:%.*]] = load i32, ptr [[Y]], align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    [[ADD:%.*]] = add nsw i32 [[I3]], [[I4]]
 ; CGSCC-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD]], [[COND]]
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[Y]])
@@ -1567,8 +1567,8 @@ define i32 @local_alloca_not_simplifiable_1() {
 entry:
   %X = alloca i32, align 4
   %Y = alloca i32, align 4
-  call void @llvm.lifetime.start.p0(ptr nonnull %X)
-  call void @llvm.lifetime.start.p0(ptr nonnull %Y)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %X)
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %Y)
   store i32 1, ptr %Y, align 4, !tbaa !3
   store i32 1, ptr %X, align 4, !tbaa !3
   call void @escape(ptr nonnull %X)
@@ -1579,27 +1579,27 @@ entry:
   %i4 = load i32, ptr %Y, align 4, !tbaa !3
   %add = add nsw i32 %i3, %i4
   %add1 = add nsw i32 %add, %cond
-  call void @llvm.lifetime.end.p0(ptr nonnull %Y)
-  call void @llvm.lifetime.end.p0(ptr nonnull %X)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %Y)
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %X)
   ret i32 %add1
 }
 
 define i8 @local_alloca_not_simplifiable_2(i64 %index1, i64 %index2, i1 %cnd) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define i8 @local_alloca_not_simplifiable_2(
-; CHECK-SAME: i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@local_alloca_not_simplifiable_2
+; CHECK-SAME: (i64 [[INDEX1:%.*]], i64 [[INDEX2:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CHECK-NEXT:    store i8 7, ptr [[BYTES]], align 16
-; CHECK-NEXT:    br i1 [[CND]], label %[[LEFT:.*]], label %[[RIGHT:.*]]
-; CHECK:       [[LEFT]]:
+; CHECK-NEXT:    br i1 [[CND]], label [[LEFT:%.*]], label [[RIGHT:%.*]]
+; CHECK:       left:
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDEX1]]
-; CHECK-NEXT:    br label %[[JOIN:.*]]
-; CHECK:       [[RIGHT]]:
+; CHECK-NEXT:    br label [[JOIN:%.*]]
+; CHECK:       right:
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 [[INDEX2]]
-; CHECK-NEXT:    br label %[[JOIN]]
-; CHECK:       [[JOIN]]:
-; CHECK-NEXT:    [[GEP_JOIN:%.*]] = phi ptr [ [[GEP1]], %[[LEFT]] ], [ [[GEP2]], %[[RIGHT]] ]
+; CHECK-NEXT:    br label [[JOIN]]
+; CHECK:       join:
+; CHECK-NEXT:    [[GEP_JOIN:%.*]] = phi ptr [ [[GEP1]], [[LEFT]] ], [ [[GEP2]], [[RIGHT]] ]
 ; CHECK-NEXT:    store i8 9, ptr [[GEP_JOIN]], align 4
 ; CHECK-NEXT:    [[I:%.*]] = load i8, ptr [[BYTES]], align 16
 ; CHECK-NEXT:    ret i8 [[I]]
@@ -1630,9 +1630,9 @@ join:                                             ; preds = %right, %left
 ; We could simplify these if we separate accessed bins wrt. alignment (here mod 4).
 define i32 @unknown_access_mixed_simplifiable(i32 %arg1, i32 %arg2) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define i32 @unknown_access_mixed_simplifiable(
-; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_simplifiable
+; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[S]], i32 [[ARG1]]
@@ -1666,9 +1666,9 @@ entry:
 ; The access to bc4b could go anywhere, nothing is simplifiable.
 define i32 @unknown_access_mixed_not_simplifiable(i32 %arg1, i32 %arg2, i32 %arg3) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define i32 @unknown_access_mixed_not_simplifiable(
-; CHECK-SAME: i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@unknown_access_mixed_not_simplifiable
+; CHECK-SAME: (i32 [[ARG1:%.*]], i32 [[ARG2:%.*]], i32 [[ARG3:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[S:%.*]] = alloca [[STRUCT_S:%.*]], align 4
 ; CHECK-NEXT:    [[GEP1:%.*]] = getelementptr inbounds [[STRUCT_S]], ptr [[S]], i64 0, i32 2
 ; CHECK-NEXT:    [[GEP2:%.*]] = getelementptr inbounds i32, ptr [[S]], i32 [[ARG1]]
@@ -1716,17 +1716,17 @@ declare void @escape(ptr)
 ;
 define i32 @global_not_simplifiable_1(i32 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; TUNIT-LABEL: define i32 @global_not_simplifiable_1(
-; TUNIT-SAME: i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
-; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-LABEL: define {{[^@]+}}@global_not_simplifiable_1
+; TUNIT-SAME: (i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    ret i32 [[I]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define i32 @global_not_simplifiable_1(
-; CGSCC-SAME: i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
-; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-LABEL: define {{[^@]+}}@global_not_simplifiable_1
+; CGSCC-SAME: (i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag0, align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    ret i32 [[I]]
 ;
 entry:
@@ -1744,15 +1744,15 @@ entry:
 ;    }
 ;
 define i32 @static_global_not_simplifiable_1(i32 %cnd) {
-; CHECK-LABEL: define i32 @static_global_not_simplifiable_1(
-; CHECK-SAME: i32 [[CND:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@static_global_not_simplifiable_1
+; CHECK-SAME: (i32 [[CND:%.*]]) {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    call void @sync()
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
-; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    br label %[[IF_END]]
-; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
 ; CHECK-NEXT:    ret i32 1
 ;
 entry:
@@ -1780,13 +1780,13 @@ declare void @sync()
 ;      return v;
 ;    }
 define i32 @static_global_simplifiable_4(i32 %cnd) {
-; CHECK-LABEL: define noundef i32 @static_global_simplifiable_4(
-; CHECK-SAME: i32 [[CND:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
-; CHECK-NEXT:    store i32 1, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-LABEL: define {{[^@]+}}@static_global_simplifiable_4
+; CHECK-SAME: (i32 [[CND:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    store i32 1, ptr @Flag2, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    call void @sync()
-; CHECK-NEXT:    [[I:%.*]] = load i32, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]]
-; CHECK-NEXT:    store i32 2, ptr @Flag2, align 4, !tbaa [[INT_TBAA3]]
+; CHECK-NEXT:    [[I:%.*]] = load i32, ptr @Flag2, align 4, !tbaa [[TBAA3]]
+; CHECK-NEXT:    store i32 2, ptr @Flag2, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    ret i32 [[I]]
 ;
 entry:
@@ -1806,22 +1806,22 @@ entry:
 ;      return v;
 ;    }
 define i32 @static_global_not_simplifiable_2(i32 %cnd) {
-; TUNIT-LABEL: define noundef i32 @static_global_not_simplifiable_2(
-; TUNIT-SAME: i32 [[CND:%.*]]) {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
-; TUNIT-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2
+; TUNIT-SAME: (i32 [[CND:%.*]]) {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    call void @sync() #[[ATTR19:[0-9]+]]
-; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
-; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    ret i32 [[I]]
 ;
-; CGSCC-LABEL: define noundef i32 @static_global_not_simplifiable_2(
-; CGSCC-SAME: i32 [[CND:%.*]]) {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
-; CGSCC-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2
+; CGSCC-SAME: (i32 [[CND:%.*]]) {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    store i32 1, ptr @Flag4, align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    call void @sync() #[[ATTR22:[0-9]+]]
-; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
-; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag4, align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    ret i32 [[I]]
 ;
 entry:
@@ -1833,15 +1833,15 @@ entry:
 }
 define void @static_global_not_simplifiable_2_helper() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define void @static_global_not_simplifiable_2_helper(
-; TUNIT-SAME: ) #[[ATTR5]] {
-; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper
+; TUNIT-SAME: () #[[ATTR4]] {
+; TUNIT-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define void @static_global_not_simplifiable_2_helper(
-; CGSCC-SAME: ) #[[ATTR6]] {
-; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_2_helper
+; CGSCC-SAME: () #[[ATTR5]] {
+; CGSCC-NEXT:    store i32 2, ptr @Flag4, align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    ret void
 ;
   store i32 2, ptr @Flag4, align 4, !tbaa !3
@@ -1851,19 +1851,19 @@ define void @static_global_not_simplifiable_2_helper() {
 ; Similiar to static_global_simplifiable_3 but with a may-store.
 define i32 @static_global_not_simplifiable_3(i1 %c, ptr %p) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define noundef i32 @static_global_not_simplifiable_3(
-; TUNIT-SAME: i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR3]] {
+; TUNIT-LABEL: define {{[^@]+}}@static_global_not_simplifiable_3
+; TUNIT-SAME: (i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR2]] {
 ; TUNIT-NEXT:    [[SEL:%.*]] = select i1 [[C]], ptr @Flag3, ptr [[P]]
-; TUNIT-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[INT_TBAA3]]
-; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
+; TUNIT-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[TBAA3]]
+; TUNIT-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[TBAA3]]
 ; TUNIT-NEXT:    ret i32 [[I]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define noundef i32 @static_global_not_simplifiable_3(
-; CGSCC-SAME: i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR5]] {
+; CGSCC-LABEL: define {{[^@]+}}@static_global_not_simplifiable_3
+; CGSCC-SAME: (i1 [[C:%.*]], ptr nofree writeonly captures(none) [[P:%.*]]) #[[ATTR4]] {
 ; CGSCC-NEXT:    [[SEL:%.*]] = select i1 [[C]], ptr @Flag3, ptr [[P]]
-; CGSCC-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[INT_TBAA3]]
-; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[INT_TBAA3]]
+; CGSCC-NEXT:    store i32 1, ptr [[SEL]], align 4, !tbaa [[TBAA3]]
+; CGSCC-NEXT:    [[I:%.*]] = load i32, ptr @Flag3, align 4, !tbaa [[TBAA3]]
 ; CGSCC-NEXT:    ret i32 [[I]]
 ;
   %sel = select i1 %c, ptr @Flag3, ptr %p
@@ -1887,15 +1887,15 @@ define i32 @static_global_not_simplifiable_3(i1 %c, ptr %p) {
 ; FIXME: We could replace these loads.
 define i32 @write_read_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i32 @write_read_global(
-; TUNIT-SAME: ) #[[ATTR3]] {
+; TUNIT-LABEL: define {{[^@]+}}@write_read_global
+; TUNIT-SAME: () #[[ATTR2]] {
 ; TUNIT-NEXT:    store i32 7, ptr @Gint1, align 4
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr @Gint1, align 4
 ; TUNIT-NEXT:    ret i32 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i32 @write_read_global(
-; CGSCC-SAME: ) #[[ATTR5]] {
+; CGSCC-LABEL: define {{[^@]+}}@write_read_global
+; CGSCC-SAME: () #[[ATTR4]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gint1, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @Gint1, align 4
 ; CGSCC-NEXT:    ret i32 [[L]]
@@ -1906,14 +1906,14 @@ define i32 @write_read_global() {
 }
 define void @write_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define void @write_global(
-; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-LABEL: define {{[^@]+}}@write_global
+; TUNIT-SAME: () #[[ATTR4]] {
 ; TUNIT-NEXT:    store i32 7, ptr @Gint2, align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define void @write_global(
-; CGSCC-SAME: ) #[[ATTR6]] {
+; CGSCC-LABEL: define {{[^@]+}}@write_global
+; CGSCC-SAME: () #[[ATTR5]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gint2, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -1922,14 +1922,14 @@ define void @write_global() {
 }
 define i32 @read_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; TUNIT-LABEL: define i32 @read_global(
-; TUNIT-SAME: ) #[[ATTR6]] {
+; TUNIT-LABEL: define {{[^@]+}}@read_global
+; TUNIT-SAME: () #[[ATTR5]] {
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr @Gint2, align 4
 ; TUNIT-NEXT:    ret i32 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define i32 @read_global(
-; CGSCC-SAME: ) #[[ATTR7]] {
+; CGSCC-LABEL: define {{[^@]+}}@read_global
+; CGSCC-SAME: () #[[ATTR6]] {
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @Gint2, align 4
 ; CGSCC-NEXT:    ret i32 [[L]]
 ;
@@ -1938,13 +1938,13 @@ define i32 @read_global() {
 }
 define i32 @write_read_static_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define noundef i32 @write_read_static_global(
-; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-LABEL: define {{[^@]+}}@write_read_static_global
+; TUNIT-SAME: () #[[ATTR4]] {
 ; TUNIT-NEXT:    ret i32 7
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define noundef i32 @write_read_static_global(
-; CGSCC-SAME: ) #[[ATTR6]] {
+; CGSCC-LABEL: define {{[^@]+}}@write_read_static_global
+; CGSCC-SAME: () #[[ATTR5]] {
 ; CGSCC-NEXT:    ret i32 7
 ;
   store i32 7, ptr @Gstatic_int1
@@ -1953,14 +1953,14 @@ define i32 @write_read_static_global() {
 }
 define void @write_static_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define void @write_static_global(
-; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-LABEL: define {{[^@]+}}@write_static_global
+; TUNIT-SAME: () #[[ATTR4]] {
 ; TUNIT-NEXT:    store i32 7, ptr @Gstatic_int2, align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define void @write_static_global(
-; CGSCC-SAME: ) #[[ATTR6]] {
+; CGSCC-LABEL: define {{[^@]+}}@write_static_global
+; CGSCC-SAME: () #[[ATTR5]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gstatic_int2, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -1969,14 +1969,14 @@ define void @write_static_global() {
 }
 define i32 @read_static_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; TUNIT-LABEL: define noundef i32 @read_static_global(
-; TUNIT-SAME: ) #[[ATTR6]] {
+; TUNIT-LABEL: define {{[^@]+}}@read_static_global
+; TUNIT-SAME: () #[[ATTR5]] {
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr @Gstatic_int2, align 4
 ; TUNIT-NEXT:    ret i32 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define noundef i32 @read_static_global(
-; CGSCC-SAME: ) #[[ATTR7]] {
+; CGSCC-LABEL: define {{[^@]+}}@read_static_global
+; CGSCC-SAME: () #[[ATTR6]] {
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @Gstatic_int2, align 4
 ; CGSCC-NEXT:    ret i32 [[L]]
 ;
@@ -1985,13 +1985,13 @@ define i32 @read_static_global() {
 }
 define i32 @write_read_static_undef_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define noundef i32 @write_read_static_undef_global(
-; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-LABEL: define {{[^@]+}}@write_read_static_undef_global
+; TUNIT-SAME: () #[[ATTR4]] {
 ; TUNIT-NEXT:    ret i32 7
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define noundef i32 @write_read_static_undef_global(
-; CGSCC-SAME: ) #[[ATTR6]] {
+; CGSCC-LABEL: define {{[^@]+}}@write_read_static_undef_global
+; CGSCC-SAME: () #[[ATTR5]] {
 ; CGSCC-NEXT:    ret i32 7
 ;
   store i32 7, ptr @Gstatic_undef_int1
@@ -2000,13 +2000,13 @@ define i32 @write_read_static_undef_global() {
 }
 define void @write_static_undef_global() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; TUNIT-LABEL: define void @write_static_undef_global(
-; TUNIT-SAME: ) #[[ATTR5]] {
+; TUNIT-LABEL: define {{[^@]+}}@write_static_undef_global
+; TUNIT-SAME: () #[[ATTR4]] {
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(write)
-; CGSCC-LABEL: define void @write_static_undef_global(
-; CGSCC-SAME: ) #[[ATTR6]] {
+; CGSCC-LABEL: define {{[^@]+}}@write_static_undef_global
+; CGSCC-SAME: () #[[ATTR5]] {
 ; CGSCC-NEXT:    store i32 7, ptr @Gstatic_undef_int2, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -2015,8 +2015,8 @@ define void @write_static_undef_global() {
 }
 define i32 @read_static_undef_global() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define i32 @read_static_undef_global(
-; CHECK-SAME: ) #[[ATTR4]] {
+; CHECK-LABEL: define {{[^@]+}}@read_static_undef_global
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:    ret i32 7
 ;
   %l = load i32, ptr @Gstatic_undef_int2
@@ -2025,8 +2025,8 @@ define i32 @read_static_undef_global() {
 
 define i32 @single_read_of_static_global() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define noundef i32 @single_read_of_static_global(
-; CHECK-SAME: ) #[[ATTR4]] {
+; CHECK-LABEL: define {{[^@]+}}@single_read_of_static_global
+; CHECK-SAME: () #[[ATTR3]] {
 ; CHECK-NEXT:    ret i32 0
 ;
   %l = load i32, ptr @Gstatic_int3
@@ -2035,20 +2035,20 @@ define i32 @single_read_of_static_global() {
 
 define i8 @phi_store() {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define i8 @phi_store(
-; CHECK-SAME: ) #[[ATTR4]] {
-; CHECK-NEXT:  [[ENTRY:.*]]:
+; CHECK-LABEL: define {{[^@]+}}@phi_store
+; CHECK-SAME: () #[[ATTR3]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A:%.*]] = alloca i16, align 2
-; CHECK-NEXT:    br label %[[LOOP:.*]]
-; CHECK:       [[LOOP]]:
-; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[A]], %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
-; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
+; CHECK-NEXT:    br label [[LOOP:%.*]]
+; CHECK:       loop:
+; CHECK-NEXT:    [[P:%.*]] = phi ptr [ [[A]], [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
+; CHECK-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
 ; CHECK-NEXT:    store i8 1, ptr [[P]], align 1
 ; CHECK-NEXT:    [[G]] = getelementptr i8, ptr [[P]], i64 1
 ; CHECK-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CHECK-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 2
-; CHECK-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
-; CHECK:       [[END]]:
+; CHECK-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; CHECK:       end:
 ; CHECK-NEXT:    [[S:%.*]] = getelementptr i8, ptr [[A]], i64 1
 ; CHECK-NEXT:    [[L:%.*]] = load i8, ptr [[S]], align 1
 ; CHECK-NEXT:    ret i8 [[L]]
@@ -2074,19 +2074,19 @@ end:
 define i8 @phi_no_store_1() {
 ;
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i8 @phi_no_store_1(
-; TUNIT-SAME: ) #[[ATTR3]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
-; TUNIT-NEXT:    br label %[[LOOP:.*]]
-; TUNIT:       [[LOOP]]:
-; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a1, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
-; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
+; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_1
+; TUNIT-SAME: () #[[ATTR2]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    br label [[LOOP:%.*]]
+; TUNIT:       loop:
+; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a1, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
+; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
 ; TUNIT-NEXT:    store i8 1, ptr [[P]], align 1
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, ptr [[P]], i64 1
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 3
-; TUNIT-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
-; TUNIT:       [[END]]:
+; TUNIT-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; TUNIT:       end:
 ; TUNIT-NEXT:    [[S11:%.*]] = getelementptr i8, ptr @a1, i64 2
 ; TUNIT-NEXT:    [[L11:%.*]] = load i8, ptr [[S11]], align 2
 ; TUNIT-NEXT:    [[S12:%.*]] = getelementptr i8, ptr @a1, i64 3
@@ -2095,19 +2095,19 @@ define i8 @phi_no_store_1() {
 ; TUNIT-NEXT:    ret i8 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i8 @phi_no_store_1(
-; CGSCC-SAME: ) #[[ATTR5]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
-; CGSCC-NEXT:    br label %[[LOOP:.*]]
-; CGSCC:       [[LOOP]]:
-; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a1, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
-; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
+; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_1
+; CGSCC-SAME: () #[[ATTR4]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    br label [[LOOP:%.*]]
+; CGSCC:       loop:
+; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a1, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
+; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
 ; CGSCC-NEXT:    store i8 1, ptr [[P]], align 1
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, ptr [[P]], i64 1
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 3
-; CGSCC-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
-; CGSCC:       [[END]]:
+; CGSCC-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; CGSCC:       end:
 ; CGSCC-NEXT:    [[S11:%.*]] = getelementptr i8, ptr @a1, i64 2
 ; CGSCC-NEXT:    [[L11:%.*]] = load i8, ptr [[S11]], align 2
 ; CGSCC-NEXT:    [[S12:%.*]] = getelementptr i8, ptr @a1, i64 3
@@ -2138,19 +2138,19 @@ end:
 define i8 @phi_no_store_2() {
 ;
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i8 @phi_no_store_2(
-; TUNIT-SAME: ) #[[ATTR3]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
-; TUNIT-NEXT:    br label %[[LOOP:.*]]
-; TUNIT:       [[LOOP]]:
-; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a2, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
-; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
+; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_2
+; TUNIT-SAME: () #[[ATTR2]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    br label [[LOOP:%.*]]
+; TUNIT:       loop:
+; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
+; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
 ; TUNIT-NEXT:    store i8 1, ptr [[P]], align 1
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, ptr @a2, i64 2
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; TUNIT-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
-; TUNIT:       [[END]]:
+; TUNIT-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; TUNIT:       end:
 ; TUNIT-NEXT:    [[S21:%.*]] = getelementptr i8, ptr @a2, i64 2
 ; TUNIT-NEXT:    [[L21:%.*]] = load i8, ptr [[S21]], align 2
 ; TUNIT-NEXT:    [[S22:%.*]] = getelementptr i8, ptr @a2, i64 3
@@ -2159,19 +2159,19 @@ define i8 @phi_no_store_2() {
 ; TUNIT-NEXT:    ret i8 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i8 @phi_no_store_2(
-; CGSCC-SAME: ) #[[ATTR5]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
-; CGSCC-NEXT:    br label %[[LOOP:.*]]
-; CGSCC:       [[LOOP]]:
-; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a2, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
-; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
+; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_2
+; CGSCC-SAME: () #[[ATTR4]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    br label [[LOOP:%.*]]
+; CGSCC:       loop:
+; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a2, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
+; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
 ; CGSCC-NEXT:    store i8 1, ptr [[P]], align 1
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, ptr @a2, i64 2
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; CGSCC-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
-; CGSCC:       [[END]]:
+; CGSCC-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; CGSCC:       end:
 ; CGSCC-NEXT:    [[S21:%.*]] = getelementptr i8, ptr @a2, i64 2
 ; CGSCC-NEXT:    [[L21:%.*]] = load i8, ptr [[S21]], align 2
 ; CGSCC-NEXT:    [[S22:%.*]] = getelementptr i8, ptr @a2, i64 3
@@ -2200,21 +2200,21 @@ end:
 
 define i8 @phi_no_store_3() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i8 @phi_no_store_3(
-; TUNIT-SAME: ) #[[ATTR3]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-LABEL: define {{[^@]+}}@phi_no_store_3
+; TUNIT-SAME: () #[[ATTR2]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[S30:%.*]] = getelementptr i8, ptr @a3, i64 3
 ; TUNIT-NEXT:    store i8 0, ptr [[S30]], align 1
-; TUNIT-NEXT:    br label %[[LOOP:.*]]
-; TUNIT:       [[LOOP]]:
-; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a3, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
-; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
+; TUNIT-NEXT:    br label [[LOOP:%.*]]
+; TUNIT:       loop:
+; TUNIT-NEXT:    [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
+; TUNIT-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
 ; TUNIT-NEXT:    store i8 1, ptr [[P]], align 1
 ; TUNIT-NEXT:    [[G]] = getelementptr i8, ptr @a3, i64 2
 ; TUNIT-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; TUNIT-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; TUNIT-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
-; TUNIT:       [[END]]:
+; TUNIT-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; TUNIT:       end:
 ; TUNIT-NEXT:    [[S31:%.*]] = getelementptr i8, ptr @a3, i64 2
 ; TUNIT-NEXT:    [[L31:%.*]] = load i8, ptr [[S31]], align 2
 ; TUNIT-NEXT:    [[S32:%.*]] = getelementptr i8, ptr @a3, i64 3
@@ -2226,21 +2226,21 @@ define i8 @phi_no_store_3() {
 ; TUNIT-NEXT:    ret i8 [[ADD2]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i8 @phi_no_store_3(
-; CGSCC-SAME: ) #[[ATTR5]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-LABEL: define {{[^@]+}}@phi_no_store_3
+; CGSCC-SAME: () #[[ATTR4]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[S30:%.*]] = getelementptr i8, ptr @a3, i64 3
 ; CGSCC-NEXT:    store i8 0, ptr [[S30]], align 1
-; CGSCC-NEXT:    br label %[[LOOP:.*]]
-; CGSCC:       [[LOOP]]:
-; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a3, %[[ENTRY]] ], [ [[G:%.*]], %[[LOOP]] ]
-; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, %[[ENTRY]] ], [ [[O:%.*]], %[[LOOP]] ]
+; CGSCC-NEXT:    br label [[LOOP:%.*]]
+; CGSCC:       loop:
+; CGSCC-NEXT:    [[P:%.*]] = phi ptr [ @a3, [[ENTRY:%.*]] ], [ [[G:%.*]], [[LOOP]] ]
+; CGSCC-NEXT:    [[I:%.*]] = phi i8 [ 0, [[ENTRY]] ], [ [[O:%.*]], [[LOOP]] ]
 ; CGSCC-NEXT:    store i8 1, ptr [[P]], align 1
 ; CGSCC-NEXT:    [[G]] = getelementptr i8, ptr @a3, i64 2
 ; CGSCC-NEXT:    [[O]] = add nsw i8 [[I]], 1
 ; CGSCC-NEXT:    [[C:%.*]] = icmp eq i8 [[O]], 7
-; CGSCC-NEXT:    br i1 [[C]], label %[[END:.*]], label %[[LOOP]]
-; CGSCC:       [[END]]:
+; CGSCC-NEXT:    br i1 [[C]], label [[END:%.*]], label [[LOOP]]
+; CGSCC:       end:
 ; CGSCC-NEXT:    [[S31:%.*]] = getelementptr i8, ptr @a3, i64 2
 ; CGSCC-NEXT:    [[L31:%.*]] = load i8, ptr [[S31]], align 2
 ; CGSCC-NEXT:    [[S32:%.*]] = getelementptr i8, ptr @a3, i64 3
@@ -2277,15 +2277,15 @@ end:
 
 define i8 @cast_and_load_1() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i8 @cast_and_load_1(
-; TUNIT-SAME: ) #[[ATTR3]] {
+; TUNIT-LABEL: define {{[^@]+}}@cast_and_load_1
+; TUNIT-SAME: () #[[ATTR2]] {
 ; TUNIT-NEXT:    store i32 42, ptr @bytes1, align 4
 ; TUNIT-NEXT:    [[L:%.*]] = load i8, ptr @bytes1, align 4
 ; TUNIT-NEXT:    ret i8 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i8 @cast_and_load_1(
-; CGSCC-SAME: ) #[[ATTR5]] {
+; CGSCC-LABEL: define {{[^@]+}}@cast_and_load_1
+; CGSCC-SAME: () #[[ATTR4]] {
 ; CGSCC-NEXT:    store i32 42, ptr @bytes1, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i8, ptr @bytes1, align 4
 ; CGSCC-NEXT:    ret i8 [[L]]
@@ -2297,15 +2297,15 @@ define i8 @cast_and_load_1() {
 
 define i64 @cast_and_load_2() {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define i64 @cast_and_load_2(
-; TUNIT-SAME: ) #[[ATTR3]] {
+; TUNIT-LABEL: define {{[^@]+}}@cast_and_load_2
+; TUNIT-SAME: () #[[ATTR2]] {
 ; TUNIT-NEXT:    store i32 42, ptr @bytes2, align 4
 ; TUNIT-NEXT:    [[L:%.*]] = load i64, ptr @bytes2, align 4
 ; TUNIT-NEXT:    ret i64 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define i64 @cast_and_load_2(
-; CGSCC-SAME: ) #[[ATTR5]] {
+; CGSCC-LABEL: define {{[^@]+}}@cast_and_load_2
+; CGSCC-SAME: () #[[ATTR4]] {
 ; CGSCC-NEXT:    store i32 42, ptr @bytes2, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i64, ptr @bytes2, align 4
 ; CGSCC-NEXT:    ret i64 [[L]]
@@ -2318,33 +2318,33 @@ define i64 @cast_and_load_2() {
 define void @recursive_load_store(i64 %N, i32 %v) {
 ;
 ; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(write)
-; TUNIT-LABEL: define void @recursive_load_store(
-; TUNIT-SAME: i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
-; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
-; TUNIT:       [[FOR_COND]]:
-; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY:.*]] ], [ 0, %[[ENTRY]] ]
+; TUNIT-LABEL: define {{[^@]+}}@recursive_load_store
+; TUNIT-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR6:[0-9]+]] {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
+; TUNIT:       for.cond:
+; TUNIT-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; TUNIT-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
-; TUNIT-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
-; TUNIT:       [[FOR_BODY]]:
+; TUNIT-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; TUNIT:       for.body:
 ; TUNIT-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND]]
-; TUNIT:       [[FOR_END]]:
+; TUNIT-NEXT:    br label [[FOR_COND]]
+; TUNIT:       for.end:
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: nofree norecurse nosync nounwind memory(write)
-; CGSCC-LABEL: define void @recursive_load_store(
-; CGSCC-SAME: i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR8:[0-9]+]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
-; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
-; CGSCC:       [[FOR_COND]]:
-; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], %[[FOR_BODY:.*]] ], [ 0, %[[ENTRY]] ]
+; CGSCC-LABEL: define {{[^@]+}}@recursive_load_store
+; CGSCC-SAME: (i64 [[N:%.*]], i32 [[V:%.*]]) #[[ATTR7:[0-9]+]] {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
+; CGSCC:       for.cond:
+; CGSCC-NEXT:    [[INDVARS_IV:%.*]] = phi i64 [ [[INDVARS_IV_NEXT:%.*]], [[FOR_BODY:%.*]] ], [ 0, [[ENTRY:%.*]] ]
 ; CGSCC-NEXT:    [[EXITCOND:%.*]] = icmp ne i64 [[INDVARS_IV]], [[N]]
-; CGSCC-NEXT:    br i1 [[EXITCOND]], label %[[FOR_BODY]], label %[[FOR_END:.*]]
-; CGSCC:       [[FOR_BODY]]:
+; CGSCC-NEXT:    br i1 [[EXITCOND]], label [[FOR_BODY]], label [[FOR_END:%.*]]
+; CGSCC:       for.body:
 ; CGSCC-NEXT:    [[INDVARS_IV_NEXT]] = add nuw nsw i64 [[INDVARS_IV]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND]]
-; CGSCC:       [[FOR_END]]:
+; CGSCC-NEXT:    br label [[FOR_COND]]
+; CGSCC:       for.end:
 ; CGSCC-NEXT:    ret void
 ;
 entry:
@@ -2369,9 +2369,9 @@ for.end:
 }
 
 define dso_local i32 @round_trip_malloc(i32 %x) {
-; CHECK-LABEL: define dso_local i32 @round_trip_malloc(
-; CHECK-SAME: i32 returned [[X:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc
+; CHECK-SAME: (i32 returned [[X:%.*]]) {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
 ; CHECK-NEXT:    ret i32 [[X]]
@@ -2385,8 +2385,8 @@ entry:
 }
 
 define dso_local i32 @round_trip_malloc_constant() {
-; CHECK-LABEL: define dso_local noundef i32 @round_trip_malloc_constant() {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@round_trip_malloc_constant() {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    ret i32 7
 ;
 entry:
@@ -2402,16 +2402,16 @@ declare noalias ptr @malloc(i64) allockind("alloc,uninitialized") allocsize(0) "
 declare void @free(ptr) allockind("free") "alloc-family"="malloc"
 
 define dso_local i32 @conditional_malloc(i32 %x) {
-; CHECK-LABEL: define dso_local i32 @conditional_malloc(
-; CHECK-SAME: i32 returned [[X:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@conditional_malloc
+; CHECK-SAME: (i32 returned [[X:%.*]]) {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; CHECK:       if.then:
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
-; CHECK-NEXT:    br label %[[IF_END]]
-; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
 entry:
@@ -2429,9 +2429,9 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define dso_local i32 @round_trip_calloc(i32 %x) {
-; CHECK-LABEL: define dso_local i32 @round_trip_calloc(
-; CHECK-SAME: i32 returned [[X:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@round_trip_calloc
+; CHECK-SAME: (i32 returned [[X:%.*]]) {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
@@ -2445,8 +2445,8 @@ entry:
 }
 
 define dso_local i32 @round_trip_calloc_constant() {
-; CHECK-LABEL: define dso_local noundef i32 @round_trip_calloc_constant() {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@round_trip_calloc_constant() {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    ret i32 11
@@ -2461,17 +2461,17 @@ entry:
 declare noalias ptr @calloc(i64, i64) allockind("alloc,zeroed") allocsize(0, 1) "alloc-family"="malloc"
 
 define dso_local i32 @conditional_calloc(i32 %x) {
-; CHECK-LABEL: define dso_local i32 @conditional_calloc(
-; CHECK-SAME: i32 [[X:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@conditional_calloc
+; CHECK-SAME: (i32 [[X:%.*]]) {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
 ; CHECK-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; CHECK-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
-; CHECK:       [[IF_THEN]]:
+; CHECK-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
 ; CHECK-NEXT:    store i32 [[X]], ptr [[CALL_H2S]], align 4
-; CHECK-NEXT:    br label %[[IF_END]]
-; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL_H2S]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP0]]
 ;
@@ -2491,15 +2491,15 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define dso_local i32 @conditional_calloc_zero(i1 %c) {
-; CHECK-LABEL: define dso_local noundef i32 @conditional_calloc_zero(
-; CHECK-SAME: i1 [[C:%.*]]) {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@conditional_calloc_zero
+; CHECK-SAME: (i1 [[C:%.*]]) {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 4, align 1
 ; CHECK-NEXT:    call void @llvm.memset.p0.i64(ptr [[CALL_H2S]], i8 0, i64 4, i1 false)
-; CHECK-NEXT:    br i1 [[C]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
-; CHECK:       [[IF_THEN]]:
-; CHECK-NEXT:    br label %[[IF_END]]
-; CHECK:       [[IF_END]]:
+; CHECK-NEXT:    br i1 [[C]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CHECK:       if.then:
+; CHECK-NEXT:    br label [[IF_END]]
+; CHECK:       if.end:
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:
@@ -2517,16 +2517,16 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define dso_local ptr @malloc_like(i32 %s) {
-; TUNIT-LABEL: define dso_local noalias ptr @malloc_like(
-; TUNIT-SAME: i32 [[S:%.*]]) {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@malloc_like
+; TUNIT-SAME: (i32 [[S:%.*]]) {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[CONV:%.*]] = sext i32 [[S]] to i64
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR20:[0-9]+]]
 ; TUNIT-NEXT:    ret ptr [[CALL]]
 ;
-; CGSCC-LABEL: define dso_local noalias ptr @malloc_like(
-; CGSCC-SAME: i32 [[S:%.*]]) {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@malloc_like
+; CGSCC-SAME: (i32 [[S:%.*]]) {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[CONV:%.*]] = sext i32 [[S]] to i64
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 [[CONV]]) #[[ATTR23:[0-9]+]]
 ; CGSCC-NEXT:    ret ptr [[CALL]]
@@ -2538,18 +2538,18 @@ entry:
 }
 
 define dso_local i32 @round_trip_malloc_like(i32 %x) {
-; TUNIT-LABEL: define dso_local i32 @round_trip_malloc_like(
-; TUNIT-SAME: i32 [[X:%.*]]) {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@round_trip_malloc_like
+; TUNIT-SAME: (i32 [[X:%.*]]) {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR20]]
 ; TUNIT-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; TUNIT-NEXT:    call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]]
 ; TUNIT-NEXT:    ret i32 [[TMP0]]
 ;
-; CGSCC-LABEL: define dso_local i32 @round_trip_malloc_like(
-; CGSCC-SAME: i32 [[X:%.*]]) {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@round_trip_malloc_like
+; CGSCC-SAME: (i32 [[X:%.*]]) {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc_like(i32 noundef 4) #[[ATTR23]]
 ; CGSCC-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
@@ -2565,18 +2565,18 @@ entry:
 }
 
 define dso_local i32 @round_trip_unknown_alloc(i32 %x) {
-; TUNIT-LABEL: define dso_local i32 @round_trip_unknown_alloc(
-; TUNIT-SAME: i32 [[X:%.*]]) {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@round_trip_unknown_alloc
+; TUNIT-SAME: (i32 [[X:%.*]]) {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR20]]
 ; TUNIT-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; TUNIT-NEXT:    call void @free(ptr noundef nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]]
 ; TUNIT-NEXT:    ret i32 [[TMP0]]
 ;
-; CGSCC-LABEL: define dso_local i32 @round_trip_unknown_alloc(
-; CGSCC-SAME: i32 [[X:%.*]]) {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@round_trip_unknown_alloc
+; CGSCC-SAME: (i32 [[X:%.*]]) {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR23]]
 ; CGSCC-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
@@ -2594,30 +2594,30 @@ entry:
 declare noalias ptr @unknown_alloc(i32)
 
 define dso_local i32 @conditional_unknown_alloc(i32 %x) {
-; TUNIT-LABEL: define dso_local i32 @conditional_unknown_alloc(
-; TUNIT-SAME: i32 [[X:%.*]]) {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@conditional_unknown_alloc
+; TUNIT-SAME: (i32 [[X:%.*]]) {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR20]]
 ; TUNIT-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; TUNIT-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
-; TUNIT:       [[IF_THEN]]:
+; TUNIT-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; TUNIT:       if.then:
 ; TUNIT-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
-; TUNIT-NEXT:    br label %[[IF_END]]
-; TUNIT:       [[IF_END]]:
+; TUNIT-NEXT:    br label [[IF_END]]
+; TUNIT:       if.end:
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; TUNIT-NEXT:    call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR20]]
 ; TUNIT-NEXT:    ret i32 [[TMP0]]
 ;
-; CGSCC-LABEL: define dso_local i32 @conditional_unknown_alloc(
-; CGSCC-SAME: i32 [[X:%.*]]) {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@conditional_unknown_alloc
+; CGSCC-SAME: (i32 [[X:%.*]]) {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias ptr @unknown_alloc(i32 noundef 4) #[[ATTR23]]
 ; CGSCC-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
-; CGSCC-NEXT:    br i1 [[TOBOOL]], label %[[IF_END:.*]], label %[[IF_THEN:.*]]
-; CGSCC:       [[IF_THEN]]:
+; CGSCC-NEXT:    br i1 [[TOBOOL]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
+; CGSCC:       if.then:
 ; CGSCC-NEXT:    store i32 [[X]], ptr [[CALL]], align 4
-; CGSCC-NEXT:    br label %[[IF_END]]
-; CGSCC:       [[IF_END]]:
+; CGSCC-NEXT:    br label [[IF_END]]
+; CGSCC:       if.end:
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[CALL]], align 4
 ; CGSCC-NEXT:    call void @free(ptr nonnull align 4 dereferenceable(4) [[CALL]]) #[[ATTR23]]
 ; CGSCC-NEXT:    ret i32 [[TMP0]]
@@ -2643,28 +2643,28 @@ if.end:                                           ; preds = %if.then, %entry
 
 ; We mark %dst as writeonly and %src as readonly, that is (for now) all we can expect.
 define dso_local void @test_nested_memory(ptr %dst, ptr %src) {
-; TUNIT-LABEL: define dso_local void @test_nested_memory(
-; TUNIT-SAME: ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@test_nested_memory
+; TUNIT-SAME: (ptr nofree writeonly captures(none) [[DST:%.*]], ptr nofree readonly captures(none) [[SRC:%.*]]) {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[CALL_H2S:%.*]] = alloca i8, i64 24, align 1
-; TUNIT-NEXT:    [[LOCAL1:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
-; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL1]], i64 0, i32 2
-; TUNIT-NEXT:    store ptr @global, ptr [[NEWGEP]], align 8
+; TUNIT-NEXT:    [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
+; TUNIT-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2
+; TUNIT-NEXT:    store ptr @global, ptr [[INNER]], align 8
 ; TUNIT-NEXT:    store ptr [[DST]], ptr [[CALL_H2S]], align 8
 ; TUNIT-NEXT:    [[SRC2:%.*]] = getelementptr inbounds i8, ptr [[CALL_H2S]], i64 8
 ; TUNIT-NEXT:    store ptr [[SRC]], ptr [[SRC2]], align 8
 ; TUNIT-NEXT:    store ptr [[CALL_H2S]], ptr getelementptr inbounds ([[STRUCT_STY]], ptr @global, i64 0, i32 2), align 8
-; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LOCAL1]], align 8
-; TUNIT-NEXT:    [[LOCAL1_B8:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 8
-; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[LOCAL1_B8]], align 8
-; TUNIT-NEXT:    [[LOCAL1_B16:%.*]] = getelementptr i8, ptr [[LOCAL1]], i64 16
-; TUNIT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[LOCAL1_B16]], align 8
+; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[LOCAL]], align 8
+; TUNIT-NEXT:    [[LOCAL_B8:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 8
+; TUNIT-NEXT:    [[TMP1:%.*]] = load ptr, ptr [[LOCAL_B8]], align 8
+; TUNIT-NEXT:    [[LOCAL_B16:%.*]] = getelementptr i8, ptr [[LOCAL]], i64 16
+; TUNIT-NEXT:    [[TMP2:%.*]] = load ptr, ptr [[LOCAL_B16]], align 8
 ; TUNIT-NEXT:    call fastcc void @nested_memory_callee(ptr [[TMP0]], ptr [[TMP1]], ptr [[TMP2]]) #[[ATTR21:[0-9]+]]
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define dso_local void @test_nested_memory(
-; CGSCC-SAME: ptr nofree [[DST:%.*]], ptr nofree [[SRC:%.*]]) {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@test_nested_memory
+; CGSCC-SAME: (ptr nofree [[DST:%.*]], ptr nofree [[SRC:%.*]]) {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[LOCAL:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
 ; CGSCC-NEXT:    [[INNER:%.*]] = getelementptr inbounds [[STRUCT_STY]], ptr [[LOCAL]], i64 0, i32 2
 ; CGSCC-NEXT:    [[CALL:%.*]] = call noalias dereferenceable_or_null(24) ptr @malloc(i64 noundef 24)
@@ -2690,9 +2690,9 @@ entry:
 
 define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nofree norecurse nounwind uwtable {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable
-; TUNIT-LABEL: define internal fastcc void @nested_memory_callee(
-; TUNIT-SAME: ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) #[[ATTR11:[0-9]+]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@nested_memory_callee
+; TUNIT-SAME: (ptr [[TMP0:%.*]], ptr [[TMP1:%.*]], ptr [[TMP2:%.*]]) #[[ATTR10:[0-9]+]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
 ; TUNIT-NEXT:    store ptr [[TMP0]], ptr [[S_PRIV]], align 8
 ; TUNIT-NEXT:    [[S_PRIV_B8:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 8
@@ -2712,9 +2712,9 @@ define internal fastcc void @nested_memory_callee(ptr nocapture readonly %S) nof
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn uwtable
-; CGSCC-LABEL: define internal fastcc void @nested_memory_callee(
-; CGSCC-SAME: ptr nofree [[TMP0:%.*]], ptr nofree [[TMP1:%.*]], ptr nofree [[TMP2:%.*]]) #[[ATTR12:[0-9]+]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@nested_memory_callee
+; CGSCC-SAME: (ptr nofree [[TMP0:%.*]], ptr nofree [[TMP1:%.*]], ptr nofree [[TMP2:%.*]]) #[[ATTR11:[0-9]+]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[S_PRIV:%.*]] = alloca [[STRUCT_STY:%.*]], align 8
 ; CGSCC-NEXT:    store ptr [[TMP0]], ptr [[S_PRIV]], align 8
 ; CGSCC-NEXT:    [[S_PRIV_B8:%.*]] = getelementptr i8, ptr [[S_PRIV]], i64 8
@@ -2751,34 +2751,34 @@ entry:
 ; varying and the accesses thus not "exact". This used to simplify %cmp12 to true.
 define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i32 %idx) #0 {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define hidden void @no_propagation_of_unknown_index_access(
-; TUNIT-SAME: ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] {
-; TUNIT-NEXT:  [[ENTRY:.*]]:
+; TUNIT-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access
+; TUNIT-SAME: (ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR1]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[BUF:%.*]] = alloca [128 x i32], align 16
 ; TUNIT-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR17]]
-; TUNIT-NEXT:    br label %[[FOR_COND:.*]]
-; TUNIT:       [[FOR_COND]]:
-; TUNIT-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; TUNIT-NEXT:    br label [[FOR_COND:%.*]]
+; TUNIT:       for.cond:
+; TUNIT-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
 ; TUNIT-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 128
-; TUNIT-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP]]:
-; TUNIT-NEXT:    br label %[[FOR_COND4:.*]]
-; TUNIT:       [[FOR_BODY]]:
+; TUNIT-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; TUNIT:       for.cond.cleanup:
+; TUNIT-NEXT:    br label [[FOR_COND4:%.*]]
+; TUNIT:       for.body:
 ; TUNIT-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
 ; TUNIT-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[IDXPROM]]
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !invariant.load [[META32]]
 ; TUNIT-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM]]
 ; TUNIT-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX2]], align 4
 ; TUNIT-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND]], !llvm.loop [[FLOAT_TBAA10]]
-; TUNIT:       [[FOR_COND4]]:
-; TUNIT-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, %[[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], %[[FOR_BODY7:.*]] ]
+; TUNIT-NEXT:    br label [[FOR_COND]], !llvm.loop [[TBAA10]]
+; TUNIT:       for.cond4:
+; TUNIT-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, [[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], [[FOR_BODY7:%.*]] ]
 ; TUNIT-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[I3_0]], 128
-; TUNIT-NEXT:    br i1 [[CMP5]], label %[[FOR_BODY7]], label %[[FOR_COND_CLEANUP6:.*]]
-; TUNIT:       [[FOR_COND_CLEANUP6]]:
+; TUNIT-NEXT:    br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6:%.*]]
+; TUNIT:       for.cond.cleanup6:
 ; TUNIT-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR17]]
 ; TUNIT-NEXT:    ret void
-; TUNIT:       [[FOR_BODY7]]:
+; TUNIT:       for.body7:
 ; TUNIT-NEXT:    [[IDXPROM8:%.*]] = sext i32 [[I3_0]] to i64
 ; TUNIT-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM8]]
 ; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
@@ -2790,37 +2790,37 @@ define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i3
 ; TUNIT-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM8]]
 ; TUNIT-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX14]], align 4
 ; TUNIT-NEXT:    [[INC16]] = add nsw i32 [[I3_0]], 1
-; TUNIT-NEXT:    br label %[[FOR_COND4]], !llvm.loop [[INT_TBAA12]]
+; TUNIT-NEXT:    br label [[FOR_COND4]], !llvm.loop [[TBAA12]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define hidden void @no_propagation_of_unknown_index_access(
-; CGSCC-SAME: ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR13:[0-9]+]] {
-; CGSCC-NEXT:  [[ENTRY:.*]]:
+; CGSCC-LABEL: define {{[^@]+}}@no_propagation_of_unknown_index_access
+; CGSCC-SAME: (ptr nofree readonly captures(none) [[IN:%.*]], ptr nofree writeonly captures(none) [[OUT:%.*]], i32 [[IDX:%.*]]) #[[ATTR12:[0-9]+]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[BUF:%.*]] = alloca [128 x i32], align 16
 ; CGSCC-NEXT:    call void @llvm.lifetime.start.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR20]]
-; CGSCC-NEXT:    br label %[[FOR_COND:.*]]
-; CGSCC:       [[FOR_COND]]:
-; CGSCC-NEXT:    [[I_0:%.*]] = phi i32 [ 0, %[[ENTRY]] ], [ [[INC:%.*]], %[[FOR_BODY:.*]] ]
+; CGSCC-NEXT:    br label [[FOR_COND:%.*]]
+; CGSCC:       for.cond:
+; CGSCC-NEXT:    [[I_0:%.*]] = phi i32 [ 0, [[ENTRY:%.*]] ], [ [[INC:%.*]], [[FOR_BODY:%.*]] ]
 ; CGSCC-NEXT:    [[CMP:%.*]] = icmp slt i32 [[I_0]], 128
-; CGSCC-NEXT:    br i1 [[CMP]], label %[[FOR_BODY]], label %[[FOR_COND_CLEANUP:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP]]:
-; CGSCC-NEXT:    br label %[[FOR_COND4:.*]]
-; CGSCC:       [[FOR_BODY]]:
+; CGSCC-NEXT:    br i1 [[CMP]], label [[FOR_BODY]], label [[FOR_COND_CLEANUP:%.*]]
+; CGSCC:       for.cond.cleanup:
+; CGSCC-NEXT:    br label [[FOR_COND4:%.*]]
+; CGSCC:       for.body:
 ; CGSCC-NEXT:    [[IDXPROM:%.*]] = sext i32 [[I_0]] to i64
 ; CGSCC-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds i32, ptr [[IN]], i64 [[IDXPROM]]
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !invariant.load [[META32]]
 ; CGSCC-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM]]
 ; CGSCC-NEXT:    store i32 [[TMP0]], ptr [[ARRAYIDX2]], align 4
 ; CGSCC-NEXT:    [[INC]] = add nsw i32 [[I_0]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND]], !llvm.loop [[FLOAT_TBAA10]]
-; CGSCC:       [[FOR_COND4]]:
-; CGSCC-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, %[[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], %[[FOR_BODY7:.*]] ]
+; CGSCC-NEXT:    br label [[FOR_COND]], !llvm.loop [[TBAA10]]
+; CGSCC:       for.cond4:
+; CGSCC-NEXT:    [[I3_0:%.*]] = phi i32 [ 0, [[FOR_COND_CLEANUP]] ], [ [[INC16:%.*]], [[FOR_BODY7:%.*]] ]
 ; CGSCC-NEXT:    [[CMP5:%.*]] = icmp slt i32 [[I3_0]], 128
-; CGSCC-NEXT:    br i1 [[CMP5]], label %[[FOR_BODY7]], label %[[FOR_COND_CLEANUP6:.*]]
-; CGSCC:       [[FOR_COND_CLEANUP6]]:
+; CGSCC-NEXT:    br i1 [[CMP5]], label [[FOR_BODY7]], label [[FOR_COND_CLEANUP6:%.*]]
+; CGSCC:       for.cond.cleanup6:
 ; CGSCC-NEXT:    call void @llvm.lifetime.end.p0(ptr noalias nofree noundef nonnull align 16 captures(none) dereferenceable(512) [[BUF]]) #[[ATTR20]]
 ; CGSCC-NEXT:    ret void
-; CGSCC:       [[FOR_BODY7]]:
+; CGSCC:       for.body7:
 ; CGSCC-NEXT:    [[IDXPROM8:%.*]] = sext i32 [[I3_0]] to i64
 ; CGSCC-NEXT:    [[ARRAYIDX9:%.*]] = getelementptr inbounds [128 x i32], ptr [[BUF]], i64 0, i64 [[IDXPROM8]]
 ; CGSCC-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX9]], align 4
@@ -2832,11 +2832,11 @@ define hidden void @no_propagation_of_unknown_index_access(ptr %in, ptr %out, i3
 ; CGSCC-NEXT:    [[ARRAYIDX14:%.*]] = getelementptr inbounds i32, ptr [[OUT]], i64 [[IDXPROM8]]
 ; CGSCC-NEXT:    store i32 [[CONV]], ptr [[ARRAYIDX14]], align 4
 ; CGSCC-NEXT:    [[INC16]] = add nsw i32 [[I3_0]], 1
-; CGSCC-NEXT:    br label %[[FOR_COND4]], !llvm.loop [[INT_TBAA12]]
+; CGSCC-NEXT:    br label [[FOR_COND4]], !llvm.loop [[TBAA12]]
 ;
 entry:
   %buf = alloca [128 x i32], align 16
-  call void @llvm.lifetime.start.p0(ptr %buf) #2
+  call void @llvm.lifetime.start.p0(i64 512, ptr %buf) #2
   br label %for.cond
 
 for.cond:                                         ; preds = %for.body, %entry
@@ -2862,7 +2862,7 @@ for.cond4:                                        ; preds = %for.body7, %for.con
   br i1 %cmp5, label %for.body7, label %for.cond.cleanup6
 
 for.cond.cleanup6:                                ; preds = %for.cond4
-  call void @llvm.lifetime.end.p0(ptr %buf) #2
+  call void @llvm.lifetime.end.p0(i64 512, ptr %buf) #2
   ret void
 
 for.body7:                                        ; preds = %for.cond4
@@ -2883,29 +2883,29 @@ for.body7:                                        ; preds = %for.cond4
 ; Ensure we do not return true.
 define internal i1 @alloca_non_unique(ptr %p, i32 %in, i1 %c) {
 ; TUNIT: Function Attrs: nofree nosync nounwind memory(argmem: readwrite)
-; TUNIT-LABEL: define internal i1 @alloca_non_unique(
-; TUNIT-SAME: ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR12:[0-9]+]] {
+; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique
+; TUNIT-SAME: (ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR11:[0-9]+]] {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    store i32 [[IN]], ptr [[A]], align 4
-; TUNIT-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
-; TUNIT:       [[T]]:
-; TUNIT-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR14:[0-9]+]]
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       t:
+; TUNIT-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR13:[0-9]+]]
 ; TUNIT-NEXT:    ret i1 [[R]]
-; TUNIT:       [[F]]:
+; TUNIT:       f:
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META32]]
 ; TUNIT-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]]
 ; TUNIT-NEXT:    ret i1 [[CMP]]
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind memory(argmem: readwrite)
-; CGSCC-LABEL: define internal i1 @alloca_non_unique(
-; CGSCC-SAME: ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR14:[0-9]+]] {
+; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique
+; CGSCC-SAME: (ptr noalias nofree readonly align 4 captures(none) [[P:%.*]], i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR13:[0-9]+]] {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[IN]], ptr [[A]], align 4
-; CGSCC-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
-; CGSCC:       [[T]]:
-; CGSCC-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR17:[0-9]+]]
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       t:
+; CGSCC-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[A]], i32 noundef 42, i1 noundef false) #[[ATTR16:[0-9]+]]
 ; CGSCC-NEXT:    ret i1 [[R]]
-; CGSCC:       [[F]]:
+; CGSCC:       f:
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[P]], align 4, !invariant.load [[META32]]
 ; CGSCC-NEXT:    [[CMP:%.*]] = icmp eq i32 [[IN]], [[L]]
 ; CGSCC-NEXT:    ret i1 [[CMP]]
@@ -2925,14 +2925,14 @@ f:
 ; Ensure we do not return true.
 define i1 @alloca_non_unique_caller(i32 %in, i1 %c) {
 ; TUNIT: Function Attrs: nofree norecurse nosync nounwind memory(none)
-; TUNIT-LABEL: define i1 @alloca_non_unique_caller(
-; TUNIT-SAME: i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR13:[0-9]+]] {
-; TUNIT-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr undef, i32 [[IN]], i1 noundef [[C]]) #[[ATTR14]]
+; TUNIT-LABEL: define {{[^@]+}}@alloca_non_unique_caller
+; TUNIT-SAME: (i32 [[IN:%.*]], i1 [[C:%.*]]) #[[ATTR12:[0-9]+]] {
+; TUNIT-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr undef, i32 [[IN]], i1 noundef [[C]]) #[[ATTR13]]
 ; TUNIT-NEXT:    ret i1 [[R]]
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind memory(none)
-; CGSCC-LABEL: define i1 @alloca_non_unique_caller(
-; CGSCC-SAME: i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR15:[0-9]+]] {
+; CGSCC-LABEL: define {{[^@]+}}@alloca_non_unique_caller
+; CGSCC-SAME: (i32 [[IN:%.*]], i1 noundef [[C:%.*]]) #[[ATTR14:[0-9]+]] {
 ; CGSCC-NEXT:    [[R:%.*]] = call i1 @alloca_non_unique(ptr nofree undef, i32 [[IN]], i1 noundef [[C]]) #[[ATTR25:[0-9]+]]
 ; CGSCC-NEXT:    ret i1 [[R]]
 ;
@@ -2943,8 +2943,8 @@ define i1 @alloca_non_unique_caller(i32 %in, i1 %c) {
 ; Ensure we do not return %bad or %l, but %sel
 define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define i32 @scope_value_traversal(
-; TUNIT-SAME: i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR4]] {
+; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal
+; TUNIT-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR3]] {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    store i32 [[BAD]], ptr [[A]], align 4
 ; TUNIT-NEXT:    call void @scope_value_traversal_helper(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR22:[0-9]+]]
@@ -2953,8 +2953,8 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) {
 ; TUNIT-NEXT:    ret i32 [[SEL]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define i32 @scope_value_traversal(
-; CGSCC-SAME: i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR16:[0-9]+]] {
+; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal
+; CGSCC-SAME: (i32 [[BAD:%.*]], i1 [[C:%.*]], i1 [[C2:%.*]]) #[[ATTR15:[0-9]+]] {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[BAD]], ptr [[A]], align 4
 ; CGSCC-NEXT:    call void @scope_value_traversal_helper(ptr noalias nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A]], i1 [[C2]]) #[[ATTR26:[0-9]+]]
@@ -2972,16 +2972,16 @@ define i32 @scope_value_traversal(i32 %bad, i1 %c, i1 %c2) {
 
 define void @scope_value_traversal_helper(ptr %a, i1 %c) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; TUNIT-LABEL: define void @scope_value_traversal_helper(
-; TUNIT-SAME: ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] {
+; TUNIT-LABEL: define {{[^@]+}}@scope_value_traversal_helper
+; TUNIT-SAME: (ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR1]] {
 ; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[A]], align 4
 ; TUNIT-NEXT:    [[SEL:%.*]] = select i1 [[C]], i32 [[L]], i32 42
 ; TUNIT-NEXT:    store i32 [[SEL]], ptr [[A]], align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite)
-; CGSCC-LABEL: define void @scope_value_traversal_helper(
-; CGSCC-SAME: ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR13]] {
+; CGSCC-LABEL: define {{[^@]+}}@scope_value_traversal_helper
+; CGSCC-SAME: (ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]], i1 [[C:%.*]]) #[[ATTR12]] {
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[A]], align 4
 ; CGSCC-NEXT:    [[SEL:%.*]] = select i1 [[C]], i32 [[L]], i32 42
 ; CGSCC-NEXT:    store i32 [[SEL]], ptr [[A]], align 4
@@ -2995,9 +2995,9 @@ define void @scope_value_traversal_helper(ptr %a, i1 %c) {
 
 define i8 @gep_index_from_binary_operator(i1 %cnd1, i1 %cnd2) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define noundef i8 @gep_index_from_binary_operator(
-; CHECK-SAME: i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@gep_index_from_binary_operator
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CHECK-NEXT:    [[GEP_FIXED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
 ; CHECK-NEXT:    ret i8 100
@@ -3014,9 +3014,9 @@ entry:
 
 define i8 @gep_index_from_memory(i1 %cnd1, i1 %cnd2) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define i8 @gep_index_from_memory(
-; CHECK-SAME: i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@gep_index_from_memory
+; CHECK-SAME: (i1 [[CND1:%.*]], i1 [[CND2:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[BYTES:%.*]] = alloca [1024 x i8], align 16
 ; CHECK-NEXT:    [[GEP_LOADED:%.*]] = getelementptr inbounds [1024 x i8], ptr [[BYTES]], i64 0, i64 12
 ; CHECK-NEXT:    ret i8 100
@@ -3040,27 +3040,27 @@ entry:
 ; Ensure this is not flattened to return 3
 define i32 @a(i1 %c) {
 ; TUNIT: Function Attrs: nofree nosync nounwind
-; TUNIT-LABEL: define noundef i32 @a(
-; TUNIT-SAME: i1 noundef [[C:%.*]]) #[[ATTR14]] {
+; TUNIT-LABEL: define {{[^@]+}}@a
+; TUNIT-SAME: (i1 noundef [[C:%.*]]) #[[ATTR13]] {
 ; TUNIT-NEXT:    store i32 3, ptr @G, align 4
-; TUNIT-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
-; TUNIT:       [[T]]:
-; TUNIT-NEXT:    [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR14]]
-; TUNIT-NEXT:    br label %[[F]]
-; TUNIT:       [[F]]:
+; TUNIT-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; TUNIT:       t:
+; TUNIT-NEXT:    [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR13]]
+; TUNIT-NEXT:    br label [[F]]
+; TUNIT:       f:
 ; TUNIT-NEXT:    [[R:%.*]] = load i32, ptr @G, align 4
 ; TUNIT-NEXT:    store i32 5, ptr @G, align 4
 ; TUNIT-NEXT:    ret i32 [[R]]
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind
-; CGSCC-LABEL: define noundef i32 @a(
-; CGSCC-SAME: i1 noundef [[C:%.*]]) #[[ATTR17]] {
+; CGSCC-LABEL: define {{[^@]+}}@a
+; CGSCC-SAME: (i1 noundef [[C:%.*]]) #[[ATTR16]] {
 ; CGSCC-NEXT:    store i32 3, ptr @G, align 4
-; CGSCC-NEXT:    br i1 [[C]], label %[[T:.*]], label %[[F:.*]]
-; CGSCC:       [[T]]:
-; CGSCC-NEXT:    [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR17]]
-; CGSCC-NEXT:    br label %[[F]]
-; CGSCC:       [[F]]:
+; CGSCC-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
+; CGSCC:       t:
+; CGSCC-NEXT:    [[REC:%.*]] = call i32 @a(i1 noundef false) #[[ATTR16]]
+; CGSCC-NEXT:    br label [[F]]
+; CGSCC:       f:
 ; CGSCC-NEXT:    [[R:%.*]] = load i32, ptr @G, align 4
 ; CGSCC-NEXT:    store i32 5, ptr @G, align 4
 ; CGSCC-NEXT:    ret i32 [[R]]
@@ -3081,22 +3081,22 @@ f:
 @GC = internal global i32 undef, align 4
 define void @atomicrmw(ptr %p, i32 %i, i1 %cnd) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; TUNIT-LABEL: define void @atomicrmw(
-; TUNIT-SAME: ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR3]] {
-; TUNIT-NEXT:    br i1 [[CND]], label %[[T:.*]], label %[[M:.*]]
-; TUNIT:       [[T]]:
-; TUNIT-NEXT:    br label %[[M]]
-; TUNIT:       [[M]]:
+; TUNIT-LABEL: define {{[^@]+}}@atomicrmw
+; TUNIT-SAME: (ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR2]] {
+; TUNIT-NEXT:    br i1 [[CND]], label [[T:%.*]], label [[M:%.*]]
+; TUNIT:       t:
+; TUNIT-NEXT:    br label [[M]]
+; TUNIT:       m:
 ; TUNIT-NEXT:    [[ARMW:%.*]] = atomicrmw add ptr @GC, i32 [[I]] monotonic, align 4
 ; TUNIT-NEXT:    ret void
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn
-; CGSCC-LABEL: define void @atomicrmw(
-; CGSCC-SAME: ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR5]] {
-; CGSCC-NEXT:    br i1 [[CND]], label %[[T:.*]], label %[[M:.*]]
-; CGSCC:       [[T]]:
-; CGSCC-NEXT:    br label %[[M]]
-; CGSCC:       [[M]]:
+; CGSCC-LABEL: define {{[^@]+}}@atomicrmw
+; CGSCC-SAME: (ptr nofree [[P:%.*]], i32 [[I:%.*]], i1 noundef [[CND:%.*]]) #[[ATTR4]] {
+; CGSCC-NEXT:    br i1 [[CND]], label [[T:%.*]], label [[M:%.*]]
+; CGSCC:       t:
+; CGSCC-NEXT:    br label [[M]]
+; CGSCC:       m:
 ; CGSCC-NEXT:    [[ARMW:%.*]] = atomicrmw add ptr @GC, i32 [[I]] monotonic, align 4
 ; CGSCC-NEXT:    ret void
 ;
@@ -3123,24 +3123,24 @@ m:
 
 define i32 @recSimplify(i32 %v, i1 %cond) {
 ; TUNIT: Function Attrs: nofree nosync nounwind
-; TUNIT-LABEL: define i32 @recSimplify(
-; TUNIT-SAME: i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR14]] {
-; TUNIT-NEXT:    br i1 [[COND]], label %[[REC:.*]], label %[[COMP:.*]]
-; TUNIT:       [[REC]]:
-; TUNIT-NEXT:    [[RV:%.*]] = call i32 @recSimplify(i32 undef, i1 noundef false) #[[ATTR14]]
+; TUNIT-LABEL: define {{[^@]+}}@recSimplify
+; TUNIT-SAME: (i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR13]] {
+; TUNIT-NEXT:    br i1 [[COND]], label [[REC:%.*]], label [[COMP:%.*]]
+; TUNIT:       rec:
+; TUNIT-NEXT:    [[RV:%.*]] = call i32 @recSimplify(i32 undef, i1 noundef false) #[[ATTR13]]
 ; TUNIT-NEXT:    ret i32 1
-; TUNIT:       [[COMP]]:
+; TUNIT:       comp:
 ; TUNIT-NEXT:    store i32 1, ptr @GRS2, align 4
 ; TUNIT-NEXT:    ret i32 1
 ;
 ; CGSCC: Function Attrs: nofree nosync nounwind
-; CGSCC-LABEL: define i32 @recSimplify(
-; CGSCC-SAME: i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR17]] {
-; CGSCC-NEXT:    br i1 [[COND]], label %[[REC:.*]], label %[[COMP:.*]]
-; CGSCC:       [[REC]]:
-; CGSCC-NEXT:    [[RV:%.*]] = call i32 @recSimplify(i32 [[V]], i1 noundef false) #[[ATTR17]]
+; CGSCC-LABEL: define {{[^@]+}}@recSimplify
+; CGSCC-SAME: (i32 [[V:%.*]], i1 noundef [[COND:%.*]]) #[[ATTR16]] {
+; CGSCC-NEXT:    br i1 [[COND]], label [[REC:%.*]], label [[COMP:%.*]]
+; CGSCC:       rec:
+; CGSCC-NEXT:    [[RV:%.*]] = call i32 @recSimplify(i32 [[V]], i1 noundef false) #[[ATTR16]]
 ; CGSCC-NEXT:    ret i32 [[RV]]
-; CGSCC:       [[COMP]]:
+; CGSCC:       comp:
 ; CGSCC-NEXT:    store i32 [[V]], ptr @GRS, align 4
 ; CGSCC-NEXT:    store i32 1, ptr @GRS2, align 4
 ; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr @GRS, align 4
@@ -3167,8 +3167,8 @@ comp:
 
 define internal i32 @recSimplify2() {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(read)
-; CGSCC-LABEL: define internal i32 @recSimplify2(
-; CGSCC-SAME: ) #[[ATTR7]] {
+; CGSCC-LABEL: define {{[^@]+}}@recSimplify2
+; CGSCC-SAME: () #[[ATTR6]] {
 ; CGSCC-NEXT:    [[R:%.*]] = load i32, ptr @GRS, align 4
 ; CGSCC-NEXT:    ret i32 [[R]]
 ;
@@ -3179,18 +3179,18 @@ define internal i32 @recSimplify2() {
 ; Verify we do not return 10.
 define i32 @may_access_after_return(i32 noundef %N, i32 noundef %M) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define noundef i32 @may_access_after_return(
-; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return
+; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR18]]
 ; TUNIT-NEXT:    ret i32 8
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define i32 @may_access_after_return(
-; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return
+; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR15]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR21]]
@@ -3213,9 +3213,9 @@ entry:
 
 define internal void @write_both(ptr noundef %Q, ptr noundef %R) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write)
-; CHECK-LABEL: define internal void @write_both(
-; CHECK-SAME: ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Q:%.*]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[R:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@write_both
+; CHECK-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[Q:%.*]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[R:%.*]]) #[[ATTR0]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    store i32 3, ptr [[Q]], align 4
 ; CHECK-NEXT:    store i32 5, ptr [[R]], align 4
 ; CHECK-NEXT:    ret void
@@ -3228,9 +3228,9 @@ entry:
 
 define internal ptr @passthrough(ptr noundef %P) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough(
-; CGSCC-SAME: ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@passthrough
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR3]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    ret ptr [[P]]
 ;
 entry:
@@ -3240,9 +3240,9 @@ entry:
 ; Verify we do not return 10.
 define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define noundef i32 @may_access_after_return_choice(
-; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_choice
+; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[B]]) #[[ATTR23:[0-9]+]]
@@ -3254,9 +3254,9 @@ define i32 @may_access_after_return_choice(i32 noundef %N, i32 noundef %M, i1 %c
 ; TUNIT-NEXT:    ret i32 [[ADD]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn
-; CGSCC-LABEL: define i32 @may_access_after_return_choice(
-; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR3]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_choice
+; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]], i1 [[C:%.*]]) #[[ATTR2]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[CALL:%.*]] = call nonnull align 4 dereferenceable(4) ptr @passthrough_choice(i1 [[C]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) [[B]]) #[[ATTR28:[0-9]+]]
@@ -3281,9 +3281,9 @@ entry:
 
 define internal ptr @passthrough_choice(i1 %c, ptr noundef %P, ptr noundef %Q) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_choice(
-; CHECK-SAME: i1 [[C:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] {
-; CHECK-NEXT:  [[ENTRY:.*:]]
+; CHECK-LABEL: define {{[^@]+}}@passthrough_choice
+; CHECK-SAME: (i1 [[C:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], ptr noalias nofree noundef nonnull readnone align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR3]] {
+; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[R:%.*]] = select i1 [[C]], ptr [[P]], ptr [[Q]]
 ; CHECK-NEXT:    ret ptr [[R]]
 ;
@@ -3295,18 +3295,18 @@ entry:
 ; Verify we do not return 10.
 define i32 @may_access_after_return_no_choice1(i32 noundef %N, i32 noundef %M) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define noundef i32 @may_access_after_return_no_choice1(
-; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_no_choice1
+; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR18]]
 ; TUNIT-NEXT:    ret i32 8
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define i32 @may_access_after_return_no_choice1(
-; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice1
+; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR15]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]]) #[[ATTR21]]
@@ -3330,18 +3330,18 @@ entry:
 ; Verify we do not return 10.
 define i32 @may_access_after_return_no_choice2(i32 noundef %N, i32 noundef %M) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; TUNIT-LABEL: define noundef i32 @may_access_after_return_no_choice2(
-; TUNIT-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR4]] {
-; TUNIT-NEXT:  [[ENTRY:.*:]]
+; TUNIT-LABEL: define {{[^@]+}}@may_access_after_return_no_choice2
+; TUNIT-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR3]] {
+; TUNIT-NEXT:  entry:
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; TUNIT-NEXT:    call void @write_both(ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]], ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]]) #[[ATTR18]]
 ; TUNIT-NEXT:    ret i32 8
 ;
 ; CGSCC: Function Attrs: mustprogress nofree nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define i32 @may_access_after_return_no_choice2(
-; CGSCC-SAME: i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR16]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@may_access_after_return_no_choice2
+; CGSCC-SAME: (i32 noundef [[N:%.*]], i32 noundef [[M:%.*]]) #[[ATTR15]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    call void @write_both(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[B]], ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[A]]) #[[ATTR21]]
@@ -3364,9 +3364,9 @@ entry:
 
 define internal ptr @passthrough_no_choice_true(i1 %c, ptr noundef %P, ptr noundef %Q) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_no_choice_true(
-; CGSCC-SAME: ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], i32 [[TMP0:%.*]]) #[[ATTR4]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@passthrough_no_choice_true
+; CGSCC-SAME: (ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[P:%.*]], i32 [[TMP0:%.*]]) #[[ATTR3]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[Q_PRIV:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[TMP0]], ptr [[Q_PRIV]], align 4
 ; CGSCC-NEXT:    ret ptr [[P]]
@@ -3377,9 +3377,9 @@ entry:
 }
 define internal ptr @passthrough_no_choice_false(i1 %c, ptr noundef %P, ptr noundef %Q) {
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CGSCC-LABEL: define internal noundef nonnull align 4 dereferenceable(4) ptr @passthrough_no_choice_false(
-; CGSCC-SAME: i32 [[TMP0:%.*]], ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR4]] {
-; CGSCC-NEXT:  [[ENTRY:.*:]]
+; CGSCC-LABEL: define {{[^@]+}}@passthrough_no_choice_false
+; CGSCC-SAME: (i32 [[TMP0:%.*]], ptr noalias nofree noundef nonnull readnone returned align 4 dereferenceable(4) "no-capture-maybe-returned" [[Q:%.*]]) #[[ATTR3]] {
+; CGSCC-NEXT:  entry:
 ; CGSCC-NEXT:    [[P_PRIV:%.*]] = alloca i32, align 4
 ; CGSCC-NEXT:    store i32 [[TMP0]], ptr [[P_PRIV]], align 4
 ; CGSCC-NEXT:    ret ptr [[Q]]
@@ -3391,8 +3391,8 @@ entry:
 
 define ptr @move2(ptr %p) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define ptr @move2(
-; CHECK-SAME: ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
+; CHECK-LABEL: define {{[^@]+}}@move2
+; CHECK-SAME: (ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, ptr [[P]], i32 2
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
@@ -3401,8 +3401,8 @@ define ptr @move2(ptr %p) {
 }
 define internal ptr @move4(ptr %p) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define internal ptr @move4(
-; CHECK-SAME: ptr noalias nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
+; CHECK-LABEL: define {{[^@]+}}@move4
+; CHECK-SAME: (ptr noalias nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[G:%.*]] = getelementptr i8, ptr [[P]], i32 4
 ; CHECK-NEXT:    ret ptr [[G]]
 ;
@@ -3412,20 +3412,20 @@ define internal ptr @move4(ptr %p) {
 
 define ptr @move246(i32 %i, ptr %p) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
-; CHECK-LABEL: define ptr @move246(
-; CHECK-SAME: i32 [[I:%.*]], ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR4]] {
+; CHECK-LABEL: define {{[^@]+}}@move246
+; CHECK-SAME: (i32 [[I:%.*]], ptr nofree readnone "no-capture-maybe-returned" [[P:%.*]]) #[[ATTR3]] {
 ; CHECK-NEXT:    [[C0:%.*]] = icmp eq i32 [[I]], 0
-; CHECK-NEXT:    br i1 [[C0]], label %[[BG2:.*]], label %[[BG46:.*]]
-; CHECK:       [[BG2]]:
+; CHECK-NEXT:    br i1 [[C0]], label [[BG2:%.*]], label [[BG46:%.*]]
+; CHECK:       bg2:
 ; CHECK-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[P]], i32 2
 ; CHECK-NEXT:    ret ptr [[G2]]
-; CHECK:       [[BG46]]:
+; CHECK:       bg46:
 ; CHECK-NEXT:    [[C1:%.*]] = icmp eq i32 [[I]], 1
-; CHECK-NEXT:    br i1 [[C1]], label %[[BG4:.*]], label %[[BG6:.*]]
-; CHECK:       [[BG4]]:
+; CHECK-NEXT:    br i1 [[C1]], label [[BG4:%.*]], label [[BG6:%.*]]
+; CHECK:       bg4:
 ; CHECK-NEXT:    [[G4:%.*]] = getelementptr i8, ptr [[P]], i32 4
 ; CHECK-NEXT:    ret ptr [[G4]]
-; CHECK:       [[BG6]]:
+; CHECK:       bg6:
 ; CHECK-NEXT:    [[G6:%.*]] = getelementptr i8, ptr [[P]], i32 6
 ; CHECK-NEXT:    ret ptr [[G6]]
 ;
@@ -3448,7 +3448,7 @@ bg6:
 declare void @use3i8(i8, i8, i8)
 
 define void @returnedPtrAccesses() {
-; TUNIT-LABEL: define void @returnedPtrAccesses() {
+; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccesses() {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[A2:%.*]] = call ptr @move2(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
 ; TUNIT-NEXT:    [[A4:%.*]] = call ptr @move4(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
@@ -3459,7 +3459,7 @@ define void @returnedPtrAccesses() {
 ; TUNIT-NEXT:    call void @use3i8(i8 2, i8 4, i8 6)
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define void @returnedPtrAccesses() {
+; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccesses() {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; CGSCC-NEXT:    [[A2:%.*]] = call nonnull dereferenceable(1) ptr @move2(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[A4:%.*]] = call ptr @move4(ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]]
@@ -3494,16 +3494,16 @@ define void @returnedPtrAccesses() {
 }
 
 define void @returnedPtrAccessesMultiple(i32 %i) {
-; TUNIT-LABEL: define void @returnedPtrAccessesMultiple(
-; TUNIT-SAME: i32 [[I:%.*]]) {
+; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple
+; TUNIT-SAME: (i32 [[I:%.*]]) {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[AP:%.*]] = call ptr @move246(i32 [[I]], ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) "no-capture-maybe-returned" [[A]]) #[[ATTR23]]
 ; TUNIT-NEXT:    store i8 2, ptr [[AP]], align 1
 ; TUNIT-NEXT:    call void @use3i8(i8 2, i8 2, i8 2)
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define void @returnedPtrAccessesMultiple(
-; CGSCC-SAME: i32 [[I:%.*]]) {
+; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple
+; CGSCC-SAME: (i32 [[I:%.*]]) {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; CGSCC-NEXT:    [[AP:%.*]] = call ptr @move246(i32 [[I]], ptr noalias nofree noundef nonnull readnone align 8 dereferenceable(8) [[A]]) #[[ATTR20]]
 ; CGSCC-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2
@@ -3530,8 +3530,8 @@ define void @returnedPtrAccessesMultiple(i32 %i) {
 }
 
 define void @returnedPtrAccessesMultiple2(i32 %i) {
-; TUNIT-LABEL: define void @returnedPtrAccessesMultiple2(
-; TUNIT-SAME: i32 [[I:%.*]]) {
+; TUNIT-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple2
+; TUNIT-SAME: (i32 [[I:%.*]]) {
 ; TUNIT-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; TUNIT-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2
 ; TUNIT-NEXT:    [[G4:%.*]] = getelementptr i8, ptr [[A]], i32 4
@@ -3547,8 +3547,8 @@ define void @returnedPtrAccessesMultiple2(i32 %i) {
 ; TUNIT-NEXT:    call void @use3i8(i8 noundef [[L2]], i8 noundef [[L4]], i8 noundef [[L6]])
 ; TUNIT-NEXT:    ret void
 ;
-; CGSCC-LABEL: define void @returnedPtrAccessesMultiple2(
-; CGSCC-SAME: i32 [[I:%.*]]) {
+; CGSCC-LABEL: define {{[^@]+}}@returnedPtrAccessesMultiple2
+; CGSCC-SAME: (i32 [[I:%.*]]) {
 ; CGSCC-NEXT:    [[A:%.*]] = alloca i64, align 8
 ; CGSCC-NEXT:    [[G2:%.*]] = getelementptr i8, ptr [[A]], i32 2
 ; CGSCC-NEXT:    [[G4:%.*]] = getelementptr i8, ptr [[A]], i32 4
@@ -3621,20 +3621,20 @@ declare void @llvm.assume(i1 noundef)
 ;.
 ; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) }
 ; TUNIT: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) }
-; TUNIT: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; TUNIT: attributes #[[ATTR3]] = { mustprogress nofree norecurse nosync nounwind willreturn }
-; TUNIT: attributes #[[ATTR4]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
-; TUNIT: attributes #[[ATTR5]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
-; TUNIT: attributes #[[ATTR6]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(read) }
-; TUNIT: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind memory(write) }
-; TUNIT: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" }
-; TUNIT: attributes #[[ATTR9:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
-; TUNIT: attributes #[[ATTR10:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
-; TUNIT: attributes #[[ATTR11]] = { mustprogress nofree norecurse nosync nounwind willreturn uwtable }
-; TUNIT: attributes #[[ATTR12]] = { nofree nosync nounwind memory(argmem: readwrite) }
-; TUNIT: attributes #[[ATTR13]] = { nofree norecurse nosync nounwind memory(none) }
-; TUNIT: attributes #[[ATTR14]] = { nofree nosync nounwind }
-; TUNIT: attributes #[[ATTR15:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+; TUNIT: attributes #[[ATTR2]] = { mustprogress nofree norecurse nosync nounwind willreturn }
+; TUNIT: attributes #[[ATTR3]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; TUNIT: attributes #[[ATTR4]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; TUNIT: attributes #[[ATTR5]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(read) }
+; TUNIT: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind memory(write) }
+; TUNIT: attributes #[[ATTR7:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" }
+; TUNIT: attributes #[[ATTR8:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+; TUNIT: attributes #[[ATTR9:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+; TUNIT: attributes #[[ATTR10]] = { mustprogress nofree norecurse nosync nounwind willreturn uwtable }
+; TUNIT: attributes #[[ATTR11]] = { nofree nosync nounwind memory(argmem: readwrite) }
+; TUNIT: attributes #[[ATTR12]] = { nofree norecurse nosync nounwind memory(none) }
+; TUNIT: attributes #[[ATTR13]] = { nofree nosync nounwind }
+; TUNIT: attributes #[[ATTR14:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+; TUNIT: attributes #[[ATTR15:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
 ; TUNIT: attributes #[[ATTR16:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
 ; TUNIT: attributes #[[ATTR17]] = { nofree willreturn }
 ; TUNIT: attributes #[[ATTR18]] = { nofree nosync nounwind willreturn memory(write) }
@@ -3646,23 +3646,23 @@ declare void @llvm.assume(i1 noundef)
 ;.
 ; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: write) }
 ; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; CGSCC: attributes #[[ATTR2:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-; CGSCC: attributes #[[ATTR3]] = { mustprogress nofree nosync nounwind willreturn }
-; CGSCC: attributes #[[ATTR4]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
-; CGSCC: attributes #[[ATTR5]] = { mustprogress nofree norecurse nosync nounwind willreturn }
-; CGSCC: attributes #[[ATTR6]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
-; CGSCC: attributes #[[ATTR7]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(read) }
-; CGSCC: attributes #[[ATTR8]] = { nofree norecurse nosync nounwind memory(write) }
-; CGSCC: attributes #[[ATTR9:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" }
-; CGSCC: attributes #[[ATTR10:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
-; CGSCC: attributes #[[ATTR11:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
-; CGSCC: attributes #[[ATTR12]] = { mustprogress nofree norecurse nosync nounwind willreturn uwtable }
-; CGSCC: attributes #[[ATTR13]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) }
-; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind memory(argmem: readwrite) }
-; CGSCC: attributes #[[ATTR15]] = { nofree nosync nounwind memory(none) }
-; CGSCC: attributes #[[ATTR16]] = { mustprogress nofree nosync nounwind willreturn memory(none) }
-; CGSCC: attributes #[[ATTR17]] = { nofree nosync nounwind }
-; CGSCC: attributes #[[ATTR18:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+; CGSCC: attributes #[[ATTR2]] = { mustprogress nofree nosync nounwind willreturn }
+; CGSCC: attributes #[[ATTR3]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; CGSCC: attributes #[[ATTR4]] = { mustprogress nofree norecurse nosync nounwind willreturn }
+; CGSCC: attributes #[[ATTR5]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(write) }
+; CGSCC: attributes #[[ATTR6]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(read) }
+; CGSCC: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind memory(write) }
+; CGSCC: attributes #[[ATTR8:[0-9]+]] = { allockind("alloc,uninitialized") allocsize(0) "alloc-family"="malloc" }
+; CGSCC: attributes #[[ATTR9:[0-9]+]] = { allockind("free") "alloc-family"="malloc" }
+; CGSCC: attributes #[[ATTR10:[0-9]+]] = { allockind("alloc,zeroed") allocsize(0,1) "alloc-family"="malloc" }
+; CGSCC: attributes #[[ATTR11]] = { mustprogress nofree norecurse nosync nounwind willreturn uwtable }
+; CGSCC: attributes #[[ATTR12]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) }
+; CGSCC: attributes #[[ATTR13]] = { nofree nosync nounwind memory(argmem: readwrite) }
+; CGSCC: attributes #[[ATTR14]] = { nofree nosync nounwind memory(none) }
+; CGSCC: attributes #[[ATTR15]] = { mustprogress nofree nosync nounwind willreturn memory(none) }
+; CGSCC: attributes #[[ATTR16]] = { nofree nosync nounwind }
+; CGSCC: attributes #[[ATTR17:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: write) }
+; CGSCC: attributes #[[ATTR18:[0-9]+]] = { nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
 ; CGSCC: attributes #[[ATTR19:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) }
 ; CGSCC: attributes #[[ATTR20]] = { nofree willreturn }
 ; CGSCC: attributes #[[ATTR21]] = { nofree nounwind willreturn memory(write) }
@@ -3677,32 +3677,32 @@ declare void @llvm.assume(i1 noundef)
 ; TUNIT: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; TUNIT: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1}
 ; TUNIT: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; TUNIT: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; TUNIT: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 ; TUNIT: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 ; TUNIT: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 ; TUNIT: [[META6]] = !{!"Simple C/C++ TBAA"}
-; TUNIT: [[FLOAT_TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
+; TUNIT: [[TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
 ; TUNIT: [[META8]] = !{!"S", [[META4]], i64 0, [[META4]], i64 4, [[META4]], i64 8, [[META9]], i64 12, [[META9]], i64 16, [[META9]], i64 20}
 ; TUNIT: [[META9]] = !{!"float", [[META5]], i64 0}
-; TUNIT: [[FLOAT_TBAA10]] = !{[[META8]], [[META9]], i64 16}
-; TUNIT: [[FLOAT_TBAA11]] = !{[[META8]], [[META9]], i64 20}
-; TUNIT: [[INT_TBAA12]] = !{[[META8]], [[META4]], i64 0}
-; TUNIT: [[INT_TBAA13]] = !{[[META8]], [[META4]], i64 4}
-; TUNIT: [[INT_TBAA14]] = !{[[META8]], [[META4]], i64 8}
+; TUNIT: [[TBAA10]] = !{[[META8]], [[META9]], i64 16}
+; TUNIT: [[TBAA11]] = !{[[META8]], [[META9]], i64 20}
+; TUNIT: [[TBAA12]] = !{[[META8]], [[META4]], i64 0}
+; TUNIT: [[TBAA13]] = !{[[META8]], [[META4]], i64 4}
+; TUNIT: [[TBAA14]] = !{[[META8]], [[META4]], i64 8}
 ; TUNIT: [[LOOP15]] = distinct !{[[LOOP15]], [[META16:![0-9]+]]}
 ; TUNIT: [[META16]] = !{!"llvm.loop.mustprogress"}
 ; TUNIT: [[LOOP17]] = distinct !{[[LOOP17]], [[META16]]}
 ; TUNIT: [[LOOP18]] = distinct !{[[LOOP18]], [[META16]]}
-; TUNIT: [[CHAR_TBAA19]] = !{[[META5]], [[META5]], i64 0}
+; TUNIT: [[TBAA19]] = !{[[META5]], [[META5]], i64 0}
 ; TUNIT: [[LOOP20]] = distinct !{[[LOOP20]], [[META16]]}
 ; TUNIT: [[LOOP21]] = distinct !{[[LOOP21]], [[META16]]}
 ; TUNIT: [[LOOP22]] = distinct !{[[LOOP22]], [[META16]]}
 ; TUNIT: [[LOOP23]] = distinct !{[[LOOP23]], [[META16]]}
 ; TUNIT: [[LOOP24]] = distinct !{[[LOOP24]], [[META16]]}
 ; TUNIT: [[LOOP25]] = distinct !{[[LOOP25]], [[META16]]}
-; TUNIT: [[FLOAT_TBAA26]] = !{[[META9]], [[META9]], i64 0}
+; TUNIT: [[TBAA26]] = !{[[META9]], [[META9]], i64 0}
 ; TUNIT: [[LOOP27]] = distinct !{[[LOOP27]], [[META16]]}
-; TUNIT: [[LONG_LONG_TBAA28]] = !{[[META29:![0-9]+]], [[META29]], i64 0}
+; TUNIT: [[TBAA28]] = !{[[META29:![0-9]+]], [[META29]], i64 0}
 ; TUNIT: [[META29]] = !{!"long long", [[META5]], i64 0}
 ; TUNIT: [[LOOP30]] = distinct !{[[LOOP30]], [[META16]]}
 ; TUNIT: [[LOOP31]] = distinct !{[[LOOP31]], [[META16]]}
@@ -3711,24 +3711,24 @@ declare void @llvm.assume(i1 noundef)
 ; CGSCC: [[META0:![0-9]+]] = !{i32 1, !"wchar_size", i32 4}
 ; CGSCC: [[META1:![0-9]+]] = !{i32 7, !"uwtable", i32 1}
 ; CGSCC: [[META2:![0-9]+]] = !{!"{{.*}}clang version {{.*}}"}
-; CGSCC: [[INT_TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
+; CGSCC: [[TBAA3]] = !{[[META4:![0-9]+]], [[META4]], i64 0}
 ; CGSCC: [[META4]] = !{!"int", [[META5:![0-9]+]], i64 0}
 ; CGSCC: [[META5]] = !{!"omnipotent char", [[META6:![0-9]+]], i64 0}
 ; CGSCC: [[META6]] = !{!"Simple C/C++ TBAA"}
-; CGSCC: [[FLOAT_TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
+; CGSCC: [[TBAA7]] = !{[[META8:![0-9]+]], [[META9:![0-9]+]], i64 12}
 ; CGSCC: [[META8]] = !{!"S", [[META4]], i64 0, [[META4]], i64 4, [[META4]], i64 8, [[META9]], i64 12, [[META9]], i64 16, [[META9]], i64 20}
 ; CGSCC: [[META9]] = !{!"float", [[META5]], i64 0}
-; CGSCC: [[FLOAT_TBAA10]] = !{[[META8]], [[META9]], i64 16}
-; CGSCC: [[FLOAT_TBAA11]] = !{[[META8]], [[META9]], i64 20}
-; CGSCC: [[INT_TBAA12]] = !{[[META8]], [[META4]], i64 0}
-; CGSCC: [[INT_TBAA13]] = !{[[META8]], [[META4]], i64 4}
-; CGSCC: [[INT_TBAA14]] = !{[[META8]], [[META4]], i64 8}
-; CGSCC: [[CHAR_TBAA15]] = !{[[META5]], [[META5]], i64 0}
+; CGSCC: [[TBAA10]] = !{[[META8]], [[META9]], i64 16}
+; CGSCC: [[TBAA11]] = !{[[META8]], [[META9]], i64 20}
+; CGSCC: [[TBAA12]] = !{[[META8]], [[META4]], i64 0}
+; CGSCC: [[TBAA13]] = !{[[META8]], [[META4]], i64 4}
+; CGSCC: [[TBAA14]] = !{[[META8]], [[META4]], i64 8}
+; CGSCC: [[TBAA15]] = !{[[META5]], [[META5]], i64 0}
 ; CGSCC: [[LOOP16]] = distinct !{[[LOOP16]], [[META17:![0-9]+]]}
 ; CGSCC: [[META17]] = !{!"llvm.loop.mustprogress"}
-; CGSCC: [[FLOAT_TBAA18]] = !{[[META9]], [[META9]], i64 0}
+; CGSCC: [[TBAA18]] = !{[[META9]], [[META9]], i64 0}
 ; CGSCC: [[LOOP19]] = distinct !{[[LOOP19]], [[META17]]}
-; CGSCC: [[LONG_LONG_TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
+; CGSCC: [[TBAA20]] = !{[[META21:![0-9]+]], [[META21]], i64 0}
 ; CGSCC: [[META21]] = !{!"long long", [[META5]], i64 0}
 ; CGSCC: [[LOOP22]] = distinct !{[[LOOP22]], [[META17]]}
 ; CGSCC: [[LOOP23]] = distinct !{[[LOOP23]], [[META17]]}

>From fde8e5d93fb375b40b44dd02bf6cc0a37615760b Mon Sep 17 00:00:00 2001
From: vidush <vidush.sl at gmail.com>
Date: Fri, 21 Nov 2025 17:31:14 -0500
Subject: [PATCH 09/14] rerun file

---
 llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index 63a234acbcb47..e52b461e40a89 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -431,7 +431,7 @@ define i32 @malloc_in_loop(i32 %arg) {
 ; CHECK-SAME: (i32 [[ARG:%.*]]) {
 ; CHECK-NEXT:  bb:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4, addrspace(5)
-; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 1, addrspace(5)
+; CHECK-NEXT:    [[I11:%.*]] = alloca [0 x i8], align 8, addrspace(5)
 ; CHECK-NEXT:    store i32 [[ARG]], ptr addrspace(5) [[I]], align 4
 ; CHECK-NEXT:    br label [[BB2:%.*]]
 ; CHECK:       bb2:

>From 789baf99f99a281f4ad88da7415ffd1e10da8106 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Fri, 21 Nov 2025 20:18:21 -0500
Subject: [PATCH 10/14] nested struct example

---
 llvm/test/Transforms/Attributor/allocator.ll | 110 +++++++++++++++++--
 1 file changed, 103 insertions(+), 7 deletions(-)

diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index 3611dbad79bf7..f50b43711de8b 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -4,11 +4,103 @@
 
 %struct.Foo = type { i32, i32, i8 }
 
+%struct.Bar = type { i32, i32, %struct.Foo}
+
 @.str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+ at .str.1 = private unnamed_addr constant [32 x i8] c"value of the first field is %d\0A\00", align 1
 
+; Function Attrs: noinline nounwind optnone uwtable
 ;.
 ; CHECK: @.str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+; CHECK: @.str.1 = private unnamed_addr constant [32 x i8] c"value of the first field is %d\0A\00", align 1
 ;.
+define dso_local void @accessBarFromFoo(ptr noundef %val) #0 {
+; TUNIT-LABEL: define dso_local void @accessBarFromFoo
+; TUNIT-SAME: (ptr nofree noundef readonly captures(none) [[VAL:%.*]]) {
+; TUNIT-NEXT:  entry:
+; TUNIT-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; TUNIT-NEXT:    [[B1:%.*]] = alloca [16 x i8], align 1
+; TUNIT-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; TUNIT-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
+; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
+; TUNIT-NEXT:    store i32 [[ADD]], ptr [[F]], align 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
+; TUNIT-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 100
+; TUNIT-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 1
+; TUNIT-NEXT:    store i32 [[ADD1]], ptr [[B2]], align 4
+; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
+; TUNIT-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
+; TUNIT-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD3]] to i8
+; TUNIT-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 2
+; TUNIT-NEXT:    store i8 [[CONV]], ptr [[C]], align 4
+; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 4
+; TUNIT-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(12) [[NEWGEP2]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[F]], i64 noundef 12, i1 noundef false) #[[ATTR3:[0-9]+]]
+; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 0
+; TUNIT-NEXT:    [[TMP3:%.*]] = load i32, ptr [[NEWGEP]], align 4
+; TUNIT-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP3]])
+; TUNIT-NEXT:    ret void
+;
+; CGSCC-LABEL: define dso_local void @accessBarFromFoo
+; CGSCC-SAME: (ptr nofree noundef readonly captures(none) [[VAL:%.*]]) {
+; CGSCC-NEXT:  entry:
+; CGSCC-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CGSCC-NEXT:    [[B1:%.*]] = alloca [16 x i8], align 1
+; CGSCC-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CGSCC-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
+; CGSCC-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
+; CGSCC-NEXT:    store i32 [[ADD]], ptr [[F]], align 4
+; CGSCC-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
+; CGSCC-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 100
+; CGSCC-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 1
+; CGSCC-NEXT:    store i32 [[ADD1]], ptr [[B2]], align 4
+; CGSCC-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
+; CGSCC-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
+; CGSCC-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD3]] to i8
+; CGSCC-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 2
+; CGSCC-NEXT:    store i8 [[CONV]], ptr [[C]], align 4
+; CGSCC-NEXT:    [[NEWGEP:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 4
+; CGSCC-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(12) [[NEWGEP]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[F]], i64 noundef 12, i1 noundef false) #[[ATTR3:[0-9]+]]
+; CGSCC-NEXT:    [[NEWGEP2:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 0
+; CGSCC-NEXT:    [[TMP3:%.*]] = load i32, ptr [[NEWGEP2]], align 4
+; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP3]])
+; CGSCC-NEXT:    ret void
+;
+entry:
+  %val.addr = alloca ptr, align 8
+  %b = alloca %struct.Bar, align 4
+  %f = alloca %struct.Foo, align 4
+  store ptr %val, ptr %val.addr, align 8
+  %0 = load ptr, ptr %val.addr, align 8
+  %1 = load i32, ptr %0, align 4
+  %add = add nsw i32 %1, 10
+  %a = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 0
+  store i32 %add, ptr %a, align 4
+  %2 = load ptr, ptr %val.addr, align 8
+  %3 = load i32, ptr %2, align 4
+  %add1 = add nsw i32 %3, 100
+  %b2 = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 1
+  store i32 %add1, ptr %b2, align 4
+  %4 = load ptr, ptr %val.addr, align 8
+  %5 = load i32, ptr %4, align 4
+  %add3 = add nsw i32 %5, 1
+  %conv = trunc i32 %add3 to i8
+  %c = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 2
+  store i8 %conv, ptr %c, align 4
+  %f4 = getelementptr inbounds nuw %struct.Bar, ptr %b, i32 0, i32 2
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %f4, ptr align 4 %f, i64 12, i1 false)
+  %f5 = getelementptr inbounds nuw %struct.Bar, ptr %b, i32 0, i32 2
+  %a6 = getelementptr inbounds nuw %struct.Foo, ptr %f5, i32 0, i32 0
+  %6 = load i32, ptr %a6, align 4
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %6)
+  ret void
+}
+
+; Function Attrs: nocallback nofree nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.memcpy.p0.p0.i64(ptr noalias writeonly captures(none), ptr noalias readonly captures(none), i64, i1 immarg) #1
+
+
 define dso_local void @positive_alloca_1(i32 noundef %val) #0 {
 ; CHECK-LABEL: define dso_local void @positive_alloca_1
 ; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
@@ -490,7 +582,7 @@ define dso_local void @pthread_test(){
 define dso_local void @select_case(i1 %cond){
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define dso_local void @select_case
-; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR1:[0-9]+]] {
 ; CHECK-NEXT:    [[A:%.*]] = alloca [100 x i8], align 1
 ; CHECK-NEXT:    [[B:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 3
 ; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds [100 x i8], ptr [[A]], i64 0, i64 1
@@ -508,7 +600,7 @@ define dso_local void @select_case(i1 %cond){
 define dso_local void @select_case_2(i1 %cond){
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define dso_local void @select_case_2
-; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR0]] {
+; CHECK-SAME: (i1 [[COND:%.*]]) #[[ATTR1]] {
 ; CHECK-NEXT:    ret void
 ;
   %a = alloca [100 x i32], align 1
@@ -570,7 +662,7 @@ entry:
 define dso_local void @alloca_array_multi_offset(){
 ; CHECK: Function Attrs: nofree norecurse nosync nounwind memory(none)
 ; CHECK-LABEL: define dso_local void @alloca_array_multi_offset
-; CHECK-SAME: () #[[ATTR1:[0-9]+]] {
+; CHECK-SAME: () #[[ATTR2:[0-9]+]] {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[I:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 0, ptr [[I]], align 4
@@ -631,11 +723,15 @@ declare i32 @printf(ptr noundef, ...) #1
 ; Function Attrs: nounwind allocsize(0)
 declare noalias ptr @malloc(i64 noundef) #1
 ;.
-; TUNIT: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
-; TUNIT: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
+; TUNIT: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; TUNIT: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; TUNIT: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(none) }
+; TUNIT: attributes #[[ATTR3]] = { nofree willreturn }
 ;.
-; CGSCC: attributes #[[ATTR0]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
-; CGSCC: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind memory(none) }
+; CGSCC: attributes #[[ATTR0:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: readwrite) }
+; CGSCC: attributes #[[ATTR1]] = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) }
+; CGSCC: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind memory(none) }
+; CGSCC: attributes #[[ATTR3]] = { nofree willreturn }
 ;.
 ; TUNIT: [[META0]] = !{}
 ; TUNIT: [[META1:![0-9]+]] = !{[[META2:![0-9]+]]}

>From c4431e7c18944b276ee59d69162cca6c1aa2fe0e Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Fri, 21 Nov 2025 22:02:49 -0500
Subject: [PATCH 11/14] wip: nested struct

---
 .../Transforms/IPO/AttributorAttributes.cpp   |  16 ++-
 llvm/test/Transforms/Attributor/allocator.ll  | 104 +++++++-----------
 2 files changed, 54 insertions(+), 66 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index aff88cc044fb7..7f102a09accb4 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -13676,6 +13676,10 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     const DataLayout &DL = A.getDataLayout();
     const auto AllocationSize = findInitialAllocationSize(I, DL);
 
+    llvm::dbgs() << "Print Allocation Size: " << AllocationSize << "\n";
+    PI->dumpState(dbgs());
+    llvm::dbgs() << "End printing size and AAPointerInfo\n";
+
     // If allocation size is nullopt, we give up.
     if (!AllocationSize)
       return indicatePessimisticFixpoint();
@@ -13696,7 +13700,15 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     // For each access bin we compute its new start offset
     // and store the results in a new map (NewOffsetBins).
     // NewOffsetsBins is a Map from AA::RangeTy OldRange to AA::RangeTy
-    // NewRange.
+    // NewRange. 
+    
+    // Algorithm Logic:
+    // For all the ranges in AAPointerInfo, we need to find the minimum 
+    // longest range that contains all the accessed offsets. 
+    // In case, disjoint ranges exist, we want to merge them. 
+    // For this we need to calculate the minimum size of the new allocation. 
+    // Then adjust all the old offsets and map them to their new offsets.
+
     unsigned long PrevBinEndOffset = 0;
     bool ChangedOffsets = false;
     for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
@@ -13712,6 +13724,8 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
       unsigned long NewEndOffset = NewStartOffset + OldRange.Size;
       PrevBinEndOffset = NewEndOffset;
 
+      dbgs() << "Print the Start, End, PrevBinEndOffset: " << NewStartOffset << "," << NewEndOffset << "," << PrevBinEndOffset << "\n";
+
       ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
                                       OldRange.Size);
     }
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index f50b43711de8b..aa6680878e0c0 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -15,81 +15,55 @@
 ; CHECK: @.str.1 = private unnamed_addr constant [32 x i8] c"value of the first field is %d\0A\00", align 1
 ;.
 define dso_local void @accessBarFromFoo(ptr noundef %val) #0 {
-; TUNIT-LABEL: define dso_local void @accessBarFromFoo
-; TUNIT-SAME: (ptr nofree noundef readonly captures(none) [[VAL:%.*]]) {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
-; TUNIT-NEXT:    [[B1:%.*]] = alloca [16 x i8], align 1
-; TUNIT-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; TUNIT-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
-; TUNIT-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
-; TUNIT-NEXT:    store i32 [[ADD]], ptr [[F]], align 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
-; TUNIT-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 100
-; TUNIT-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 1
-; TUNIT-NEXT:    store i32 [[ADD1]], ptr [[B2]], align 4
-; TUNIT-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
-; TUNIT-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
-; TUNIT-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD3]] to i8
-; TUNIT-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 2
-; TUNIT-NEXT:    store i8 [[CONV]], ptr [[C]], align 4
-; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 4
-; TUNIT-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(12) [[NEWGEP2]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[F]], i64 noundef 12, i1 noundef false) #[[ATTR3:[0-9]+]]
-; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 0
-; TUNIT-NEXT:    [[TMP3:%.*]] = load i32, ptr [[NEWGEP]], align 4
-; TUNIT-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP3]])
-; TUNIT-NEXT:    ret void
-;
-; CGSCC-LABEL: define dso_local void @accessBarFromFoo
-; CGSCC-SAME: (ptr nofree noundef readonly captures(none) [[VAL:%.*]]) {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
-; CGSCC-NEXT:    [[B1:%.*]] = alloca [16 x i8], align 1
-; CGSCC-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
-; CGSCC-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
-; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
-; CGSCC-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
-; CGSCC-NEXT:    store i32 [[ADD]], ptr [[F]], align 4
-; CGSCC-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
-; CGSCC-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 100
-; CGSCC-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 1
-; CGSCC-NEXT:    store i32 [[ADD1]], ptr [[B2]], align 4
-; CGSCC-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
-; CGSCC-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
-; CGSCC-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD3]] to i8
-; CGSCC-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 2
-; CGSCC-NEXT:    store i8 [[CONV]], ptr [[C]], align 4
-; CGSCC-NEXT:    [[NEWGEP:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 4
-; CGSCC-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(12) [[NEWGEP]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[F]], i64 noundef 12, i1 noundef false) #[[ATTR3:[0-9]+]]
-; CGSCC-NEXT:    [[NEWGEP2:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 0
-; CGSCC-NEXT:    [[TMP3:%.*]] = load i32, ptr [[NEWGEP2]], align 4
-; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP3]])
-; CGSCC-NEXT:    ret void
+; CHECK-LABEL: define dso_local void @accessBarFromFoo
+; CHECK-SAME: (ptr nofree noundef readonly captures(none) [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
+; CHECK-NEXT:    [[B1:%.*]] = alloca [16 x i8], align 4
+; CHECK-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[F]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 100
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[ADD1]], ptr [[B2]], align 4
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
+; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
+; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD3]] to i8
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 2
+; CHECK-NEXT:    store i8 [[CONV]], ptr [[C]], align 4
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 4
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(12) [[NEWGEP]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[F]], i64 noundef 12, i1 noundef false) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 0
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[NEWGEP2]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP3]])
+; CHECK-NEXT:    ret void
 ;
 entry:
   %val.addr = alloca ptr, align 8
   %b = alloca %struct.Bar, align 4
-  %f = alloca %struct.Foo, align 4
+  %fnested = alloca %struct.Foo, align 4
   store ptr %val, ptr %val.addr, align 8
   %0 = load ptr, ptr %val.addr, align 8
   %1 = load i32, ptr %0, align 4
   %add = add nsw i32 %1, 10
-  %a = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 0
+  %a = getelementptr inbounds nuw %struct.Foo, ptr %fnested, i32 0, i32 0
   store i32 %add, ptr %a, align 4
   %2 = load ptr, ptr %val.addr, align 8
   %3 = load i32, ptr %2, align 4
   %add1 = add nsw i32 %3, 100
-  %b2 = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 1
+  %b2 = getelementptr inbounds nuw %struct.Foo, ptr %fnested, i32 0, i32 1
   store i32 %add1, ptr %b2, align 4
   %4 = load ptr, ptr %val.addr, align 8
   %5 = load i32, ptr %4, align 4
   %add3 = add nsw i32 %5, 1
   %conv = trunc i32 %add3 to i8
-  %c = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 2
+  %c = getelementptr inbounds nuw %struct.Foo, ptr %fnested, i32 0, i32 2
   store i8 %conv, ptr %c, align 4
   %f4 = getelementptr inbounds nuw %struct.Bar, ptr %b, i32 0, i32 2
-  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %f4, ptr align 4 %f, i64 12, i1 false)
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %f4, ptr align 4 %fnested, i64 12, i1 false)
   %f5 = getelementptr inbounds nuw %struct.Bar, ptr %b, i32 0, i32 2
   %a6 = getelementptr inbounds nuw %struct.Foo, ptr %f5, i32 0, i32 0
   %6 = load i32, ptr %a6, align 4
@@ -146,7 +120,7 @@ define dso_local void @positive_malloc_1(ptr noundef %val) #0 {
 ; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
 ; CHECK-NEXT:    [[CALL:%.*]] = call noalias ptr @malloc(i64 noundef 12)
 ; CHECK-NEXT:    store ptr [[CALL]], ptr [[F]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
 ; CHECK-NEXT:    store i32 [[ADD]], ptr [[CALL]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[CALL]], align 4, !invariant.load [[META0]]
@@ -267,11 +241,11 @@ define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val)
 ; TUNIT-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
 ; TUNIT-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; TUNIT-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; TUNIT-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
 ; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
+; TUNIT-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; TUNIT-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
+; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; TUNIT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
 ; TUNIT-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 ; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
 ; TUNIT-NEXT:    ret void
@@ -286,11 +260,11 @@ define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val)
 ; CGSCC-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
 ; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
 ; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; CGSCC-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CGSCC-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CGSCC-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
 ; CGSCC-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CGSCC-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
+; CGSCC-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CGSCC-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
+; CGSCC-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CGSCC-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
 ; CGSCC-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 ; CGSCC-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
 ; CGSCC-NEXT:    ret void

>From 6dee5461118f80014d1dae6463261644e053221f Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Sat, 22 Nov 2025 15:22:48 -0500
Subject: [PATCH 12/14] fix logic to pack an Alloca Instruction

---
 .../Transforms/IPO/AttributorAttributes.cpp   | 118 +++++++++++++-----
 llvm/test/Transforms/Attributor/allocator.ll  |  71 ++++-------
 llvm/test/Transforms/Attributor/callbacks.ll  |  34 ++---
 llvm/test/Transforms/Attributor/noalias.ll    |   8 +-
 .../value-simplify-pointer-info-vec.ll        |   4 +-
 5 files changed, 140 insertions(+), 95 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 7f102a09accb4..a68c93989ddce 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -75,9 +75,11 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <cassert>
+#include <cstdint>
 #include <numeric>
 #include <optional>
 #include <string>
+#include <tuple>
 
 using namespace llvm;
 
@@ -13676,10 +13678,6 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     const DataLayout &DL = A.getDataLayout();
     const auto AllocationSize = findInitialAllocationSize(I, DL);
 
-    llvm::dbgs() << "Print Allocation Size: " << AllocationSize << "\n";
-    PI->dumpState(dbgs());
-    llvm::dbgs() << "End printing size and AAPointerInfo\n";
-
     // If allocation size is nullopt, we give up.
     if (!AllocationSize)
       return indicatePessimisticFixpoint();
@@ -13697,43 +13695,103 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
       return ChangeStatus::CHANGED;
     }
 
-    // For each access bin we compute its new start offset
-    // and store the results in a new map (NewOffsetBins).
-    // NewOffsetsBins is a Map from AA::RangeTy OldRange to AA::RangeTy
-    // NewRange. 
-    
-    // Algorithm Logic:
-    // For all the ranges in AAPointerInfo, we need to find the minimum 
-    // longest range that contains all the accessed offsets. 
-    // In case, disjoint ranges exist, we want to merge them. 
-    // For this we need to calculate the minimum size of the new allocation. 
-    // Then adjust all the old offsets and map them to their new offsets.
-
-    unsigned long PrevBinEndOffset = 0;
-    bool ChangedOffsets = false;
+    // Algorithm:
+    // For all the accessed ranges in AAPointerInfo, we need to find the minimum
+    // longest range that contains all the accessed offsets.
+    // In case, disjoint ranges exist, we want to merge them since we want to
+    // make a packed Alloca. For this we need to calculate the minimum size of
+    // the new allocation. Then adjust all the old offsets and map them to their
+    // new offsets.
+
+    // A tuple to store a cluster, a cluster is a maximal unique range.
+    // Different Clusters are meant to be disjoint, but we eventually merge them
+    // together. A bigger cluster can subsume a smaller cluster inside it.
+    using ClusterTy = std::tuple<int64_t, int64_t>;
+    using ClustersTy = SmallVector<ClusterTy, 4>;
+
+    // BinInterval is an interval to keep track of the Start and
+    // End of a RangeTy. Since RangeTy stores the Offset and the
+    // Corresponding size but not the end of the range.
+    //(StartOffset, EndOffset, RangeTy struct)
+    using AccessedInterval = std::tuple<int64_t, int64_t, AA::RangeTy>;
+    using AccessedIntervals = SmallVector<AccessedInterval, 4>;
+
+    // Obtain all the offset bins that exists in AAPointerInfo.
+    SmallVector<AA::RangeTy, 4> OldBins;
     for (AAPointerInfo::OffsetBinsTy::const_iterator It = PI->begin();
          It != PI->end(); It++) {
-      const AA::RangeTy &OldRange = It->getFirst();
-
-      // If any byte range has an unknown offset or size, we should leave the
-      // original allocation unmodified.
-      if (OldRange.offsetOrSizeAreUnknown())
+      const AA::RangeTy &Bin = It->getFirst();
+      // In case unknown or unassigned bins exists,
+      // We don't want to change the allocation size and return a
+      // pessimistic fixpoint.
+      if (Bin.offsetOrSizeAreUnknown() || Bin.isUnassigned())
         return indicatePessimisticFixpoint();
+      OldBins.push_back(Bin);
+    }
+
+    AccessedIntervals Intervals;
+    // Obtain the intervals from the ranges.
+    for (auto &OldBin : OldBins) {
+      AccessedInterval Interval =
+          std::make_tuple(OldBin.Offset, OldBin.Offset + OldBin.Size, OldBin);
+      Intervals.push_back(Interval);
+    }
+
+    // sort the intervals to push the largest interval to the beginning.
+    llvm::sort(Intervals,
+               [](const AccessedInterval &A, const AccessedInterval &B) {
+                 return get<0>(A) < get<0>(B);
+               });
 
-      unsigned long NewStartOffset = PrevBinEndOffset;
-      unsigned long NewEndOffset = NewStartOffset + OldRange.Size;
-      PrevBinEndOffset = NewEndOffset;
+    ClustersTy Clusters;
+    int64_t CurrentClusterStart = get<0>(Intervals[0]);
+    int64_t CurrentClusterEnd = get<1>(Intervals[0]);
 
-      dbgs() << "Print the Start, End, PrevBinEndOffset: " << NewStartOffset << "," << NewEndOffset << "," << PrevBinEndOffset << "\n";
+    for (uint I = 1; I < Intervals.size(); I++) {
+      if (get<0>(Intervals[I]) <= CurrentClusterEnd) {
+        CurrentClusterEnd = std::max(CurrentClusterEnd, get<1>(Intervals[I]));
+      } else {
+        Clusters.push_back({CurrentClusterStart, CurrentClusterEnd});
+        CurrentClusterStart = get<0>(Intervals[I]);
+        CurrentClusterEnd = get<1>(Intervals[I]);
+      }
+    }
+    Clusters.push_back({CurrentClusterStart, CurrentClusterEnd});
 
-      ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset, NewStartOffset,
-                                      OldRange.Size);
+    int64_t PackedRangeSize = 0;
+    // Sum up each cluster to get the total size of the packed Alloca.
+    for (ClusterTy &Cluster : Clusters)
+      PackedRangeSize += (get<1>(Cluster) - get<0>(Cluster));
+
+    // For each access bin we compute its new start offset
+    // and store the results in a new map (NewOffsetBins).
+    // NewOffsetsBins is a Map from AA::RangeTy OldRange to AA::RangeTy
+    // NewRange
+    int64_t PackedCursorStart = 0;
+    bool ChangedOffsets = false;
+    for (ClusterTy &Cluster : Clusters) {
+      auto &ClusterStart = get<0>(Cluster);
+      auto &ClusterEnd = get<1>(Cluster);
+      for (AccessedInterval &Interval : Intervals) {
+        auto &InterValStart = get<0>(Interval);
+        auto &InterValEnd = get<1>(Interval);
+        if (InterValStart >= ClusterStart && InterValEnd <= ClusterEnd) {
+          int64_t NewRangeStart =
+              PackedCursorStart + (InterValStart - ClusterStart);
+          int64_t NewRangeSize = InterValEnd - InterValStart;
+          auto &OldRange = get<2>(Interval);
+
+          ChangedOffsets |= setNewOffsets(OldRange, OldRange.Offset,
+                                          NewRangeStart, NewRangeSize);
+        }
+      }
+      PackedCursorStart += ClusterEnd - ClusterStart;
     }
 
     // Set the new size of the allocation. The new size of the Allocation should
     // be the size of PrevBinEndOffset * 8 in bits.
     auto NewAllocationSize =
-        std::optional<TypeSize>(TypeSize(PrevBinEndOffset * 8, false));
+        std::optional<TypeSize>(TypeSize(PackedRangeSize * 8, false));
 
     if (!changeAllocationSize(NewAllocationSize))
       return ChangeStatus::UNCHANGED;
diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index aa6680878e0c0..918257ebf2493 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -19,24 +19,24 @@ define dso_local void @accessBarFromFoo(ptr noundef %val) #0 {
 ; CHECK-SAME: (ptr nofree noundef readonly captures(none) [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
-; CHECK-NEXT:    [[B1:%.*]] = alloca [16 x i8], align 4
-; CHECK-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[B1:%.*]] = alloca [12 x i8], align 4
+; CHECK-NEXT:    [[FNESTED:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
 ; CHECK-NEXT:    store ptr [[VAL]], ptr [[VAL_ADDR]], align 8
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0:![0-9]+]]
 ; CHECK-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP0]], 10
-; CHECK-NEXT:    store i32 [[ADD]], ptr [[F]], align 4
+; CHECK-NEXT:    store i32 [[ADD]], ptr [[FNESTED]], align 4
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[ADD1:%.*]] = add nsw i32 [[TMP1]], 100
-; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 1
+; CHECK-NEXT:    [[B2:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[FNESTED]], i32 0, i32 1
 ; CHECK-NEXT:    store i32 [[ADD1]], ptr [[B2]], align 4
 ; CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[VAL]], align 4, !invariant.load [[META0]]
 ; CHECK-NEXT:    [[ADD3:%.*]] = add nsw i32 [[TMP2]], 1
 ; CHECK-NEXT:    [[CONV:%.*]] = trunc i32 [[ADD3]] to i8
-; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 2
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[FNESTED]], i32 0, i32 2
 ; CHECK-NEXT:    store i8 [[CONV]], ptr [[C]], align 4
-; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 4
-; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(12) [[NEWGEP]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[F]], i64 noundef 12, i1 noundef false) #[[ATTR3:[0-9]+]]
-; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr [16 x i8], ptr [[B1]], i64 0
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [12 x i8], ptr [[B1]], i64 0
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr nofree nonnull writeonly align 4 captures(none) dereferenceable(12) [[NEWGEP]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[FNESTED]], i64 noundef 12, i1 noundef false) #[[ATTR3:[0-9]+]]
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr [12 x i8], ptr [[B1]], i64 0
 ; CHECK-NEXT:    [[TMP3:%.*]] = load i32, ptr [[NEWGEP2]], align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP3]])
 ; CHECK-NEXT:    ret void
@@ -231,43 +231,24 @@ entry:
 ;However, the offsets (load/store etc.) Need to be changed.
 ; Function Attrs: noinline nounwind uwtable
 define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val) #0 {
-; TUNIT-LABEL: define dso_local void @positive_test_not_a_single_start_offset
-; TUNIT-SAME: (i32 noundef [[VAL:%.*]]) {
-; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
-; TUNIT-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 4
-; TUNIT-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
-; TUNIT-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
-; TUNIT-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
-; TUNIT-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
-; TUNIT-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; TUNIT-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; TUNIT-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; TUNIT-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
-; TUNIT-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; TUNIT-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
-; TUNIT-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
-; TUNIT-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
-; TUNIT-NEXT:    ret void
-;
-; CGSCC-LABEL: define dso_local void @positive_test_not_a_single_start_offset
-; CGSCC-SAME: (i32 noundef [[VAL:%.*]]) {
-; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
-; CGSCC-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 4
-; CGSCC-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
-; CGSCC-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
-; CGSCC-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
-; CGSCC-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
-; CGSCC-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; CGSCC-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CGSCC-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CGSCC-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
-; CGSCC-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CGSCC-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
-; CGSCC-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
-; CGSCC-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
-; CGSCC-NEXT:    ret void
+; CHECK-LABEL: define dso_local void @positive_test_not_a_single_start_offset
+; CHECK-SAME: (i32 noundef [[VAL:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca i32, align 4
+; CHECK-NEXT:    [[F1:%.*]] = alloca [5 x i8], align 4
+; CHECK-NEXT:    store i32 [[VAL]], ptr [[VAL_ADDR]], align 4
+; CHECK-NEXT:    [[MUL:%.*]] = mul nsw i32 2, [[VAL]]
+; CHECK-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
+; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
+; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
+; CHECK-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
+; CHECK-NEXT:    ret void
 ;
 entry:
   %val.addr = alloca i32, align 4
diff --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll
index 80a0b2befbbee..7cd289c812daf 100644
--- a/llvm/test/Transforms/Attributor/callbacks.ll
+++ b/llvm/test/Transforms/Attributor/callbacks.ll
@@ -53,7 +53,7 @@ define internal void @t0_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; TUNIT-LABEL: define {{[^@]+}}@t0_callback_callee
 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0:![0-9]+]]
 ; TUNIT-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
 ; TUNIT-NEXT:    tail call void @t0_check(ptr align 256 [[A]], i64 noundef 99, ptr align 32 [[TMP0]])
@@ -62,9 +62,9 @@ define internal void @t0_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; CGSCC-LABEL: define {{[^@]+}}@t0_callback_callee
 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0:![0-9]+]]
 ; CGSCC-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
-; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
+; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64, !invariant.load [[META0]]
 ; CGSCC-NEXT:    tail call void @t0_check(ptr align 256 [[A]], i64 noundef 99, ptr [[TMP0]])
 ; CGSCC-NEXT:    ret void
 ;
@@ -127,7 +127,7 @@ define internal void @t1_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; TUNIT-LABEL: define {{[^@]+}}@t1_callback_callee
 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 captures(none) [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0]]
 ; TUNIT-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
 ; TUNIT-NEXT:    tail call void @t1_check(ptr align 256 captures(none) [[A]], i64 noundef 99, ptr align 32 captures(none) [[TMP0]])
@@ -137,9 +137,9 @@ define internal void @t1_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; CGSCC-LABEL: define {{[^@]+}}@t1_callback_callee
 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 captures(none) [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) #[[ATTR0:[0-9]+]] {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0]]
 ; CGSCC-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
-; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
+; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64, !invariant.load [[META0]]
 ; CGSCC-NEXT:    tail call void @t1_check(ptr align 256 captures(none) [[A]], i64 noundef 99, ptr captures(none) [[TMP0]])
 ; CGSCC-NEXT:    ret void
 ;
@@ -201,7 +201,7 @@ define internal void @t2_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; TUNIT-LABEL: define {{[^@]+}}@t2_callback_callee
 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 captures(none) [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0]]
 ; TUNIT-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
 ; TUNIT-NEXT:    tail call void @t2_check(ptr align 256 captures(none) [[A]], i64 noundef 99, ptr align 32 captures(none) [[TMP0]])
@@ -210,9 +210,9 @@ define internal void @t2_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; CGSCC-LABEL: define {{[^@]+}}@t2_callback_callee
 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 captures(none) [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0]]
 ; CGSCC-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
-; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
+; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64, !invariant.load [[META0]]
 ; CGSCC-NEXT:    tail call void @t2_check(ptr align 256 captures(none) [[A]], i64 noundef 99, ptr captures(none) [[TMP0]])
 ; CGSCC-NEXT:    ret void
 ;
@@ -277,7 +277,7 @@ define internal void @t3_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; TUNIT-LABEL: define {{[^@]+}}@t3_callback_callee
 ; TUNIT-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 captures(none) [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) {
 ; TUNIT-NEXT:  entry:
-; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; TUNIT-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0]]
 ; TUNIT-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
 ; TUNIT-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
 ; TUNIT-NEXT:    tail call void @t3_check(ptr align 256 captures(none) [[A]], i64 noundef 99, ptr align 32 captures(none) [[TMP0]])
@@ -286,9 +286,9 @@ define internal void @t3_callback_callee(ptr %is_not_null, ptr %ptr, ptr %a, i64
 ; CGSCC-LABEL: define {{[^@]+}}@t3_callback_callee
 ; CGSCC-SAME: (ptr nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(4) [[IS_NOT_NULL:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(4) [[PTR:%.*]], ptr align 256 captures(none) [[A:%.*]], i64 [[B:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(8) [[C:%.*]]) {
 ; CGSCC-NEXT:  entry:
-; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8
+; CGSCC-NEXT:    [[PTR_VAL:%.*]] = load i32, ptr [[PTR]], align 8, !invariant.load [[META0]]
 ; CGSCC-NEXT:    store i32 [[PTR_VAL]], ptr [[IS_NOT_NULL]], align 4
-; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64
+; CGSCC-NEXT:    [[TMP0:%.*]] = load ptr, ptr [[C]], align 64, !invariant.load [[META0]]
 ; CGSCC-NEXT:    tail call void @t3_check(ptr align 256 captures(none) [[A]], i64 noundef 99, ptr captures(none) [[TMP0]])
 ; CGSCC-NEXT:    ret void
 ;
@@ -311,11 +311,13 @@ declare !callback !0 void @t3_callback_broker(ptr nocapture , ptr nocapture , pt
 ;.
 ; CGSCC: attributes #[[ATTR0]] = { nosync }
 ;.
-; TUNIT: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
-; TUNIT: [[META1]] = !{i64 2, i64 -1, i64 -1, i1 true}
+; TUNIT: [[META0]] = !{}
+; TUNIT: [[META1:![0-9]+]] = !{[[META2:![0-9]+]]}
+; TUNIT: [[META2]] = !{i64 2, i64 -1, i64 -1, i1 true}
 ;.
-; CGSCC: [[META0:![0-9]+]] = !{[[META1:![0-9]+]]}
-; CGSCC: [[META1]] = !{i64 2, i64 -1, i64 -1, i1 true}
+; CGSCC: [[META0]] = !{}
+; CGSCC: [[META1:![0-9]+]] = !{[[META2:![0-9]+]]}
+; CGSCC: [[META2]] = !{i64 2, i64 -1, i64 -1, i1 true}
 ;.
 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line:
 ; CHECK: {{.*}}
diff --git a/llvm/test/Transforms/Attributor/noalias.ll b/llvm/test/Transforms/Attributor/noalias.ll
index b7c295ae4179c..b7be8d115021c 100644
--- a/llvm/test/Transforms/Attributor/noalias.ll
+++ b/llvm/test/Transforms/Attributor/noalias.ll
@@ -550,13 +550,13 @@ define internal i32 @ret(ptr %arg) {
 ; TUNIT: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
 ; TUNIT-LABEL: define {{[^@]+}}@ret
 ; TUNIT-SAME: (ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[ARG:%.*]]) #[[ATTR6:[0-9]+]] {
-; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[ARG]], align 4
+; TUNIT-NEXT:    [[L:%.*]] = load i32, ptr [[ARG]], align 4, !invariant.load [[META0:![0-9]+]]
 ; TUNIT-NEXT:    ret i32 [[L]]
 ;
 ; CGSCC: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: read)
 ; CGSCC-LABEL: define {{[^@]+}}@ret
 ; CGSCC-SAME: (ptr nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[ARG:%.*]]) #[[ATTR7:[0-9]+]] {
-; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[ARG]], align 4
+; CGSCC-NEXT:    [[L:%.*]] = load i32, ptr [[ARG]], align 4, !invariant.load [[META0:![0-9]+]]
 ; CGSCC-NEXT:    ret i32 [[L]]
 ;
   %l = load i32, ptr %arg
@@ -970,3 +970,7 @@ l2:
 ; CGSCC: attributes #[[ATTR14]] = { nofree willreturn memory(readwrite) }
 ; CGSCC: attributes #[[ATTR15]] = { nofree nounwind willreturn memory(write) }
 ;.
+; TUNIT: [[META0]] = !{}
+;.
+; CGSCC: [[META0]] = !{}
+;.
diff --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
index 24bf4938ff2d4..96849b2ef5b09 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info-vec.ll
@@ -101,8 +101,8 @@ define i32 @vec_write_5(i32 %arg) {
 ; CHECK: Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none)
 ; CHECK-LABEL: define {{[^@]+}}@vec_write_5
 ; CHECK-SAME: (i32 [[ARG:%.*]]) #[[ATTR0]] {
-; CHECK-NEXT:    [[A:%.*]] = alloca <4 x i32>, align 16
-; CHECK-NEXT:    store i32 [[ARG]], ptr [[A]], align 16
+; CHECK-NEXT:    [[A1:%.*]] = alloca [12 x i8], align 16
+; CHECK-NEXT:    store i32 [[ARG]], ptr [[A1]], align 16
 ; CHECK-NEXT:    [[ADD1:%.*]] = add i32 [[ARG]], 5
 ; CHECK-NEXT:    [[ADD2:%.*]] = add i32 5, [[ADD1]]
 ; CHECK-NEXT:    ret i32 [[ADD2]]

>From 588ee0b163ff3d2bc1fbea5428927f06da7db864 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Sat, 22 Nov 2025 20:59:34 -0500
Subject: [PATCH 13/14] change to size_t

---
 llvm/lib/Transforms/IPO/AttributorAttributes.cpp | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index a68c93989ddce..204e8ae82148b 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -75,6 +75,7 @@
 #include "llvm/Transforms/Utils/Local.h"
 #include "llvm/Transforms/Utils/ValueMapper.h"
 #include <cassert>
+#include <cstddef>
 #include <cstdint>
 #include <numeric>
 #include <optional>
@@ -13747,7 +13748,7 @@ struct AAAllocationInfoImpl : public AAAllocationInfo {
     int64_t CurrentClusterStart = get<0>(Intervals[0]);
     int64_t CurrentClusterEnd = get<1>(Intervals[0]);
 
-    for (uint I = 1; I < Intervals.size(); I++) {
+    for (size_t I = 1; I < Intervals.size(); I++) {
       if (get<0>(Intervals[I]) <= CurrentClusterEnd) {
         CurrentClusterEnd = std::max(CurrentClusterEnd, get<1>(Intervals[I]));
       } else {

>From b7b5275022ff4e0a5bd28576fc697dad4d78f8e0 Mon Sep 17 00:00:00 2001
From: Vidush Singhal <vidush.sl at gmail.com>
Date: Sat, 22 Nov 2025 22:40:51 -0500
Subject: [PATCH 14/14] add test for nested struct

---
 llvm/test/Transforms/Attributor/allocator.ll | 82 ++++++++++++++++++--
 1 file changed, 74 insertions(+), 8 deletions(-)

diff --git a/llvm/test/Transforms/Attributor/allocator.ll b/llvm/test/Transforms/Attributor/allocator.ll
index 918257ebf2493..44b4dbc95a322 100644
--- a/llvm/test/Transforms/Attributor/allocator.ll
+++ b/llvm/test/Transforms/Attributor/allocator.ll
@@ -11,11 +11,14 @@
 
 ; Function Attrs: noinline nounwind optnone uwtable
 ;.
-; CHECK: @.str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
-; CHECK: @.str.1 = private unnamed_addr constant [32 x i8] c"value of the first field is %d\0A\00", align 1
+; TUNIT: @.str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+; TUNIT: @.str.1 = private unnamed_addr constant [32 x i8] c"value of the first field is %d\0A\00", align 1
 ;.
-define dso_local void @accessBarFromFoo(ptr noundef %val) #0 {
-; CHECK-LABEL: define dso_local void @accessBarFromFoo
+; CGSCC: @.str = private unnamed_addr constant [17 x i8] c"The value is %d\0A\00", align 1
+; CGSCC: @.str.1 = private unnamed_addr constant [32 x i8] c"value of the first field is %d\0A\00", align 1
+;.
+define dso_local void @reduceNestedStruct(ptr noundef %val) #0 {
+; CHECK-LABEL: define dso_local void @reduceNestedStruct
 ; CHECK-SAME: (ptr nofree noundef readonly captures(none) [[VAL:%.*]]) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[VAL_ADDR:%.*]] = alloca ptr, align 8
@@ -109,6 +112,69 @@ entry:
   ret void
 }
 
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @nestedStruct() #0 {
+; CHECK-LABEL: define dso_local void @nestedStruct() {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B1:%.*]] = alloca [12 x i8], align 4
+; CHECK-NEXT:    [[F_COERCE:%.*]] = alloca { i64, i8 }, align 4
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [12 x i8], ptr [[B1]], i64 0
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(12) [[F_COERCE]], ptr nofree nonnull readonly align 4 captures(none) dereferenceable(12) [[NEWGEP]], i64 noundef 12, i1 noundef false) #[[ATTR3]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i64, ptr [[F_COERCE]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr inbounds nuw { i64, i8 }, ptr [[F_COERCE]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, ptr [[TMP1]], align 4
+; CHECK-NEXT:    call void @nestedStructAccessfoo(i64 [[TMP0]], i8 [[TMP2]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca %struct.Bar, align 4
+  %f.coerce = alloca { i64, i8 }, align 4
+  %f = getelementptr inbounds nuw %struct.Bar, ptr %b, i32 0, i32 2
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %f.coerce, ptr align 4 %f, i64 12, i1 false)
+  %0 = getelementptr inbounds nuw { i64, i8 }, ptr %f.coerce, i32 0, i32 0
+  %1 = load i64, ptr %0, align 4
+  %2 = getelementptr inbounds nuw { i64, i8 }, ptr %f.coerce, i32 0, i32 1
+  %3 = load i8, ptr %2, align 4
+  call void @nestedStructAccessfoo(i64 %1, i8 %3)
+  ret void
+}
+
+; Function Attrs: noinline nounwind optnone uwtable
+define dso_local void @nestedStructAccessfoo(i64 %f.coerce0, i8 %f.coerce1) #0 {
+; CHECK-LABEL: define dso_local void @nestedStructAccessfoo
+; CHECK-SAME: (i64 [[F_COERCE0:%.*]], i8 [[F_COERCE1:%.*]]) {
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[F:%.*]] = alloca [[STRUCT_FOO:%.*]], align 4
+; CHECK-NEXT:    [[COERCE:%.*]] = alloca { i64, i8 }, align 4
+; CHECK-NEXT:    store i64 [[F_COERCE0]], ptr [[COERCE]], align 4
+; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr inbounds nuw { i64, i8 }, ptr [[COERCE]], i32 0, i32 1
+; CHECK-NEXT:    store i8 [[F_COERCE1]], ptr [[TMP0]], align 4
+; CHECK-NEXT:    call void @llvm.memcpy.p0.p0.i64(ptr noalias nofree noundef nonnull writeonly align 4 captures(none) dereferenceable(12) [[F]], ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(12) [[COERCE]], i64 noundef 12, i1 noundef false) #[[ATTR3]]
+; CHECK-NEXT:    [[C:%.*]] = getelementptr inbounds nuw [[STRUCT_FOO]], ptr [[F]], i32 0, i32 2
+; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef 10)
+; CHECK-NEXT:    ret void
+;
+entry:
+  %f = alloca %struct.Foo, align 4
+  %coerce = alloca { i64, i8 }, align 4
+  %0 = getelementptr inbounds nuw { i64, i8 }, ptr %coerce, i32 0, i32 0
+  store i64 %f.coerce0, ptr %0, align 4
+  %1 = getelementptr inbounds nuw { i64, i8 }, ptr %coerce, i32 0, i32 1
+  store i8 %f.coerce1, ptr %1, align 4
+  call void @llvm.memcpy.p0.p0.i64(ptr align 4 %f, ptr align 4 %coerce, i64 12, i1 false)
+  %a = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 0
+  store i32 10, ptr %a, align 4
+  %c = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 2
+  store i8 10, ptr %c, align 4
+  %c1 = getelementptr inbounds nuw %struct.Foo, ptr %f, i32 0, i32 2
+  %2 = load i8, ptr %c1, align 4
+  %conv = sext i8 %2 to i32
+  %call = call i32 (ptr, ...) @printf(ptr noundef @.str, i32 noundef %conv)
+  ret void
+}
+
+
 ; TODO: change malloc like call
 ; Function Attrs: noinline nounwind uwtable
 define dso_local void @positive_malloc_1(ptr noundef %val) #0 {
@@ -241,11 +307,11 @@ define dso_local void @positive_test_not_a_single_start_offset(i32 noundef %val)
 ; CHECK-NEXT:    store i32 [[MUL]], ptr [[F1]], align 4
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[F1]], align 4
 ; CHECK-NEXT:    [[CALL:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[TMP0]])
-; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
-; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP]], align 4
 ; CHECK-NEXT:    [[NEWGEP2:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
-; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP2]], align 4
+; CHECK-NEXT:    [[CONV1:%.*]] = trunc i32 [[TMP0]] to i8
+; CHECK-NEXT:    store i8 [[CONV1]], ptr [[NEWGEP2]], align 4
+; CHECK-NEXT:    [[NEWGEP:%.*]] = getelementptr [5 x i8], ptr [[F1]], i64 4
+; CHECK-NEXT:    [[TMP1:%.*]] = load i8, ptr [[NEWGEP]], align 4
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i8 [[TMP1]] to i32
 ; CHECK-NEXT:    [[CALL3:%.*]] = call i32 (ptr, ...) @printf(ptr noundef nonnull dereferenceable(17) @.str, i32 noundef [[CONV]])
 ; CHECK-NEXT:    ret void



More information about the llvm-commits mailing list