[llvm] c2281f1 - [Attributor] Introduce AAPointerInfo

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Mon Jul 19 20:50:48 PDT 2021


Author: Johannes Doerfert
Date: 2021-07-19T22:48:35-05:00
New Revision: c2281f15659a0b1122e140d2419c536d090224b9

URL: https://github.com/llvm/llvm-project/commit/c2281f15659a0b1122e140d2419c536d090224b9
DIFF: https://github.com/llvm/llvm-project/commit/c2281f15659a0b1122e140d2419c536d090224b9.diff

LOG: [Attributor] Introduce AAPointerInfo

This patch introduces AAPointerInfo which tracks the uses of a pointer
and places them in "bins" based on their offset from the base and access
size.

As with other AAs, any pointer can be tracked but it is up to the user
to make sense of the results. The user in this patch is AAValueSimplify
and AAPotentialValues which both utilize AAPointerInfo to determine the
value of a load. For now, this is restricted to loads of allocas and
internal globals. Through the use of AAPointerInfo and the "bins" we can
track struct members separately. The users also know that storing only
zeros (at unknown indices) will result in loading only 0 (from unknown
indices). Other than that, the users are flow and context insensitive
(for now).

To deal with the "bins" more easily, AAPointerInfo provides a
forallInterfearingAccesses that applies a callback on all accesses
that might interfere with a given load or store.

Differential Revision: https://reviews.llvm.org/D104432

Added: 
    

Modified: 
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/array.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
    llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
    llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
    llvm/test/Transforms/Attributor/IPConstantProp/global.ll
    llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
    llvm/test/Transforms/Attributor/heap_to_stack.ll
    llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
    llvm/test/Transforms/Attributor/internal-noalias.ll
    llvm/test/Transforms/Attributor/memory_locations.ll
    llvm/test/Transforms/Attributor/misc_crash.ll
    llvm/test/Transforms/Attributor/nodelete.ll
    llvm/test/Transforms/Attributor/norecurse.ll
    llvm/test/Transforms/Attributor/noreturn_sync.ll
    llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
    llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
    llvm/test/Transforms/Attributor/wrapper.ll
    llvm/test/Transforms/OpenMP/custom_state_machines.ll
    llvm/test/Transforms/OpenMP/parallel_region_merging.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index 501c2af35b429..3577f96e74610 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -1569,6 +1569,7 @@ struct Attributor {
   /// associated value and return true if \p Pred holds every time.
   bool checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
                        const AbstractAttribute &QueryingAA, const Value &V,
+                       bool CheckBBLivenessOnly = false,
                        DepClassTy LivenessDepClass = DepClassTy::OPTIONAL);
 
   /// Emit a remark generically.
@@ -4374,6 +4375,136 @@ struct AAFunctionReachability
   virtual bool canReachUnknownCallee() const = 0;
 };
 
+/// An abstract interface for struct information.
+struct AAPointerInfo : public AbstractAttribute {
+  AAPointerInfo(const IRPosition &IRP) : AbstractAttribute(IRP) {}
+
+  enum AccessKind {
+    AK_READ = 1 << 0,
+    AK_WRITE = 1 << 1,
+    AK_READ_WRITE = AK_READ | AK_WRITE,
+  };
+
+  /// An access description.
+  struct Access {
+    Access(Instruction *I, Optional<Value *> Content, AccessKind Kind, Type *Ty)
+        : LocalI(I), RemoteI(I), Content(Content), Kind(Kind), Ty(Ty) {}
+    Access(Instruction *LocalI, Instruction *RemoteI, Optional<Value *> Content,
+           AccessKind Kind, Type *Ty)
+        : LocalI(LocalI), RemoteI(RemoteI), Content(Content), Kind(Kind),
+          Ty(Ty) {}
+    Access(const Access &Other)
+        : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
+          Kind(Other.Kind), Ty(Other.Ty) {}
+    Access(const Access &&Other)
+        : LocalI(Other.LocalI), RemoteI(Other.RemoteI), Content(Other.Content),
+          Kind(Other.Kind), Ty(Other.Ty) {}
+
+    Access &operator=(const Access &Other) {
+      LocalI = Other.LocalI;
+      RemoteI = Other.RemoteI;
+      Content = Other.Content;
+      Kind = Other.Kind;
+      return *this;
+    }
+    bool operator==(const Access &R) const {
+      return LocalI == R.LocalI && RemoteI == R.RemoteI &&
+             Content == R.Content && Kind == R.Kind;
+    }
+    bool operator!=(const Access &R) const { return !(*this == R); }
+
+    Access &operator&=(const Access &R) {
+      assert(RemoteI == R.RemoteI && "Expected same instruction!");
+      Content =
+          AA::combineOptionalValuesInAAValueLatice(Content, R.Content, Ty);
+      Kind = AccessKind(Kind | R.Kind);
+      return *this;
+    }
+
+    /// Return the access kind.
+    AccessKind getKind() const { return Kind; }
+
+    /// Return true if this is a read access.
+    bool isRead() const { return Kind & AK_READ; }
+
+    /// Return true if this is a write access.
+    bool isWrite() const { return Kind & AK_WRITE; }
+
+    /// Return the instruction that causes the access with respect to the local
+    /// scope of the associated attribute.
+    Instruction *getLocalInst() const { return LocalI; }
+
+    /// Return the actual instruction that causes the access.
+    Instruction *getRemoteInst() const { return RemoteI; }
+
+    /// Return true if the value written is not known yet.
+    bool isWrittenValueYetUndetermined() const { return !Content.hasValue(); }
+
+    /// Return true if the value written cannot be determined at all.
+    bool isWrittenValueUnknown() const {
+      return Content.hasValue() && !*Content;
+    }
+
+    /// Return the type associated with the access, if known.
+    Type *getType() const { return Ty; }
+
+    /// Return the value writen, if any. As long as
+    /// isWrittenValueYetUndetermined return true this function shall not be
+    /// called.
+    Value *getWrittenValue() const { return *Content; }
+
+    /// Return the written value which can be `llvm::null` if it is not yet
+    /// determined.
+    Optional<Value *> getContent() const { return Content; }
+
+  private:
+    /// The instruction responsible for the access with respect to the local
+    /// scope of the associated attribute.
+    Instruction *LocalI;
+
+    /// The instruction responsible for the access.
+    Instruction *RemoteI;
+
+    /// The value written, if any. `llvm::none` means "not known yet", `nullptr`
+    /// cannot be determined.
+    Optional<Value *> Content;
+
+    /// The access kind, e.g., READ, as bitset (could be more than one).
+    AccessKind Kind;
+
+    /// The type of the content, thus the type read/written, can be null if not
+    /// available.
+    Type *Ty;
+  };
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAPointerInfo &createForPosition(const IRPosition &IRP, Attributor &A);
+
+  /// See AbstractAttribute::getName()
+  const std::string getName() const override { return "AAPointerInfo"; }
+
+  /// See AbstractAttribute::getIdAddr()
+  const char *getIdAddr() const override { return &ID; }
+
+  /// Call \p CB on all accesses that might interfere with \p LI and return true
+  /// if all such accesses were known and the callback returned true for all of
+  /// them, false otherwise.
+  virtual bool forallInterfearingAccesses(
+      LoadInst &LI, function_ref<bool(const Access &, bool)> CB) const = 0;
+  virtual bool forallInterfearingAccesses(
+      StoreInst &SI, function_ref<bool(const Access &, bool)> CB) const = 0;
+
+  /// This function should return true if the type of the \p AA is AAPointerInfo
+  static bool classof(const AbstractAttribute *AA) {
+    return (AA->getIdAddr() == &ID);
+  }
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
+raw_ostream &operator<<(raw_ostream &, const AAPointerInfo::Access &);
+
 /// Run options, used by the pass manager.
 enum AttributorRunOption {
   NONE = 0,

diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index 1260f6c5293da..3ec75bf4a6dfc 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -155,6 +155,11 @@ static cl::opt<bool>
                    cl::desc("Print Attributor's internal call graph"),
                    cl::init(false));
 
+static cl::opt<bool> SimplifyAllLoads("attributor-simplify-all-loads",
+                                      cl::Hidden,
+                                      cl::desc("Try to simplify all loads."),
+                                      cl::init(true));
+
 /// Logic operators for the change status enum class.
 ///
 ///{
@@ -872,7 +877,9 @@ bool Attributor::isAssumedDead(const IRPosition &IRP,
 
 bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
                                  const AbstractAttribute &QueryingAA,
-                                 const Value &V, DepClassTy LivenessDepClass) {
+                                 const Value &V,
+                                 bool CheckBBLivenessOnly,
+                                 DepClassTy LivenessDepClass) {
 
   // Check the trivial case first as it catches void values.
   if (V.use_empty())
@@ -902,7 +909,7 @@ bool Attributor::checkForAllUses(function_ref<bool(const Use &, bool &)> Pred,
                       << *U->getUser() << "\n");
     bool UsedAssumedInformation = false;
     if (isAssumedDead(*U, &QueryingAA, LivenessAA, UsedAssumedInformation,
-                      /* CheckBBLivenessOnly */ false, LivenessDepClass)) {
+                      CheckBBLivenessOnly, LivenessDepClass)) {
       LLVM_DEBUG(dbgs() << "[Attributor] Dead use, skip!\n");
       continue;
     }
@@ -2448,10 +2455,12 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
   assert(Success && "Expected the check call to be successful!");
 
   auto LoadStorePred = [&](Instruction &I) -> bool {
-    if (isa<LoadInst>(I))
+    if (isa<LoadInst>(I)) {
       getOrCreateAAFor<AAAlign>(
           IRPosition::value(*cast<LoadInst>(I).getPointerOperand()));
-    else
+      if (SimplifyAllLoads)
+        getOrCreateAAFor<AAValueSimplify>(IRPosition::value(I));
+    } else
       getOrCreateAAFor<AAAlign>(
           IRPosition::value(*cast<StoreInst>(I).getPointerOperand()));
     return true;
@@ -2565,6 +2574,13 @@ void AbstractAttribute::printWithDeps(raw_ostream &OS) const {
 
   OS << '\n';
 }
+
+raw_ostream &llvm::operator<<(raw_ostream &OS, const AAPointerInfo::Access &Acc) {
+  OS << " ["<< Acc.getKind() << "] " << *Acc.getRemoteInst();
+  if (Acc.getLocalInst() != Acc.getRemoteInst())
+    OS << " via " << *Acc.getLocalInst() << "\n";
+  return OS;
+}
 ///}
 
 /// ----------------------------------------------------------------------------

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 05d4c7d5c228e..5e52593d420f8 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -143,6 +143,7 @@ PIPE_OPERATOR(AAPotentialValues)
 PIPE_OPERATOR(AANoUndef)
 PIPE_OPERATOR(AACallEdges)
 PIPE_OPERATOR(AAFunctionReachability)
+PIPE_OPERATOR(AAPointerInfo)
 
 #undef PIPE_OPERATOR
 } // namespace llvm
@@ -768,6 +769,616 @@ static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
   }
 }
 
+/// ------------------------ PointerInfo ---------------------------------------
+
+namespace llvm {
+namespace AA {
+namespace PointerInfo {
+
+/// An access kind description as used by AAPointerInfo.
+struct OffsetAndSize;
+
+struct State;
+
+} // namespace PointerInfo
+} // namespace AA
+
+/// Helper for AA::PointerInfo::Acccess DenseMap/Set usage.
+template <>
+struct DenseMapInfo<AAPointerInfo::Access> : DenseMapInfo<Instruction *> {
+  using Access = AAPointerInfo::Access;
+  static inline Access getEmptyKey();
+  static inline Access getTombstoneKey();
+  static unsigned getHashValue(const Access &A);
+  static bool isEqual(const Access &LHS, const Access &RHS);
+};
+
+/// Helper that allows OffsetAndSize as a key in a DenseMap.
+template <>
+struct DenseMapInfo<AA::PointerInfo ::OffsetAndSize>
+    : DenseMapInfo<std::pair<int64_t, int64_t>> {};
+
+/// Helper for AA::PointerInfo::Acccess DenseMap/Set usage ignoring everythign
+/// but the instruction
+struct AccessAsInstructionInfo : DenseMapInfo<Instruction *> {
+  using Base = DenseMapInfo<Instruction *>;
+  using Access = AAPointerInfo::Access;
+  static inline Access getEmptyKey();
+  static inline Access getTombstoneKey();
+  static unsigned getHashValue(const Access &A);
+  static bool isEqual(const Access &LHS, const Access &RHS);
+};
+
+} // namespace llvm
+
+/// Helper to represent an access offset and size, with logic to deal with
+/// uncertainty and check for overlapping accesses.
+struct AA::PointerInfo::OffsetAndSize : public std::pair<int64_t, int64_t> {
+  using BaseTy = std::pair<int64_t, int64_t>;
+  OffsetAndSize(int64_t Offset, int64_t Size) : BaseTy(Offset, Size) {}
+  OffsetAndSize(const BaseTy &P) : BaseTy(P) {}
+  int64_t getOffset() const { return first; }
+  int64_t getSize() const { return second; }
+  static OffsetAndSize getUnknown() { return OffsetAndSize(Unknown, Unknown); }
+
+  /// Return true if this offset and size pair might describe an address that
+  /// overlaps with \p OAS.
+  bool mayOverlap(const OffsetAndSize &OAS) const {
+    // Any unknown value and we are giving up -> overlap.
+    if (OAS.getOffset() == OffsetAndSize::Unknown ||
+        OAS.getSize() == OffsetAndSize::Unknown ||
+        getOffset() == OffsetAndSize::Unknown ||
+        getSize() == OffsetAndSize::Unknown)
+      return true;
+
+    // Check if one offset point is in the other interval [offset, offset+size].
+    return OAS.getOffset() + OAS.getSize() > getOffset() &&
+           OAS.getOffset() < getOffset() + getSize();
+  }
+
+  /// Constant used to represent unknown offset or sizes.
+  static constexpr int64_t Unknown = 1 << 31;
+};
+
+/// Implementation of the DenseMapInfo.
+///
+///{
+inline llvm::AccessAsInstructionInfo::Access
+llvm::AccessAsInstructionInfo::getEmptyKey() {
+  return Access(Base::getEmptyKey(), nullptr, AAPointerInfo::AK_READ, nullptr);
+}
+inline llvm::AccessAsInstructionInfo::Access
+llvm::AccessAsInstructionInfo::getTombstoneKey() {
+  return Access(Base::getTombstoneKey(), nullptr, AAPointerInfo::AK_READ,
+                nullptr);
+}
+unsigned llvm::AccessAsInstructionInfo::getHashValue(
+    const llvm::AccessAsInstructionInfo::Access &A) {
+  return Base::getHashValue(A.getRemoteInst());
+}
+bool llvm::AccessAsInstructionInfo::isEqual(
+    const llvm::AccessAsInstructionInfo::Access &LHS,
+    const llvm::AccessAsInstructionInfo::Access &RHS) {
+  return LHS.getRemoteInst() == RHS.getRemoteInst();
+}
+inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
+llvm::DenseMapInfo<AAPointerInfo::Access>::getEmptyKey() {
+  return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_READ,
+                               nullptr);
+}
+inline llvm::DenseMapInfo<AAPointerInfo::Access>::Access
+llvm::DenseMapInfo<AAPointerInfo::Access>::getTombstoneKey() {
+  return AAPointerInfo::Access(nullptr, nullptr, AAPointerInfo::AK_WRITE,
+                               nullptr);
+}
+
+unsigned llvm::DenseMapInfo<AAPointerInfo::Access>::getHashValue(
+    const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &A) {
+  return detail::combineHashValue(
+             DenseMapInfo<Instruction *>::getHashValue(A.getRemoteInst()),
+             (A.isWrittenValueYetUndetermined()
+                  ? ~0
+                  : DenseMapInfo<Value *>::getHashValue(A.getWrittenValue()))) +
+         A.getKind();
+}
+
+bool llvm::DenseMapInfo<AAPointerInfo::Access>::isEqual(
+    const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &LHS,
+    const llvm::DenseMapInfo<AAPointerInfo::Access>::Access &RHS) {
+  return LHS == RHS;
+}
+///}
+
+/// A type to track pointer/struct usage and accesses for AAPointerInfo.
+struct AA::PointerInfo::State : public AbstractState {
+
+  /// Return the best possible representable state.
+  static State getBestState(const State &SIS) { return State(); }
+
+  /// Return the worst possible representable state.
+  static State getWorstState(const State &SIS) {
+    State R;
+    R.indicatePessimisticFixpoint();
+    return R;
+  }
+
+  State() {}
+  State(const State &SIS) : AccessBins(SIS.AccessBins) {}
+  State(State &&SIS) : AccessBins(std::move(SIS.AccessBins)) {}
+
+  const State &getAssumed() const { return *this; }
+
+  /// See AbstractState::isValidState().
+  bool isValidState() const override { return BS.isValidState(); }
+
+  /// See AbstractState::isAtFixpoint().
+  bool isAtFixpoint() const override { return BS.isAtFixpoint(); }
+
+  /// See AbstractState::indicateOptimisticFixpoint().
+  ChangeStatus indicateOptimisticFixpoint() override {
+    BS.indicateOptimisticFixpoint();
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractState::indicatePessimisticFixpoint().
+  ChangeStatus indicatePessimisticFixpoint() override {
+    BS.indicatePessimisticFixpoint();
+    return ChangeStatus::CHANGED;
+  }
+
+  State &operator=(const State &R) {
+    if (this == &R)
+      return *this;
+    BS = R.BS;
+    AccessBins = R.AccessBins;
+    return *this;
+  }
+
+  State &operator=(State &&R) {
+    if (this == &R)
+      return *this;
+    std::swap(BS, R.BS);
+    std::swap(AccessBins, R.AccessBins);
+    return *this;
+  }
+
+  bool operator==(const State &R) const {
+    if (BS != R.BS)
+      return false;
+    if (AccessBins.size() != R.AccessBins.size())
+      return false;
+    auto It = begin(), RIt = R.begin(), E = end();
+    while (It != E) {
+      if (It->getFirst() != RIt->getFirst())
+        return false;
+      auto &Accs = It->getSecond();
+      auto &RAccs = RIt->getSecond();
+      if (Accs.size() != RAccs.size())
+        return false;
+      auto AccIt = Accs.begin(), RAccIt = RAccs.begin(), AccE = Accs.end();
+      while (AccIt != AccE) {
+        if (*AccIt != *RAccIt)
+          return false;
+        ++AccIt;
+        ++RAccIt;
+      }
+      ++It;
+      ++RIt;
+    }
+    return true;
+  }
+  bool operator!=(const State &R) const { return !(*this == R); }
+
+  /// We store accesses in a set with the instruction as key.
+  using Accesses = DenseSet<AAPointerInfo::Access, AccessAsInstructionInfo>;
+
+  /// We store all accesses in bins denoted by their offset and size.
+  using AccessBinsTy = DenseMap<OffsetAndSize, Accesses>;
+
+  AccessBinsTy::const_iterator begin() const { return AccessBins.begin(); }
+  AccessBinsTy::const_iterator end() const { return AccessBins.end(); }
+
+protected:
+  /// The bins with all the accesses for the associated pointer.
+  DenseMap<OffsetAndSize, Accesses> AccessBins;
+
+  /// Add a new access to the state at offset \p Offset and with size \p Size.
+  /// The access is associated with \p I, writes \p Content (if anything), and
+  /// is of kind \p Kind.
+  /// \Returns CHANGED, if the state changed, UNCHANGED otherwise.
+  ChangeStatus addAccess(int64_t Offset, int64_t Size, Instruction &I,
+                         Optional<Value *> Content,
+                         AAPointerInfo::AccessKind Kind, Type *Ty,
+                         Instruction *RemoteI = nullptr,
+                         Accesses *BinPtr = nullptr) {
+    OffsetAndSize Key{Offset, Size};
+    Accesses &Bin = BinPtr ? *BinPtr : AccessBins[Key];
+    AAPointerInfo::Access Acc(&I, RemoteI ? RemoteI : &I, Content, Kind, Ty);
+    // Check if we have an access for this instruction in this bin, if not,
+    // simply add it.
+    auto It = Bin.find(Acc);
+    if (It == Bin.end()) {
+      Bin.insert(Acc);
+      return ChangeStatus::CHANGED;
+    }
+    // If the existing access is the same as then new one, nothing changed.
+    AAPointerInfo::Access Before = *It;
+    // The new one will be combined with the existing one.
+    *It &= Acc;
+    return *It == Before ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
+  }
+
+  /// See AAPointerInfo::forallInterfearingAccesses.
+  bool forallInterfearingAccesses(
+      Instruction &I,
+      function_ref<bool(const AAPointerInfo::Access &, bool)> CB) const {
+    if (!isValidState())
+      return false;
+    // First find the offset and size of I.
+    OffsetAndSize OAS(-1, -1);
+    for (auto &It : AccessBins) {
+      for (auto &Access : It.getSecond()) {
+        if (Access.getRemoteInst() == &I) {
+          OAS = It.getFirst();
+          break;
+        }
+      }
+      if (OAS.getSize() != -1)
+        break;
+    }
+    if (OAS.getSize() == -1)
+      return true;
+
+    // Now that we have an offset and size, find all overlapping ones and use
+    // the callback on the accesses.
+    for (auto &It : AccessBins) {
+      OffsetAndSize ItOAS = It.getFirst();
+      if (!OAS.mayOverlap(ItOAS))
+        continue;
+      for (auto &Access : It.getSecond())
+        if (!CB(Access, OAS == ItOAS))
+          return false;
+    }
+    return true;
+  }
+
+private:
+  /// State to track fixpoint and validity.
+  BooleanState BS;
+};
+
+struct AAPointerInfoImpl
+    : public StateWrapper<AA::PointerInfo::State, AAPointerInfo> {
+  using BaseTy = StateWrapper<AA::PointerInfo::State, AAPointerInfo>;
+  AAPointerInfoImpl(const IRPosition &IRP, Attributor &A) : BaseTy(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override { AAPointerInfo::initialize(A); }
+
+  /// See AbstractAttribute::getAsStr().
+  const std::string getAsStr() const override {
+    return std::string("PointerInfo ") +
+           (isValidState() ? (std::string("#") +
+                              std::to_string(AccessBins.size()) + " bins")
+                           : "<invalid>");
+  }
+
+  /// See AbstractAttribute::manifest(...).
+  ChangeStatus manifest(Attributor &A) override {
+    return AAPointerInfo::manifest(A);
+  }
+
+  bool forallInterfearingAccesses(
+      LoadInst &LI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
+      const override {
+    return State::forallInterfearingAccesses(LI, CB);
+  }
+  bool forallInterfearingAccesses(
+      StoreInst &SI, function_ref<bool(const AAPointerInfo::Access &, bool)> CB)
+      const override {
+    return State::forallInterfearingAccesses(SI, CB);
+  }
+
+  ChangeStatus translateAndAddCalleeState(Attributor &A,
+                                          const AAPointerInfo &CalleeAA,
+                                          int64_t CallArgOffset, CallBase &CB) {
+    using namespace AA::PointerInfo;
+    if (!CalleeAA.getState().isValidState() || !isValidState())
+      return indicatePessimisticFixpoint();
+
+    const auto &CalleeImplAA = static_cast<const AAPointerInfoImpl &>(CalleeAA);
+    bool IsByval = CalleeImplAA.getAssociatedArgument()->hasByValAttr();
+
+    // Combine the accesses bin by bin.
+    ChangeStatus Changed = ChangeStatus::UNCHANGED;
+    for (auto &It : CalleeImplAA.getState()) {
+      OffsetAndSize OAS = OffsetAndSize::getUnknown();
+      if (CallArgOffset != OffsetAndSize::Unknown)
+        OAS = OffsetAndSize(It.first.getOffset() + CallArgOffset,
+                            It.first.getSize());
+      Accesses &Bin = AccessBins[OAS];
+      for (const AAPointerInfo::Access &RAcc : It.second) {
+        if (IsByval && !RAcc.isRead())
+          continue;
+        bool UsedAssumedInformation = false;
+        Optional<Value *> Content = A.translateArgumentToCallSiteContent(
+            RAcc.getContent(), CB, *this, UsedAssumedInformation);
+        AccessKind AK =
+            AccessKind(RAcc.getKind() & (IsByval ? AccessKind::AK_READ
+                                                 : AccessKind::AK_READ_WRITE));
+        Changed =
+            Changed | addAccess(OAS.getOffset(), OAS.getSize(), CB, Content, AK,
+                                RAcc.getType(), RAcc.getRemoteInst(), &Bin);
+      }
+    }
+    return Changed;
+  }
+
+  /// Statistic tracking for all AAPointerInfo implementations.
+  /// See AbstractAttribute::trackStatistics().
+  void trackPointerInfoStatistics(const IRPosition &IRP) const {}
+};
+
+struct AAPointerInfoFloating : public AAPointerInfoImpl {
+  using AccessKind = AAPointerInfo::AccessKind;
+  AAPointerInfoFloating(const IRPosition &IRP, Attributor &A)
+      : AAPointerInfoImpl(IRP, A) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override { AAPointerInfoImpl::initialize(A); }
+
+  /// Deal with an access and signal if it was handled successfully.
+  bool handleAccess(Attributor &A, Instruction &I, Value &Ptr,
+                    Optional<Value *> Content, AccessKind Kind, int64_t Offset,
+                    ChangeStatus &Changed, Type *Ty,
+                    int64_t Size = AA::PointerInfo::OffsetAndSize::Unknown) {
+    using namespace AA::PointerInfo;
+    // No need to find a size if one is given or the offset is unknown.
+    if (Offset != OffsetAndSize::Unknown && Size == OffsetAndSize::Unknown &&
+        Ty) {
+      const DataLayout &DL = A.getDataLayout();
+      TypeSize AccessSize = DL.getTypeStoreSize(Ty);
+      if (!AccessSize.isScalable())
+        Size = AccessSize.getFixedSize();
+    }
+    Changed = Changed | addAccess(Offset, Size, I, Content, Kind, Ty);
+    return true;
+  };
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    using namespace AA::PointerInfo;
+    State S = getState();
+    ChangeStatus Changed = ChangeStatus::UNCHANGED;
+    Value &AssociatedValue = getAssociatedValue();
+    struct OffsetInfo {
+      int64_t Offset = 0;
+    };
+
+    const DataLayout &DL = A.getDataLayout();
+    DenseMap<Value *, OffsetInfo> OffsetInfoMap;
+    OffsetInfoMap[&AssociatedValue] = {};
+
+    auto HandlePassthroughUser = [&](Value *Usr, OffsetInfo &PtrOI,
+                                     bool &Follow) {
+      OffsetInfo &UsrOI = OffsetInfoMap[Usr];
+      UsrOI = PtrOI;
+      Follow = true;
+      return true;
+    };
+
+    auto UsePred = [&](const Use &U, bool &Follow) -> bool {
+      Value *CurPtr = U.get();
+      User *Usr = U.getUser();
+      LLVM_DEBUG(dbgs() << "[AAPointerInfo] Analyze " << *CurPtr << " in "
+                        << *Usr << "\n");
+
+      OffsetInfo &PtrOI = OffsetInfoMap[CurPtr];
+
+      if (ConstantExpr *CE = dyn_cast<ConstantExpr>(Usr)) {
+        if (CE->isCast())
+          return HandlePassthroughUser(Usr, PtrOI, Follow);
+        if (CE->isCompare())
+          return true;
+        if (!CE->isGEPWithNoNotionalOverIndexing()) {
+          LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled constant user " << *CE
+                            << "\n");
+          return false;
+        }
+      }
+      if (auto *GEP = dyn_cast<GEPOperator>(Usr)) {
+        OffsetInfo &UsrOI = OffsetInfoMap[Usr];
+        UsrOI = PtrOI;
+
+        // TODO: Use range information.
+        if (PtrOI.Offset == OffsetAndSize::Unknown ||
+            !GEP->hasAllConstantIndices()) {
+          UsrOI.Offset = OffsetAndSize::Unknown;
+          Follow = true;
+          return true;
+        }
+
+        SmallVector<Value *, 8> Indices;
+        for (Use &Idx : llvm::make_range(GEP->idx_begin(), GEP->idx_end())) {
+          if (auto *CIdx = dyn_cast<ConstantInt>(Idx)) {
+            Indices.push_back(CIdx);
+            continue;
+          }
+
+          LLVM_DEBUG(dbgs() << "[AAPointerInfo] Non constant GEP index " << *GEP
+                            << " : " << *Idx << "\n");
+          return false;
+        }
+        UsrOI.Offset = PtrOI.Offset +
+                       DL.getIndexedOffsetInType(
+                           CurPtr->getType()->getPointerElementType(), Indices);
+        Follow = true;
+        return true;
+      }
+      if (isa<CastInst>(Usr) || isa<PHINode>(Usr) || isa<SelectInst>(Usr))
+        return HandlePassthroughUser(Usr, PtrOI, Follow);
+      if (auto *LoadI = dyn_cast<LoadInst>(Usr))
+        return handleAccess(A, *LoadI, *CurPtr, /* Content */ nullptr,
+                            AccessKind::AK_READ, PtrOI.Offset, Changed,
+                            LoadI->getType());
+      if (auto *StoreI = dyn_cast<StoreInst>(Usr)) {
+        if (StoreI->getValueOperand() == CurPtr) {
+          LLVM_DEBUG(dbgs() << "[AAPointerInfo] Escaping use in store "
+                            << *StoreI << "\n");
+          return false;
+        }
+        bool UsedAssumedInformation = false;
+        Optional<Value *> Content = A.getAssumedSimplified(
+            *StoreI->getValueOperand(), *this, UsedAssumedInformation);
+        return handleAccess(A, *StoreI, *CurPtr, Content, AccessKind::AK_WRITE,
+                            PtrOI.Offset, Changed,
+                            StoreI->getValueOperand()->getType());
+      }
+      if (auto *CB = dyn_cast<CallBase>(Usr)) {
+        if (CB->isLifetimeStartOrEnd())
+          return true;
+        if (CB->isArgOperand(&U)) {
+          unsigned ArgNo = CB->getArgOperandNo(&U);
+          const auto &CSArgPI = A.getAAFor<AAPointerInfo>(
+              *this, IRPosition::callsite_argument(*CB, ArgNo),
+              DepClassTy::REQUIRED);
+          Changed = translateAndAddCalleeState(A, CSArgPI, PtrOI.Offset, *CB) |
+                    Changed;
+          return true;
+        }
+        LLVM_DEBUG(dbgs() << "[AAPointerInfo] Call user not handled " << *CB
+                          << "\n");
+        // TODO: Allow some call uses
+        return false;
+      }
+
+      LLVM_DEBUG(dbgs() << "[AAPointerInfo] User not handled " << *Usr << "\n");
+      return false;
+    };
+    if (!A.checkForAllUses(UsePred, *this, AssociatedValue,
+                           /* CheckBBLivenessOnly */ true))
+      return indicatePessimisticFixpoint();
+
+    LLVM_DEBUG({
+      dbgs() << "Accesses by bin after update:\n";
+      for (auto &It : AccessBins) {
+        dbgs() << "[" << It.first.getOffset() << "-"
+               << It.first.getOffset() + It.first.getSize()
+               << "] : " << It.getSecond().size() << "\n";
+        for (auto &Acc : It.getSecond()) {
+          dbgs() << "     - " << Acc.getKind() << " - " << *Acc.getLocalInst()
+                 << "\n";
+          if (Acc.getLocalInst() != Acc.getRemoteInst())
+            dbgs() << "     -->                         "
+                   << *Acc.getRemoteInst() << "\n";
+          if (!Acc.isWrittenValueYetUndetermined())
+            dbgs() << "     - " << Acc.getWrittenValue() << "\n";
+        }
+      }
+    });
+
+    return Changed;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
+  }
+};
+
+struct AAPointerInfoReturned final : AAPointerInfoImpl {
+  AAPointerInfoReturned(const IRPosition &IRP, Attributor &A)
+      : AAPointerInfoImpl(IRP, A) {}
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    return indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
+  }
+};
+
+struct AAPointerInfoArgument final : AAPointerInfoFloating {
+  AAPointerInfoArgument(const IRPosition &IRP, Attributor &A)
+      : AAPointerInfoFloating(IRP, A) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    AAPointerInfoFloating::initialize(A);
+    if (getAnchorScope()->isDeclaration())
+      indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
+  }
+};
+
+struct AAPointerInfoCallSiteArgument final : AAPointerInfoFloating {
+  AAPointerInfoCallSiteArgument(const IRPosition &IRP, Attributor &A)
+      : AAPointerInfoFloating(IRP, A) {}
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    using namespace AA::PointerInfo;
+    // We handle memory intrinsics explicitly, at least the first (=
+    // destination) and second (=source) arguments as we know how they are
+    // accessed.
+    if (auto *MI = dyn_cast_or_null<MemIntrinsic>(getCtxI())) {
+      ConstantInt *Length = dyn_cast<ConstantInt>(MI->getLength());
+      int64_t LengthVal = OffsetAndSize::Unknown;
+      if (Length)
+        LengthVal = Length->getSExtValue();
+      Value &Ptr = getAssociatedValue();
+      unsigned ArgNo = getIRPosition().getCallSiteArgNo();
+      ChangeStatus Changed;
+      if (ArgNo == 0) {
+        handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_WRITE, 0, Changed,
+                     nullptr, LengthVal);
+      } else if (ArgNo == 1) {
+        handleAccess(A, *MI, Ptr, nullptr, AccessKind::AK_READ, 0, Changed,
+                     nullptr, LengthVal);
+      } else {
+        LLVM_DEBUG(dbgs() << "[AAPointerInfo] Unhandled memory intrinsic "
+                          << *MI << "\n");
+        return indicatePessimisticFixpoint();
+      }
+      return Changed;
+    }
+
+    // TODO: Once we have call site specific value information we can provide
+    //       call site specific liveness information and then it makes
+    //       sense to specialize attributes for call sites arguments instead of
+    //       redirecting requests to the callee argument.
+    Argument *Arg = getAssociatedArgument();
+    if (!Arg)
+      return indicatePessimisticFixpoint();
+    const IRPosition &ArgPos = IRPosition::argument(*Arg);
+    auto &ArgAA =
+        A.getAAFor<AAPointerInfo>(*this, ArgPos, DepClassTy::REQUIRED);
+    return translateAndAddCalleeState(A, ArgAA, 0, *cast<CallBase>(getCtxI()));
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
+  }
+};
+
+struct AAPointerInfoCallSiteReturned final : AAPointerInfoFloating {
+  AAPointerInfoCallSiteReturned(const IRPosition &IRP, Attributor &A)
+      : AAPointerInfoFloating(IRP, A) {}
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    AAPointerInfoImpl::trackPointerInfoStatistics(getIRPosition());
+  }
+};
+
 /// -----------------------NoUnwind Function Attribute--------------------------
 
 struct AANoUnwindImpl : AANoUnwind {
@@ -2681,7 +3292,8 @@ struct AAIsDeadValueImpl : public AAIsDead {
     // chain of N dependent instructions to be considered live as soon as one is
     // without going through N update cycles. This is not required for
     // correctness.
-    return A.checkForAllUses(UsePred, *this, V, DepClassTy::REQUIRED);
+    return A.checkForAllUses(UsePred, *this, V, /* CheckBBLivenessOnly */ false,
+                             DepClassTy::REQUIRED);
   }
 
   /// Determine if \p I is assumed to be side-effect free.
@@ -4532,24 +5144,47 @@ struct AAValueSimplifyImpl : AAValueSimplify {
         // be OK. We do not try to optimize the latter.
         bool UsedAssumedInformation;
         if (!NullPointerIsDefined(L.getFunction(),
-                                  Ptr.getType()->getPointerAddressSpace())) {
-          const auto &SimplePtr =
-              A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation);
-          if (!SimplePtr.hasValue() || *SimplePtr == Obj)
-            continue;
-        }
+                                  Ptr.getType()->getPointerAddressSpace()) &&
+            A.getAssumedSimplified(Ptr, AA, UsedAssumedInformation) == Obj)
+          continue;
+        return false;
       }
-      // If the object is only loaded from by this one load, we can try to
-      // propagate the initializer.
-      // TODO: This is only to test the load simplification, will be replaced.
-      if (Obj->getNumUses() == 1 && *Obj->user_begin() == &L)
-        if (Value *InitialV = AA::getInitialValueForObj(*Obj, *L.getType()))
-          if (Union(*InitialV))
-            continue;
+      if (!isa<AllocaInst>(Obj) && !isa<GlobalVariable>(Obj))
+        return false;
+      Constant *InitialVal = AA::getInitialValueForObj(*Obj, *L.getType());
+      if (!InitialVal || !Union(*InitialVal))
+        return false;
 
-      return false;
+      LLVM_DEBUG(dbgs() << "Underlying object amenable to load-store "
+                           "propagation, checking accesses next.\n");
+
+      auto CheckAccess = [&](const AAPointerInfo::Access &Acc, bool IsExact) {
+        LLVM_DEBUG(dbgs() << " - visit access " << Acc << "\n");
+        if (!Acc.isWrite())
+          return true;
+        if (Acc.isWrittenValueYetUndetermined())
+          return true;
+        Value *Content = Acc.getWrittenValue();
+        if (!Content)
+          return false;
+        Value *CastedContent =
+            AA::getWithType(*Content, *AA.getAssociatedType());
+        if (!CastedContent)
+          return false;
+        if (IsExact)
+          return Union(*CastedContent);
+        if (auto *C = dyn_cast<Constant>(CastedContent))
+          if (C->isNullValue() || C->isAllOnesValue() || isa<UndefValue>(C))
+            return Union(*CastedContent);
+        return false;
+      };
+
+      auto &PI = A.getAAFor<AAPointerInfo>(AA, IRPosition::value(*Obj),
+                                           DepClassTy::REQUIRED);
+      if (!PI.forallInterfearingAccesses(L, CheckAccess))
+        return false;
     }
-    return Union(*UndefValue::get(AA.getAssociatedType()));
+    return true;
   }
 };
 
@@ -8779,6 +9414,7 @@ const char AAPotentialValues::ID = 0;
 const char AANoUndef::ID = 0;
 const char AACallEdges::ID = 0;
 const char AAFunctionReachability::ID = 0;
+const char AAPointerInfo::ID = 0;
 
 // Macro magic to create the static generator function for attributes that
 // follow the naming scheme.
@@ -8890,6 +9526,7 @@ CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueConstantRange)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPotentialValues)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoUndef)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPointerInfo)
 
 CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAValueSimplify)
 CREATE_ALL_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAIsDead)

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index 9b6ddfa54eabb..54ce838c52b5a 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -77,8 +77,7 @@ define i32 @f(i32 %x) {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]]) #[[ATTR1:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[X]]) #[[ATTR1:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP1]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
index 036cb1fa2b25d..fa2bcd43656ae 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=13 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=17 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=17 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 
@@ -112,19 +112,19 @@ define internal i32 @test(i32* %X, i64* %Y) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:    [[Y_PRIV:%.*]] = alloca i64, align 8
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[Y_PRIV]], align 8
+; IS__CGSCC_NPM-NEXT:    store i64 1, i64* [[Y_PRIV]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B:%.*]] = load i64, i64* [[Y_PRIV]], align 8
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = add i32 [[A]], 1
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = add i32 [[TMP0]], 1
 ; IS__CGSCC_NPM-NEXT:    [[D:%.*]] = add i64 [[B]], 1
 ; IS__CGSCC_NPM-NEXT:    [[COND:%.*]] = icmp sgt i64 [[D]], -1
 ; IS__CGSCC_NPM-NEXT:    br i1 [[COND]], label [[RETURN1:%.*]], label [[RETURN2:%.*]]
 ; IS__CGSCC_NPM:       Return1:
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[C]]
 ; IS__CGSCC_NPM:       Return2:
-; IS__CGSCC_NPM-NEXT:    ret i32 [[A]]
+; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP0]]
 ;
   %A = load i32, i32* %X
   %B = load i64, i64* %Y
@@ -171,12 +171,11 @@ define internal i32 @caller(i32* %A) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:    [[A_PRIV:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 2, i32* [[A_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B:%.*]] = alloca i64, align 8
 ; IS__CGSCC_NPM-NEXT:    store i64 1, i64* [[B]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = load i64, i64* [[B]], align 8
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i64 [[TMP3]]) #[[ATTR1:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i64 undef) #[[ATTR1:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[C]]
 ;
   %B = alloca i64
@@ -216,8 +215,7 @@ define i32 @callercaller() {
 ; IS__CGSCC_NPM-SAME: () #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 2, i32* [[B]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
-; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = call i32 @caller(i32 [[TMP1]]) #[[ATTR2:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = call i32 @caller(i32 undef) #[[ATTR2:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[X]]
 ;
   %B = alloca i32

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/array.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/array.ll
index 1df604dc3795a..7879ee061aed7 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/array.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/array.ll
@@ -16,25 +16,30 @@ define void @caller() {
 ; IS________OPM-NEXT:    call void @callee(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[ARRAYDECAY]])
 ; IS________OPM-NEXT:    ret void
 ;
-; IS________NPM-LABEL: define {{[^@]+}}@caller() {
-; IS________NPM-NEXT:  entry:
-; IS________NPM-NEXT:    [[LEFT:%.*]] = alloca [3 x i32], align 4
-; IS________NPM-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[LEFT]], i64 0, i64 0
-; IS________NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYDECAY]] to [3 x i32]*
-; IS________NPM-NEXT:    [[DOTCAST:%.*]] = bitcast [3 x i32]* [[TMP0]] to i32*
-; IS________NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAST]], align 4
-; IS________NPM-NEXT:    [[DOT0:%.*]] = getelementptr [3 x i32], [3 x i32]* [[TMP0]], i32 0
-; IS________NPM-NEXT:    [[TMP2:%.*]] = bitcast [3 x i32]* [[DOT0]] to i8*
-; IS________NPM-NEXT:    [[DOT0_B4:%.*]] = getelementptr i8, i8* [[TMP2]], i32 4
-; IS________NPM-NEXT:    [[DOT0_B4_CAST:%.*]] = bitcast i8* [[DOT0_B4]] to i32*
-; IS________NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOT0_B4_CAST]], align 4
-; IS________NPM-NEXT:    [[DOT01:%.*]] = getelementptr [3 x i32], [3 x i32]* [[TMP0]], i32 0
-; IS________NPM-NEXT:    [[TMP4:%.*]] = bitcast [3 x i32]* [[DOT01]] to i8*
-; IS________NPM-NEXT:    [[DOT0_B8:%.*]] = getelementptr i8, i8* [[TMP4]], i32 8
-; IS________NPM-NEXT:    [[DOT0_B8_CAST:%.*]] = bitcast i8* [[DOT0_B8]] to i32*
-; IS________NPM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOT0_B8_CAST]], align 4
-; IS________NPM-NEXT:    call void @callee(i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]])
-; IS________NPM-NEXT:    ret void
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller() {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[LEFT:%.*]] = alloca [3 x i32], align 4
+; IS__TUNIT_NPM-NEXT:    [[ARRAYDECAY:%.*]] = getelementptr inbounds [3 x i32], [3 x i32]* [[LEFT]], i64 0, i64 0
+; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[ARRAYDECAY]] to [3 x i32]*
+; IS__TUNIT_NPM-NEXT:    [[DOTCAST:%.*]] = bitcast [3 x i32]* [[TMP0]] to i32*
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DOTCAST]], align 4
+; IS__TUNIT_NPM-NEXT:    [[DOT0:%.*]] = getelementptr [3 x i32], [3 x i32]* [[TMP0]], i32 0
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = bitcast [3 x i32]* [[DOT0]] to i8*
+; IS__TUNIT_NPM-NEXT:    [[DOT0_B4:%.*]] = getelementptr i8, i8* [[TMP2]], i32 4
+; IS__TUNIT_NPM-NEXT:    [[DOT0_B4_CAST:%.*]] = bitcast i8* [[DOT0_B4]] to i32*
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[DOT0_B4_CAST]], align 4
+; IS__TUNIT_NPM-NEXT:    [[DOT01:%.*]] = getelementptr [3 x i32], [3 x i32]* [[TMP0]], i32 0
+; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = bitcast [3 x i32]* [[DOT01]] to i8*
+; IS__TUNIT_NPM-NEXT:    [[DOT0_B8:%.*]] = getelementptr i8, i8* [[TMP4]], i32 8
+; IS__TUNIT_NPM-NEXT:    [[DOT0_B8_CAST:%.*]] = bitcast i8* [[DOT0_B8]] to i32*
+; IS__TUNIT_NPM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOT0_B8_CAST]], align 4
+; IS__TUNIT_NPM-NEXT:    call void @callee(i32 [[TMP1]], i32 [[TMP3]], i32 [[TMP5]])
+; IS__TUNIT_NPM-NEXT:    ret void
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller() {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    call void @callee(i32 undef, i32 undef, i32 undef)
+; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
   %left = alloca [3 x i32], align 4
@@ -50,25 +55,45 @@ define internal void @callee(i32* noalias %arg) {
 ; IS________OPM-NEXT:    call void @use(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[ARG]])
 ; IS________OPM-NEXT:    ret void
 ;
-; IS________NPM-LABEL: define {{[^@]+}}@callee
-; IS________NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) {
-; IS________NPM-NEXT:  entry:
-; IS________NPM-NEXT:    [[ARG_PRIV:%.*]] = alloca [3 x i32], align 4
-; IS________NPM-NEXT:    [[ARG_PRIV_CAST:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
-; IS________NPM-NEXT:    store i32 [[TMP0]], i32* [[ARG_PRIV_CAST]], align 4
-; IS________NPM-NEXT:    [[ARG_PRIV_0:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i32 0
-; IS________NPM-NEXT:    [[TMP3:%.*]] = bitcast [3 x i32]* [[ARG_PRIV_0]] to i8*
-; IS________NPM-NEXT:    [[ARG_PRIV_0_B4:%.*]] = getelementptr i8, i8* [[TMP3]], i32 4
-; IS________NPM-NEXT:    [[ARG_PRIV_0_B4_CAST:%.*]] = bitcast i8* [[ARG_PRIV_0_B4]] to i32*
-; IS________NPM-NEXT:    store i32 [[TMP1]], i32* [[ARG_PRIV_0_B4_CAST]], align 4
-; IS________NPM-NEXT:    [[ARG_PRIV_01:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i32 0
-; IS________NPM-NEXT:    [[TMP4:%.*]] = bitcast [3 x i32]* [[ARG_PRIV_01]] to i8*
-; IS________NPM-NEXT:    [[ARG_PRIV_0_B8:%.*]] = getelementptr i8, i8* [[TMP4]], i32 8
-; IS________NPM-NEXT:    [[ARG_PRIV_0_B8_CAST:%.*]] = bitcast i8* [[ARG_PRIV_0_B8]] to i32*
-; IS________NPM-NEXT:    store i32 [[TMP2]], i32* [[ARG_PRIV_0_B8_CAST]], align 4
-; IS________NPM-NEXT:    [[TMP5:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
-; IS________NPM-NEXT:    call void @use(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[TMP5]])
-; IS________NPM-NEXT:    ret void
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@callee
+; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV:%.*]] = alloca [3 x i32], align 4
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV_CAST:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[ARG_PRIV_CAST]], align 4
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV_0:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i32 0
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = bitcast [3 x i32]* [[ARG_PRIV_0]] to i8*
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV_0_B4:%.*]] = getelementptr i8, i8* [[TMP3]], i32 4
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV_0_B4_CAST:%.*]] = bitcast i8* [[ARG_PRIV_0_B4]] to i32*
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[ARG_PRIV_0_B4_CAST]], align 4
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV_01:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i32 0
+; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = bitcast [3 x i32]* [[ARG_PRIV_01]] to i8*
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV_0_B8:%.*]] = getelementptr i8, i8* [[TMP4]], i32 8
+; IS__TUNIT_NPM-NEXT:    [[ARG_PRIV_0_B8_CAST:%.*]] = bitcast i8* [[ARG_PRIV_0_B8]] to i32*
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[ARG_PRIV_0_B8_CAST]], align 4
+; IS__TUNIT_NPM-NEXT:    [[TMP5:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
+; IS__TUNIT_NPM-NEXT:    call void @use(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[TMP5]])
+; IS__TUNIT_NPM-NEXT:    ret void
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee
+; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]], i32 [[TMP2:%.*]]) {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV:%.*]] = alloca [3 x i32], align 4
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV_CAST:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
+; IS__CGSCC_NPM-NEXT:    store i32 undef, i32* [[ARG_PRIV_CAST]], align 4
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV_0:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i32 0
+; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = bitcast [3 x i32]* [[ARG_PRIV_0]] to i8*
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV_0_B4:%.*]] = getelementptr i8, i8* [[TMP3]], i32 4
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV_0_B4_CAST:%.*]] = bitcast i8* [[ARG_PRIV_0_B4]] to i32*
+; IS__CGSCC_NPM-NEXT:    store i32 undef, i32* [[ARG_PRIV_0_B4_CAST]], align 4
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV_01:%.*]] = getelementptr [3 x i32], [3 x i32]* [[ARG_PRIV]], i32 0
+; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = bitcast [3 x i32]* [[ARG_PRIV_01]] to i8*
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV_0_B8:%.*]] = getelementptr i8, i8* [[TMP4]], i32 8
+; IS__CGSCC_NPM-NEXT:    [[ARG_PRIV_0_B8_CAST:%.*]] = bitcast i8* [[ARG_PRIV_0_B8]] to i32*
+; IS__CGSCC_NPM-NEXT:    store i32 undef, i32* [[ARG_PRIV_0_B8_CAST]], align 4
+; IS__CGSCC_NPM-NEXT:    [[TMP5:%.*]] = bitcast [3 x i32]* [[ARG_PRIV]] to i32*
+; IS__CGSCC_NPM-NEXT:    call void @use(i32* noalias nocapture noundef nonnull readonly align 4 dereferenceable(12) [[TMP5]])
+; IS__CGSCC_NPM-NEXT:    ret void
 ;
 entry:
   call void @use(i32* %arg)

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
index e0cc670666434..26c55401a12a8 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -63,9 +63,9 @@ define internal i32 @f(%struct.ss* byval(%struct.ss) %b, i32* byval(i32) %X, i32
 ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 8
+; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[B_PRIV_CAST]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
+; IS__CGSCC_NPM-NEXT:    store i64 2, i64* [[B_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
@@ -141,12 +141,8 @@ define i32 @test(i32* %X) {
 ; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; IS__CGSCC_NPM-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; IS__CGSCC_NPM-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8
-; IS__CGSCC_NPM-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
-; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 4
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]]) #[[ATTR2:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X]], align 4
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @f(i32 undef, i64 undef, i32 [[TMP0]]) #[[ATTR2:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[C]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
index f2508197d77eb..3ade9a6a91f9c 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=17 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=17 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-v64:64:64-v128:128:128"
@@ -40,10 +40,10 @@ define internal i32 @test(i32* %X, i32* %Y) {
 ; IS__CGSCC_NPM-NEXT:    [[Y_PRIV:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[Y_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[X_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B:%.*]] = load i32, i32* [[Y_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = add i32 1, [[TMP1]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[C]]
 ;
   %A = load i32, i32* %X
@@ -85,12 +85,11 @@ define internal i32 @caller(i32* %B) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 2, i32* [[B_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[A]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 4
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]]) #[[ATTR1:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[B_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @test(i32 undef, i32 [[TMP2]]) #[[ATTR1:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[C]]
 ;
   %A = alloca i32
@@ -130,8 +129,7 @@ define i32 @callercaller() {
 ; IS__CGSCC_NPM-SAME: () #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 2, i32* [[B]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
-; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = call i32 @caller(i32 [[TMP1]]) #[[ATTR2:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = call i32 @caller(i32 undef) #[[ATTR2:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[X]]
 ;
   %B = alloca i32

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
index b707c8baa6d82..0ce0d2d2ffeb8 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
@@ -26,9 +26,9 @@ define internal void @f(%struct.ss* byval(%struct.ss)  %b, i32* byval(i32) %X) n
 ; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 8
+; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[B_PRIV_CAST]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
+; IS__CGSCC_NPM-NEXT:    store i64 2, i64* [[B_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
index f1d23f9e5ea22..62d541fb8735d 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 
@@ -50,9 +50,9 @@ define internal i32 @f(%struct.ss* byval(%struct.ss)  %b) nounwind  {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 8
+; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[B_PRIV_CAST]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
+; IS__CGSCC_NPM-NEXT:    store i64 2, i64* [[B_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
@@ -110,9 +110,9 @@ define internal i32 @g(%struct.ss* byval(%struct.ss) align 32 %b) nounwind {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 32
+; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[B_PRIV_CAST]], align 32
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
+; IS__CGSCC_NPM-NEXT:    store i64 2, i64* [[B_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
@@ -152,15 +152,15 @@ define i32 @main() nounwind  {
 ; IS__TUNIT_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; IS__TUNIT_NPM-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; IS__TUNIT_NPM-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 8
-; IS__TUNIT_NPM-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 8
-; IS__TUNIT_NPM-NEXT:    [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR0]]
 ; IS__TUNIT_NPM-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 32
+; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 8
 ; IS__TUNIT_NPM-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 32
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
+; IS__TUNIT_NPM-NEXT:    [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR0]]
+; IS__TUNIT_NPM-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32
+; IS__TUNIT_NPM-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32
 ; IS__TUNIT_NPM-NEXT:    [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) #[[ATTR0]]
 ; IS__TUNIT_NPM-NEXT:    [[A:%.*]] = add i32 [[C0]], [[C1]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[A]]
@@ -188,16 +188,8 @@ define i32 @main() nounwind  {
 ; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 32
 ; IS__CGSCC_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; IS__CGSCC_NPM-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; IS__CGSCC_NPM-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 32
-; IS__CGSCC_NPM-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 8
-; IS__CGSCC_NPM-NEXT:    [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]]) #[[ATTR1:[0-9]+]]
-; IS__CGSCC_NPM-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 32
-; IS__CGSCC_NPM-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 32
-; IS__CGSCC_NPM-NEXT:    [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]]) #[[ATTR1]]
+; IS__CGSCC_NPM-NEXT:    [[C0:%.*]] = call i32 @f(i32 undef, i64 undef) #[[ATTR1:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[C1:%.*]] = call i32 @g(i32 undef, i64 undef) #[[ATTR1]]
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = add i32 [[C0]], [[C1]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[A]]
 ;

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
index de507064beb8e..ecb19f4a5a078 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
@@ -43,7 +43,7 @@ define internal i32 @callee(i1 %C, i32* %P) {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@callee
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:    [[P_PRIV:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[P_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 17, i32* [[P_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    br label [[F:%.*]]
 ; IS__CGSCC_NPM:       T:
 ; IS__CGSCC_NPM-NEXT:    unreachable
@@ -92,8 +92,7 @@ define i32 @foo() {
 ; IS__CGSCC_NPM-SAME: () #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 17, i32* [[A]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 4
-; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = call i32 @callee(i32 [[TMP1]]) #[[ATTR1:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = call i32 @callee(i32 undef) #[[ATTR1:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[X]]
 ;
   %A = alloca i32         ; <i32*> [#uses=2]

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
index eb593ae7edbfc..4d0c2bd561a36 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
@@ -14,12 +14,18 @@ define void @caller() #0 {
 ; IS________OPM-NEXT:    call void @promote_i32_ptr(i32* noalias nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[X]]), !prof [[PROF0:![0-9]+]]
 ; IS________OPM-NEXT:    ret void
 ;
-; IS________NPM-LABEL: define {{[^@]+}}@caller() {
-; IS________NPM-NEXT:    [[X:%.*]] = alloca i32, align 4
-; IS________NPM-NEXT:    store i32 42, i32* [[X]], align 4
-; IS________NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
-; IS________NPM-NEXT:    call void @promote_i32_ptr(i32 [[TMP1]]), !prof [[PROF0:![0-9]+]]
-; IS________NPM-NEXT:    ret void
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@caller() {
+; IS__TUNIT_NPM-NEXT:    [[X:%.*]] = alloca i32, align 4
+; IS__TUNIT_NPM-NEXT:    store i32 42, i32* [[X]], align 4
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 4
+; IS__TUNIT_NPM-NEXT:    call void @promote_i32_ptr(i32 [[TMP1]]), !prof [[PROF0:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    ret void
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller() {
+; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = alloca i32, align 4
+; IS__CGSCC_NPM-NEXT:    store i32 42, i32* [[X]], align 4
+; IS__CGSCC_NPM-NEXT:    call void @promote_i32_ptr(i32 undef), !prof [[PROF0:![0-9]+]]
+; IS__CGSCC_NPM-NEXT:    ret void
 ;
   %x = alloca i32
   store i32 42, i32* %x
@@ -34,13 +40,21 @@ define internal void @promote_i32_ptr(i32* %xp) {
 ; IS________OPM-NEXT:    call void @use_i32(i32 [[X]])
 ; IS________OPM-NEXT:    ret void
 ;
-; IS________NPM-LABEL: define {{[^@]+}}@promote_i32_ptr
-; IS________NPM-SAME: (i32 [[TMP0:%.*]]) {
-; IS________NPM-NEXT:    [[XP_PRIV:%.*]] = alloca i32, align 4
-; IS________NPM-NEXT:    store i32 [[TMP0]], i32* [[XP_PRIV]], align 4
-; IS________NPM-NEXT:    [[X:%.*]] = load i32, i32* [[XP_PRIV]], align 4
-; IS________NPM-NEXT:    call void @use_i32(i32 [[X]])
-; IS________NPM-NEXT:    ret void
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@promote_i32_ptr
+; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]]) {
+; IS__TUNIT_NPM-NEXT:    [[XP_PRIV:%.*]] = alloca i32, align 4
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[XP_PRIV]], align 4
+; IS__TUNIT_NPM-NEXT:    [[X:%.*]] = load i32, i32* [[XP_PRIV]], align 4
+; IS__TUNIT_NPM-NEXT:    call void @use_i32(i32 [[X]])
+; IS__TUNIT_NPM-NEXT:    ret void
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@promote_i32_ptr
+; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]]) {
+; IS__CGSCC_NPM-NEXT:    [[XP_PRIV:%.*]] = alloca i32, align 4
+; IS__CGSCC_NPM-NEXT:    store i32 42, i32* [[XP_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    [[X:%.*]] = load i32, i32* [[XP_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    call void @use_i32(i32 [[X]])
+; IS__CGSCC_NPM-NEXT:    ret void
 ;
   %x = load i32, i32* %xp
   call void @use_i32(i32 %x)

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
index 1e4f567c5c661..55ca43c117f52 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -8,23 +8,46 @@
 ; PR5038
 %struct.MYstr = type { i8, i32 }
 @mystr = internal global %struct.MYstr zeroinitializer ; <%struct.MYstr*> [#uses=3]
+
+declare void @use(i8)
 ;.
 ; CHECK: @[[MYSTR:[a-zA-Z0-9_$"\\.-]+]] = internal global [[STRUCT_MYSTR:%.*]] zeroinitializer
 ;.
 define internal void @vfu1(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind {
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@vfu1
-; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull writeonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) [[U:%.*]]) #[[ATTR0:[0-9]+]] {
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 1
-; IS__CGSCC_OPM-NEXT:    store i32 99, i32* [[TMP0]], align 4
-; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 0
-; IS__CGSCC_OPM-NEXT:    store i8 97, i8* [[TMP1]], align 8
-; IS__CGSCC_OPM-NEXT:    br label [[RETURN:%.*]]
-; IS__CGSCC_OPM:       return:
-; IS__CGSCC_OPM-NEXT:    ret void
+; IS________OPM: Function Attrs: nounwind
+; IS________OPM-LABEL: define {{[^@]+}}@vfu1
+; IS________OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) [[U:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS________OPM-NEXT:  entry:
+; IS________OPM-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 1
+; IS________OPM-NEXT:    store i32 99, i32* [[TMP0]], align 4
+; IS________OPM-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 0
+; IS________OPM-NEXT:    store i8 97, i8* [[TMP1]], align 8
+; IS________OPM-NEXT:    [[L:%.*]] = load i8, i8* [[TMP1]], align 8
+; IS________OPM-NEXT:    call void @use(i8 [[L]])
+; IS________OPM-NEXT:    br label [[RETURN:%.*]]
+; IS________OPM:       return:
+; IS________OPM-NEXT:    ret void
 ;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__TUNIT_NPM: Function Attrs: nounwind
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@vfu1
+; IS__TUNIT_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]], align 8
+; IS__TUNIT_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
+; IS__TUNIT_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]], align 1
+; IS__TUNIT_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    store i32 99, i32* [[TMP2]], align 4
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
+; IS__TUNIT_NPM-NEXT:    store i8 97, i8* [[TMP3]], align 8
+; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = load i8, i8* [[TMP3]], align 8
+; IS__TUNIT_NPM-NEXT:    call void @use(i8 [[L]])
+; IS__TUNIT_NPM-NEXT:    br label [[RETURN:%.*]]
+; IS__TUNIT_NPM:       return:
+; IS__TUNIT_NPM-NEXT:    ret void
+;
+; IS__CGSCC_NPM: Function Attrs: nounwind
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@vfu1
 ; IS__CGSCC_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
@@ -37,6 +60,8 @@ define internal void @vfu1(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounw
 ; IS__CGSCC_NPM-NEXT:    store i32 99, i32* [[TMP2]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
 ; IS__CGSCC_NPM-NEXT:    store i8 97, i8* [[TMP3]], align 8
+; IS__CGSCC_NPM-NEXT:    [[L:%.*]] = load i8, i8* [[TMP3]], align 8
+; IS__CGSCC_NPM-NEXT:    call void @use(i8 [[L]])
 ; IS__CGSCC_NPM-NEXT:    br label [[RETURN:%.*]]
 ; IS__CGSCC_NPM:       return:
 ; IS__CGSCC_NPM-NEXT:    ret void
@@ -46,6 +71,8 @@ entry:
   store i32 99, i32* %0, align 4
   %1 = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 0 ; <i8*> [#uses=1]
   store i8 97, i8* %1, align 4
+  %l = load i8, i8* %1
+  call void @use(i8 %l)
   br label %return
 
 return:                                           ; preds = %entry
@@ -53,17 +80,6 @@ return:                                           ; preds = %entry
 }
 
 define internal i32 @vfu2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind readonly {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@vfu2
-; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
-; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; IS__TUNIT____-NEXT:    [[TMP2:%.*]] = load i8, i8* getelementptr inbounds ([[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0), align 8
-; IS__TUNIT____-NEXT:    [[TMP3:%.*]] = zext i8 [[TMP2]] to i32
-; IS__TUNIT____-NEXT:    [[TMP4:%.*]] = add i32 [[TMP3]], [[TMP1]]
-; IS__TUNIT____-NEXT:    ret i32 [[TMP4]]
-;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@vfu2
 ; IS__CGSCC____-SAME: () #[[ATTR1:[0-9]+]] {
@@ -86,19 +102,42 @@ entry:
 }
 
 define i32 @unions() nounwind {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn
-; IS__TUNIT____-LABEL: define {{[^@]+}}@unions
-; IS__TUNIT____-SAME: () #[[ATTR1:[0-9]+]] {
-; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[RESULT:%.*]] = call i32 @vfu2() #[[ATTR0]]
-; IS__TUNIT____-NEXT:    ret i32 [[RESULT]]
+; IS__TUNIT_OPM: Function Attrs: nounwind
+; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions
+; IS__TUNIT_OPM-SAME: () #[[ATTR0]] {
+; IS__TUNIT_OPM-NEXT:  entry:
+; IS__TUNIT_OPM-NEXT:    call void @vfu1(%struct.MYstr* nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) @mystr) #[[ATTR0]]
+; IS__TUNIT_OPM-NEXT:    ret i32 0
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
-; IS__CGSCC____-LABEL: define {{[^@]+}}@unions
-; IS__CGSCC____-SAME: () #[[ATTR1]] {
-; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[RESULT:%.*]] = call i32 @vfu2() #[[ATTR2:[0-9]+]]
-; IS__CGSCC____-NEXT:    ret i32 [[RESULT]]
+; IS__TUNIT_NPM: Function Attrs: nounwind
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions
+; IS__TUNIT_NPM-SAME: () #[[ATTR0]] {
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
+; IS__TUNIT_NPM-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
+; IS__TUNIT_NPM-NEXT:    call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]]
+; IS__TUNIT_NPM-NEXT:    ret i32 0
+;
+; IS__CGSCC_OPM: Function Attrs: nounwind
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@unions
+; IS__CGSCC_OPM-SAME: () #[[ATTR0]] {
+; IS__CGSCC_OPM-NEXT:  entry:
+; IS__CGSCC_OPM-NEXT:    call void @vfu1(%struct.MYstr* noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) @mystr) #[[ATTR0]]
+; IS__CGSCC_OPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2() #[[ATTR0]]
+; IS__CGSCC_OPM-NEXT:    ret i32 [[RESULT]]
+;
+; IS__CGSCC_NPM: Function Attrs: nounwind
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@unions
+; IS__CGSCC_NPM-SAME: () #[[ATTR0]] {
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 0), align 8
+; IS__CGSCC_NPM-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
+; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
+; IS__CGSCC_NPM-NEXT:    call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]]
+; IS__CGSCC_NPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2() #[[ATTR0]]
+; IS__CGSCC_NPM-NEXT:    ret i32 [[RESULT]]
 ;
 entry:
   call void @vfu1(%struct.MYstr* byval(%struct.MYstr) align 4 @mystr) nounwind
@@ -109,7 +148,7 @@ entry:
 define internal i32 @vfu2_v2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nounwind readonly {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@vfu2_v2
-; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) [[U:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) [[U:%.*]]) #[[ATTR1:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 1
 ; IS__TUNIT_OPM-NEXT:    store i32 99, i32* [[Z]], align 4
@@ -123,7 +162,7 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nou
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@vfu2_v2
-; IS__TUNIT_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
+; IS__TUNIT_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR1:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]], align 8
 ; IS__TUNIT_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
@@ -142,7 +181,7 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nou
 ;
 ; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@vfu2_v2
-; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) [[U:%.*]]) #[[ATTR0]] {
+; IS__CGSCC_OPM-SAME: (%struct.MYstr* noalias nocapture nofree noundef nonnull byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) [[U:%.*]]) #[[ATTR2:[0-9]+]] {
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 1
 ; IS__CGSCC_OPM-NEXT:    store i32 99, i32* [[Z]], align 4
@@ -156,13 +195,13 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval(%struct.MYstr) align 4 %u) nou
 ;
 ; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@vfu2_v2
-; IS__CGSCC_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR0]] {
+; IS__CGSCC_NPM-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
-; IS__CGSCC_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]], align 8
+; IS__CGSCC_NPM-NEXT:    store i8 0, i8* [[U_PRIV_CAST]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 0, i32* [[U_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
 ; IS__CGSCC_NPM-NEXT:    store i32 99, i32* [[Z]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
@@ -186,39 +225,44 @@ entry:
 }
 
 define i32 @unions_v2() nounwind {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_OPM: Function Attrs: nounwind
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@unions_v2
-; IS__TUNIT_OPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR0]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) @mystr) #[[ATTR2]]
+; IS__TUNIT_OPM-NEXT:    call void @vfu1(%struct.MYstr* nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) @mystr) #[[ATTR0]]
+; IS__TUNIT_OPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR]]) align 8 dereferenceable(8) @mystr) #[[ATTR2:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[RESULT]]
 ;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT_NPM: Function Attrs: nounwind
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@unions_v2
-; IS__TUNIT_NPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR0]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
 ; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST]], align 8
 ; IS__TUNIT_NPM-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
 ; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_1]], align 8
-; IS__TUNIT_NPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR2]]
+; IS__TUNIT_NPM-NEXT:    call void @vfu1(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR0]]
+; IS__TUNIT_NPM-NEXT:    [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 8
+; IS__TUNIT_NPM-NEXT:    [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8
+; IS__TUNIT_NPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]]) #[[ATTR2:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[RESULT]]
 ;
-; IS__CGSCC_OPM: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
+; IS__CGSCC_OPM: Function Attrs: nounwind
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@unions_v2
 ; IS__CGSCC_OPM-SAME: () #[[ATTR0]] {
 ; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nocapture nofree noundef nonnull readnone byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) @mystr) #[[ATTR3:[0-9]+]]
+; IS__CGSCC_OPM-NEXT:    call void @vfu1(%struct.MYstr* noalias nocapture nofree noundef nonnull readonly byval([[STRUCT_MYSTR:%.*]]) align 8 dereferenceable(8) @mystr) #[[ATTR0]]
+; IS__CGSCC_OPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* noalias nocapture nofree noundef nonnull readnone byval([[STRUCT_MYSTR]]) align 8 dereferenceable(8) @mystr) #[[ATTR0]]
 ; IS__CGSCC_OPM-NEXT:    ret i32 [[RESULT]]
 ;
-; IS__CGSCC_NPM: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
+; IS__CGSCC_NPM: Function Attrs: nounwind
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@unions_v2
-; IS__CGSCC_NPM-SAME: () #[[ATTR1]] {
+; IS__CGSCC_NPM-SAME: () #[[ATTR0]] {
 ; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[TMP0:%.*]] = load i8, i8* getelementptr inbounds ([[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 0), align 8
-; IS__CGSCC_NPM-NEXT:    [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 8
-; IS__CGSCC_NPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP0]], i32 [[TMP1]]) #[[ATTR3:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    call void @vfu1(i8 noundef 0, i32 noundef 0) #[[ATTR0]]
+; IS__CGSCC_NPM-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(i8 undef, i32 undef) #[[ATTR0]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[RESULT]]
 ;
 entry:
@@ -227,12 +271,11 @@ entry:
   ret i32 %result
 }
 ;.
-; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readonly willreturn }
-; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { nofree nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR0:[0-9]+]] = { nounwind }
+; IS__TUNIT____: attributes #[[ATTR1:[0-9]+]] = { nofree nosync nounwind readnone willreturn }
+; IS__TUNIT____: attributes #[[ATTR2:[0-9]+]] = { nounwind readonly }
 ;.
-; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR0:[0-9]+]] = { nounwind }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readonly willreturn }
-; IS__CGSCC____: attributes #[[ATTR2]] = { nounwind readonly willreturn }
-; IS__CGSCC____: attributes #[[ATTR3:[0-9]+]] = { nounwind readnone willreturn }
+; IS__CGSCC____: attributes #[[ATTR2:[0-9]+]] = { nofree norecurse nosync nounwind readnone willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
index 44975f33815de..a51e0cd493d0e 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR26044.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=7 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=9 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/global.ll b/llvm/test/Transforms/Attributor/IPConstantProp/global.ll
index 4b808cd06d29f..1f4da31357d2a 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/global.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/global.ll
@@ -10,29 +10,23 @@
 ; CHECK: @[[_ZL6TEST1G:[a-zA-Z0-9_$"\\.-]+]] = internal global i32 42, align 4
 ;.
 define void @_Z7test1f1v() nounwind {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@_Z7test1f1v
 ; IS__TUNIT____-SAME: () #[[ATTR0:[0-9]+]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[TMP:%.*]] = load i32, i32* @_ZL6test1g, align 4
-; IS__TUNIT____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP]], 0
-; IS__TUNIT____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; IS__TUNIT____-NEXT:    br label [[IF_END:%.*]]
 ; IS__TUNIT____:       if.then:
-; IS__TUNIT____-NEXT:    store i32 0, i32* @_ZL6test1g, align 4
-; IS__TUNIT____-NEXT:    br label [[IF_END]]
+; IS__TUNIT____-NEXT:    unreachable
 ; IS__TUNIT____:       if.end:
 ; IS__TUNIT____-NEXT:    ret void
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@_Z7test1f1v
 ; IS__CGSCC____-SAME: () #[[ATTR0:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[TMP:%.*]] = load i32, i32* @_ZL6test1g, align 4
-; IS__CGSCC____-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP]], 0
-; IS__CGSCC____-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
+; IS__CGSCC____-NEXT:    br label [[IF_END:%.*]]
 ; IS__CGSCC____:       if.then:
-; IS__CGSCC____-NEXT:    store i32 0, i32* @_ZL6test1g, align 4
-; IS__CGSCC____-NEXT:    br label [[IF_END]]
+; IS__CGSCC____-NEXT:    unreachable
 ; IS__CGSCC____:       if.end:
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -50,28 +44,24 @@ if.end:                                           ; preds = %if.then, %entry
 }
 
 define i32 @_Z7test1f2v() nounwind {
-; IS__TUNIT____: Function Attrs: nofree nosync nounwind readonly willreturn
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@_Z7test1f2v
-; IS__TUNIT____-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__TUNIT____-SAME: () #[[ATTR0]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[TMP:%.*]] = load i32, i32* @_ZL6test1g, align 4
-; IS__TUNIT____-NEXT:    ret i32 [[TMP]]
+; IS__TUNIT____-NEXT:    ret i32 42
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@_Z7test1f2v
-; IS__CGSCC____-SAME: () #[[ATTR1:[0-9]+]] {
+; IS__CGSCC____-SAME: () #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[TMP:%.*]] = load i32, i32* @_ZL6test1g, align 4
-; IS__CGSCC____-NEXT:    ret i32 [[TMP]]
+; IS__CGSCC____-NEXT:    ret i32 42
 ;
 entry:
   %tmp = load i32, i32* @_ZL6test1g, align 4
   ret i32 %tmp
 }
 ;.
-; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind willreturn }
-; IS__TUNIT____: attributes #[[ATTR1]] = { nofree nosync nounwind readonly willreturn }
+; IS__TUNIT____: attributes #[[ATTR0]] = { nofree nosync nounwind readnone willreturn }
 ;.
-; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR1]] = { nofree norecurse nosync nounwind readonly willreturn }
+; IS__CGSCC____: attributes #[[ATTR0]] = { nofree norecurse nosync nounwind readnone willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
index 7730bce35ba36..19bf0ecaabae2 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=6 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=15 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 ;

diff  --git a/llvm/test/Transforms/Attributor/heap_to_stack.ll b/llvm/test/Transforms/Attributor/heap_to_stack.ll
index fb4c841ada29b..173bf44fc283d 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
index cb74ed43cd731..1678f4deff939 100644
--- a/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
+++ b/llvm/test/Transforms/Attributor/heap_to_stack_gpu.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=5 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 

diff  --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll
index ba6153fe2b829..913812cc5f6d3 100644
--- a/llvm/test/Transforms/Attributor/internal-noalias.ll
+++ b/llvm/test/Transforms/Attributor/internal-noalias.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=12 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=12 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=19 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=19 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 
@@ -185,9 +185,9 @@ define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 {
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@noalias_args_argmem_ro
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) #[[ATTR2:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[B_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 5, i32* [[B_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[A_PRIV:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 5, i32* [[A_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[T0:%.*]] = load i32, i32* [[A_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[T1:%.*]] = load i32, i32* [[B_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[ADD:%.*]] = add nsw i32 [[T0]], [[T1]]
@@ -231,9 +231,7 @@ define i32 @visible_local_2() {
 ; IS__CGSCC_NPM-SAME: () #[[ATTR3:[0-9]+]] {
 ; IS__CGSCC_NPM-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; IS__CGSCC_NPM-NEXT:    store i32 5, i32* [[B]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[B]], align 4
-; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32 [[TMP1]], i32 [[TMP2]]) #[[ATTR6:[0-9]+]]
+; IS__CGSCC_NPM-NEXT:    [[CALL:%.*]] = call i32 @noalias_args_argmem_ro(i32 undef, i32 undef) #[[ATTR6:[0-9]+]]
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[CALL]]
 ;
   %B = alloca i32, align 4

diff  --git a/llvm/test/Transforms/Attributor/memory_locations.ll b/llvm/test/Transforms/Attributor/memory_locations.ll
index 60cf10019c164..4cd483bbbe392 100644
--- a/llvm/test/Transforms/Attributor/memory_locations.ll
+++ b/llvm/test/Transforms/Attributor/memory_locations.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=10 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=14 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
@@ -533,28 +533,26 @@ define void @writeonly_global_via_arg_internal() {
 }
 
 define i8 @recursive_not_readnone(i8* %ptr, i1 %c) {
-; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind writeonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@recursive_not_readnone
 ; IS__TUNIT____-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__TUNIT____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT____:       t:
-; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR10:[0-9]+]]
-; IS__TUNIT____-NEXT:    [[R:%.*]] = load i8, i8* [[ALLOC]], align 1
-; IS__TUNIT____-NEXT:    ret i8 [[R]]
+; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR10:[0-9]+]], !range [[RNG0:![0-9]+]]
+; IS__TUNIT____-NEXT:    ret i8 1
 ; IS__TUNIT____:       f:
 ; IS__TUNIT____-NEXT:    store i8 1, i8* [[PTR]], align 1
 ; IS__TUNIT____-NEXT:    ret i8 0
 ;
-; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
+; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@recursive_not_readnone
 ; IS__CGSCC____-SAME: (i8* nocapture nofree writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8:[0-9]+]] {
 ; IS__CGSCC____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__CGSCC____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC____:       t:
-; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR11:[0-9]+]]
-; IS__CGSCC____-NEXT:    [[R:%.*]] = load i8, i8* [[ALLOC]], align 1
-; IS__CGSCC____-NEXT:    ret i8 [[R]]
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR11:[0-9]+]], !range [[RNG0:![0-9]+]]
+; IS__CGSCC____-NEXT:    ret i8 1
 ; IS__CGSCC____:       f:
 ; IS__CGSCC____-NEXT:    store i8 1, i8* [[PTR]], align 1
 ; IS__CGSCC____-NEXT:    ret i8 0
@@ -571,28 +569,26 @@ f:
 }
 
 define internal i8 @recursive_not_readnone_internal(i8* %ptr, i1 %c) {
-; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind writeonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal
 ; IS__TUNIT____-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
 ; IS__TUNIT____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__TUNIT____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT____:       t:
-; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR10]]
-; IS__TUNIT____-NEXT:    [[R:%.*]] = load i8, i8* [[ALLOC]], align 1
-; IS__TUNIT____-NEXT:    ret i8 [[R]]
+; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR10]], !range [[RNG0]]
+; IS__TUNIT____-NEXT:    ret i8 1
 ; IS__TUNIT____:       f:
 ; IS__TUNIT____-NEXT:    store i8 1, i8* [[PTR]], align 1
 ; IS__TUNIT____-NEXT:    ret i8 0
 ;
-; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
+; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal
 ; IS__CGSCC____-SAME: (i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
 ; IS__CGSCC____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__CGSCC____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC____:       t:
-; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR11]]
-; IS__CGSCC____-NEXT:    [[R:%.*]] = load i8, i8* [[ALLOC]], align 1
-; IS__CGSCC____-NEXT:    ret i8 [[R]]
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR11]], !range [[RNG0]]
+; IS__CGSCC____-NEXT:    ret i8 1
 ; IS__CGSCC____:       f:
 ; IS__CGSCC____-NEXT:    store i8 1, i8* [[PTR]], align 1
 ; IS__CGSCC____-NEXT:    ret i8 0
@@ -613,14 +609,14 @@ define i8 @readnone_caller(i1 %c) {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@readnone_caller
 ; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR9:[0-9]+]] {
 ; IS__TUNIT____-NEXT:    [[A:%.*]] = alloca i8, align 1
-; IS__TUNIT____-NEXT:    [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) #[[ATTR10]]
+; IS__TUNIT____-NEXT:    [[R:%.*]] = call noundef i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) #[[ATTR10]], !range [[RNG0]]
 ; IS__TUNIT____-NEXT:    ret i8 [[R]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@readnone_caller
 ; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR9:[0-9]+]] {
 ; IS__CGSCC____-NEXT:    [[A:%.*]] = alloca i8, align 1
-; IS__CGSCC____-NEXT:    [[R:%.*]] = call i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) #[[ATTR12:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[R:%.*]] = call noundef i8 @recursive_not_readnone_internal(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[A]], i1 [[C]]) #[[ATTR12:[0-9]+]], !range [[RNG0]]
 ; IS__CGSCC____-NEXT:    ret i8 [[R]]
 ;
   %a = alloca i8
@@ -629,28 +625,26 @@ define i8 @readnone_caller(i1 %c) {
 }
 
 define internal i8 @recursive_not_readnone_internal2(i8* %ptr, i1 %c) {
-; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind
+; IS__TUNIT____: Function Attrs: argmemonly nofree nosync nounwind writeonly
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal2
 ; IS__TUNIT____-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
 ; IS__TUNIT____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__TUNIT____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__TUNIT____:       t:
-; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR10]]
-; IS__TUNIT____-NEXT:    [[R:%.*]] = load i8, i8* [[ALLOC]], align 1
-; IS__TUNIT____-NEXT:    ret i8 [[R]]
+; IS__TUNIT____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR10]], !range [[RNG0]]
+; IS__TUNIT____-NEXT:    ret i8 1
 ; IS__TUNIT____:       f:
 ; IS__TUNIT____-NEXT:    store i8 1, i8* [[PTR]], align 1
 ; IS__TUNIT____-NEXT:    ret i8 0
 ;
-; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind
+; IS__CGSCC____: Function Attrs: argmemonly nofree nosync nounwind writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@recursive_not_readnone_internal2
 ; IS__CGSCC____-SAME: (i8* nocapture nofree nonnull writeonly [[PTR:%.*]], i1 [[C:%.*]]) #[[ATTR8]] {
 ; IS__CGSCC____-NEXT:    [[ALLOC:%.*]] = alloca i8, align 1
 ; IS__CGSCC____-NEXT:    br i1 [[C]], label [[T:%.*]], label [[F:%.*]]
 ; IS__CGSCC____:       t:
-; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR11]]
-; IS__CGSCC____-NEXT:    [[R:%.*]] = load i8, i8* [[ALLOC]], align 1
-; IS__CGSCC____-NEXT:    ret i8 [[R]]
+; IS__CGSCC____-NEXT:    [[TMP1:%.*]] = call i8 @recursive_not_readnone_internal2(i8* noalias nocapture nofree noundef nonnull writeonly dereferenceable(1) [[ALLOC]], i1 noundef false) #[[ATTR11]], !range [[RNG0]]
+; IS__CGSCC____-NEXT:    ret i8 1
 ; IS__CGSCC____:       f:
 ; IS__CGSCC____-NEXT:    store i8 1, i8* [[PTR]], align 1
 ; IS__CGSCC____-NEXT:    ret i8 0
@@ -670,13 +664,13 @@ define i8 @readnone_caller2(i1 %c) {
 ; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@readnone_caller2
 ; IS__TUNIT____-SAME: (i1 [[C:%.*]]) #[[ATTR9]] {
-; IS__TUNIT____-NEXT:    [[R:%.*]] = call i8 @recursive_not_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR10]]
+; IS__TUNIT____-NEXT:    [[R:%.*]] = call noundef i8 @recursive_not_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR10]], !range [[RNG0]]
 ; IS__TUNIT____-NEXT:    ret i8 [[R]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind readnone
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@readnone_caller2
 ; IS__CGSCC____-SAME: (i1 [[C:%.*]]) #[[ATTR9]] {
-; IS__CGSCC____-NEXT:    [[R:%.*]] = call i8 @recursive_not_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR12]]
+; IS__CGSCC____-NEXT:    [[R:%.*]] = call noundef i8 @recursive_not_readnone_internal2(i8* undef, i1 [[C]]) #[[ATTR12]], !range [[RNG0]]
 ; IS__CGSCC____-NEXT:    ret i8 [[R]]
 ;
   %r = call i8 @recursive_not_readnone_internal2(i8* undef, i1 %c)
@@ -725,9 +719,9 @@ define void @argmemonky_caller() {
 ; IS__TUNIT____: attributes #[[ATTR5]] = { nofree nosync nounwind readnone willreturn }
 ; IS__TUNIT____: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly }
 ; IS__TUNIT____: attributes #[[ATTR7]] = { argmemonly nofree nosync nounwind willreturn writeonly }
-; IS__TUNIT____: attributes #[[ATTR8]] = { argmemonly nofree nosync nounwind }
+; IS__TUNIT____: attributes #[[ATTR8]] = { argmemonly nofree nosync nounwind writeonly }
 ; IS__TUNIT____: attributes #[[ATTR9]] = { nofree nosync nounwind readnone }
-; IS__TUNIT____: attributes #[[ATTR10]] = { nofree nosync nounwind }
+; IS__TUNIT____: attributes #[[ATTR10]] = { nofree nosync nounwind writeonly }
 ;.
 ; IS__CGSCC____: attributes #[[ATTR0]] = { inaccessiblememonly }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { inaccessiblemem_or_argmemonly }
@@ -737,9 +731,11 @@ define void @argmemonky_caller() {
 ; IS__CGSCC____: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC____: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC____: attributes #[[ATTR7]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR8]] = { argmemonly nofree nosync nounwind }
+; IS__CGSCC____: attributes #[[ATTR8]] = { argmemonly nofree nosync nounwind writeonly }
 ; IS__CGSCC____: attributes #[[ATTR9]] = { nofree nosync nounwind readnone }
 ; IS__CGSCC____: attributes #[[ATTR10]] = { nounwind willreturn writeonly }
-; IS__CGSCC____: attributes #[[ATTR11]] = { nofree nosync nounwind }
-; IS__CGSCC____: attributes #[[ATTR12]] = { nounwind }
+; IS__CGSCC____: attributes #[[ATTR11]] = { nofree nosync nounwind writeonly }
+; IS__CGSCC____: attributes #[[ATTR12]] = { nounwind writeonly }
+;.
+; CHECK: [[META0:![0-9]+]] = !{i8 0, i8 2}
 ;.

diff  --git a/llvm/test/Transforms/Attributor/misc_crash.ll b/llvm/test/Transforms/Attributor/misc_crash.ll
index c89a31c8ff2ae..2b28b822dde98 100644
--- a/llvm/test/Transforms/Attributor/misc_crash.ll
+++ b/llvm/test/Transforms/Attributor/misc_crash.ll
@@ -94,8 +94,7 @@ define internal void @func5(i32 %0) {
 ; CHECK-NEXT:    br label [[BLOCK:%.*]]
 ; CHECK:       block:
 ; CHECK-NEXT:    store i8* blockaddress(@func5, [[BLOCK]]), i8** [[TMP]], align 8
-; CHECK-NEXT:    [[ADDR:%.*]] = load i8*, i8** [[TMP]], align 8
-; CHECK-NEXT:    call void @func6(i8* [[ADDR]])
+; CHECK-NEXT:    call void @func6(i8* blockaddress(@func5, [[BLOCK]]))
 ; CHECK-NEXT:    ret void
 ;
   %tmp = alloca i8*

diff  --git a/llvm/test/Transforms/Attributor/nodelete.ll b/llvm/test/Transforms/Attributor/nodelete.ll
index e3457357940d8..b4ad6191a2c6b 100644
--- a/llvm/test/Transforms/Attributor/nodelete.ll
+++ b/llvm/test/Transforms/Attributor/nodelete.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=8 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 
@@ -47,7 +47,7 @@ entry:
 define internal void @f3(%"b"* %this) align 2 {
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@f3
-; IS__CGSCC____-SAME: (%b* noalias nocapture nofree readnone [[THIS:%.*]]) #[[ATTR0]] align 2 {
+; IS__CGSCC____-SAME: (%b* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] align 2 {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[THIS_ADDR:%.*]] = alloca %b*, align 8
 ; IS__CGSCC____-NEXT:    store %b* [[THIS]], %b** [[THIS_ADDR]], align 8
@@ -64,7 +64,7 @@ entry:
 define internal i1 @f4(%"b"* %this) align 2 {
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@f4
-; IS__CGSCC____-SAME: (%b* noalias nocapture nofree readnone [[THIS:%.*]]) #[[ATTR0]] align 2 {
+; IS__CGSCC____-SAME: (%b* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] align 2 {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[THIS_ADDR:%.*]] = alloca %b*, align 8
 ; IS__CGSCC____-NEXT:    store %b* [[THIS]], %b** [[THIS_ADDR]], align 8
@@ -81,7 +81,7 @@ entry:
 define internal %"a"* @f5(%"b"* %this) align 2 {
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@f5
-; IS__CGSCC____-SAME: (%b* noalias nocapture nofree readnone [[THIS:%.*]]) #[[ATTR0]] align 2 {
+; IS__CGSCC____-SAME: (%b* noalias nocapture nofree noundef nonnull readnone align 8 dereferenceable(8) [[THIS:%.*]]) #[[ATTR0]] align 2 {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[THIS_ADDR:%.*]] = alloca %b*, align 8
 ; IS__CGSCC____-NEXT:    store %b* [[THIS]], %b** [[THIS_ADDR]], align 8

diff  --git a/llvm/test/Transforms/Attributor/norecurse.ll b/llvm/test/Transforms/Attributor/norecurse.ll
index 316ed29e2ca9f..a8c4c7c4bf261 100644
--- a/llvm/test/Transforms/Attributor/norecurse.ll
+++ b/llvm/test/Transforms/Attributor/norecurse.ll
@@ -227,8 +227,7 @@ define void @f(i32 %x)  {
 ; NOT_CGSCC_OPM-NEXT:  entry:
 ; NOT_CGSCC_OPM-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
 ; NOT_CGSCC_OPM-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; NOT_CGSCC_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
-; NOT_CGSCC_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+; NOT_CGSCC_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; NOT_CGSCC_OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; NOT_CGSCC_OPM:       if.then:
 ; NOT_CGSCC_OPM-NEXT:    call void @g() #[[ATTR8:[0-9]+]]
@@ -242,8 +241,7 @@ define void @f(i32 %x)  {
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[X_ADDR:%.*]] = alloca i32, align 4
 ; IS__CGSCC_OPM-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 4
-; IS__CGSCC_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[TMP0]], 0
+; IS__CGSCC_OPM-NEXT:    [[TOBOOL:%.*]] = icmp ne i32 [[X]], 0
 ; IS__CGSCC_OPM-NEXT:    br i1 [[TOBOOL]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; IS__CGSCC_OPM:       if.then:
 ; IS__CGSCC_OPM-NEXT:    call void @g() #[[ATTR9:[0-9]+]]

diff  --git a/llvm/test/Transforms/Attributor/noreturn_sync.ll b/llvm/test/Transforms/Attributor/noreturn_sync.ll
index aab5de48b96b9..6d1560484c3e4 100644
--- a/llvm/test/Transforms/Attributor/noreturn_sync.ll
+++ b/llvm/test/Transforms/Attributor/noreturn_sync.ll
@@ -1,5 +1,5 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=3 -S < %s | FileCheck %s
+; RUN: opt -attributor  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=4 -S < %s | FileCheck %s
 ;
 ; This file is the same as noreturn_async.ll but with a personality which
 ; indicates that the exception handler *cannot* catch asynchronous exceptions.

diff  --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
index 989aef0ba3baa..811eb0659955f 100644
--- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
+++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
@@ -1,6 +1,6 @@
 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --check-attributes --check-globals
-; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
-; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=16 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
+; RUN: opt -attributor -enable-new-pm=0 -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=22 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_NPM,NOT_CGSCC_OPM,NOT_TUNIT_NPM,IS__TUNIT____,IS________OPM,IS__TUNIT_OPM
+; RUN: opt -aa-pipeline=basic-aa -passes=attributor -attributor-manifest-internal  -attributor-max-iterations-verify -attributor-annotate-decl-cs -attributor-max-iterations=22 -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_CGSCC_OPM,NOT_CGSCC_NPM,NOT_TUNIT_OPM,IS__TUNIT____,IS________NPM,IS__TUNIT_NPM
 ; RUN: opt -attributor-cgscc -enable-new-pm=0 -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_NPM,IS__CGSCC____,IS________OPM,IS__CGSCC_OPM
 ; RUN: opt -aa-pipeline=basic-aa -passes=attributor-cgscc -attributor-manifest-internal  -attributor-annotate-decl-cs -S < %s | FileCheck %s --check-prefixes=CHECK,NOT_TUNIT_NPM,NOT_TUNIT_OPM,NOT_CGSCC_OPM,IS__CGSCC____,IS________NPM,IS__CGSCC_NPM
 
@@ -41,7 +41,7 @@ define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__TUNIT____-NEXT:  entry:
 ; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree [[W0]]) #[[ATTR2:[0-9]+]]
 ; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree align 4 [[R0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
-; IS__TUNIT____-NEXT:    [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3:[0-9]+]]
+; IS__TUNIT____-NEXT:    [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    ret i32* [[W0]]
 ;
@@ -106,8 +106,8 @@ define internal i32* @internal_ret0_nw(i32* %n0, i32* %w0) {
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @internal_ret1_rrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    [[CALL2:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
-; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR4:[0-9]+]]
-; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR4]]
+; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
+; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
 ; IS__CGSCC____-NEXT:    [[CALL5:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
@@ -189,8 +189,8 @@ define internal i32* @internal_ret1_rrw(i32* %r0, i32* %r1, i32* %w0) {
 ; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    [[CALL5:%.*]] = call i32* @external_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
-; IS__CGSCC____-NEXT:    [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR4]]
-; IS__CGSCC____-NEXT:    [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR4]]
+; IS__CGSCC____-NEXT:    [[CALL6:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[R1]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
+; IS__CGSCC____-NEXT:    [[CALL7:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
 ; IS__CGSCC____-NEXT:    [[CALL8:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[R1]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
@@ -311,7 +311,7 @@ define internal i32* @internal_ret1_rw(i32* %r0, i32* %w0) {
 ; IS__CGSCC____-NEXT:    store i32 [[TMP1]], i32* [[W0]], align 4
 ; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @internal_ret0_nw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    [[CALL2:%.*]] = call i32* @internal_ret0_nw(i32* nofree nonnull align 4 dereferenceable(4) [[W0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
-; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR4]]
+; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[R0]], i32* nofree nonnull writeonly align 4 dereferenceable(4) "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
 ; IS__CGSCC____-NEXT:    [[CALL4:%.*]] = call i32* @external_ret2_nrw(i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree noundef nonnull align 4 dereferenceable(4) [[R0]], i32* nofree nonnull align 4 dereferenceable(4) [[W0]]) #[[ATTR2]]
 ; IS__CGSCC____-NEXT:    br label [[RETURN]]
 ; IS__CGSCC____:       return:
@@ -346,7 +346,7 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__TUNIT____-LABEL: define {{[^@]+}}@external_source_ret2_nrw
 ; IS__TUNIT____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR0]] {
 ; IS__TUNIT____-NEXT:  entry:
-; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
+; IS__TUNIT____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3:[0-9]+]]
 ; IS__TUNIT____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR2]]
 ; IS__TUNIT____-NEXT:    ret i32* [[W0]]
 ;
@@ -354,8 +354,8 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_source_ret2_nrw
 ; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]]) #[[ATTR0]] {
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR3]]
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR4]]
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly "no-capture-maybe-returned" [[W0]]) #[[ATTR4:[0-9]+]]
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]]) #[[ATTR3]]
 ; IS__CGSCC____-NEXT:    ret i32* [[W0]]
 ;
 entry:
@@ -375,6 +375,6 @@ entry:
 ; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree nosync nounwind }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { argmemonly nofree norecurse nosync nounwind willreturn }
 ; IS__CGSCC____: attributes #[[ATTR2]] = { nofree nosync nounwind }
-; IS__CGSCC____: attributes #[[ATTR3]] = { nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind }
+; IS__CGSCC____: attributes #[[ATTR3]] = { nounwind }
+; IS__CGSCC____: attributes #[[ATTR4]] = { nounwind willreturn }
 ;.

diff  --git a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
index f700a1202582a..295f72d99a5e4 100644
--- a/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify-pointer-info.ll
@@ -111,43 +111,27 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__TUNIT_OPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_OPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR6:[0-9]+]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR5:[0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR6]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR5]]
 ; IS__TUNIT_OPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR6]]
-; IS__TUNIT_OPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
-; IS__TUNIT_OPM-NEXT:    [[I4:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR5]]
 ; IS__TUNIT_OPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
-; IS__TUNIT_OPM-NEXT:    store float [[I4]], float* [[F12]], align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_OPM-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4
-; IS__TUNIT_OPM-NEXT:    [[I5:%.*]] = load float, float* [[F23]], align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_OPM-NEXT:    [[MUL:%.*]] = fmul float [[I5]], 2.000000e+00
+; IS__TUNIT_OPM-NEXT:    store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_OPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
 ; IS__TUNIT_OPM-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 4
 ; IS__TUNIT_OPM-NEXT:    store float [[MUL]], float* [[F24]], align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_OPM-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
-; IS__TUNIT_OPM-NEXT:    [[I6:%.*]] = load float, float* [[F35]], align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_OPM-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
-; IS__TUNIT_OPM-NEXT:    [[I7:%.*]] = load float, float* [[F16]], align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_OPM-NEXT:    [[ADD:%.*]] = fadd float [[I6]], [[I7]]
+; IS__TUNIT_OPM-NEXT:    [[ADD:%.*]] = fadd float 0x400A666660000000, 0x3FF19999A0000000
 ; IS__TUNIT_OPM-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 5
 ; IS__TUNIT_OPM-NEXT:    store float [[ADD]], float* [[F37]], align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_OPM-NEXT:    [[I18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    [[I8:%.*]] = load i32, i32* [[I18]], align 4, !tbaa [[TBAA12:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[I19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    store i32 [[I8]], i32* [[I19]], align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_OPM-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_OPM-NEXT:    [[I9:%.*]] = load i32, i32* [[I210]], align 4, !tbaa [[TBAA13:![0-9]+]]
-; IS__TUNIT_OPM-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I9]], 1
+; IS__TUNIT_OPM-NEXT:    store i32 1, i32* [[I19]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    [[MUL11:%.*]] = shl nsw i32 2, 1
 ; IS__TUNIT_OPM-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 1
-; IS__TUNIT_OPM-NEXT:    store i32 [[MUL11]], i32* [[I212]], align 4, !tbaa [[TBAA13]]
-; IS__TUNIT_OPM-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_OPM-NEXT:    [[I10:%.*]] = load i32, i32* [[I313]], align 4, !tbaa [[TBAA14:![0-9]+]]
-; IS__TUNIT_OPM-NEXT:    [[I114:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    [[I11:%.*]] = load i32, i32* [[I114]], align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_OPM-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I10]], [[I11]]
+; IS__TUNIT_OPM-NEXT:    store i32 [[MUL11]], i32* [[I212]], align 4, !tbaa [[TBAA13:![0-9]+]]
+; IS__TUNIT_OPM-NEXT:    [[ADD15:%.*]] = add nsw i32 3, 1
 ; IS__TUNIT_OPM-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
-; IS__TUNIT_OPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14]]
+; IS__TUNIT_OPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; IS__TUNIT_OPM-NEXT:    [[I12:%.*]] = bitcast %struct.S* [[S]] to i8*
 ; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR7]]
 ; IS__TUNIT_OPM-NEXT:    ret void
@@ -166,43 +150,27 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__TUNIT_NPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_NPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR5:[0-9]+]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR4:[0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR5]]
-; IS__TUNIT_NPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
-; IS__TUNIT_NPM-NEXT:    [[I4:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
-; IS__TUNIT_NPM-NEXT:    store float [[I4]], float* [[F12]], align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_NPM-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4
-; IS__TUNIT_NPM-NEXT:    [[I5:%.*]] = load float, float* [[F23]], align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_NPM-NEXT:    [[MUL:%.*]] = fmul float [[I5]], 2.000000e+00
+; IS__TUNIT_NPM-NEXT:    store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_NPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
 ; IS__TUNIT_NPM-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 4
 ; IS__TUNIT_NPM-NEXT:    store float [[MUL]], float* [[F24]], align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_NPM-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
-; IS__TUNIT_NPM-NEXT:    [[I6:%.*]] = load float, float* [[F35]], align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_NPM-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
-; IS__TUNIT_NPM-NEXT:    [[I7:%.*]] = load float, float* [[F16]], align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_NPM-NEXT:    [[ADD:%.*]] = fadd float [[I6]], [[I7]]
+; IS__TUNIT_NPM-NEXT:    [[ADD:%.*]] = fadd float 0x400A666660000000, 0x3FF19999A0000000
 ; IS__TUNIT_NPM-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 5
 ; IS__TUNIT_NPM-NEXT:    store float [[ADD]], float* [[F37]], align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_NPM-NEXT:    [[I18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    [[I8:%.*]] = load i32, i32* [[I18]], align 4, !tbaa [[TBAA12:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[I19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    store i32 [[I8]], i32* [[I19]], align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_NPM-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_NPM-NEXT:    [[I9:%.*]] = load i32, i32* [[I210]], align 4, !tbaa [[TBAA13:![0-9]+]]
-; IS__TUNIT_NPM-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I9]], 1
+; IS__TUNIT_NPM-NEXT:    store i32 1, i32* [[I19]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    [[MUL11:%.*]] = shl nsw i32 2, 1
 ; IS__TUNIT_NPM-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 1
-; IS__TUNIT_NPM-NEXT:    store i32 [[MUL11]], i32* [[I212]], align 4, !tbaa [[TBAA13]]
-; IS__TUNIT_NPM-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_NPM-NEXT:    [[I10:%.*]] = load i32, i32* [[I313]], align 4, !tbaa [[TBAA14:![0-9]+]]
-; IS__TUNIT_NPM-NEXT:    [[I114:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    [[I11:%.*]] = load i32, i32* [[I114]], align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_NPM-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I10]], [[I11]]
+; IS__TUNIT_NPM-NEXT:    store i32 [[MUL11]], i32* [[I212]], align 4, !tbaa [[TBAA13:![0-9]+]]
+; IS__TUNIT_NPM-NEXT:    [[ADD15:%.*]] = add nsw i32 3, 1
 ; IS__TUNIT_NPM-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
-; IS__TUNIT_NPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14]]
+; IS__TUNIT_NPM-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; IS__TUNIT_NPM-NEXT:    [[I12:%.*]] = bitcast %struct.S* [[S]] to i8*
 ; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR6]]
 ; IS__TUNIT_NPM-NEXT:    ret void
@@ -226,38 +194,22 @@ define void @local_alloca_simplifiable_1(%struct.S* noalias sret(%struct.S) alig
 ; IS__CGSCC____-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR9]]
 ; IS__CGSCC____-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
 ; IS__CGSCC____-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR9]]
-; IS__CGSCC____-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
-; IS__CGSCC____-NEXT:    [[I4:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
 ; IS__CGSCC____-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
-; IS__CGSCC____-NEXT:    store float [[I4]], float* [[F12]], align 4, !tbaa [[TBAA7]]
-; IS__CGSCC____-NEXT:    [[F23:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 4
-; IS__CGSCC____-NEXT:    [[I5:%.*]] = load float, float* [[F23]], align 4, !tbaa [[TBAA10]]
-; IS__CGSCC____-NEXT:    [[MUL:%.*]] = fmul float [[I5]], 2.000000e+00
+; IS__CGSCC____-NEXT:    store float 0x3FF19999A0000000, float* [[F12]], align 4, !tbaa [[TBAA7]]
+; IS__CGSCC____-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
 ; IS__CGSCC____-NEXT:    [[F24:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 4
 ; IS__CGSCC____-NEXT:    store float [[MUL]], float* [[F24]], align 4, !tbaa [[TBAA10]]
-; IS__CGSCC____-NEXT:    [[F35:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
-; IS__CGSCC____-NEXT:    [[I6:%.*]] = load float, float* [[F35]], align 4, !tbaa [[TBAA11]]
-; IS__CGSCC____-NEXT:    [[F16:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
-; IS__CGSCC____-NEXT:    [[I7:%.*]] = load float, float* [[F16]], align 4, !tbaa [[TBAA7]]
-; IS__CGSCC____-NEXT:    [[ADD:%.*]] = fadd float [[I6]], [[I7]]
+; IS__CGSCC____-NEXT:    [[ADD:%.*]] = fadd float 0x400A666660000000, 0x3FF19999A0000000
 ; IS__CGSCC____-NEXT:    [[F37:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 5
 ; IS__CGSCC____-NEXT:    store float [[ADD]], float* [[F37]], align 4, !tbaa [[TBAA11]]
-; IS__CGSCC____-NEXT:    [[I18:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__CGSCC____-NEXT:    [[I8:%.*]] = load i32, i32* [[I18]], align 4, !tbaa [[TBAA12:![0-9]+]]
 ; IS__CGSCC____-NEXT:    [[I19:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 0
-; IS__CGSCC____-NEXT:    store i32 [[I8]], i32* [[I19]], align 4, !tbaa [[TBAA12]]
-; IS__CGSCC____-NEXT:    [[I210:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__CGSCC____-NEXT:    [[I9:%.*]] = load i32, i32* [[I210]], align 4, !tbaa [[TBAA13:![0-9]+]]
-; IS__CGSCC____-NEXT:    [[MUL11:%.*]] = shl nsw i32 [[I9]], 1
+; IS__CGSCC____-NEXT:    store i32 1, i32* [[I19]], align 4, !tbaa [[TBAA12:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[MUL11:%.*]] = shl nsw i32 2, 1
 ; IS__CGSCC____-NEXT:    [[I212:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 1
-; IS__CGSCC____-NEXT:    store i32 [[MUL11]], i32* [[I212]], align 4, !tbaa [[TBAA13]]
-; IS__CGSCC____-NEXT:    [[I313:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__CGSCC____-NEXT:    [[I10:%.*]] = load i32, i32* [[I313]], align 4, !tbaa [[TBAA14:![0-9]+]]
-; IS__CGSCC____-NEXT:    [[I114:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__CGSCC____-NEXT:    [[I11:%.*]] = load i32, i32* [[I114]], align 4, !tbaa [[TBAA12]]
-; IS__CGSCC____-NEXT:    [[ADD15:%.*]] = add nsw i32 [[I10]], [[I11]]
+; IS__CGSCC____-NEXT:    store i32 [[MUL11]], i32* [[I212]], align 4, !tbaa [[TBAA13:![0-9]+]]
+; IS__CGSCC____-NEXT:    [[ADD15:%.*]] = add nsw i32 3, 1
 ; IS__CGSCC____-NEXT:    [[I316:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
-; IS__CGSCC____-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14]]
+; IS__CGSCC____-NEXT:    store i32 [[ADD15]], i32* [[I316]], align 4, !tbaa [[TBAA14:![0-9]+]]
 ; IS__CGSCC____-NEXT:    [[I12:%.*]] = bitcast %struct.S* [[S]] to i8*
 ; IS__CGSCC____-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 24, i8* nocapture nofree noundef nonnull align 4 dereferenceable(24) [[I12]]) #[[ATTR8]]
 ; IS__CGSCC____-NEXT:    ret void
@@ -407,10 +359,8 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_OPM:       for.cond.cleanup30:
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_END38:%.*]]
 ; IS__TUNIT_OPM:       for.body31:
-; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_OPM-NEXT:    [[I23:%.*]] = load i8, i8* [[ARRAYIDX33]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_OPM-NEXT:    store i8 [[I23]], i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
+; IS__TUNIT_OPM-NEXT:    store i8 0, i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_OPM-NEXT:    br label [[FOR_INC36]]
 ; IS__TUNIT_OPM:       for.inc36:
 ; IS__TUNIT_OPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
@@ -483,7 +433,7 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX25]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX26:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 500
 ; IS__TUNIT_NPM-NEXT:    [[I22:%.*]] = bitcast i8* [[ARRAYIDX26]] to i32*
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(524) [[I22]], i32 noundef 0) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(524) [[I22]], i32 noundef 0) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND28:%.*]]
 ; IS__TUNIT_NPM:       for.cond28:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC36:%.*]] ], [ 0, [[FOR_END24]] ]
@@ -492,10 +442,8 @@ define void @local_alloca_simplifiable_2() {
 ; IS__TUNIT_NPM:       for.cond.cleanup30:
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_END38:%.*]]
 ; IS__TUNIT_NPM:       for.body31:
-; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX33:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* [[BYTES]], i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_NPM-NEXT:    [[I23:%.*]] = load i8, i8* [[ARRAYIDX33]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX35:%.*]] = getelementptr inbounds [1024 x i8], [1024 x i8]* @globalBytes, i64 0, i64 [[INDVARS_IV12]]
-; IS__TUNIT_NPM-NEXT:    store i8 [[I23]], i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
+; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX35]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_INC36]]
 ; IS__TUNIT_NPM:       for.inc36:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV_NEXT13]] = add nuw nsw i64 [[INDVARS_IV12]], 1
@@ -856,11 +804,9 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_OPM:       cond.false:
 ; IS__TUNIT_OPM-NEXT:    br label [[COND_END]]
 ; IS__TUNIT_OPM:       cond.end:
-; IS__TUNIT_OPM-NEXT:    [[COND:%.*]] = phi i32* [ @GI1, [[COND_TRUE]] ], [ [[L]], [[COND_FALSE]] ]
-; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = load i32, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_OPM-NEXT:    [[I2:%.*]] = bitcast i32* [[L]] to i8*
 ; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR7]]
-; IS__TUNIT_OPM-NEXT:    ret i32 [[I1]]
+; IS__TUNIT_OPM-NEXT:    ret i32 5
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1
@@ -878,11 +824,9 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_NPM:       cond.false:
 ; IS__TUNIT_NPM-NEXT:    br label [[COND_END]]
 ; IS__TUNIT_NPM:       cond.end:
-; IS__TUNIT_NPM-NEXT:    [[COND:%.*]] = phi i32* [ @GI1, [[COND_TRUE]] ], [ [[L]], [[COND_FALSE]] ]
-; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = load i32, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_NPM-NEXT:    [[I2:%.*]] = bitcast i32* [[L]] to i8*
 ; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR6]]
-; IS__TUNIT_NPM-NEXT:    ret i32 [[I1]]
+; IS__TUNIT_NPM-NEXT:    ret i32 5
 ;
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@multi_obj_simplifiable_1
@@ -900,11 +844,9 @@ define i32 @multi_obj_simplifiable_1(i32 %cnd) {
 ; IS__CGSCC____:       cond.false:
 ; IS__CGSCC____-NEXT:    br label [[COND_END]]
 ; IS__CGSCC____:       cond.end:
-; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi i32* [ @GI1, [[COND_TRUE]] ], [ [[L]], [[COND_FALSE]] ]
-; IS__CGSCC____-NEXT:    [[I1:%.*]] = load i32, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__CGSCC____-NEXT:    [[I2:%.*]] = bitcast i32* [[L]] to i8*
 ; IS__CGSCC____-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I2]]) #[[ATTR8]]
-; IS__CGSCC____-NEXT:    ret i32 [[I1]]
+; IS__CGSCC____-NEXT:    ret i32 5
 ;
 entry:
   %L = alloca i32, align 4
@@ -955,9 +897,8 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__TUNIT_OPM-NEXT:    [[COND:%.*]] = phi i32* [ @GI2, [[COND_TRUE]] ], [ [[L]], [[COND_FALSE]] ]
 ; IS__TUNIT_OPM-NEXT:    store i32 5, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_OPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR7]]
-; IS__TUNIT_OPM-NEXT:    ret i32 [[L]]
+; IS__TUNIT_OPM-NEXT:    ret i32 5
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2
@@ -976,9 +917,8 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__TUNIT_NPM-NEXT:    [[COND:%.*]] = phi i32* [ @GI2, [[COND_TRUE]] ], [ [[L]], [[COND_FALSE]] ]
 ; IS__TUNIT_NPM-NEXT:    store i32 5, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = bitcast i32* [[L]] to i8*
-; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = load i32, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_NPM-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR6]]
-; IS__TUNIT_NPM-NEXT:    ret i32 [[L]]
+; IS__TUNIT_NPM-NEXT:    ret i32 5
 ;
 ; IS__CGSCC____: Function Attrs: nofree nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@multi_obj_simplifiable_2
@@ -997,9 +937,8 @@ define i32 @multi_obj_simplifiable_2(i32 %cnd) {
 ; IS__CGSCC____-NEXT:    [[COND:%.*]] = phi i32* [ @GI2, [[COND_TRUE]] ], [ [[L]], [[COND_FALSE]] ]
 ; IS__CGSCC____-NEXT:    store i32 5, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__CGSCC____-NEXT:    [[I1:%.*]] = bitcast i32* [[L]] to i8*
-; IS__CGSCC____-NEXT:    [[L:%.*]] = load i32, i32* [[COND]], align 4, !tbaa [[TBAA3]]
 ; IS__CGSCC____-NEXT:    call void @llvm.lifetime.end.p0i8(i64 noundef 4, i8* nocapture nofree noundef nonnull align 4 dereferenceable(4) [[I1]]) #[[ATTR8]]
-; IS__CGSCC____-NEXT:    ret i32 [[L]]
+; IS__CGSCC____-NEXT:    ret i32 5
 ;
 entry:
   %L = alloca i32, align 4
@@ -1042,79 +981,63 @@ cond.end:                                         ; preds = %cond.false, %cond.t
 ;    }
 ;
 define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) align 4 %agg.result) {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1
-; IS__TUNIT_OPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR4]] {
+; IS__TUNIT_OPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR5]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    store float 0x3FF19999A0000000, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_OPM-NEXT:    store float 0x40019999A0000000, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
 ; IS__TUNIT_OPM-NEXT:    store float 0x400A666660000000, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR6]]
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR6]]
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR6]]
-; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR5]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR5]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR5]]
 ; IS__TUNIT_OPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
-; IS__TUNIT_OPM-NEXT:    store float [[I]], float* [[F1]], align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_OPM-NEXT:    [[I4:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_OPM-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
+; IS__TUNIT_OPM-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_OPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
 ; IS__TUNIT_OPM-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 4
 ; IS__TUNIT_OPM-NEXT:    store float [[MUL]], float* [[F2]], align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_OPM-NEXT:    [[I5:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_OPM-NEXT:    [[I6:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_OPM-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
+; IS__TUNIT_OPM-NEXT:    [[ADD:%.*]] = fadd float 0x400A666660000000, 0x3FF19999A0000000
 ; IS__TUNIT_OPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 5
 ; IS__TUNIT_OPM-NEXT:    store float [[ADD]], float* [[F3]], align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_OPM-NEXT:    [[I7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 0), align 4, !tbaa [[TBAA12]]
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    store i32 [[I7]], i32* [[I1]], align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_OPM-NEXT:    [[I8:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), align 4, !tbaa [[TBAA13]]
-; IS__TUNIT_OPM-NEXT:    [[MUL1:%.*]] = shl nsw i32 [[I8]], 1
+; IS__TUNIT_OPM-NEXT:    store i32 1, i32* [[I1]], align 4, !tbaa [[TBAA12]]
+; IS__TUNIT_OPM-NEXT:    [[MUL1:%.*]] = shl nsw i32 2, 1
 ; IS__TUNIT_OPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 1
 ; IS__TUNIT_OPM-NEXT:    store i32 [[MUL1]], i32* [[I2]], align 4, !tbaa [[TBAA13]]
-; IS__TUNIT_OPM-NEXT:    [[I9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), align 4, !tbaa [[TBAA14]]
-; IS__TUNIT_OPM-NEXT:    [[I10:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 0), align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_OPM-NEXT:    [[ADD2:%.*]] = add nsw i32 [[I9]], [[I10]]
+; IS__TUNIT_OPM-NEXT:    [[ADD2:%.*]] = add nsw i32 3, 1
 ; IS__TUNIT_OPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
 ; IS__TUNIT_OPM-NEXT:    store i32 [[ADD2]], i32* [[I3]], align 4, !tbaa [[TBAA14]]
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@static_global_simplifiable_1
-; IS__TUNIT_NPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR2]] {
+; IS__TUNIT_NPM-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR4]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    store float 0x3FF19999A0000000, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_NPM-NEXT:    store float 0x40019999A0000000, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
 ; IS__TUNIT_NPM-NEXT:    store float 0x400A666660000000, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR5]]
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR5]]
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR5]]
-; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR4]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR4]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
-; IS__TUNIT_NPM-NEXT:    store float [[I]], float* [[F1]], align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_NPM-NEXT:    [[I4:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_NPM-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
+; IS__TUNIT_NPM-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
+; IS__TUNIT_NPM-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
 ; IS__TUNIT_NPM-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 4
 ; IS__TUNIT_NPM-NEXT:    store float [[MUL]], float* [[F2]], align 4, !tbaa [[TBAA10]]
-; IS__TUNIT_NPM-NEXT:    [[I5:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_NPM-NEXT:    [[I6:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
-; IS__TUNIT_NPM-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
+; IS__TUNIT_NPM-NEXT:    [[ADD:%.*]] = fadd float 0x400A666660000000, 0x3FF19999A0000000
 ; IS__TUNIT_NPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 5
 ; IS__TUNIT_NPM-NEXT:    store float [[ADD]], float* [[F3]], align 4, !tbaa [[TBAA11]]
-; IS__TUNIT_NPM-NEXT:    [[I7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 0), align 4, !tbaa [[TBAA12]]
 ; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    store i32 [[I7]], i32* [[I1]], align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_NPM-NEXT:    [[I8:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), align 4, !tbaa [[TBAA13]]
-; IS__TUNIT_NPM-NEXT:    [[MUL1:%.*]] = shl nsw i32 [[I8]], 1
+; IS__TUNIT_NPM-NEXT:    store i32 1, i32* [[I1]], align 4, !tbaa [[TBAA12]]
+; IS__TUNIT_NPM-NEXT:    [[MUL1:%.*]] = shl nsw i32 2, 1
 ; IS__TUNIT_NPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 1
 ; IS__TUNIT_NPM-NEXT:    store i32 [[MUL1]], i32* [[I2]], align 4, !tbaa [[TBAA13]]
-; IS__TUNIT_NPM-NEXT:    [[I9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), align 4, !tbaa [[TBAA14]]
-; IS__TUNIT_NPM-NEXT:    [[I10:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 0), align 4, !tbaa [[TBAA12]]
-; IS__TUNIT_NPM-NEXT:    [[ADD2:%.*]] = add nsw i32 [[I9]], [[I10]]
+; IS__TUNIT_NPM-NEXT:    [[ADD2:%.*]] = add nsw i32 3, 1
 ; IS__TUNIT_NPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
 ; IS__TUNIT_NPM-NEXT:    store i32 [[ADD2]], i32* [[I3]], align 4, !tbaa [[TBAA14]]
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@static_global_simplifiable_1
 ; IS__CGSCC____-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]]) #[[ATTR4:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
@@ -1124,28 +1047,20 @@ define void @static_global_simplifiable_1(%struct.S* noalias sret(%struct.S) ali
 ; IS__CGSCC____-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(24) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i32 0, i32 0), i32 noundef 1) #[[ATTR9]]
 ; IS__CGSCC____-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(20) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), i32 noundef 2) #[[ATTR9]]
 ; IS__CGSCC____-NEXT:    call void @write_arg(i32* nocapture nofree noundef nonnull writeonly align 4 dereferenceable(16) getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), i32 noundef 3) #[[ATTR9]]
-; IS__CGSCC____-NEXT:    [[I:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
 ; IS__CGSCC____-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
-; IS__CGSCC____-NEXT:    store float [[I]], float* [[F1]], align 4, !tbaa [[TBAA7]]
-; IS__CGSCC____-NEXT:    [[I4:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 4), align 4, !tbaa [[TBAA10]]
-; IS__CGSCC____-NEXT:    [[MUL:%.*]] = fmul float [[I4]], 2.000000e+00
+; IS__CGSCC____-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
+; IS__CGSCC____-NEXT:    [[MUL:%.*]] = fmul float 0x40019999A0000000, 2.000000e+00
 ; IS__CGSCC____-NEXT:    [[F2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 4
 ; IS__CGSCC____-NEXT:    store float [[MUL]], float* [[F2]], align 4, !tbaa [[TBAA10]]
-; IS__CGSCC____-NEXT:    [[I5:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 5), align 4, !tbaa [[TBAA11]]
-; IS__CGSCC____-NEXT:    [[I6:%.*]] = load float, float* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 3), align 4, !tbaa [[TBAA7]]
-; IS__CGSCC____-NEXT:    [[ADD:%.*]] = fadd float [[I5]], [[I6]]
+; IS__CGSCC____-NEXT:    [[ADD:%.*]] = fadd float 0x400A666660000000, 0x3FF19999A0000000
 ; IS__CGSCC____-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 5
 ; IS__CGSCC____-NEXT:    store float [[ADD]], float* [[F3]], align 4, !tbaa [[TBAA11]]
-; IS__CGSCC____-NEXT:    [[I7:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 0), align 4, !tbaa [[TBAA12]]
 ; IS__CGSCC____-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 0
-; IS__CGSCC____-NEXT:    store i32 [[I7]], i32* [[I1]], align 4, !tbaa [[TBAA12]]
-; IS__CGSCC____-NEXT:    [[I8:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 1), align 4, !tbaa [[TBAA13]]
-; IS__CGSCC____-NEXT:    [[MUL1:%.*]] = shl nsw i32 [[I8]], 1
+; IS__CGSCC____-NEXT:    store i32 1, i32* [[I1]], align 4, !tbaa [[TBAA12]]
+; IS__CGSCC____-NEXT:    [[MUL1:%.*]] = shl nsw i32 2, 1
 ; IS__CGSCC____-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 1
 ; IS__CGSCC____-NEXT:    store i32 [[MUL1]], i32* [[I2]], align 4, !tbaa [[TBAA13]]
-; IS__CGSCC____-NEXT:    [[I9:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 2), align 4, !tbaa [[TBAA14]]
-; IS__CGSCC____-NEXT:    [[I10:%.*]] = load i32, i32* getelementptr inbounds ([[STRUCT_S]], %struct.S* @Gs1, i64 0, i32 0), align 4, !tbaa [[TBAA12]]
-; IS__CGSCC____-NEXT:    [[ADD2:%.*]] = add nsw i32 [[I9]], [[I10]]
+; IS__CGSCC____-NEXT:    [[ADD2:%.*]] = add nsw i32 3, 1
 ; IS__CGSCC____-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 2
 ; IS__CGSCC____-NEXT:    store i32 [[ADD2]], i32* [[I3]], align 4, !tbaa [[TBAA14]]
 ; IS__CGSCC____-NEXT:    ret void
@@ -1330,7 +1245,7 @@ define void @static_global_simplifiable_2() {
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND13]], !llvm.loop [[LOOP26:![0-9]+]]
 ; IS__TUNIT_NPM:       for.end23:
 ; IS__TUNIT_NPM-NEXT:    store i8 0, i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 1023), align 1, !tbaa [[TBAA15]]
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(524) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree writeonly align 4 dereferenceable_or_null(524) bitcast (i8* getelementptr inbounds ([1024 x i8], [1024 x i8]* @GBytes, i64 0, i64 500) to i32*), i32 noundef 0) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND25:%.*]]
 ; IS__TUNIT_NPM:       for.cond25:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC33:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -1538,7 +1453,7 @@ define i32 @static_global_simplifiable_3() {
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@static_global_simplifiable_3
-; IS__CGSCC____-SAME: () #[[ATTR4]] {
+; IS__CGSCC____-SAME: () #[[ATTR5:[0-9]+]] {
 ; IS__CGSCC____-NEXT:    store i32 1, i32* @Flag3, align 4, !tbaa [[TBAA3]]
 ; IS__CGSCC____-NEXT:    [[I:%.*]] = load i32, i32* @Flag3, align 4, !tbaa [[TBAA3]]
 ; IS__CGSCC____-NEXT:    ret i32 [[I]]
@@ -1577,11 +1492,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align
 ; IS__TUNIT_OPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_OPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]]
 ; IS__TUNIT_OPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR6]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR5]]
 ; IS__TUNIT_OPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR6]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR5]]
 ; IS__TUNIT_OPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR6]]
+; IS__TUNIT_OPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR5]]
 ; IS__TUNIT_OPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_OPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
@@ -1627,11 +1542,11 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align
 ; IS__TUNIT_NPM-NEXT:    [[F3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 5
 ; IS__TUNIT_NPM-NEXT:    store float 0x400A666660000000, float* [[F3]], align 4, !tbaa [[TBAA11]]
 ; IS__TUNIT_NPM-NEXT:    [[I1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 0
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(24) [[I1]], i32 noundef 1) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    [[I2:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 1
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(20) [[I2]], i32 noundef 2) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    [[I3:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 2
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 8 dereferenceable(16) [[I3]], i32 noundef 3) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    [[F11:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = load float, float* [[F11]], align 4, !tbaa [[TBAA7]]
 ; IS__TUNIT_NPM-NEXT:    [[F12:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[AGG_RESULT]], i64 0, i32 3
@@ -1668,7 +1583,7 @@ define void @noalias_arg_simplifiable_1(%struct.S* noalias sret(%struct.S) align
 ;
 ; IS__CGSCC____: Function Attrs: argmemonly nofree norecurse nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@noalias_arg_simplifiable_1
-; IS__CGSCC____-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]], %struct.S* noalias nocapture nofree nonnull byval([[STRUCT_S]]) align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR5:[0-9]+]] {
+; IS__CGSCC____-SAME: (%struct.S* noalias nocapture nofree nonnull writeonly sret([[STRUCT_S:%.*]]) align 4 dereferenceable(24) [[AGG_RESULT:%.*]], %struct.S* noalias nocapture nofree nonnull byval([[STRUCT_S]]) align 8 dereferenceable(24) [[S:%.*]]) #[[ATTR6:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[F1:%.*]] = getelementptr inbounds [[STRUCT_S]], %struct.S* [[S]], i64 0, i32 3
 ; IS__CGSCC____-NEXT:    store float 0x3FF19999A0000000, float* [[F1]], align 4, !tbaa [[TBAA7]]
@@ -1919,7 +1834,7 @@ define void @noalias_arg_simplifiable_2(i8* %Bytes) {
 ; IS__TUNIT_NPM-NEXT:    store i8 0, i8* [[ARRAYIDX24]], align 1, !tbaa [[TBAA15]]
 ; IS__TUNIT_NPM-NEXT:    [[ARRAYIDX25:%.*]] = getelementptr inbounds i8, i8* [[BYTES]], i64 500
 ; IS__TUNIT_NPM-NEXT:    [[I21:%.*]] = bitcast i8* [[ARRAYIDX25]] to i32*
-; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 [[I21]], i32 noundef 0) #[[ATTR5]]
+; IS__TUNIT_NPM-NEXT:    call void @write_arg(i32* nocapture nofree nonnull writeonly align 4 [[I21]], i32 noundef 0) #[[ATTR4]]
 ; IS__TUNIT_NPM-NEXT:    br label [[FOR_COND27:%.*]]
 ; IS__TUNIT_NPM:       for.cond27:
 ; IS__TUNIT_NPM-NEXT:    [[INDVARS_IV12:%.*]] = phi i64 [ [[INDVARS_IV_NEXT13:%.*]], [[FOR_INC35:%.*]] ], [ 0, [[FOR_END23]] ]
@@ -2368,21 +2283,21 @@ declare void @escape(i8*)
 define i32 @global_not_simplifiable_1(i32 %cnd) {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@global_not_simplifiable_1
-; IS__TUNIT_OPM-SAME: (i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] {
+; IS__TUNIT_OPM-SAME: (i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] {
 ; IS__TUNIT_OPM-NEXT:  entry:
 ; IS__TUNIT_OPM-NEXT:    [[I:%.*]] = load i32, i32* @Flag0, align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[I]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@global_not_simplifiable_1
-; IS__TUNIT_NPM-SAME: (i32 [[CND:%.*]]) #[[ATTR4:[0-9]+]] {
+; IS__TUNIT_NPM-SAME: (i32 [[CND:%.*]]) #[[ATTR5:[0-9]+]] {
 ; IS__TUNIT_NPM-NEXT:  entry:
 ; IS__TUNIT_NPM-NEXT:    [[I:%.*]] = load i32, i32* @Flag0, align 4, !tbaa [[TBAA3]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[I]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@global_not_simplifiable_1
-; IS__CGSCC____-SAME: (i32 [[CND:%.*]]) #[[ATTR6:[0-9]+]] {
+; IS__CGSCC____-SAME: (i32 [[CND:%.*]]) #[[ATTR7:[0-9]+]] {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[I:%.*]] = load i32, i32* @Flag0, align 4, !tbaa [[TBAA3]]
 ; IS__CGSCC____-NEXT:    ret i32 [[I]]
@@ -2405,7 +2320,6 @@ define i32 @static_global_not_simplifiable_1(i32 %cnd) {
 ; CHECK-LABEL: define {{[^@]+}}@static_global_not_simplifiable_1
 ; CHECK-SAME: (i32 [[CND:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[I:%.*]] = load i32, i32* @Flag1, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    call void @sync()
 ; CHECK-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[CND]], 0
 ; CHECK-NEXT:    br i1 [[TOBOOL_NOT]], label [[IF_END:%.*]], label [[IF_THEN:%.*]]
@@ -2413,7 +2327,7 @@ define i32 @static_global_not_simplifiable_1(i32 %cnd) {
 ; CHECK-NEXT:    store i32 1, i32* @Flag1, align 4, !tbaa [[TBAA3]]
 ; CHECK-NEXT:    br label [[IF_END]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    ret i32 [[I]]
+; CHECK-NEXT:    ret i32 1
 ;
 entry:
   %i = load i32, i32* @Flag1, align 4, !tbaa !3
@@ -2486,7 +2400,7 @@ define i32 @write_read_global() {
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@write_read_global
-; IS__CGSCC____-SAME: () #[[ATTR4]] {
+; IS__CGSCC____-SAME: () #[[ATTR5]] {
 ; IS__CGSCC____-NEXT:    store i32 7, i32* @Gint1, align 4
 ; IS__CGSCC____-NEXT:    [[L:%.*]] = load i32, i32* @Gint1, align 4
 ; IS__CGSCC____-NEXT:    ret i32 [[L]]
@@ -2498,19 +2412,19 @@ define i32 @write_read_global() {
 define void @write_global() {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@write_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR6]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_OPM-NEXT:    store i32 7, i32* @Gint2, align 4
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@write_global
-; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
 ; IS__TUNIT_NPM-NEXT:    store i32 7, i32* @Gint2, align 4
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@write_global
-; IS__CGSCC____-SAME: () #[[ATTR7:[0-9]+]] {
+; IS__CGSCC____-SAME: () #[[ATTR4]] {
 ; IS__CGSCC____-NEXT:    store i32 7, i32* @Gint2, align 4
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -2520,19 +2434,19 @@ define void @write_global() {
 define i32 @read_global() {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@read_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR6]] {
 ; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* @Gint2, align 4
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[L]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@read_global
-; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = load i32, i32* @Gint2, align 4
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[L]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@read_global
-; IS__CGSCC____-SAME: () #[[ATTR6]] {
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
 ; IS__CGSCC____-NEXT:    [[L:%.*]] = load i32, i32* @Gint2, align 4
 ; IS__CGSCC____-NEXT:    ret i32 [[L]]
 ;
@@ -2557,7 +2471,7 @@ define i32 @write_read_static_global() {
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@write_read_static_global
-; IS__CGSCC____-SAME: () #[[ATTR4]] {
+; IS__CGSCC____-SAME: () #[[ATTR5]] {
 ; IS__CGSCC____-NEXT:    store i32 7, i32* @Gstatic_int1, align 4
 ; IS__CGSCC____-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_int1, align 4
 ; IS__CGSCC____-NEXT:    ret i32 [[L]]
@@ -2569,19 +2483,19 @@ define i32 @write_read_static_global() {
 define void @write_static_global() {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@write_static_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR6]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_OPM-NEXT:    store i32 7, i32* @Gstatic_int2, align 4
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@write_static_global
-; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
 ; IS__TUNIT_NPM-NEXT:    store i32 7, i32* @Gstatic_int2, align 4
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@write_static_global
-; IS__CGSCC____-SAME: () #[[ATTR7]] {
+; IS__CGSCC____-SAME: () #[[ATTR4]] {
 ; IS__CGSCC____-NEXT:    store i32 7, i32* @Gstatic_int2, align 4
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -2591,19 +2505,19 @@ define void @write_static_global() {
 define i32 @read_static_global() {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@read_static_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR6]] {
 ; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4
 ; IS__TUNIT_OPM-NEXT:    ret i32 [[L]]
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readonly willreturn
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@read_static_global
-; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[L]]
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@read_static_global
-; IS__CGSCC____-SAME: () #[[ATTR6]] {
+; IS__CGSCC____-SAME: () #[[ATTR7]] {
 ; IS__CGSCC____-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_int2, align 4
 ; IS__CGSCC____-NEXT:    ret i32 [[L]]
 ;
@@ -2611,26 +2525,23 @@ define i32 @read_static_global() {
   ret i32 %l
 }
 define i32 @write_read_static_undef_global() {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@write_read_static_undef_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR4]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_OPM-NEXT:    store i32 7, i32* @Gstatic_undef_int1, align 4
-; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_undef_int1, align 4
-; IS__TUNIT_OPM-NEXT:    ret i32 [[L]]
+; IS__TUNIT_OPM-NEXT:    ret i32 7
 ;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn
+; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@write_read_static_undef_global
-; IS__TUNIT_NPM-SAME: () #[[ATTR2]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
 ; IS__TUNIT_NPM-NEXT:    store i32 7, i32* @Gstatic_undef_int1, align 4
-; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_undef_int1, align 4
-; IS__TUNIT_NPM-NEXT:    ret i32 [[L]]
+; IS__TUNIT_NPM-NEXT:    ret i32 7
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@write_read_static_undef_global
 ; IS__CGSCC____-SAME: () #[[ATTR4]] {
 ; IS__CGSCC____-NEXT:    store i32 7, i32* @Gstatic_undef_int1, align 4
-; IS__CGSCC____-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_undef_int1, align 4
-; IS__CGSCC____-NEXT:    ret i32 [[L]]
+; IS__CGSCC____-NEXT:    ret i32 7
 ;
   store i32 7, i32* @Gstatic_undef_int1
   %l = load i32, i32* @Gstatic_undef_int1
@@ -2639,19 +2550,19 @@ define i32 @write_read_static_undef_global() {
 define void @write_static_undef_global() {
 ; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@write_static_undef_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR6]] {
+; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
 ; IS__TUNIT_OPM-NEXT:    store i32 7, i32* @Gstatic_undef_int2, align 4
 ; IS__TUNIT_OPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind willreturn writeonly
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@write_static_undef_global
-; IS__TUNIT_NPM-SAME: () #[[ATTR5]] {
+; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
 ; IS__TUNIT_NPM-NEXT:    store i32 7, i32* @Gstatic_undef_int2, align 4
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind willreturn writeonly
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@write_static_undef_global
-; IS__CGSCC____-SAME: () #[[ATTR7]] {
+; IS__CGSCC____-SAME: () #[[ATTR4]] {
 ; IS__CGSCC____-NEXT:    store i32 7, i32* @Gstatic_undef_int2, align 4
 ; IS__CGSCC____-NEXT:    ret void
 ;
@@ -2659,23 +2570,15 @@ define void @write_static_undef_global() {
   ret void
 }
 define i32 @read_static_undef_global() {
-; IS__TUNIT_OPM: Function Attrs: nofree nosync nounwind readonly willreturn
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@read_static_undef_global
-; IS__TUNIT_OPM-SAME: () #[[ATTR5]] {
-; IS__TUNIT_OPM-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_undef_int2, align 4
-; IS__TUNIT_OPM-NEXT:    ret i32 [[L]]
-;
-; IS__TUNIT_NPM: Function Attrs: nofree nosync nounwind readonly willreturn
-; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@read_static_undef_global
-; IS__TUNIT_NPM-SAME: () #[[ATTR4]] {
-; IS__TUNIT_NPM-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_undef_int2, align 4
-; IS__TUNIT_NPM-NEXT:    ret i32 [[L]]
+; IS__TUNIT____: Function Attrs: nofree nosync nounwind readnone willreturn
+; IS__TUNIT____-LABEL: define {{[^@]+}}@read_static_undef_global
+; IS__TUNIT____-SAME: () #[[ATTR3]] {
+; IS__TUNIT____-NEXT:    ret i32 7
 ;
-; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readonly willreturn
+; IS__CGSCC____: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@read_static_undef_global
-; IS__CGSCC____-SAME: () #[[ATTR6]] {
-; IS__CGSCC____-NEXT:    [[L:%.*]] = load i32, i32* @Gstatic_undef_int2, align 4
-; IS__CGSCC____-NEXT:    ret i32 [[L]]
+; IS__CGSCC____-SAME: () #[[ATTR2]] {
+; IS__CGSCC____-NEXT:    ret i32 7
 ;
   %l = load i32, i32* @Gstatic_undef_int2
   ret i32 %l
@@ -2738,8 +2641,8 @@ define i32 @single_read_of_static_global() {
 ; IS__TUNIT_OPM: attributes #[[ATTR2]] = { nofree nosync nounwind }
 ; IS__TUNIT_OPM: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_OPM: attributes #[[ATTR6]] = { nofree nosync nounwind readonly willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR7]] = { willreturn }
 ; IS__TUNIT_OPM: attributes #[[ATTR8]] = { nofree nosync nounwind writeonly }
 ;.
@@ -2747,18 +2650,18 @@ define i32 @single_read_of_static_global() {
 ; IS__TUNIT_NPM: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR2]] = { nofree nosync nounwind willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR3]] = { nofree nosync nounwind readnone willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR4]] = { nofree nosync nounwind readonly willreturn }
-; IS__TUNIT_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_NPM: attributes #[[ATTR4]] = { nofree nosync nounwind willreturn writeonly }
+; IS__TUNIT_NPM: attributes #[[ATTR5]] = { nofree nosync nounwind readonly willreturn }
 ; IS__TUNIT_NPM: attributes #[[ATTR6]] = { willreturn }
 ;.
 ; IS__CGSCC____: attributes #[[ATTR0]] = { argmemonly nofree norecurse nosync nounwind willreturn writeonly }
 ; IS__CGSCC____: attributes #[[ATTR1]] = { argmemonly nofree nosync nounwind willreturn }
 ; IS__CGSCC____: attributes #[[ATTR2]] = { nofree norecurse nosync nounwind readnone willreturn }
 ; IS__CGSCC____: attributes #[[ATTR3]] = { nofree nosync nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR5]] = { argmemonly nofree norecurse nosync nounwind willreturn }
-; IS__CGSCC____: attributes #[[ATTR6]] = { nofree norecurse nosync nounwind readonly willreturn }
-; IS__CGSCC____: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind willreturn writeonly }
+; IS__CGSCC____: attributes #[[ATTR4]] = { nofree norecurse nosync nounwind willreturn writeonly }
+; IS__CGSCC____: attributes #[[ATTR5]] = { nofree norecurse nosync nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR6]] = { argmemonly nofree norecurse nosync nounwind willreturn }
+; IS__CGSCC____: attributes #[[ATTR7]] = { nofree norecurse nosync nounwind readonly willreturn }
 ; IS__CGSCC____: attributes #[[ATTR8]] = { willreturn }
 ; IS__CGSCC____: attributes #[[ATTR9]] = { nounwind willreturn writeonly }
 ; IS__CGSCC____: attributes #[[ATTR10]] = { nounwind writeonly }

diff  --git a/llvm/test/Transforms/Attributor/wrapper.ll b/llvm/test/Transforms/Attributor/wrapper.ll
index d48ab54aef720..a023220d3529a 100644
--- a/llvm/test/Transforms/Attributor/wrapper.ll
+++ b/llvm/test/Transforms/Attributor/wrapper.ll
@@ -9,11 +9,10 @@
 ;
 ; Check the original function, which is wrapped and becomes anonymous
 ; CHECK: Function Attrs: nofree norecurse nosync nounwind readnone willreturn
-; CHECK: define internal i32 @0()
+; CHECK: define internal noundef i32 @0()
 ; CHECK: %a = alloca i32
 ; CHECK: store i32 1, i32* %a
-; CHECK: %b = load i32, i32* %a
-; CHECK: ret i32 %b
+; CHECK: ret i32 1
 define linkonce i32 @inner1() {
 entry:
   %a = alloca i32

diff  --git a/llvm/test/Transforms/OpenMP/custom_state_machines.ll b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
index 9195b87fb8e82..defadceeafec7 100644
--- a/llvm/test/Transforms/OpenMP/custom_state_machines.ll
+++ b/llvm/test/Transforms/OpenMP/custom_state_machines.ll
@@ -1043,13 +1043,12 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
 ; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 ; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__2 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__2_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
 ; CHECK-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__3 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__3_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
 ; CHECK-NEXT:    ret void
 ;
 ;
@@ -1180,10 +1179,9 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
 ; CHECK-NEXT:    call void @simple_state_machine_interprocedural_before.internalized() #[[ATTR7]]
 ; CHECK-NEXT:    call void @no_parallel_region_in_here.internalized() #[[ATTR7]]
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__5 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__5_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
 ; CHECK-NEXT:    call void @simple_state_machine_interprocedural_after.internalized() #[[ATTR7]]
 ; CHECK-NEXT:    ret void
 ;
@@ -1326,13 +1324,12 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
 ; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 ; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
-; CHECK-NEXT:    [[TMP3:%.*]] = call i32 @unknown() #[[ATTR8]]
-; CHECK-NEXT:    [[TMP4:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP4]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__7 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__7_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = call i32 @unknown() #[[ATTR8]]
+; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__8 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__8_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
 ; CHECK-NEXT:    ret void
 ;
 ;
@@ -1456,13 +1453,12 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
 ; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 ; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__10 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__10_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
 ; CHECK-NEXT:    call void @unknown_no_openmp() #[[ATTR9:[0-9]+]]
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__11 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__11_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
 ; CHECK-NEXT:    ret void
 ;
 ;
@@ -1552,12 +1548,11 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:    [[CAPTURED_VARS_ADDRS1:%.*]] = alloca [0 x i8*], align 8
 ; CHECK-NEXT:    store i32* [[DOTGLOBAL_TID_]], i32** [[DOTGLOBAL_TID__ADDR]], align 8
 ; CHECK-NEXT:    store i32* [[DOTBOUND_TID_]], i32** [[DOTBOUND_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[DOTGLOBAL_TID__ADDR]], align 8
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
-; CHECK-NEXT:    [[TMP3:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
-; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP1]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP3]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; CHECK-NEXT:    [[TMP1:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__13 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__13_wrapper to i8*), i8** noundef [[TMP1]], i64 noundef 0)
+; CHECK-NEXT:    [[TMP2:%.*]] = bitcast [0 x i8*]* [[CAPTURED_VARS_ADDRS1]] to i8**
+; CHECK-NEXT:    call void @__kmpc_parallel_51(%struct.ident_t* noundef @[[GLOB1]], i32 [[TMP0]], i32 noundef 1, i32 noundef -1, i32 noundef -1, i8* noundef bitcast (void (i32*, i32*)* @__omp_outlined__14 to i8*), i8* noundef bitcast (void (i16, i32)* @__omp_outlined__14_wrapper to i8*), i8** noundef [[TMP2]], i64 noundef 0)
 ; CHECK-NEXT:    ret void
 ;
 ;
@@ -1684,14 +1679,12 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[TMP0]], 0
+; CHECK-NEXT:    [[CMP:%.*]] = icmp eq i32 [[A]], 0
 ; CHECK-NEXT:    br i1 [[CMP]], label [[IF_THEN:%.*]], label [[IF_END:%.*]]
 ; CHECK:       if.then:
 ; CHECK-NEXT:    br label [[RETURN:%.*]]
 ; CHECK:       if.end:
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[A]], 1
 ; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]]
 ; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]]
 ; CHECK-NEXT:    br label [[RETURN]]
@@ -1712,9 +1705,9 @@ attributes #10 = { convergent nounwind readonly willreturn }
 ; CHECK-NEXT:    br label [[RETURN:%.*]]
 ; CHECK:       if.end:
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[TMP1]], 1
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR7]]
-; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR7]]
+; CHECK-NEXT:    [[SUB:%.*]] = sub nsw i32 [[A]], 1
+; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after.internalized(i32 [[SUB]]) #[[ATTR8]]
+; CHECK-NEXT:    call void @simple_state_machine_interprocedural_nested_recursive_after_after.internalized() #[[ATTR8]]
 ; CHECK-NEXT:    br label [[RETURN]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
index e9ebe61e58186..0424c48bb8e07 100644
--- a/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
+++ b/llvm/test/Transforms/OpenMP/parallel_region_merging.ll
@@ -4688,6 +4688,7 @@ entry:
 ; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 4
 ; CHECK-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK-NEXT:    ret void
+
 ; CHECK1-LABEL: define {{[^@]+}}@merge
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
@@ -4917,12 +4918,14 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float
 ; CHECK1-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
 ; CHECK1-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK1-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -4933,12 +4936,13 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_RELOADED:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load float, float* [[F_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -4949,9 +4953,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -4968,8 +4972,7 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
-; CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK1-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], 0x40091EB860000000
 ; CHECK1-NEXT:    store float [[ADD]], float* [[P]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
@@ -5172,15 +5175,17 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK1-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
 ; CHECK1-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK1-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5193,12 +5198,13 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5209,9 +5215,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -5228,9 +5234,8 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK1-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK1-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
 ; CHECK1-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
@@ -5335,14 +5340,16 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK1-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5353,12 +5360,13 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_RELOADED:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5369,9 +5377,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -5388,8 +5396,7 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
-; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK1-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK1-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
 ; CHECK1-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@@ -5502,14 +5509,16 @@ entry:
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK1-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK1-NEXT:  entry:
+; CHECK1-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK1-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK1:       omp_parallel:
-; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK1-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK1:       omp.par.outlined.exit:
 ; CHECK1-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -5522,12 +5531,13 @@ entry:
 ;
 ;
 ; CHECK1-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK1-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK1-NEXT:  omp.par.entry:
 ; CHECK1-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK1-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK1-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK1-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK1:       omp.par.outlined.exit.exitStub:
 ; CHECK1-NEXT:    ret void
@@ -5538,9 +5548,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK1-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK1-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK1-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK1-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK1-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK1:       omp_region.end:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -5550,9 +5560,9 @@ entry:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK1-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
-; CHECK1-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK1-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK1-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK1-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK1-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK1:       omp_region.end4:
 ; CHECK1-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK1-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@@ -5569,9 +5579,8 @@ entry:
 ; CHECK1:       omp_region.body5:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK1:       seq.par.merged2:
-; CHECK1-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
-; CHECK1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK1-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP1]]
 ; CHECK1-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split.split.split:
@@ -5582,8 +5591,7 @@ entry:
 ; CHECK1:       omp_region.body:
 ; CHECK1-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK1:       seq.par.merged:
-; CHECK1-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK1-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
 ; CHECK1-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK1-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK1:       omp.par.merged.split:
@@ -5698,8 +5706,7 @@ entry:
 ; CHECK1-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK1-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK1-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK1-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
 ; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK1-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK1-NEXT:    ret void
@@ -6028,12 +6035,14 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float
 ; CHECK2-SAME: (float [[F:%.*]], float* nocapture nofree writeonly [[P:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[F_RELOADED:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    [[F_ADDR:%.*]] = alloca float, align 4
 ; CHECK2-NEXT:    store float [[F]], float* [[F_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    store float [[F]], float* [[F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_ADDR]], float* [[P]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, float*, float*, float*)* @merge_seq_float..omp_par to void (i32*, i32*, ...)*), float* [[F_RELOADED]], float* [[F_ADDR]], float* [[P]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6044,12 +6053,13 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_float..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], float* [[F_RELOADED:%.*]], float* [[F_ADDR:%.*]], float* [[P:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load float, float* [[F_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6060,9 +6070,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6079,8 +6089,7 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP3:%.*]] = load float, float* [[F_ADDR]], align 4
-; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP3]], 0x40091EB860000000
+; CHECK2-NEXT:    [[ADD:%.*]] = fadd float [[TMP1]], 0x40091EB860000000
 ; CHECK2-NEXT:    store float [[ADD]], float* [[P]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
@@ -6283,15 +6292,17 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use
 ; CHECK2-SAME: (i32 [[A:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[B:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
 ; CHECK2-NEXT:    [[LT_CAST3:%.*]] = bitcast i32* [[B]] to i8*
 ; CHECK2-NEXT:    call void @llvm.lifetime.start.p0i8(i64 -1, i8* [[LT_CAST3]])
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[B]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_seq_par_use..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[B]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6304,12 +6315,13 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_seq_par_use..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[B:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6320,9 +6332,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6339,9 +6351,8 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP3:%.*]] = bitcast i32* [[B]] to i8*
-; CHECK2-NEXT:    [[TMP4:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP4]], 1
+; CHECK2-NEXT:    [[TMP4:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
 ; CHECK2-NEXT:    store i32 [[ADD]], i32* [[B]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
@@ -6446,14 +6457,16 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq
 ; CHECK2-SAME: (i32 [[CANCEL1:%.*]], i32 [[CANCEL2:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[CANCEL1_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL1_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[CANCEL2_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[CANCEL2]], i32* [[CANCEL2_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[CANCEL1]], i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 2, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_cancellable_regions_seq..omp_par to void (i32*, i32*, ...)*), i32* [[CANCEL1_RELOADED]], i32* [[CANCEL1_ADDR]], i32* [[CANCEL2_ADDR]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6464,12 +6477,13 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_cancellable_regions_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[CANCEL1_RELOADED:%.*]], i32* [[CANCEL1_ADDR:%.*]], i32* [[CANCEL2_ADDR:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[CANCEL1_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6480,9 +6494,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6499,8 +6513,7 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP3:%.*]] = load i32, i32* [[CANCEL1_ADDR]], align 4
-; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP3]], 0
+; CHECK2-NEXT:    [[TOBOOL_NOT:%.*]] = icmp eq i32 [[TMP1]], 0
 ; CHECK2-NEXT:    [[LNOT_EXT:%.*]] = zext i1 [[TOBOOL_NOT]] to i32
 ; CHECK2-NEXT:    store i32 [[LNOT_EXT]], i32* [[CANCEL2_ADDR]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
@@ -6613,14 +6626,16 @@ entry:
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq
 ; CHECK2-SAME: (i32 [[A:%.*]], i32 [[B:%.*]]) local_unnamed_addr {
 ; CHECK2-NEXT:  entry:
+; CHECK2-NEXT:    [[A_RELOADED:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[ADD1_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_ALLOC:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[A_ADDR:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM7:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
+; CHECK2-NEXT:    store i32 [[A]], i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PARALLEL:%.*]]
 ; CHECK2:       omp_parallel:
-; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 3, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
+; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* @[[GLOB1]], i32 4, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i32*, i32*, i32*)* @merge_3_seq..omp_par to void (i32*, i32*, ...)*), i32* [[A_RELOADED]], i32* [[A_ADDR]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]])
 ; CHECK2-NEXT:    br label [[OMP_PAR_OUTLINED_EXIT:%.*]]
 ; CHECK2:       omp.par.outlined.exit:
 ; CHECK2-NEXT:    br label [[OMP_PAR_EXIT_SPLIT:%.*]]
@@ -6633,12 +6648,13 @@ entry:
 ;
 ;
 ; CHECK2-LABEL: define {{[^@]+}}@merge_3_seq..omp_par
-; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
+; CHECK2-SAME: (i32* noalias [[TID_ADDR:%.*]], i32* noalias [[ZERO_ADDR:%.*]], i32* [[A_RELOADED:%.*]], i32* [[A_ADDR:%.*]], i32* [[ADD_SEQ_OUTPUT_ALLOC:%.*]], i32* [[ADD1_SEQ_OUTPUT_ALLOC:%.*]]) #[[ATTR0]] {
 ; CHECK2-NEXT:  omp.par.entry:
 ; CHECK2-NEXT:    [[TID_ADDR_LOCAL:%.*]] = alloca i32, align 4
 ; CHECK2-NEXT:    [[TMP0:%.*]] = load i32, i32* [[TID_ADDR]], align 4
 ; CHECK2-NEXT:    store i32 [[TMP0]], i32* [[TID_ADDR_LOCAL]], align 4
 ; CHECK2-NEXT:    [[TID:%.*]] = load i32, i32* [[TID_ADDR_LOCAL]], align 4
+; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_RELOADED]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_REGION:%.*]]
 ; CHECK2:       omp.par.outlined.exit.exitStub:
 ; CHECK2-NEXT:    ret void
@@ -6649,9 +6665,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM2:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM2]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
-; CHECK2-NEXT:    [[TMP2:%.*]] = icmp ne i32 [[TMP1]], 0
-; CHECK2-NEXT:    br i1 [[TMP2]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
+; CHECK2-NEXT:    [[TMP2:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM]])
+; CHECK2-NEXT:    [[TMP3:%.*]] = icmp ne i32 [[TMP2]], 0
+; CHECK2-NEXT:    br i1 [[TMP3]], label [[OMP_REGION_BODY:%.*]], label [[OMP_REGION_END:%.*]]
 ; CHECK2:       omp_region.end:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM1:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM1]])
@@ -6661,9 +6677,9 @@ entry:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM4:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM4]])
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM3:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
-; CHECK2-NEXT:    [[TMP3:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
-; CHECK2-NEXT:    [[TMP4:%.*]] = icmp ne i32 [[TMP3]], 0
-; CHECK2-NEXT:    br i1 [[TMP4]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
+; CHECK2-NEXT:    [[TMP4:%.*]] = call i32 @__kmpc_master(%struct.ident_t* @[[GLOB1]], i32 [[OMP_GLOBAL_THREAD_NUM3]])
+; CHECK2-NEXT:    [[TMP5:%.*]] = icmp ne i32 [[TMP4]], 0
+; CHECK2-NEXT:    br i1 [[TMP5]], label [[OMP_REGION_BODY5:%.*]], label [[OMP_REGION_END4:%.*]]
 ; CHECK2:       omp_region.end4:
 ; CHECK2-NEXT:    [[OMP_GLOBAL_THREAD_NUM6:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* @[[GLOB1]])
 ; CHECK2-NEXT:    call void @__kmpc_barrier(%struct.ident_t* @[[GLOB2]], i32 [[OMP_GLOBAL_THREAD_NUM6]])
@@ -6680,9 +6696,8 @@ entry:
 ; CHECK2:       omp_region.body5:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED2:%.*]]
 ; CHECK2:       seq.par.merged2:
-; CHECK2-NEXT:    [[TMP5:%.*]] = load i32, i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    [[ADD_SEQ_OUTPUT_LOAD:%.*]] = load i32, i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
-; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP5]]
+; CHECK2-NEXT:    [[ADD1:%.*]] = add nsw i32 [[ADD_SEQ_OUTPUT_LOAD]], [[TMP1]]
 ; CHECK2-NEXT:    store i32 [[ADD1]], i32* [[ADD1_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT_SPLIT_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split.split.split:
@@ -6693,8 +6708,7 @@ entry:
 ; CHECK2:       omp_region.body:
 ; CHECK2-NEXT:    br label [[SEQ_PAR_MERGED:%.*]]
 ; CHECK2:       seq.par.merged:
-; CHECK2-NEXT:    [[TMP6:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP6]], 1
+; CHECK2-NEXT:    [[ADD:%.*]] = add nsw i32 [[TMP1]], 1
 ; CHECK2-NEXT:    store i32 [[ADD]], i32* [[ADD_SEQ_OUTPUT_ALLOC]], align 4
 ; CHECK2-NEXT:    br label [[OMP_PAR_MERGED_SPLIT:%.*]]
 ; CHECK2:       omp.par.merged.split:
@@ -6809,8 +6823,7 @@ entry:
 ; CHECK2-NEXT:    [[TMP0:%.*]] = call i32 @__kmpc_global_thread_num(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]])
 ; CHECK2-NEXT:    store i32 [[A]], i32* [[A_ADDR]], align 4
 ; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..34 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
-; CHECK2-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A_ADDR]], align 4
-; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[TMP1]])
+; CHECK2-NEXT:    call void @__kmpc_push_num_threads(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 [[TMP0]], i32 [[A]])
 ; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..35 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    call void (%struct.ident_t*, i32, void (i32*, i32*, ...)*, ...) @__kmpc_fork_call(%struct.ident_t* noundef nonnull align 8 dereferenceable(24) @[[GLOB1]], i32 noundef 1, void (i32*, i32*, ...)* noundef bitcast (void (i32*, i32*, i32*)* @.omp_outlined..36 to void (i32*, i32*, ...)*), i32* nocapture nofree noundef nonnull readonly align 4 dereferenceable(4) [[A_ADDR]])
 ; CHECK2-NEXT:    ret void
@@ -6909,4 +6922,3 @@ entry:
 ; CHECK2-NEXT:    call void @use(i32 [[TMP0]])
 ; CHECK2-NEXT:    ret void
 ;
-


        


More information about the llvm-commits mailing list