[llvm] 231026a - [Attributor] Inititialize "value attributes" w/ must-be-executed-context info

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Sun May 3 19:44:36 PDT 2020


Author: Johannes Doerfert
Date: 2020-05-03T21:41:22-05:00
New Revision: 231026a508eacad3a9bab98954194287b8fcd412

URL: https://github.com/llvm/llvm-project/commit/231026a508eacad3a9bab98954194287b8fcd412
DIFF: https://github.com/llvm/llvm-project/commit/231026a508eacad3a9bab98954194287b8fcd412.diff

LOG: [Attributor] Inititialize "value attributes" w/ must-be-executed-context info

Attributes that only depend on the value (=bit pattern) can be
initialized from uses in the must-be-executed-context (MBEC). We did use
`AAComposeTwoGenericDeduction` and `AAFromMustBeExecutedContext` before
to do this for some positions of these attributes but not for all. This
was fairly complicated and also problematic as we did run it in every
`updateImpl` call even though we only use known information. The new
implementation removes `AAComposeTwoGenericDeduction`* and
`AAFromMustBeExecutedContext` in favor of a simple interface
`AddInformation::fromMBEContext(...)` which we call from the
`initialize` methods of the "value attribute" `Impl` classes, e.g.
`AANonNullImpl:initialize`.

There can be two types of test changes:
  1) Artifacts were we miss some information that was known before a
     global fixpoint was reached and therefore available in an update
     but not at the beginning.
  2) Deduction for values we did not derive via the MBEC before or which
     were not found as the `AAFromMustBeExecutedContext::updateImpl` was
     never invoked.

* An improved version of AAComposeTwoGenericDeduction can be found in
  D78718. Once we find a new use case that implementation will be able
  to handle "generic" AAs better.

---

Single run of the Attributor module and then CGSCC pass (oldPM)
for SPASS/clause.c (~10k LLVM-IR loc):

Before:
```
calls to allocation functions: 468428 (328952/s)
temporary memory allocations: 77480 (54410/s)
peak heap memory consumption: 32.71MB
peak RSS (including heaptrack overhead): 122.46MB
total memory leaked: 269.10KB
```

After:
```
calls to allocation functions: 554720 (351310/s)
temporary memory allocations: 101650 (64376/s)
peak heap memory consumption: 28.46MB
peak RSS (including heaptrack overhead): 116.75MB
total memory leaked: 269.10KB
```

Difference:
```
calls to allocation functions: 86292 (556722/s)
temporary memory allocations: 24170 (155935/s)
peak heap memory consumption: -4.25MB
peak RSS (including heaptrack overhead): 0B
total memory leaked: 0B
```

Reviewed By: uenoku

Differential Revision: https://reviews.llvm.org/D78719

Added: 
    

Modified: 
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
    llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
    llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
    llvm/test/Transforms/Attributor/callbacks.ll
    llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
    llvm/test/Transforms/Attributor/value-simplify.ll

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index b9adc0e981cd..b7be4f73336d 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -400,28 +400,6 @@ static void clampReturnedValueStates(Attributor &A, const AAType &QueryingAA,
     S ^= *T;
 }
 
-/// Helper class to compose two generic deduction
-template <typename AAType, typename BaseType, typename StateType,
-          template <typename...> class F, template <typename...> class G>
-struct AAComposeTwoGenericDeduction
-    : public F<AAType, G<AAType, BaseType, StateType>, StateType> {
-  AAComposeTwoGenericDeduction(const IRPosition &IRP, Attributor &A)
-      : F<AAType, G<AAType, BaseType, StateType>, StateType>(IRP, A) {}
-
-  void initialize(Attributor &A) override {
-    F<AAType, G<AAType, BaseType, StateType>, StateType>::initialize(A);
-    G<AAType, BaseType, StateType>::initialize(A);
-  }
-
-  /// See AbstractAttribute::updateImpl(...).
-  ChangeStatus updateImpl(Attributor &A) override {
-    ChangeStatus ChangedF =
-        F<AAType, G<AAType, BaseType, StateType>, StateType>::updateImpl(A);
-    ChangeStatus ChangedG = G<AAType, BaseType, StateType>::updateImpl(A);
-    return ChangedF | ChangedG;
-  }
-};
-
 /// Helper class for generic deduction: return value -> returned position.
 template <typename AAType, typename BaseType,
           typename StateType = typename BaseType::StateType>
@@ -530,151 +508,116 @@ struct AACallSiteReturnedFromReturned : public BaseType {
   }
 };
 
-/// Helper class for generic deduction using must-be-executed-context
-/// BaseType class is required to have `followUse` method.
-
-/// bool followUse(Attributor &A, const Use *U, const Instruction *I)
-/// U - Underlying use.
-/// I - The user of the \p U.
-/// `followUse` returns true if the value should be tracked transitively.
-
-template <typename AAType, typename BaseType,
-          typename StateType = typename AAType::StateType>
-struct AAFromMustBeExecutedContext : public BaseType {
-  AAFromMustBeExecutedContext(const IRPosition &IRP, Attributor &A)
-      : BaseType(IRP, A) {}
-
-  void initialize(Attributor &A) override {
-    BaseType::initialize(A);
-    const IRPosition &IRP = this->getIRPosition();
-    Instruction *CtxI = IRP.getCtxI();
-
-    if (!CtxI)
-      return;
-
-    for (const Use &U : IRP.getAssociatedValue().uses())
-      Uses.insert(&U);
-  }
-
-  /// Helper function to accumulate uses.
-  void followUsesInContext(Attributor &A,
-                           MustBeExecutedContextExplorer &Explorer,
-                           const Instruction *CtxI,
-                           SetVector<const Use *> &Uses, StateType &State) {
-    auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
-    for (unsigned u = 0; u < Uses.size(); ++u) {
-      const Use *U = Uses[u];
-      if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
-        bool Found = Explorer.findInContextOf(UserI, EIt, EEnd);
-        if (Found && BaseType::followUse(A, U, UserI, State))
-          for (const Use &Us : UserI->uses())
-            Uses.insert(&Us);
-      }
+/// Helper function to accumulate uses.
+template <class AAType, typename StateType = typename AAType::StateType>
+static void followUsesInContext(AAType &AA, Attributor &A,
+                                MustBeExecutedContextExplorer &Explorer,
+                                const Instruction *CtxI,
+                                SetVector<const Use *> &Uses,
+                                StateType &State) {
+  auto EIt = Explorer.begin(CtxI), EEnd = Explorer.end(CtxI);
+  for (unsigned u = 0; u < Uses.size(); ++u) {
+    const Use *U = Uses[u];
+    if (const Instruction *UserI = dyn_cast<Instruction>(U->getUser())) {
+      bool Found = Explorer.findInContextOf(UserI, EIt, EEnd);
+      if (Found && AA.followUseInMBEC(A, U, UserI, State))
+        for (const Use &Us : UserI->uses())
+          Uses.insert(&Us);
     }
   }
+}
 
-  /// See AbstractAttribute::updateImpl(...).
-  ChangeStatus updateImpl(Attributor &A) override {
-    auto BeforeState = this->getState();
-    auto &S = this->getState();
-    Instruction *CtxI = this->getIRPosition().getCtxI();
-    if (!CtxI)
-      return ChangeStatus::UNCHANGED;
+/// Use the must-be-executed-context around \p I to add information into \p S.
+/// The AAType class is required to have `followUseInMBEC` method with the
+/// following signature and behaviour:
+///
+/// bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I)
+/// U - Underlying use.
+/// I - The user of the \p U.
+/// Returns true if the value should be tracked transitively.
+///
+template <class AAType, typename StateType = typename AAType::StateType>
+static void followUsesInMBEC(AAType &AA, Attributor &A, StateType &S,
+                            Instruction &CtxI) {
 
-    MustBeExecutedContextExplorer &Explorer =
-        A.getInfoCache().getMustBeExecutedContextExplorer();
+  // Container for (transitive) uses of the associated value.
+  SetVector<const Use *> Uses;
+  for (const Use &U : AA.getIRPosition().getAssociatedValue().uses())
+    Uses.insert(&U);
 
-    followUsesInContext(A, Explorer, CtxI, Uses, S);
+  MustBeExecutedContextExplorer &Explorer =
+      A.getInfoCache().getMustBeExecutedContextExplorer();
 
-    if (this->isAtFixpoint())
-      return ChangeStatus::CHANGED;
+  followUsesInContext<AAType>(AA, A, Explorer, &CtxI, Uses, S);
 
-    SmallVector<const BranchInst *, 4> BrInsts;
-    auto Pred = [&](const Instruction *I) {
-      if (const BranchInst *Br = dyn_cast<BranchInst>(I))
-        if (Br->isConditional())
-          BrInsts.push_back(Br);
-      return true;
-    };
+  if (S.isAtFixpoint())
+    return;
 
-    // Here, accumulate conditional branch instructions in the context. We
-    // explore the child paths and collect the known states. The disjunction of
-    // those states can be merged to its own state. Let ParentState_i be a state
-    // to indicate the known information for an i-th branch instruction in the
-    // context. ChildStates are created for its successors respectively.
-    //
-    // ParentS_1 = ChildS_{1, 1} /\ ChildS_{1, 2} /\ ... /\ ChildS_{1, n_1}
-    // ParentS_2 = ChildS_{2, 1} /\ ChildS_{2, 2} /\ ... /\ ChildS_{2, n_2}
-    //      ...
-    // ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m}
-    //
-    // Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m
-    //
-    // FIXME: Currently, recursive branches are not handled. For example, we
-    // can't deduce that ptr must be dereferenced in below function.
-    //
-    // void f(int a, int c, int *ptr) {
-    //    if(a)
-    //      if (b) {
-    //        *ptr = 0;
-    //      } else {
-    //        *ptr = 1;
-    //      }
-    //    else {
-    //      if (b) {
-    //        *ptr = 0;
-    //      } else {
-    //        *ptr = 1;
-    //      }
-    //    }
-    // }
-
-    Explorer.checkForAllContext(CtxI, Pred);
-    for (const BranchInst *Br : BrInsts) {
-      StateType ParentState;
-
-      // The known state of the parent state is a conjunction of children's
-      // known states so it is initialized with a best state.
-      ParentState.indicateOptimisticFixpoint();
-
-      for (const BasicBlock *BB : Br->successors()) {
-        StateType ChildState;
-
-        size_t BeforeSize = Uses.size();
-        followUsesInContext(A, Explorer, &BB->front(), Uses, ChildState);
-
-        // Erase uses which only appear in the child.
-        for (auto It = Uses.begin() + BeforeSize; It != Uses.end();)
-          It = Uses.erase(It);
-
-        ParentState &= ChildState;
-      }
+  SmallVector<const BranchInst *, 4> BrInsts;
+  auto Pred = [&](const Instruction *I) {
+    if (const BranchInst *Br = dyn_cast<BranchInst>(I))
+      if (Br->isConditional())
+        BrInsts.push_back(Br);
+    return true;
+  };
 
-      // Use only known state.
-      S += ParentState;
+  // Here, accumulate conditional branch instructions in the context. We
+  // explore the child paths and collect the known states. The disjunction of
+  // those states can be merged to its own state. Let ParentState_i be a state
+  // to indicate the known information for an i-th branch instruction in the
+  // context. ChildStates are created for its successors respectively.
+  //
+  // ParentS_1 = ChildS_{1, 1} /\ ChildS_{1, 2} /\ ... /\ ChildS_{1, n_1}
+  // ParentS_2 = ChildS_{2, 1} /\ ChildS_{2, 2} /\ ... /\ ChildS_{2, n_2}
+  //      ...
+  // ParentS_m = ChildS_{m, 1} /\ ChildS_{m, 2} /\ ... /\ ChildS_{m, n_m}
+  //
+  // Known State |= ParentS_1 \/ ParentS_2 \/... \/ ParentS_m
+  //
+  // FIXME: Currently, recursive branches are not handled. For example, we
+  // can't deduce that ptr must be dereferenced in below function.
+  //
+  // void f(int a, int c, int *ptr) {
+  //    if(a)
+  //      if (b) {
+  //        *ptr = 0;
+  //      } else {
+  //        *ptr = 1;
+  //      }
+  //    else {
+  //      if (b) {
+  //        *ptr = 0;
+  //      } else {
+  //        *ptr = 1;
+  //      }
+  //    }
+  // }
+
+  Explorer.checkForAllContext(&CtxI, Pred);
+  for (const BranchInst *Br : BrInsts) {
+    StateType ParentState;
+
+    // The known state of the parent state is a conjunction of children's
+    // known states so it is initialized with a best state.
+    ParentState.indicateOptimisticFixpoint();
+
+    for (const BasicBlock *BB : Br->successors()) {
+      StateType ChildState;
+
+      size_t BeforeSize = Uses.size();
+      followUsesInContext(AA, A, Explorer, &BB->front(), Uses, ChildState);
+
+      // Erase uses which only appear in the child.
+      for (auto It = Uses.begin() + BeforeSize; It != Uses.end();)
+        It = Uses.erase(It);
+
+      ParentState &= ChildState;
     }
 
-    return BeforeState == S ? ChangeStatus::UNCHANGED : ChangeStatus::CHANGED;
+    // Use only known state.
+    S += ParentState;
   }
-
-private:
-  /// Container for (transitive) uses of the associated value.
-  SetVector<const Use *> Uses;
-};
-
-template <typename AAType, typename BaseType,
-          typename StateType = typename AAType::StateType>
-using AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext =
-    AAComposeTwoGenericDeduction<AAType, BaseType, StateType,
-                                 AAFromMustBeExecutedContext,
-                                 AAArgumentFromCallSiteArguments>;
-
-template <typename AAType, typename BaseType,
-          typename StateType = typename AAType::StateType>
-using AACallSiteReturnedFromReturnedAndMustBeExecutedContext =
-    AAComposeTwoGenericDeduction<AAType, BaseType, StateType,
-                                 AAFromMustBeExecutedContext,
-                                 AACallSiteReturnedFromReturned>;
+}
 
 /// -----------------------NoUnwind Function Attribute--------------------------
 
@@ -1665,11 +1608,15 @@ struct AANonNullImpl : AANonNull {
       indicatePessimisticFixpoint();
     else
       AANonNull::initialize(A);
+
+    if (!getState().isAtFixpoint())
+      if (Instruction *CtxI = getCtxI())
+        followUsesInMBEC(*this, A, getState(), *CtxI);
   }
 
-  /// See AAFromMustBeExecutedContext
-  bool followUse(Attributor &A, const Use *U, const Instruction *I,
-                 AANonNull::StateType &State) {
+  /// See followUsesInMBEC
+  bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+                       AANonNull::StateType &State) {
     bool IsNonNull = false;
     bool TrackUse = false;
     getKnownNonNullAndDerefBytesForUse(A, *this, getAssociatedValue(), U, I,
@@ -1689,22 +1636,17 @@ struct AANonNullImpl : AANonNull {
 };
 
 /// NonNull attribute for a floating value.
-struct AANonNullFloating
-    : AAFromMustBeExecutedContext<AANonNull, AANonNullImpl> {
-  using Base = AAFromMustBeExecutedContext<AANonNull, AANonNullImpl>;
-  AANonNullFloating(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
+struct AANonNullFloating : public AANonNullImpl {
+  AANonNullFloating(const IRPosition &IRP, Attributor &A)
+      : AANonNullImpl(IRP, A) {}
 
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
-    ChangeStatus Change = Base::updateImpl(A);
-    if (isKnownNonNull())
-      return Change;
-
     if (!NullIsDefined) {
       const auto &DerefAA =
           A.getAAFor<AADereferenceable>(*this, getIRPosition());
       if (DerefAA.getAssumedDereferenceableBytes())
-        return Change;
+        return ChangeStatus::UNCHANGED;
     }
 
     const DataLayout &DL = A.getDataLayout();
@@ -1756,12 +1698,9 @@ struct AANonNullReturned final
 
 /// NonNull attribute for function argument.
 struct AANonNullArgument final
-    : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
-                                                              AANonNullImpl> {
+    : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl> {
   AANonNullArgument(const IRPosition &IRP, Attributor &A)
-      : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AANonNull,
-                                                                AANonNullImpl>(
-            IRP, A) {}
+      : AAArgumentFromCallSiteArguments<AANonNull, AANonNullImpl>(IRP, A) {}
 
   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override { STATS_DECLTRACK_ARG_ATTR(nonnull) }
@@ -1777,12 +1716,9 @@ struct AANonNullCallSiteArgument final : AANonNullFloating {
 
 /// NonNull attribute for a call site return position.
 struct AANonNullCallSiteReturned final
-    : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
-                                                             AANonNullImpl> {
+    : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl> {
   AANonNullCallSiteReturned(const IRPosition &IRP, Attributor &A)
-      : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AANonNull,
-                                                               AANonNullImpl>(
-            IRP, A) {}
+      : AACallSiteReturnedFromReturned<AANonNull, AANonNullImpl>(IRP, A) {}
 
   /// See AbstractAttribute::trackStatistics()
   void trackStatistics() const override { STATS_DECLTRACK_CSRET_ATTR(nonnull) }
@@ -3270,6 +3206,7 @@ struct AADereferenceableImpl : AADereferenceable {
       : AADereferenceable(IRP, A) {}
   using StateType = DerefState;
 
+  /// See AbstractAttribute::initialize(...).
   void initialize(Attributor &A) override {
     SmallVector<Attribute, 4> Attrs;
     getAttrs({Attribute::Dereferenceable, Attribute::DereferenceableOrNull},
@@ -3283,8 +3220,13 @@ struct AADereferenceableImpl : AADereferenceable {
     const IRPosition &IRP = this->getIRPosition();
     bool IsFnInterface = IRP.isFnInterfaceKind();
     Function *FnScope = IRP.getAnchorScope();
-    if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope)))
+    if (IsFnInterface && (!FnScope || !A.isFunctionIPOAmendable(*FnScope))) {
       indicatePessimisticFixpoint();
+      return;
+    }
+
+    if (Instruction *CtxI = getCtxI())
+      followUsesInMBEC(*this, A, getState(), *CtxI);
   }
 
   /// See AbstractAttribute::getState()
@@ -3314,9 +3256,9 @@ struct AADereferenceableImpl : AADereferenceable {
     return;
   }
 
-  /// See AAFromMustBeExecutedContext
-  bool followUse(Attributor &A, const Use *U, const Instruction *I,
-                 AADereferenceable::StateType &State) {
+  /// See followUsesInMBEC
+  bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+                       AADereferenceable::StateType &State) {
     bool IsNonNull = false;
     bool TrackUse = false;
     int64_t DerefBytes = getKnownNonNullAndDerefBytesForUse(
@@ -3361,17 +3303,12 @@ struct AADereferenceableImpl : AADereferenceable {
 };
 
 /// Dereferenceable attribute for a floating value.
-struct AADereferenceableFloating
-    : AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl> {
-  using Base =
-      AAFromMustBeExecutedContext<AADereferenceable, AADereferenceableImpl>;
+struct AADereferenceableFloating : AADereferenceableImpl {
   AADereferenceableFloating(const IRPosition &IRP, Attributor &A)
-      : Base(IRP, A) {}
+      : AADereferenceableImpl(IRP, A) {}
 
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
-    ChangeStatus Change = Base::updateImpl(A);
-
     const DataLayout &DL = A.getDataLayout();
 
     auto VisitValueCB = [&](Value &V, const Instruction *, DerefState &T,
@@ -3433,7 +3370,7 @@ struct AADereferenceableFloating
             A, getIRPosition(), *this, T, VisitValueCB, getCtxI()))
       return indicatePessimisticFixpoint();
 
-    return Change | clampStateAndIndicateChange(getState(), T);
+    return clampStateAndIndicateChange(getState(), T);
   }
 
   /// See AbstractAttribute::trackStatistics()
@@ -3457,10 +3394,10 @@ struct AADereferenceableReturned final
 
 /// Dereferenceable attribute for an argument
 struct AADereferenceableArgument final
-    : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
-          AADereferenceable, AADereferenceableImpl> {
-  using Base = AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<
-      AADereferenceable, AADereferenceableImpl>;
+    : AAArgumentFromCallSiteArguments<AADereferenceable,
+                                      AADereferenceableImpl> {
+  using Base =
+      AAArgumentFromCallSiteArguments<AADereferenceable, AADereferenceableImpl>;
   AADereferenceableArgument(const IRPosition &IRP, Attributor &A)
       : Base(IRP, A) {}
 
@@ -3483,10 +3420,9 @@ struct AADereferenceableCallSiteArgument final : AADereferenceableFloating {
 
 /// Dereferenceable attribute deduction for a call site return value.
 struct AADereferenceableCallSiteReturned final
-    : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
-          AADereferenceable, AADereferenceableImpl> {
-  using Base = AACallSiteReturnedFromReturnedAndMustBeExecutedContext<
-      AADereferenceable, AADereferenceableImpl>;
+    : AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl> {
+  using Base =
+      AACallSiteReturnedFromReturned<AADereferenceable, AADereferenceableImpl>;
   AADereferenceableCallSiteReturned(const IRPosition &IRP, Attributor &A)
       : Base(IRP, A) {}
 
@@ -3592,8 +3528,13 @@ struct AAAlignImpl : AAAlign {
 
     if (getIRPosition().isFnInterfaceKind() &&
         (!getAnchorScope() ||
-         !A.isFunctionIPOAmendable(*getAssociatedFunction())))
+         !A.isFunctionIPOAmendable(*getAssociatedFunction()))) {
       indicatePessimisticFixpoint();
+      return;
+    }
+
+    if (Instruction *CtxI = getCtxI())
+      followUsesInMBEC(*this, A, getState(), *CtxI);
   }
 
   /// See AbstractAttribute::manifest(...).
@@ -3643,9 +3584,10 @@ struct AAAlignImpl : AAAlign {
       Attrs.emplace_back(
           Attribute::getWithAlignment(Ctx, Align(getAssumedAlign())));
   }
-  /// See AAFromMustBeExecutedContext
-  bool followUse(Attributor &A, const Use *U, const Instruction *I,
-                 AAAlign::StateType &State) {
+
+  /// See followUsesInMBEC
+  bool followUseInMBEC(Attributor &A, const Use *U, const Instruction *I,
+                       AAAlign::StateType &State) {
     bool TrackUse = false;
 
     unsigned int KnownAlign =
@@ -3664,14 +3606,11 @@ struct AAAlignImpl : AAAlign {
 };
 
 /// Align attribute for a floating value.
-struct AAAlignFloating : AAFromMustBeExecutedContext<AAAlign, AAAlignImpl> {
-  using Base = AAFromMustBeExecutedContext<AAAlign, AAAlignImpl>;
-  AAAlignFloating(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
+struct AAAlignFloating : AAAlignImpl {
+  AAAlignFloating(const IRPosition &IRP, Attributor &A) : AAAlignImpl(IRP, A) {}
 
   /// See AbstractAttribute::updateImpl(...).
   ChangeStatus updateImpl(Attributor &A) override {
-    Base::updateImpl(A);
-
     const DataLayout &DL = A.getDataLayout();
 
     auto VisitValueCB = [&](Value &V, const Instruction *,
@@ -3717,11 +3656,8 @@ struct AAAlignReturned final
 
 /// Align attribute for function argument.
 struct AAAlignArgument final
-    : AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign,
-                                                              AAAlignImpl> {
-  using Base =
-      AAArgumentFromCallSiteArgumentsAndMustBeExecutedContext<AAAlign,
-                                                              AAAlignImpl>;
+    : AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl> {
+  using Base = AAArgumentFromCallSiteArguments<AAAlign, AAAlignImpl>;
   AAAlignArgument(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {}
 
   /// See AbstractAttribute::manifest(...).
@@ -3777,11 +3713,8 @@ struct AAAlignCallSiteArgument final : AAAlignFloating {
 
 /// Align attribute deduction for a call site return value.
 struct AAAlignCallSiteReturned final
-    : AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign,
-                                                             AAAlignImpl> {
-  using Base =
-      AACallSiteReturnedFromReturnedAndMustBeExecutedContext<AAAlign,
-                                                             AAAlignImpl>;
+    : AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl> {
+  using Base = AACallSiteReturnedFromReturned<AAAlign, AAAlignImpl>;
   AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A)
       : Base(IRP, A) {}
 

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
index d6a4de9ac1d1..3f440b79c0bd 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -41,12 +41,12 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[A]]
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f
-; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]])
+; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[X:%.*]])
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
-; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; IS__CGSCC_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
 ; IS__CGSCC_OPM-NEXT:    store i32 0, i32* [[X]], align 4
 ; IS__CGSCC_OPM-NEXT:    [[L:%.*]] = load i32, i32* [[X]], align 4
 ; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = add i32 [[L]], [[TMP2]]
@@ -56,16 +56,16 @@ define internal i32 @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    store i32 0, i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[L:%.*]] = load i32, i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = add i32 [[L]], [[TMP2]]

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
index cf10658bb7ef..1fe0fe6044ed 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
@@ -8,12 +8,12 @@
 
 define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f
-; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]])
+; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval align 4 dereferenceable(4) [[X:%.*]])
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
-; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; IS__CGSCC_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
 ; IS__CGSCC_OPM-NEXT:    store i32 0, i32* [[X]], align 4
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
@@ -21,16 +21,16 @@ define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[X_PRIV:%.*]] = alloca i32
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    store i32 0, i32* [[X_PRIV]], align 4
 ; IS__CGSCC_NPM-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
index ff9f282cd0de..b36744677d21 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
@@ -9,14 +9,14 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 %struct.ss = type { i32, i64 }
 
 define internal i32 @f(%struct.ss* byval  %b) nounwind  {
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@f
-; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]])
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
-; IS__TUNIT_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
-; IS__TUNIT_OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__TUNIT_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; IS__TUNIT_OPM-NEXT:    ret i32 [[TMP1]]
+; IS________OPM-LABEL: define {{[^@]+}}@f
+; IS________OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]])
+; IS________OPM-NEXT:  entry:
+; IS________OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
+; IS________OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; IS________OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
+; IS________OPM-NEXT:    ret i32 [[TMP1]]
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@f
 ; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
@@ -32,27 +32,18 @@ define internal i32 @f(%struct.ss* byval  %b) nounwind  {
 ; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP1]]
 ;
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@f
-; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 4 dereferenceable(4) [[B:%.*]])
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
-; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
-; IS__CGSCC_OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
-; IS__CGSCC_OPM-NEXT:    ret i32 [[TMP1]]
-;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@f
 ; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
 ; IS__CGSCC_NPM-NEXT:  entry:
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 4
+; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 4
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
 ; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP1]]
 ;
 entry:
@@ -65,37 +56,42 @@ entry:
 
 
 define internal i32 @g(%struct.ss* byval align 32 %b) nounwind {
-; IS__TUNIT_OPM-LABEL: define {{[^@]+}}@g
-; IS__TUNIT_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]])
-; IS__TUNIT_OPM-NEXT:  entry:
-; IS__TUNIT_OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
-; IS__TUNIT_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
-; IS__TUNIT_OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__TUNIT_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
-; IS__TUNIT_OPM-NEXT:    ret i32 [[TMP2]]
+; IS________OPM-LABEL: define {{[^@]+}}@g
+; IS________OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]])
+; IS________OPM-NEXT:  entry:
+; IS________OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; IS________OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
+; IS________OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; IS________OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
+; IS________OPM-NEXT:    ret i32 [[TMP2]]
 ;
-; IS________NPM-LABEL: define {{[^@]+}}@g
-; IS________NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
-; IS________NPM-NEXT:  entry:
-; IS________NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
-; IS________NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
-; IS________NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
-; IS________NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
-; IS________NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
-; IS________NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
-; IS________NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
-; IS________NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS________NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
-; IS________NPM-NEXT:    ret i32 [[TMP2]]
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@g
+; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
+; IS__TUNIT_NPM-NEXT:  entry:
+; IS__TUNIT_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__TUNIT_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; IS__TUNIT_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; IS__TUNIT_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
+; IS__TUNIT_NPM-NEXT:    ret i32 [[TMP2]]
 ;
-; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@g
-; IS__CGSCC_OPM-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(4) [[B:%.*]])
-; IS__CGSCC_OPM-NEXT:  entry:
-; IS__CGSCC_OPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
-; IS__CGSCC_OPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
-; IS__CGSCC_OPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
-; IS__CGSCC_OPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
-; IS__CGSCC_OPM-NEXT:    ret i32 [[TMP2]]
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@g
+; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
+; IS__CGSCC_NPM-NEXT:  entry:
+; IS__CGSCC_NPM-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]], align 4
+; IS__CGSCC_NPM-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; IS__CGSCC_NPM-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]], align 4
+; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
+; IS__CGSCC_NPM-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
+; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
+; IS__CGSCC_NPM-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
   %tmp = getelementptr %struct.ss, %struct.ss* %b, i32 0, i32 0
@@ -126,15 +122,15 @@ define i32 @main() nounwind  {
 ; IS__TUNIT_NPM-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; IS__TUNIT_NPM-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; IS__TUNIT_NPM-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; IS__TUNIT_NPM-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST1]], align 1
-; IS__TUNIT_NPM-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_12]], align 1
-; IS__TUNIT_NPM-NEXT:    [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]])
 ; IS__TUNIT_NPM-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
-; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST]], align 1
+; IS__TUNIT_NPM-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
 ; IS__TUNIT_NPM-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
-; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_1]], align 1
+; IS__TUNIT_NPM-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
+; IS__TUNIT_NPM-NEXT:    [[C0:%.*]] = call i32 @f(i32 [[TMP0]], i64 [[TMP1]])
+; IS__TUNIT_NPM-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; IS__TUNIT_NPM-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 1
+; IS__TUNIT_NPM-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 1
 ; IS__TUNIT_NPM-NEXT:    [[C1:%.*]] = call i32 @g(i32 [[TMP2]], i64 [[TMP3]])
 ; IS__TUNIT_NPM-NEXT:    [[A:%.*]] = add i32 [[C0]], [[C1]]
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[A]]

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
index 714d9f8df78b..416169dd7145 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
@@ -28,12 +28,12 @@ define internal i32 @callee(i1 %C, i32* %P) {
 ; IS__TUNIT_NPM-NEXT:    ret i32 [[X]]
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@callee
-; IS__CGSCC____-SAME: (i32* nocapture nofree readonly [[P:%.*]])
+; IS__CGSCC____-SAME: (i32* nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
 ; IS__CGSCC____-NEXT:    br label [[F:%.*]]
 ; IS__CGSCC____:       T:
 ; IS__CGSCC____-NEXT:    unreachable
 ; IS__CGSCC____:       F:
-; IS__CGSCC____-NEXT:    [[X:%.*]] = load i32, i32* [[P]]
+; IS__CGSCC____-NEXT:    [[X:%.*]] = load i32, i32* [[P]], align 4
 ; IS__CGSCC____-NEXT:    ret i32 [[X]]
 ;
   br i1 %C, label %T, label %F

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
index 37cb25bce5da..885102f1d314 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
@@ -77,9 +77,9 @@ define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]]
 ; IS__CGSCC____-NEXT:    [[A_PRIV_CAST:%.*]] = bitcast %struct.Foo* [[A_PRIV]] to i32*
-; IS__CGSCC____-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV_CAST]]
+; IS__CGSCC____-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV_CAST]], align 4
 ; IS__CGSCC____-NEXT:    [[A_PRIV_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i32 0, i32 1
-; IS__CGSCC____-NEXT:    store i64 [[TMP1]], i64* [[A_PRIV_0_1]]
+; IS__CGSCC____-NEXT:    store i64 [[TMP1]], i64* [[A_PRIV_0_1]], align 8
 ; IS__CGSCC____-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*
 ; IS__CGSCC____-NEXT:    br label [[LOOP:%.*]]
 ; IS__CGSCC____:       loop:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
index 71a1f3d02ff9..48259aa42533 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/live_called_from_dead_2.ll
@@ -20,14 +20,23 @@ define internal i32 @test(i32* %X, i32* %Y) {
 ; IS__TUNIT____:       dead:
 ; IS__TUNIT____-NEXT:    unreachable
 ;
-; IS__CGSCC____-LABEL: define {{[^@]+}}@test
-; IS__CGSCC____-SAME: (i32* nocapture nofree writeonly [[X:%.*]])
-; IS__CGSCC____-NEXT:    br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]]
-; IS__CGSCC____:       live:
-; IS__CGSCC____-NEXT:    store i32 0, i32* [[X]]
-; IS__CGSCC____-NEXT:    ret i32 undef
-; IS__CGSCC____:       dead:
-; IS__CGSCC____-NEXT:    unreachable
+; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@test
+; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly align 4 [[X:%.*]])
+; IS__CGSCC_OPM-NEXT:    br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]]
+; IS__CGSCC_OPM:       live:
+; IS__CGSCC_OPM-NEXT:    store i32 0, i32* [[X]], align 4
+; IS__CGSCC_OPM-NEXT:    ret i32 undef
+; IS__CGSCC_OPM:       dead:
+; IS__CGSCC_OPM-NEXT:    unreachable
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@test
+; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[X:%.*]])
+; IS__CGSCC_NPM-NEXT:    br i1 true, label [[LIVE:%.*]], label [[DEAD:%.*]]
+; IS__CGSCC_NPM:       live:
+; IS__CGSCC_NPM-NEXT:    store i32 0, i32* [[X]], align 4
+; IS__CGSCC_NPM-NEXT:    ret i32 undef
+; IS__CGSCC_NPM:       dead:
+; IS__CGSCC_NPM-NEXT:    unreachable
 ;
   br i1 true, label %live, label %dead
 live:
@@ -48,17 +57,17 @@ define internal i32 @caller(i32* %B) {
 ; IS__TUNIT____-NEXT:    ret i32 undef
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@caller
-; IS__CGSCC_OPM-SAME: (i32* nocapture nofree writeonly [[B:%.*]])
+; IS__CGSCC_OPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]])
 ; IS__CGSCC_OPM-NEXT:    [[A:%.*]] = alloca i32
 ; IS__CGSCC_OPM-NEXT:    store i32 1, i32* [[A]], align 4
-; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = call i32 @test(i32* nocapture nofree writeonly [[B]])
+; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]])
 ; IS__CGSCC_OPM-NEXT:    ret i32 0
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@caller
-; IS__CGSCC_NPM-SAME: (i32* nocapture nofree writeonly [[B:%.*]])
+; IS__CGSCC_NPM-SAME: (i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B:%.*]])
 ; IS__CGSCC_NPM-NEXT:    [[A:%.*]] = alloca i32
 ; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[A]], align 4
-; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @test(i32* nocapture nofree writeonly [[B]])
+; IS__CGSCC_NPM-NEXT:    [[C:%.*]] = call i32 @test(i32* nocapture nofree nonnull writeonly align 4 dereferenceable(4) [[B]])
 ; IS__CGSCC_NPM-NEXT:    ret i32 undef
 ;
   %A = alloca i32

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll
index a5e0140977fe..16038889314a 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/musttail.ll
@@ -73,7 +73,7 @@ define i32 @caller2(%T* %g) {
 ; IS__TUNIT____-NEXT:    ret i32 0
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller2
-; IS__CGSCC____-SAME: (%T* nocapture nofree readonly [[G:%.*]])
+; IS__CGSCC____-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]])
 ; IS__CGSCC____-NEXT:    [[V:%.*]] = call i32 @test2(%T* nocapture nofree readonly [[G]], i32 0)
 ; IS__CGSCC____-NEXT:    ret i32 [[V]]
 ;
@@ -124,7 +124,7 @@ define i32 @caller2b(%T* %g) {
 ; IS__TUNIT____-NEXT:    ret i32 0
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@caller2b
-; IS__CGSCC____-SAME: (%T* nocapture nofree readonly [[G:%.*]])
+; IS__CGSCC____-SAME: (%T* nocapture nofree readonly align 4 [[G:%.*]])
 ; IS__CGSCC____-NEXT:    [[V:%.*]] = call i32 @test2b(%T* nocapture nofree readonly [[G]], i32 0)
 ; IS__CGSCC____-NEXT:    ret i32 [[V]]
 ;

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
index c1388072f5a5..2797389a442b 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
@@ -17,15 +17,25 @@ define internal void @bar(%pair* byval %Data) {
 ; IS________OPM-NEXT:    [[TMP1:%.*]] = tail call i8* @foo(%pair* [[DATA]])
 ; IS________OPM-NEXT:    ret void
 ;
-; IS________NPM-LABEL: define {{[^@]+}}@bar
-; IS________NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
-; IS________NPM-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
-; IS________NPM-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
-; IS________NPM-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]]
-; IS________NPM-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
-; IS________NPM-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]]
-; IS________NPM-NEXT:    [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]])
-; IS________NPM-NEXT:    ret void
+; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@bar
+; IS__TUNIT_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; IS__TUNIT_NPM-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
+; IS__TUNIT_NPM-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]]
+; IS__TUNIT_NPM-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
+; IS__TUNIT_NPM-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]]
+; IS__TUNIT_NPM-NEXT:    [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]])
+; IS__TUNIT_NPM-NEXT:    ret void
+;
+; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@bar
+; IS__CGSCC_NPM-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; IS__CGSCC_NPM-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
+; IS__CGSCC_NPM-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]], align 4
+; IS__CGSCC_NPM-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]], align 4
+; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = call i8* @foo(%pair* nonnull align 4 dereferenceable(8) [[DATA_PRIV]])
+; IS__CGSCC_NPM-NEXT:    ret void
 ;
   tail call i8* @foo(%pair* %Data)
   ret void

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
index 51371d0088ca..0a743608de62 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -27,7 +27,7 @@ define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind {
 ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
 ; IS__CGSCC_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
 ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
 ; IS__CGSCC_NPM-NEXT:    store i32 99, i32* [[TMP2]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP3:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
@@ -173,7 +173,7 @@ define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly
 ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
 ; IS__CGSCC_NPM-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
 ; IS__CGSCC_NPM-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
-; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
+; IS__CGSCC_NPM-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
 ; IS__CGSCC_NPM-NEXT:    store i32 99, i32* [[Z]], align 4
 ; IS__CGSCC_NPM-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
index 778ef542fe3d..69601edd550e 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/openmp_parallel_for.ll
@@ -83,66 +83,66 @@ entry:
 }
 
 define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.bound_tid., i32* dereferenceable(4) %N, float* dereferenceable(4) %p, i64 %q) {
-; IS________OPM-LABEL: define {{[^@]+}}@.omp_outlined.
-; IS________OPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]])
-; IS________OPM-NEXT:  entry:
-; IS________OPM-NEXT:    [[Q_ADDR:%.*]] = alloca i64, align 8
-; IS________OPM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
-; IS________OPM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
-; IS________OPM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-; IS________OPM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-; IS________OPM-NEXT:    store i64 4617315517961601024, i64* [[Q_ADDR]], align 8
-; IS________OPM-NEXT:    [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double*
-; IS________OPM-NEXT:    [[TMP:%.*]] = load i32, i32* [[N]], align 4
-; IS________OPM-NEXT:    [[SUB3:%.*]] = add nsw i32 [[TMP]], -3
-; IS________OPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2
-; IS________OPM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-; IS________OPM:       omp.precond.then:
-; IS________OPM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
-; IS________OPM-NEXT:    store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4
-; IS________OPM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
-; IS________OPM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-; IS________OPM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; IS________OPM-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1)
-; IS________OPM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-; IS________OPM-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]]
-; IS________OPM-NEXT:    br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-; IS________OPM:       cond.true:
-; IS________OPM-NEXT:    br label [[COND_END:%.*]]
-; IS________OPM:       cond.false:
-; IS________OPM-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-; IS________OPM-NEXT:    br label [[COND_END]]
-; IS________OPM:       cond.end:
-; IS________OPM-NEXT:    [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ]
-; IS________OPM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
-; IS________OPM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
-; IS________OPM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
-; IS________OPM:       omp.inner.for.cond:
-; IS________OPM-NEXT:    [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ]
-; IS________OPM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-; IS________OPM-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]]
-; IS________OPM-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]]
-; IS________OPM:       omp.inner.for.cond.cleanup:
-; IS________OPM-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
-; IS________OPM:       omp.inner.for.body:
-; IS________OPM-NEXT:    [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2
-; IS________OPM-NEXT:    [[TMP10:%.*]] = load float, float* [[P]], align 4
-; IS________OPM-NEXT:    [[TMP11:%.*]] = load double, double* [[CONV]], align 8
-; IS________OPM-NEXT:    call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]])
-; IS________OPM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
-; IS________OPM:       omp.body.continue:
-; IS________OPM-NEXT:    br label [[OMP_INNER_FOR_INC]]
-; IS________OPM:       omp.inner.for.inc:
-; IS________OPM-NEXT:    [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1
-; IS________OPM-NEXT:    br label [[OMP_INNER_FOR_COND]]
-; IS________OPM:       omp.inner.for.end:
-; IS________OPM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
-; IS________OPM:       omp.loop.exit:
-; IS________OPM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; IS________OPM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP12]])
-; IS________OPM-NEXT:    br label [[OMP_PRECOND_END]]
-; IS________OPM:       omp.precond.end:
-; IS________OPM-NEXT:    ret void
+; NOT_TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined.
+; NOT_TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]])
+; NOT_TUNIT_NPM-NEXT:  entry:
+; NOT_TUNIT_NPM-NEXT:    [[Q_ADDR:%.*]] = alloca i64, align 8
+; NOT_TUNIT_NPM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
+; NOT_TUNIT_NPM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
+; NOT_TUNIT_NPM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
+; NOT_TUNIT_NPM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
+; NOT_TUNIT_NPM-NEXT:    store i64 4617315517961601024, i64* [[Q_ADDR]], align 8
+; NOT_TUNIT_NPM-NEXT:    [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double*
+; NOT_TUNIT_NPM-NEXT:    [[TMP:%.*]] = load i32, i32* [[N]], align 4
+; NOT_TUNIT_NPM-NEXT:    [[SUB3:%.*]] = add nsw i32 [[TMP]], -3
+; NOT_TUNIT_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2
+; NOT_TUNIT_NPM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
+; NOT_TUNIT_NPM:       omp.precond.then:
+; NOT_TUNIT_NPM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
+; NOT_TUNIT_NPM-NEXT:    store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4
+; NOT_TUNIT_NPM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
+; NOT_TUNIT_NPM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
+; NOT_TUNIT_NPM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; NOT_TUNIT_NPM-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1)
+; NOT_TUNIT_NPM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+; NOT_TUNIT_NPM-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]]
+; NOT_TUNIT_NPM-NEXT:    br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
+; NOT_TUNIT_NPM:       cond.true:
+; NOT_TUNIT_NPM-NEXT:    br label [[COND_END:%.*]]
+; NOT_TUNIT_NPM:       cond.false:
+; NOT_TUNIT_NPM-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+; NOT_TUNIT_NPM-NEXT:    br label [[COND_END]]
+; NOT_TUNIT_NPM:       cond.end:
+; NOT_TUNIT_NPM-NEXT:    [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ]
+; NOT_TUNIT_NPM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
+; NOT_TUNIT_NPM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
+; NOT_TUNIT_NPM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
+; NOT_TUNIT_NPM:       omp.inner.for.cond:
+; NOT_TUNIT_NPM-NEXT:    [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ]
+; NOT_TUNIT_NPM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
+; NOT_TUNIT_NPM-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]]
+; NOT_TUNIT_NPM-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]]
+; NOT_TUNIT_NPM:       omp.inner.for.cond.cleanup:
+; NOT_TUNIT_NPM-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
+; NOT_TUNIT_NPM:       omp.inner.for.body:
+; NOT_TUNIT_NPM-NEXT:    [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2
+; NOT_TUNIT_NPM-NEXT:    [[TMP10:%.*]] = load float, float* [[P]], align 4
+; NOT_TUNIT_NPM-NEXT:    [[TMP11:%.*]] = load double, double* [[CONV]], align 8
+; NOT_TUNIT_NPM-NEXT:    call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]])
+; NOT_TUNIT_NPM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
+; NOT_TUNIT_NPM:       omp.body.continue:
+; NOT_TUNIT_NPM-NEXT:    br label [[OMP_INNER_FOR_INC]]
+; NOT_TUNIT_NPM:       omp.inner.for.inc:
+; NOT_TUNIT_NPM-NEXT:    [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1
+; NOT_TUNIT_NPM-NEXT:    br label [[OMP_INNER_FOR_COND]]
+; NOT_TUNIT_NPM:       omp.inner.for.end:
+; NOT_TUNIT_NPM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
+; NOT_TUNIT_NPM:       omp.loop.exit:
+; NOT_TUNIT_NPM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
+; NOT_TUNIT_NPM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP12]])
+; NOT_TUNIT_NPM-NEXT:    br label [[OMP_PRECOND_END]]
+; NOT_TUNIT_NPM:       omp.precond.end:
+; NOT_TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__TUNIT_NPM-LABEL: define {{[^@]+}}@.omp_outlined.
 ; IS__TUNIT_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]])
@@ -205,67 +205,6 @@ define internal void @.omp_outlined.(i32* noalias %.global_tid., i32* noalias %.
 ; IS__TUNIT_NPM:       omp.precond.end:
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
-; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@.omp_outlined.
-; IS__CGSCC_NPM-SAME: (i32* noalias nocapture readonly [[DOTGLOBAL_TID_:%.*]], i32* noalias nocapture nofree readnone [[DOTBOUND_TID_:%.*]], i32* nocapture nonnull readonly align 4 dereferenceable(4) [[N:%.*]], float* nocapture nonnull readonly dereferenceable(4) [[P:%.*]], i64 [[Q:%.*]])
-; IS__CGSCC_NPM-NEXT:  entry:
-; IS__CGSCC_NPM-NEXT:    [[Q_ADDR:%.*]] = alloca i64, align 8
-; IS__CGSCC_NPM-NEXT:    [[DOTOMP_LB:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    [[DOTOMP_UB:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    [[DOTOMP_STRIDE:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    [[DOTOMP_IS_LAST:%.*]] = alloca i32, align 4
-; IS__CGSCC_NPM-NEXT:    store i64 4617315517961601024, i64* [[Q_ADDR]], align 8
-; IS__CGSCC_NPM-NEXT:    [[CONV:%.*]] = bitcast i64* [[Q_ADDR]] to double*
-; IS__CGSCC_NPM-NEXT:    [[TMP:%.*]] = load i32, i32* [[N]], align 4
-; IS__CGSCC_NPM-NEXT:    [[SUB3:%.*]] = add nsw i32 [[TMP]], -3
-; IS__CGSCC_NPM-NEXT:    [[CMP:%.*]] = icmp sgt i32 [[TMP]], 2
-; IS__CGSCC_NPM-NEXT:    br i1 [[CMP]], label [[OMP_PRECOND_THEN:%.*]], label [[OMP_PRECOND_END:%.*]]
-; IS__CGSCC_NPM:       omp.precond.then:
-; IS__CGSCC_NPM-NEXT:    store i32 0, i32* [[DOTOMP_LB]], align 4
-; IS__CGSCC_NPM-NEXT:    store i32 [[SUB3]], i32* [[DOTOMP_UB]], align 4
-; IS__CGSCC_NPM-NEXT:    store i32 1, i32* [[DOTOMP_STRIDE]], align 4
-; IS__CGSCC_NPM-NEXT:    store i32 0, i32* [[DOTOMP_IS_LAST]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP5:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; IS__CGSCC_NPM-NEXT:    call void @__kmpc_for_static_init_4(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP5]], i32 34, i32* nonnull align 4 dereferenceable(4) [[DOTOMP_IS_LAST]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_LB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_UB]], i32* nonnull align 4 dereferenceable(4) [[DOTOMP_STRIDE]], i32 1, i32 1)
-; IS__CGSCC_NPM-NEXT:    [[TMP6:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-; IS__CGSCC_NPM-NEXT:    [[CMP6:%.*]] = icmp sgt i32 [[TMP6]], [[SUB3]]
-; IS__CGSCC_NPM-NEXT:    br i1 [[CMP6]], label [[COND_TRUE:%.*]], label [[COND_FALSE:%.*]]
-; IS__CGSCC_NPM:       cond.true:
-; IS__CGSCC_NPM-NEXT:    br label [[COND_END:%.*]]
-; IS__CGSCC_NPM:       cond.false:
-; IS__CGSCC_NPM-NEXT:    [[TMP7:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-; IS__CGSCC_NPM-NEXT:    br label [[COND_END]]
-; IS__CGSCC_NPM:       cond.end:
-; IS__CGSCC_NPM-NEXT:    [[COND:%.*]] = phi i32 [ [[SUB3]], [[COND_TRUE]] ], [ [[TMP7]], [[COND_FALSE]] ]
-; IS__CGSCC_NPM-NEXT:    store i32 [[COND]], i32* [[DOTOMP_UB]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP8:%.*]] = load i32, i32* [[DOTOMP_LB]], align 4
-; IS__CGSCC_NPM-NEXT:    br label [[OMP_INNER_FOR_COND:%.*]]
-; IS__CGSCC_NPM:       omp.inner.for.cond:
-; IS__CGSCC_NPM-NEXT:    [[DOTOMP_IV_0:%.*]] = phi i32 [ [[TMP8]], [[COND_END]] ], [ [[ADD11:%.*]], [[OMP_INNER_FOR_INC:%.*]] ]
-; IS__CGSCC_NPM-NEXT:    [[TMP9:%.*]] = load i32, i32* [[DOTOMP_UB]], align 4
-; IS__CGSCC_NPM-NEXT:    [[CMP8:%.*]] = icmp sgt i32 [[DOTOMP_IV_0]], [[TMP9]]
-; IS__CGSCC_NPM-NEXT:    br i1 [[CMP8]], label [[OMP_INNER_FOR_COND_CLEANUP:%.*]], label [[OMP_INNER_FOR_BODY:%.*]]
-; IS__CGSCC_NPM:       omp.inner.for.cond.cleanup:
-; IS__CGSCC_NPM-NEXT:    br label [[OMP_INNER_FOR_END:%.*]]
-; IS__CGSCC_NPM:       omp.inner.for.body:
-; IS__CGSCC_NPM-NEXT:    [[ADD10:%.*]] = add nsw i32 [[DOTOMP_IV_0]], 2
-; IS__CGSCC_NPM-NEXT:    [[TMP10:%.*]] = load float, float* [[P]], align 4
-; IS__CGSCC_NPM-NEXT:    [[TMP11:%.*]] = load double, double* [[CONV]], align 8
-; IS__CGSCC_NPM-NEXT:    call void @bar(i32 [[ADD10]], float [[TMP10]], double [[TMP11]])
-; IS__CGSCC_NPM-NEXT:    br label [[OMP_BODY_CONTINUE:%.*]]
-; IS__CGSCC_NPM:       omp.body.continue:
-; IS__CGSCC_NPM-NEXT:    br label [[OMP_INNER_FOR_INC]]
-; IS__CGSCC_NPM:       omp.inner.for.inc:
-; IS__CGSCC_NPM-NEXT:    [[ADD11]] = add nsw i32 [[DOTOMP_IV_0]], 1
-; IS__CGSCC_NPM-NEXT:    br label [[OMP_INNER_FOR_COND]]
-; IS__CGSCC_NPM:       omp.inner.for.end:
-; IS__CGSCC_NPM-NEXT:    br label [[OMP_LOOP_EXIT:%.*]]
-; IS__CGSCC_NPM:       omp.loop.exit:
-; IS__CGSCC_NPM-NEXT:    [[TMP12:%.*]] = load i32, i32* [[DOTGLOBAL_TID_]], align 4
-; IS__CGSCC_NPM-NEXT:    call void @__kmpc_for_static_fini(%struct.ident_t* nonnull align 8 dereferenceable(24) @0, i32 [[TMP12]])
-; IS__CGSCC_NPM-NEXT:    br label [[OMP_PRECOND_END]]
-; IS__CGSCC_NPM:       omp.precond.end:
-; IS__CGSCC_NPM-NEXT:    ret void
-;
 entry:
   %q.addr = alloca i64, align 8
   %.omp.lb = alloca i32, align 4

diff  --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll
index e15a800b9759..89f16a5acab6 100644
--- a/llvm/test/Transforms/Attributor/callbacks.ll
+++ b/llvm/test/Transforms/Attributor/callbacks.ll
@@ -41,7 +41,7 @@ define void @t0_caller(i32* %a) {
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t0_caller
-; IS__CGSCC_OPM-SAME: (i32* [[A:%.*]])
+; IS__CGSCC_OPM-SAME: (i32* align 256 [[A:%.*]])
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[B:%.*]] = alloca i32, align 32
 ; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = alloca i32*, align 64
@@ -49,7 +49,7 @@ define void @t0_caller(i32* %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t0_callback_broker(i32* noalias nocapture align 536870912 null, i32* nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t0_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t0_caller
@@ -149,7 +149,7 @@ define void @t1_caller(i32* noalias %a) {
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t1_caller
-; IS__CGSCC_OPM-SAME: (i32* noalias [[A:%.*]])
+; IS__CGSCC_OPM-SAME: (i32* noalias align 256 [[A:%.*]])
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[B:%.*]] = alloca i32, align 32
 ; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = alloca i32*, align 64
@@ -157,7 +157,7 @@ define void @t1_caller(i32* noalias %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t1_caller
@@ -256,7 +256,7 @@ define void @t2_caller(i32* noalias %a) {
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t2_caller
-; IS__CGSCC_OPM-SAME: (i32* noalias [[A:%.*]])
+; IS__CGSCC_OPM-SAME: (i32* noalias align 256 [[A:%.*]])
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[B:%.*]] = alloca i32, align 32
 ; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = alloca i32*, align 64
@@ -264,7 +264,7 @@ define void @t2_caller(i32* noalias %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t2_caller
@@ -367,7 +367,7 @@ define void @t3_caller(i32* noalias %a) {
 ; IS__TUNIT_NPM-NEXT:    ret void
 ;
 ; IS__CGSCC_OPM-LABEL: define {{[^@]+}}@t3_caller
-; IS__CGSCC_OPM-SAME: (i32* noalias [[A:%.*]])
+; IS__CGSCC_OPM-SAME: (i32* noalias align 256 [[A:%.*]])
 ; IS__CGSCC_OPM-NEXT:  entry:
 ; IS__CGSCC_OPM-NEXT:    [[B:%.*]] = alloca i32, align 32
 ; IS__CGSCC_OPM-NEXT:    [[C:%.*]] = alloca i32*, align 64
@@ -375,8 +375,8 @@ define void @t3_caller(i32* noalias %a) {
 ; IS__CGSCC_OPM-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
 ; IS__CGSCC_OPM-NEXT:    store i32 42, i32* [[B]], align 32
 ; IS__CGSCC_OPM-NEXT:    store i32* [[B]], i32** [[C]], align 64
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
-; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
+; IS__CGSCC_OPM-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t3_callback_broker(i32* noalias nocapture align 536870912 null, i32* nocapture nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nocapture bitcast (void (i32*, i32*, i32*, i64, i32**)* @t3_callback_callee to void (i32*, i32*, ...)*), i32* align 256 [[A]], i64 99, i32** nonnull align 64 dereferenceable(8) [[C]])
 ; IS__CGSCC_OPM-NEXT:    ret void
 ;
 ; IS__CGSCC_NPM-LABEL: define {{[^@]+}}@t3_caller

diff  --git a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
index ef775b7b6468..1ecee461fe5d 100644
--- a/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
+++ b/llvm/test/Transforms/Attributor/read_write_returned_arguments_scc.ll
@@ -46,10 +46,10 @@ define i32* @external_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__TUNIT____-NEXT:    ret i32* [[CALL3]]
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_ret2_nrw
-; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]])
+; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree align 4 [[R0:%.*]], i32* nofree returned [[W0:%.*]])
 ; IS__CGSCC____-NEXT:  entry:
 ; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @internal_ret0_nw(i32* nofree [[N0]], i32* nofree [[W0]])
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree align 4 [[R0]], i32* nofree [[R0]], i32* nofree [[W0]])
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @internal_ret1_rrw(i32* nofree align 4 [[R0]], i32* nofree align 4 [[R0]], i32* nofree [[W0]])
 ; IS__CGSCC____-NEXT:    [[CALL2:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly align 4 [[R0]], i32* nofree writeonly [[W0]])
 ; IS__CGSCC____-NEXT:    [[CALL3:%.*]] = call i32* @internal_ret1_rw(i32* nofree align 4 [[R0]], i32* nofree [[W0]])
 ; IS__CGSCC____-NEXT:    ret i32* [[CALL3]]
@@ -333,10 +333,10 @@ define i32* @external_source_ret2_nrw(i32* %n0, i32* %r0, i32* %w0) {
 ; IS__TUNIT____-NEXT:    ret i32* [[CALL1]]
 ;
 ; IS__CGSCC____-LABEL: define {{[^@]+}}@external_source_ret2_nrw
-; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree [[R0:%.*]], i32* nofree returned [[W0:%.*]])
+; IS__CGSCC____-SAME: (i32* nofree [[N0:%.*]], i32* nofree align 4 [[R0:%.*]], i32* nofree returned [[W0:%.*]])
 ; IS__CGSCC____-NEXT:  entry:
-; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly [[R0]], i32* nofree writeonly [[W0]])
-; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree [[R0]], i32* nofree [[W0]])
+; IS__CGSCC____-NEXT:    [[CALL:%.*]] = call i32* @external_sink_ret2_nrw(i32* nofree [[N0]], i32* nocapture nofree readonly align 4 [[R0]], i32* nofree writeonly [[W0]])
+; IS__CGSCC____-NEXT:    [[CALL1:%.*]] = call i32* @external_ret2_nrw(i32* nofree [[N0]], i32* nofree align 4 [[R0]], i32* nofree [[W0]])
 ; IS__CGSCC____-NEXT:    ret i32* [[CALL1]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index f485029a833f..ad6e048353fa 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -348,7 +348,7 @@ define internal void @test_byval(%struct.X* byval %a) {
 ; IS__CGSCC_NPM-SAME: (i8* nocapture nofree readnone [[TMP0:%.*]])
 ; IS__CGSCC_NPM-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_X:%.*]]
 ; IS__CGSCC_NPM-NEXT:    [[A_PRIV_CAST:%.*]] = bitcast %struct.X* [[A_PRIV]] to i8**
-; IS__CGSCC_NPM-NEXT:    store i8* [[TMP0]], i8** [[A_PRIV_CAST]]
+; IS__CGSCC_NPM-NEXT:    store i8* [[TMP0]], i8** [[A_PRIV_CAST]], align 8
 ; IS__CGSCC_NPM-NEXT:    [[G0:%.*]] = getelementptr [[STRUCT_X]], %struct.X* [[A_PRIV]], i32 0, i32 0
 ; IS__CGSCC_NPM-NEXT:    store i8* null, i8** [[G0]], align 8
 ; IS__CGSCC_NPM-NEXT:    ret void


        


More information about the llvm-commits mailing list