[llvm] 89c2e73 - [Attributor] Pointer privatization attribute (argument promotion)

Johannes Doerfert via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 29 19:32:47 PST 2020


Author: Johannes Doerfert
Date: 2020-01-29T21:31:04-06:00
New Revision: 89c2e733e80ee35921ad4aff8058c3ff9d933c54

URL: https://github.com/llvm/llvm-project/commit/89c2e733e80ee35921ad4aff8058c3ff9d933c54
DIFF: https://github.com/llvm/llvm-project/commit/89c2e733e80ee35921ad4aff8058c3ff9d933c54.diff

LOG: [Attributor] Pointer privatization attribute (argument promotion)

A pointer is privatizeable if it can be replaced by a new, private one.
Privatizing pointer reduces the use count, interaction between unrelated
code parts. This is a first step towards replacing argument promotion.
While we can already handle recursion (unlike argument promotion!) we
are restricted to stack allocations for now because we do not analyze
the uses in the callee.

Reviewed By: uenoku

Differential Revision: https://reviews.llvm.org/D68852

Added: 
    llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll

Modified: 
    llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
    llvm/include/llvm/Transforms/IPO/Attributor.h
    llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
    llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
    llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
    llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
    llvm/test/Transforms/Attributor/callbacks.ll
    llvm/test/Transforms/Attributor/internal-noalias.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
index c8afb7bdcd65..6d6cb58abdbb 100644
--- a/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
+++ b/llvm/include/llvm/Transforms/IPO/ArgumentPromotion.h
@@ -14,6 +14,7 @@
 #include "llvm/IR/PassManager.h"
 
 namespace llvm {
+class TargetTransformInfo;
 
 /// Argument promotion pass.
 ///
@@ -26,6 +27,17 @@ class ArgumentPromotionPass : public PassInfoMixin<ArgumentPromotionPass> {
 public:
   ArgumentPromotionPass(unsigned MaxElements = 3u) : MaxElements(MaxElements) {}
 
+  /// Check if callers and the callee \p F agree how promoted arguments would be
+  /// passed. The ones that they do not agree on are eliminated from the sets but
+  /// the return value has to be observed as well.
+  static bool areFunctionArgsABICompatible(
+      const Function &F, const TargetTransformInfo &TTI,
+      SmallPtrSetImpl<Argument *> &ArgsToPromote,
+      SmallPtrSetImpl<Argument *> &ByValArgsToTransform);
+
+  /// Checks if a type could have padding bytes.
+  static bool isDenselyPacked(Type *type, const DataLayout &DL);
+
   PreservedAnalyses run(LazyCallGraph::SCC &C, CGSCCAnalysisManager &AM,
                         LazyCallGraph &CG, CGSCCUpdateResult &UR);
 };

diff  --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h
index a8e83673f196..d394624c0c5b 100644
--- a/llvm/include/llvm/Transforms/IPO/Attributor.h
+++ b/llvm/include/llvm/Transforms/IPO/Attributor.h
@@ -104,6 +104,7 @@
 #include "llvm/Analysis/CallGraph.h"
 #include "llvm/Analysis/MustExecute.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
+#include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/PassManager.h"
@@ -951,6 +952,16 @@ struct Attributor {
     friend struct Attributor;
   };
 
+  /// Check if we can rewrite a function signature.
+  ///
+  /// The argument \p Arg is replaced with new ones defined by the number,
+  /// order, and types in \p ReplacementTypes.
+  ///
+  /// \returns True, if the replacement can be registered, via
+  /// registerFunctionSignatureRewrite, false otherwise.
+  bool isValidFunctionSignatureRewrite(Argument &Arg,
+                                       ArrayRef<Type *> ReplacementTypes);
+
   /// Register a rewrite for a function signature.
   ///
   /// The argument \p Arg is replaced with new ones defined by the number,
@@ -2402,6 +2413,45 @@ struct AAHeapToStack : public StateWrapper<BooleanState, AbstractAttribute>,
   static const char ID;
 };
 
+/// An abstract interface for privatizability.
+///
+/// A pointer is privatizable if it can be replaced by a new, private one.
+/// Privatizing pointer reduces the use count, interaction between unrelated
+/// code parts.
+///
+/// In order for a pointer to be privatizable its value cannot be observed
+/// (=nocapture), it is (for now) not written (=readonly & noalias), we know
+/// what values are necessary to make the private copy look like the original
+/// one, and the values we need can be loaded (=dereferenceable).
+struct AAPrivatizablePtr : public StateWrapper<BooleanState, AbstractAttribute>,
+                           public IRPosition {
+  AAPrivatizablePtr(const IRPosition &IRP) : IRPosition(IRP) {}
+
+  /// Returns true if pointer privatization is assumed to be possible.
+  bool isAssumedPrivatizablePtr() const { return getAssumed(); }
+
+  /// Returns true if pointer privatization is known to be possible.
+  bool isKnownPrivatizablePtr() const { return getKnown(); }
+
+  /// Return the type we can choose for a private copy of the underlying
+  /// value. None means it is not clear yet, nullptr means there is none.
+  virtual Optional<Type *> getPrivatizableType() const = 0;
+
+  /// Return an IR position, see struct IRPosition.
+  ///
+  ///{
+  IRPosition &getIRPosition() { return *this; }
+  const IRPosition &getIRPosition() const { return *this; }
+  ///}
+
+  /// Create an abstract attribute view for the position \p IRP.
+  static AAPrivatizablePtr &createForPosition(const IRPosition &IRP,
+                                              Attributor &A);
+
+  /// Unique ID (due to the unique address)
+  static const char ID;
+};
+
 /// An abstract interface for all memory related attributes.
 struct AAMemoryBehavior
     : public IRAttribute<

diff  --git a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
index c9c8d0a749ab..3be31bcd172b 100644
--- a/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
+++ b/llvm/lib/Transforms/IPO/ArgumentPromotion.cpp
@@ -774,8 +774,7 @@ static bool isSafeToPromoteArgument(Argument *Arg, Type *ByValTy, AAResults &AAR
   return true;
 }
 
-/// Checks if a type could have padding bytes.
-static bool isDenselyPacked(Type *type, const DataLayout &DL) {
+bool ArgumentPromotionPass::isDenselyPacked(Type *type, const DataLayout &DL) {
   // There is no size information, so be conservative.
   if (!type->isSized())
     return false;
@@ -844,12 +843,14 @@ static bool canPaddingBeAccessed(Argument *arg) {
   return false;
 }
 
-static bool areFunctionArgsABICompatible(
+bool ArgumentPromotionPass::areFunctionArgsABICompatible(
     const Function &F, const TargetTransformInfo &TTI,
     SmallPtrSetImpl<Argument *> &ArgsToPromote,
     SmallPtrSetImpl<Argument *> &ByValArgsToTransform) {
   for (const Use &U : F.uses()) {
     CallSite CS(U.getUser());
+    if (!CS)
+      return false;
     const Function *Caller = CS.getCaller();
     const Function *Callee = CS.getCalledFunction();
     if (!TTI.areFunctionArgsABICompatible(Caller, Callee, ArgsToPromote) ||
@@ -951,9 +952,9 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
     // If this is a byval argument, and if the aggregate type is small, just
     // pass the elements, which is always safe, if the passed value is densely
     // packed or if we can prove the padding bytes are never accessed.
-    bool isSafeToPromote =
-        PtrArg->hasByValAttr() &&
-        (isDenselyPacked(AgTy, DL) || !canPaddingBeAccessed(PtrArg));
+    bool isSafeToPromote = PtrArg->hasByValAttr() &&
+                           (ArgumentPromotionPass::isDenselyPacked(AgTy, DL) ||
+                            !canPaddingBeAccessed(PtrArg));
     if (isSafeToPromote) {
       if (StructType *STy = dyn_cast<StructType>(AgTy)) {
         if (MaxElements > 0 && STy->getNumElements() > MaxElements) {
@@ -1011,8 +1012,8 @@ promoteArguments(Function *F, function_ref<AAResults &(Function &F)> AARGetter,
   if (ArgsToPromote.empty() && ByValArgsToTransform.empty())
     return nullptr;
 
-  if (!areFunctionArgsABICompatible(*F, TTI, ArgsToPromote,
-                                    ByValArgsToTransform))
+  if (!ArgumentPromotionPass::areFunctionArgsABICompatible(
+          *F, TTI, ArgsToPromote, ByValArgsToTransform))
     return nullptr;
 
   return doPromotion(F, ArgsToPromote, ByValArgsToTransform, ReplaceCallSite);

diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index c055e18902c6..b23df5499b7a 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -31,13 +31,16 @@
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Attributes.h"
 #include "llvm/IR/CFG.h"
+#include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
 #include "llvm/IR/IntrinsicInst.h"
 #include "llvm/IR/Verifier.h"
 #include "llvm/InitializePasses.h"
+#include "llvm/IR/NoFolder.h"
 #include "llvm/Support/CommandLine.h"
 #include "llvm/Support/Debug.h"
 #include "llvm/Support/raw_ostream.h"
+#include "llvm/Transforms/IPO/ArgumentPromotion.h"
 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
 #include "llvm/Transforms/Utils/Local.h"
 
@@ -130,6 +133,7 @@ PIPE_OPERATOR(AAHeapToStack)
 PIPE_OPERATOR(AAReachability)
 PIPE_OPERATOR(AAMemoryBehavior)
 PIPE_OPERATOR(AAValueConstantRange)
+PIPE_OPERATOR(AAPrivatizablePtr)
 
 #undef PIPE_OPERATOR
 } // namespace llvm
@@ -309,6 +313,75 @@ static const Value *getPointerOperand(const Instruction *I,
   return nullptr;
 }
 
+/// Helper function to create a pointer of type \p ResTy, based on \p Ptr, and
+/// advanced by \p Offset bytes. To aid later analysis the method tries to build
+/// getelement pointer instructions that traverse the natural type of \p Ptr if
+/// possible. If that fails, the remaining offset is adjusted byte-wise, hence
+/// through a cast to i8*.
+///
+/// TODO: This could probably live somewhere more prominantly if it doesn't
+///       already exist.
+static Value *constructPointer(Type *ResTy, Value *Ptr, int64_t Offset,
+                               IRBuilder<NoFolder> &IRB, const DataLayout &DL) {
+  assert(Offset >= 0 && "Negative offset not supported yet!");
+  LLVM_DEBUG(dbgs() << "Construct pointer: " << *Ptr << " + " << Offset
+                    << "-bytes as " << *ResTy << "\n");
+
+  // The initial type we are trying to traverse to get nice GEPs.
+  Type *Ty = Ptr->getType();
+
+  SmallVector<Value *, 4> Indices;
+  std::string GEPName = Ptr->getName().str();
+  while (Offset) {
+    uint64_t Idx, Rem;
+
+    if (auto *STy = dyn_cast<StructType>(Ty)) {
+      const StructLayout *SL = DL.getStructLayout(STy);
+      if (int64_t(SL->getSizeInBytes()) < Offset)
+        break;
+      Idx = SL->getElementContainingOffset(Offset);
+      assert(Idx < STy->getNumElements() && "Offset calculation error!");
+      Rem = Offset - SL->getElementOffset(Idx);
+      Ty = STy->getElementType(Idx);
+    } else if (auto *PTy = dyn_cast<PointerType>(Ty)) {
+      Ty = PTy->getElementType();
+      if (!Ty->isSized())
+        break;
+      uint64_t ElementSize = DL.getTypeAllocSize(Ty);
+      assert(ElementSize && "Expected type with size!");
+      Idx = Offset / ElementSize;
+      Rem = Offset % ElementSize;
+    } else {
+      // Non-aggregate type, we cast and make byte-wise progress now.
+      break;
+    }
+
+    LLVM_DEBUG(errs() << "Ty: " << *Ty << " Offset: " << Offset
+                      << " Idx: " << Idx << " Rem: " << Rem << "\n");
+
+    GEPName += "." + std::to_string(Idx);
+    Indices.push_back(ConstantInt::get(IRB.getInt32Ty(), Idx));
+    Offset = Rem;
+  }
+
+  // Create a GEP if we collected indices above.
+  if (Indices.size())
+    Ptr = IRB.CreateGEP(Ptr, Indices, GEPName);
+
+  // If an offset is left we use byte-wise adjustment.
+  if (Offset) {
+    Ptr = IRB.CreateBitCast(Ptr, IRB.getInt8PtrTy());
+    Ptr = IRB.CreateGEP(Ptr, IRB.getInt32(Offset),
+                        GEPName + ".b" + Twine(Offset));
+  }
+
+  // Ensure the result has the requested type.
+  Ptr = IRB.CreateBitOrPointerCast(Ptr, ResTy, Ptr->getName() + ".cast");
+
+  LLVM_DEBUG(dbgs() << "Constructed pointer: " << *Ptr << "\n");
+  return Ptr;
+}
+
 /// Recursively visit all values that might become \p IRP at some point. This
 /// will be done by looking through cast instructions, selects, phis, and calls
 /// with the "returned" attribute. Once we cannot look through the value any
@@ -2796,7 +2869,7 @@ struct AAIsDeadArgument : public AAIsDeadFloating {
   ChangeStatus manifest(Attributor &A) override {
     ChangeStatus Changed = AAIsDeadFloating::manifest(A);
     Argument &Arg = *getAssociatedArgument();
-    if (Arg.getParent()->hasLocalLinkage())
+    if (A.isValidFunctionSignatureRewrite(Arg, /* ReplacementTypes */ {}))
       if (A.registerFunctionSignatureRewrite(
               Arg, /* ReplacementTypes */ {},
               Attributor::ArgumentReplacementInfo::CalleeRepairCBTy{},
@@ -4761,6 +4834,575 @@ struct AAHeapToStackFunction final : public AAHeapToStackImpl {
   }
 };
 
+/// ----------------------- Privatizable Pointers ------------------------------
+struct AAPrivatizablePtrImpl : public AAPrivatizablePtr {
+  AAPrivatizablePtrImpl(const IRPosition &IRP)
+      : AAPrivatizablePtr(IRP), PrivatizableType(llvm::None) {}
+
+  ChangeStatus indicatePessimisticFixpoint() override {
+    AAPrivatizablePtr::indicatePessimisticFixpoint();
+    PrivatizableType = nullptr;
+    return ChangeStatus::CHANGED;
+  }
+
+  /// Identify the type we can chose for a private copy of the underlying
+  /// argument. None means it is not clear yet, nullptr means there is none.
+  virtual Optional<Type *> identifyPrivatizableType(Attributor &A) = 0;
+
+  /// Return a privatizable type that encloses both T0 and T1.
+  /// TODO: This is merely a stub for now as we should manage a mapping as well.
+  Optional<Type *> combineTypes(Optional<Type *> T0, Optional<Type *> T1) {
+    if (!T0.hasValue())
+      return T1;
+    if (!T1.hasValue())
+      return T0;
+    if (T0 == T1)
+      return T0;
+    return nullptr;
+  }
+
+  Optional<Type *> getPrivatizableType() const override {
+    return PrivatizableType;
+  }
+
+  const std::string getAsStr() const override {
+    return isAssumedPrivatizablePtr() ? "[priv]" : "[no-priv]";
+  }
+
+protected:
+  Optional<Type *> PrivatizableType;
+};
+
+// TODO: Do this for call site arguments (probably also other values) as well.
+
+struct AAPrivatizablePtrArgument final : public AAPrivatizablePtrImpl {
+  AAPrivatizablePtrArgument(const IRPosition &IRP)
+      : AAPrivatizablePtrImpl(IRP) {}
+
+  /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+  Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+    // If this is a byval argument and we know all the call sites (so we can
+    // rewrite them), there is no need to check them explicitly.
+    if (getIRPosition().hasAttr(Attribute::ByVal) &&
+        A.checkForAllCallSites([](AbstractCallSite ACS) { return true; }, *this,
+                               true))
+      return getAssociatedValue().getType()->getPointerElementType();
+
+    Optional<Type *> Ty;
+    unsigned ArgNo = getIRPosition().getArgNo();
+
+    // Make sure the associated call site argument has the same type at all call
+    // sites and it is an allocation we know is safe to privatize, for now that
+    // means we only allow alloca instructions.
+    // TODO: We can additionally analyze the accesses in the callee to  create
+    //       the type from that information instead. That is a little more
+    //       involved and will be done in a follow up patch.
+    auto CallSiteCheck = [&](AbstractCallSite ACS) {
+      IRPosition ACSArgPos = IRPosition::callsite_argument(ACS, ArgNo);
+      // Check if a coresponding argument was found or if it is one not
+      // associated (which can happen for callback calls).
+      if (ACSArgPos.getPositionKind() == IRPosition::IRP_INVALID)
+        return false;
+
+      // Check that all call sites agree on a type.
+      auto &PrivCSArgAA = A.getAAFor<AAPrivatizablePtr>(*this, ACSArgPos);
+      Optional<Type *> CSTy = PrivCSArgAA.getPrivatizableType();
+
+      LLVM_DEBUG({
+        dbgs() << "[AAPrivatizablePtr] ACSPos: " << ACSArgPos << ", CSTy: ";
+        if (CSTy.hasValue() && CSTy.getValue())
+          CSTy.getValue()->print(dbgs());
+        else if (CSTy.hasValue())
+          dbgs() << "<nullptr>";
+        else
+          dbgs() << "<none>";
+      });
+
+      Ty = combineTypes(Ty, CSTy);
+
+      LLVM_DEBUG({
+        dbgs() << " : New Type: ";
+        if (Ty.hasValue() && Ty.getValue())
+          Ty.getValue()->print(dbgs());
+        else if (Ty.hasValue())
+          dbgs() << "<nullptr>";
+        else
+          dbgs() << "<none>";
+        dbgs() << "\n";
+      });
+
+      return !Ty.hasValue() || Ty.getValue();
+    };
+
+    if (!A.checkForAllCallSites(CallSiteCheck, *this, true))
+      return nullptr;
+    return Ty;
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    PrivatizableType = identifyPrivatizableType(A);
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    if (!PrivatizableType.getValue())
+      return indicatePessimisticFixpoint();
+
+    // Avoid arguments with padding for now.
+    if (!getIRPosition().hasAttr(Attribute::ByVal) &&
+        !ArgumentPromotionPass::isDenselyPacked(PrivatizableType.getValue(),
+                                                A.getInfoCache().getDL())) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Padding detected\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    // Verify callee and caller agree on how the promoted argument would be
+    // passed.
+    // TODO: The use of the ArgumentPromotion interface here is ugly, we need a
+    // specialized form of TargetTransformInfo::areFunctionArgsABICompatible
+    // which doesn't require the arguments ArgumentPromotion wanted to pass.
+    Function &Fn = *getIRPosition().getAnchorScope();
+    SmallPtrSet<Argument *, 1> ArgsToPromote, Dummy;
+    ArgsToPromote.insert(getAssociatedArgument());
+    const auto *TTI =
+        A.getInfoCache().getAnalysisResultForFunction<TargetIRAnalysis>(Fn);
+    if (!TTI ||
+        !ArgumentPromotionPass::areFunctionArgsABICompatible(
+            Fn, *TTI, ArgsToPromote, Dummy) ||
+        ArgsToPromote.empty()) {
+      LLVM_DEBUG(
+          dbgs() << "[AAPrivatizablePtr] ABI incompatibility detected for "
+                 << Fn.getName() << "\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    // Collect the types that will replace the privatizable type in the function
+    // signature.
+    SmallVector<Type *, 16> ReplacementTypes;
+    identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+
+    // Register a rewrite of the argument.
+    Argument *Arg = getAssociatedArgument();
+    if (!A.isValidFunctionSignatureRewrite(*Arg, ReplacementTypes)) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Rewrite not valid\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    unsigned ArgNo = Arg->getArgNo();
+
+    // Helper to check if for the given call site the associated argument is
+    // passed to a callback where the privatization would be 
diff erent.
+    auto IsCompatiblePrivArgOfCallback = [&](CallSite CS) {
+      Value *CSArgOp = CS.getArgOperand(ArgNo);
+      SmallVector<const Use *, 4> CBUses;
+      AbstractCallSite::getCallbackUses(CS, CBUses);
+      for (const Use *U : CBUses) {
+        AbstractCallSite CBACS(U);
+        assert(CBACS && CBACS.isCallbackCall());
+        for (Argument &CBArg : CBACS.getCalledFunction()->args()) {
+          int CBArgNo = CBACS.getCallArgOperandNo(CBArg);
+
+          LLVM_DEBUG({
+            dbgs()
+                << "[AAPrivatizablePtr] Argument " << *Arg
+                << "check if can be privatized in the context of its parent ("
+                << Arg->getParent()->getName()
+                << ")\n[AAPrivatizablePtr] because it is an argument in a "
+                   "callback ("
+                << CBArgNo << "@" << CBACS.getCalledFunction()->getName()
+                << ")\n[AAPrivatizablePtr] " << CBArg << " : "
+                << CBACS.getCallArgOperand(CBArg) << " vs " << CSArgOp << "\n"
+                << "[AAPrivatizablePtr] " << CBArg << " : "
+                << CBACS.getCallArgOperandNo(CBArg) << " vs " << ArgNo << "\n";
+          });
+
+          if (CBArgNo != int(ArgNo))
+            continue;
+          const auto &CBArgPrivAA =
+              A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(CBArg));
+          if (CBArgPrivAA.isValidState()) {
+            auto CBArgPrivTy = CBArgPrivAA.getPrivatizableType();
+            if (!CBArgPrivTy.hasValue())
+              continue;
+            if (CBArgPrivTy.getValue() == PrivatizableType)
+              continue;
+          }
+
+          LLVM_DEBUG({
+            dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
+                   << " cannot be privatized in the context of its parent ("
+                   << Arg->getParent()->getName()
+                   << ")\n[AAPrivatizablePtr] because it is an argument in a "
+                      "callback ("
+                   << CBArgNo << "@" << CBACS.getCalledFunction()->getName()
+                   << ").\n[AAPrivatizablePtr] for which the argument "
+                      "privatization is not compatible.\n";
+          });
+          return false;
+        }
+      }
+      return true;
+    };
+
+    // Helper to check if for the given call site the associated argument is
+    // passed to a direct call where the privatization would be 
diff erent.
+    auto IsCompatiblePrivArgOfDirectCS = [&](AbstractCallSite ACS) {
+      CallBase *DC = cast<CallBase>(ACS.getInstruction());
+      int DCArgNo = ACS.getCallArgOperandNo(ArgNo);
+      assert(DCArgNo >= 0 && unsigned(DCArgNo) < DC->getNumArgOperands() &&
+             "Expected a direct call operand for callback call operand");
+
+      LLVM_DEBUG({
+        dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
+               << " check if be privatized in the context of its parent ("
+               << Arg->getParent()->getName()
+               << ")\n[AAPrivatizablePtr] because it is an argument in a "
+                  "direct call of ("
+               << DCArgNo << "@" << DC->getCalledFunction()->getName()
+               << ").\n";
+      });
+
+      Function *DCCallee = DC->getCalledFunction();
+      if (unsigned(DCArgNo) < DCCallee->arg_size()) {
+        const auto &DCArgPrivAA = A.getAAFor<AAPrivatizablePtr>(
+            *this, IRPosition::argument(*DCCallee->getArg(DCArgNo)));
+        if (DCArgPrivAA.isValidState()) {
+          auto DCArgPrivTy = DCArgPrivAA.getPrivatizableType();
+          if (!DCArgPrivTy.hasValue())
+            return true;
+          if (DCArgPrivTy.getValue() == PrivatizableType)
+            return true;
+        }
+      }
+
+      LLVM_DEBUG({
+        dbgs() << "[AAPrivatizablePtr] Argument " << *Arg
+               << " cannot be privatized in the context of its parent ("
+               << Arg->getParent()->getName()
+               << ")\n[AAPrivatizablePtr] because it is an argument in a "
+                  "direct call of ("
+               << ACS.getCallSite().getCalledFunction()->getName()
+               << ").\n[AAPrivatizablePtr] for which the argument "
+                  "privatization is not compatible.\n";
+      });
+      return false;
+    };
+
+    // Helper to check if the associated argument is used at the given abstract
+    // call site in a way that is incompatible with the privatization assumed
+    // here.
+    auto IsCompatiblePrivArgOfOtherCallSite = [&](AbstractCallSite ACS) {
+      if (ACS.isDirectCall())
+        return IsCompatiblePrivArgOfCallback(ACS.getCallSite());
+      if (ACS.isCallbackCall())
+        return IsCompatiblePrivArgOfDirectCS(ACS);
+      return false;
+    };
+
+    if (!A.checkForAllCallSites(IsCompatiblePrivArgOfOtherCallSite, *this,
+                                true))
+      return indicatePessimisticFixpoint();
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// Given a type to private \p PrivType, collect the constituates (which are
+  /// used) in \p ReplacementTypes.
+  static void
+  identifyReplacementTypes(Type *PrivType,
+                           SmallVectorImpl<Type *> &ReplacementTypes) {
+    // TODO: For now we expand the privatization type to the fullest which can
+    //       lead to dead arguments that need to be removed later.
+    assert(PrivType && "Expected privatizable type!");
+
+    // Traverse the type, extract constituate types on the outermost level.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++)
+        ReplacementTypes.push_back(PrivStructType->getElementType(u));
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      ReplacementTypes.append(PrivArrayType->getNumElements(),
+                              PrivArrayType->getElementType());
+    } else {
+      ReplacementTypes.push_back(PrivType);
+    }
+  }
+
+  /// Initialize \p Base according to the type \p PrivType at position \p IP.
+  /// The values needed are taken from the arguments of \p F starting at
+  /// position \p ArgNo.
+  static void createInitialization(Type *PrivType, Value &Base, Function &F,
+                                   unsigned ArgNo, Instruction &IP) {
+    assert(PrivType && "Expected privatizable type!");
+
+    IRBuilder<NoFolder> IRB(&IP);
+    const DataLayout &DL = F.getParent()->getDataLayout();
+
+    // Traverse the type, build GEPs and stores.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+        Type *PointeeTy = PrivStructType->getElementType(u)->getPointerTo();
+        Value *Ptr = constructPointer(
+            PointeeTy, &Base, PrivStructLayout->getElementOffset(u), IRB, DL);
+        new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+      }
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      Type *PointeePtrTy = PrivArrayType->getElementType()->getPointerTo();
+      uint64_t PointeeTySize = DL.getTypeStoreSize(PointeePtrTy);
+      for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+        Value *Ptr =
+            constructPointer(PointeePtrTy, &Base, u * PointeeTySize, IRB, DL);
+        new StoreInst(F.getArg(ArgNo + u), Ptr, &IP);
+      }
+    } else {
+      new StoreInst(F.getArg(ArgNo), &Base, &IP);
+    }
+  }
+
+  /// Extract values from \p Base according to the type \p PrivType at the
+  /// call position \p ACS. The values are appended to \p ReplacementValues.
+  void createReplacementValues(Type *PrivType, AbstractCallSite ACS,
+                               Value *Base,
+                               SmallVectorImpl<Value *> &ReplacementValues) {
+    assert(Base && "Expected base value!");
+    assert(PrivType && "Expected privatizable type!");
+    Instruction *IP = ACS.getInstruction();
+
+    IRBuilder<NoFolder> IRB(IP);
+    const DataLayout &DL = IP->getModule()->getDataLayout();
+
+    if (Base->getType()->getPointerElementType() != PrivType)
+      Base = BitCastInst::CreateBitOrPointerCast(Base, PrivType->getPointerTo(),
+                                                 "", ACS.getInstruction());
+
+    // TODO: Improve the alignment of the loads.
+    // Traverse the type, build GEPs and loads.
+    if (auto *PrivStructType = dyn_cast<StructType>(PrivType)) {
+      const StructLayout *PrivStructLayout = DL.getStructLayout(PrivStructType);
+      for (unsigned u = 0, e = PrivStructType->getNumElements(); u < e; u++) {
+        Type *PointeeTy = PrivStructType->getElementType(u);
+        Value *Ptr =
+            constructPointer(PointeeTy->getPointerTo(), Base,
+                             PrivStructLayout->getElementOffset(u), IRB, DL);
+        LoadInst *L = new LoadInst(PointeeTy, Ptr, "", IP);
+        L->setAlignment(MaybeAlign(1));
+        ReplacementValues.push_back(L);
+      }
+    } else if (auto *PrivArrayType = dyn_cast<ArrayType>(PrivType)) {
+      Type *PointeeTy = PrivArrayType->getElementType();
+      uint64_t PointeeTySize = DL.getTypeStoreSize(PointeeTy);
+      Type *PointeePtrTy = PointeeTy->getPointerTo();
+      for (unsigned u = 0, e = PrivArrayType->getNumElements(); u < e; u++) {
+        Value *Ptr =
+            constructPointer(PointeePtrTy, Base, u * PointeeTySize, IRB, DL);
+        LoadInst *L = new LoadInst(PointeePtrTy, Ptr, "", IP);
+        L->setAlignment(MaybeAlign(1));
+        ReplacementValues.push_back(L);
+      }
+    } else {
+      LoadInst *L = new LoadInst(PrivType, Base, "", IP);
+      L->setAlignment(MaybeAlign(1));
+      ReplacementValues.push_back(L);
+    }
+  }
+
+  /// See AbstractAttribute::manifest(...)
+  ChangeStatus manifest(Attributor &A) override {
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    assert(PrivatizableType.getValue() && "Expected privatizable type!");
+
+    // Collect all tail calls in the function as we cannot allow new allocas to
+    // escape into tail recursion.
+    // TODO: Be smarter about new allocas escaping into tail calls.
+    SmallVector<CallInst *, 16> TailCalls;
+    if (!A.checkForAllInstructions(
+            [&](Instruction &I) {
+              CallInst &CI = cast<CallInst>(I);
+              if (CI.isTailCall())
+                TailCalls.push_back(&CI);
+              return true;
+            },
+            *this, {Instruction::Call}))
+      return ChangeStatus::UNCHANGED;
+
+    Argument *Arg = getAssociatedArgument();
+
+    // Callback to repair the associated function. A new alloca is placed at the
+    // beginning and initialized with the values passed through arguments. The
+    // new alloca replaces the use of the old pointer argument.
+    Attributor::ArgumentReplacementInfo::CalleeRepairCBTy FnRepairCB =
+        [=](const Attributor::ArgumentReplacementInfo &ARI,
+            Function &ReplacementFn, Function::arg_iterator ArgIt) {
+          BasicBlock &EntryBB = ReplacementFn.getEntryBlock();
+          Instruction *IP = &*EntryBB.getFirstInsertionPt();
+          auto *AI = new AllocaInst(PrivatizableType.getValue(), 0,
+                                    Arg->getName() + ".priv", IP);
+          createInitialization(PrivatizableType.getValue(), *AI, ReplacementFn,
+                               ArgIt->getArgNo(), *IP);
+          Arg->replaceAllUsesWith(AI);
+
+          for (CallInst *CI : TailCalls)
+            CI->setTailCall(false);
+        };
+
+    // Callback to repair a call site of the associated function. The elements
+    // of the privatizable type are loaded prior to the call and passed to the
+    // new function version.
+    Attributor::ArgumentReplacementInfo::ACSRepairCBTy ACSRepairCB =
+        [=](const Attributor::ArgumentReplacementInfo &ARI,
+            AbstractCallSite ACS, SmallVectorImpl<Value *> &NewArgOperands) {
+          createReplacementValues(
+              PrivatizableType.getValue(), ACS,
+              ACS.getCallArgOperand(ARI.getReplacedArg().getArgNo()),
+                                  NewArgOperands);
+        };
+
+    // Collect the types that will replace the privatizable type in the function
+    // signature.
+    SmallVector<Type *, 16> ReplacementTypes;
+    identifyReplacementTypes(PrivatizableType.getValue(), ReplacementTypes);
+
+    // Register a rewrite of the argument.
+    if (A.registerFunctionSignatureRewrite(*Arg, ReplacementTypes,
+                                           std::move(FnRepairCB),
+                                           std::move(ACSRepairCB)))
+      return ChangeStatus::CHANGED;
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_ARG_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrFloating : public AAPrivatizablePtrImpl {
+  AAPrivatizablePtrFloating(const IRPosition &IRP)
+      : AAPrivatizablePtrImpl(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  virtual void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  ChangeStatus updateImpl(Attributor &A) override {
+    llvm_unreachable("AAPrivatizablePtr(Floating|Returned|CallSiteReturned)::"
+                     "updateImpl will not be called");
+  }
+
+  /// See AAPrivatizablePtrImpl::identifyPrivatizableType(...)
+  Optional<Type *> identifyPrivatizableType(Attributor &A) override {
+    Value *Obj =
+        GetUnderlyingObject(&getAssociatedValue(), A.getInfoCache().getDL());
+    if (!Obj) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] No underlying object found!\n");
+      return nullptr;
+    }
+
+    if (auto *AI = dyn_cast<AllocaInst>(Obj))
+      if (auto *CI = dyn_cast<ConstantInt>(AI->getArraySize()))
+        if (CI->isOne())
+          return Obj->getType()->getPointerElementType();
+    if (auto *Arg = dyn_cast<Argument>(Obj)) {
+      auto &PrivArgAA =
+          A.getAAFor<AAPrivatizablePtr>(*this, IRPosition::argument(*Arg));
+      if (PrivArgAA.isAssumedPrivatizablePtr())
+        return Obj->getType()->getPointerElementType();
+    }
+
+    LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] Underlying object neither valid "
+                         "alloca nor privatizable argument: "
+                      << *Obj << "!\n");
+    return nullptr;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FLOATING_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrCallSiteArgument final
+    : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrCallSiteArgument(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    if (getIRPosition().hasAttr(Attribute::ByVal))
+      indicateOptimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::updateImpl(...).
+  ChangeStatus updateImpl(Attributor &A) override {
+    PrivatizableType = identifyPrivatizableType(A);
+    if (!PrivatizableType.hasValue())
+      return ChangeStatus::UNCHANGED;
+    if (!PrivatizableType.getValue())
+      return indicatePessimisticFixpoint();
+
+    const IRPosition &IRP = getIRPosition();
+    auto &NoCaptureAA = A.getAAFor<AANoCapture>(*this, IRP);
+    if (!NoCaptureAA.isAssumedNoCapture()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might be captured!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    auto &NoAliasAA = A.getAAFor<AANoAlias>(*this, IRP);
+    if (!NoAliasAA.isAssumedNoAlias()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer might alias!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    const auto &MemBehaviorAA = A.getAAFor<AAMemoryBehavior>(*this, IRP);
+    if (!MemBehaviorAA.isAssumedReadOnly()) {
+      LLVM_DEBUG(dbgs() << "[AAPrivatizablePtr] pointer is written!\n");
+      return indicatePessimisticFixpoint();
+    }
+
+    return ChangeStatus::UNCHANGED;
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSARG_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrCallSiteReturned final
+    : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrCallSiteReturned(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_CSRET_ATTR(privatizable_ptr);
+  }
+};
+
+struct AAPrivatizablePtrReturned final : public AAPrivatizablePtrFloating {
+  AAPrivatizablePtrReturned(const IRPosition &IRP)
+      : AAPrivatizablePtrFloating(IRP) {}
+
+  /// See AbstractAttribute::initialize(...).
+  void initialize(Attributor &A) override {
+    // TODO: We can privatize more than arguments.
+    indicatePessimisticFixpoint();
+  }
+
+  /// See AbstractAttribute::trackStatistics()
+  void trackStatistics() const override {
+    STATS_DECLTRACK_FNRET_ATTR(privatizable_ptr);
+  }
+};
+
 /// -------------------- Memory Behavior Attributes ----------------------------
 /// Includes read-none, read-only, and write-only.
 /// ----------------------------------------------------------------------------
@@ -6311,10 +6953,8 @@ ChangeStatus Attributor::run(Module &M) {
   return ManifestChange;
 }
 
-bool Attributor::registerFunctionSignatureRewrite(
-    Argument &Arg, ArrayRef<Type *> ReplacementTypes,
-    ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
-    ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
+bool Attributor::isValidFunctionSignatureRewrite(
+    Argument &Arg, ArrayRef<Type *> ReplacementTypes) {
 
   auto CallSiteCanBeChanged = [](AbstractCallSite ACS) {
     // Forbid must-tail calls for now.
@@ -6360,8 +7000,22 @@ bool Attributor::registerFunctionSignatureRewrite(
     return false;
   }
 
+  return true;
+}
+
+bool Attributor::registerFunctionSignatureRewrite(
+    Argument &Arg, ArrayRef<Type *> ReplacementTypes,
+    ArgumentReplacementInfo::CalleeRepairCBTy &&CalleeRepairCB,
+    ArgumentReplacementInfo::ACSRepairCBTy &&ACSRepairCB) {
+  LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
+                    << Arg.getParent()->getName() << " with "
+                    << ReplacementTypes.size() << " replacements\n");
+  assert(isValidFunctionSignatureRewrite(Arg, ReplacementTypes) &&
+         "Cannot register an invalid rewrite");
+
+  Function *Fn = Arg.getParent();
   SmallVectorImpl<ArgumentReplacementInfo *> &ARIs = ArgumentReplacementMap[Fn];
-  if (ARIs.size() == 0)
+  if (ARIs.empty())
     ARIs.resize(Fn->arg_size());
 
   // If we have a replacement already with less than or equal new arguments,
@@ -6377,6 +7031,10 @@ bool Attributor::registerFunctionSignatureRewrite(
   if (ARI)
     delete ARI;
 
+  LLVM_DEBUG(dbgs() << "[Attributor] Register new rewrite of " << Arg << " in "
+                    << Arg.getParent()->getName() << " with "
+                    << ReplacementTypes.size() << " replacements\n");
+
   // Remember the replacement.
   ARI = new ArgumentReplacementInfo(*this, Arg, ReplacementTypes,
                                     std::move(CalleeRepairCB),
@@ -6718,6 +7376,9 @@ void Attributor::identifyDefaultAbstractAttributes(Function &F) {
 
       // Every argument with pointer type might be marked nofree.
       getOrCreateAAFor<AANoFree>(ArgPos);
+
+      // Every argument with pointer type might be privatizable (or promotable)
+      getOrCreateAAFor<AAPrivatizablePtr>(ArgPos);
     }
   }
 
@@ -6972,6 +7633,7 @@ const char AAAlign::ID = 0;
 const char AANoCapture::ID = 0;
 const char AAValueSimplify::ID = 0;
 const char AAHeapToStack::ID = 0;
+const char AAPrivatizablePtr::ID = 0;
 const char AAMemoryBehavior::ID = 0;
 const char AAValueConstantRange::ID = 0;
 
@@ -7076,6 +7738,7 @@ CREATE_FUNCTION_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAReturnedValues)
 
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANonNull)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoAlias)
+CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAPrivatizablePtr)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AADereferenceable)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AAAlign)
 CREATE_VALUE_ABSTRACT_ATTRIBUTE_FOR_POSITION(AANoCapture)

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
index d19cb444acf4..ddb115ef6c16 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/2008-02-01-ReturnAttrs.ll
@@ -3,9 +3,11 @@
 
 define internal i32 @deref(i32* %x) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@deref
-; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 4
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X_PRIV]], align 4
 ; CHECK-NEXT:    ret i32 [[TMP2]]
 ;
 entry:
@@ -19,7 +21,8 @@ define i32 @f(i32 %x) {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[X_ADDR:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 [[X]], i32* [[X_ADDR]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X_ADDR]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[X_ADDR]], align 1
+; CHECK-NEXT:    [[TMP1:%.*]] = call i32 @deref(i32 [[TMP0]])
 ; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
index b66e0987593a..c79dfe0cc5bf 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/attributes.ll
@@ -45,9 +45,11 @@ bb:
 
 define internal fastcc void @promote_avx2(<4 x i64>* %arg, <4 x i64>* readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@promote_avx2
-; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[ARG1:%.*]])
+; CHECK-SAME: (<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[ARG:%.*]], <4 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <4 x i64>
+; CHECK-NEXT:    store <4 x i64> [[TMP0]], <4 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <4 x i64>, <4 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP]], <4 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -65,7 +67,8 @@ define void @promote(<4 x i64>* %arg) #0 {
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <4 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <4 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(32) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(32) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <4 x i64>, <4 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    call fastcc void @promote_avx2(<4 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(32) [[TMP2]], <4 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <4 x i64>, <4 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <4 x i64> [[TMP4]], <4 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
index 158e950ed287..98d1bc2384c4 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll
@@ -8,9 +8,11 @@ target triple = "x86_64-unknown-linux-gnu"
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -28,7 +30,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -47,9 +50,11 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -67,7 +72,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -86,9 +92,11 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #1 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -106,7 +114,8 @@ define void @avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -125,9 +134,11 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #0 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -145,7 +156,8 @@ define void @avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>*
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    call fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -242,9 +254,11 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #3 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -262,7 +276,8 @@ define void @avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* %ar
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void
@@ -281,9 +296,11 @@ bb:
 ; This should promote
 define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %arg, <8 x i64>* readonly %arg1) #4 {
 ; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256
-; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[ARG1:%.*]])
+; CHECK-SAME: (<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]])
 ; CHECK-NEXT:  bb:
-; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1]], align 32
+; CHECK-NEXT:    [[ARG1_PRIV:%.*]] = alloca <8 x i64>
+; CHECK-NEXT:    store <8 x i64> [[TMP0]], <8 x i64>* [[ARG1_PRIV]]
+; CHECK-NEXT:    [[TMP:%.*]] = load <8 x i64>, <8 x i64>* [[ARG1_PRIV]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP]], <8 x i64>* [[ARG]], align 32
 ; CHECK-NEXT:    ret void
 ;
@@ -301,7 +318,8 @@ define void @avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* %ar
 ; CHECK-NEXT:    [[TMP2:%.*]] = alloca <8 x i64>, align 32
 ; CHECK-NEXT:    [[TMP3:%.*]] = bitcast <8 x i64>* [[TMP]] to i8*
 ; CHECK-NEXT:    call void @llvm.memset.p0i8.i64(i8* nonnull align 32 dereferenceable(64) [[TMP3]], i8 0, i64 32, i1 false)
-; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64>* noalias nocapture nofree nonnull readonly align 32 dereferenceable(64) [[TMP]])
+; CHECK-NEXT:    [[TMP0:%.*]] = load <8 x i64>, <8 x i64>* [[TMP]], align 1
+; CHECK-NEXT:    call fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(<8 x i64>* noalias nocapture nofree nonnull writeonly align 32 dereferenceable(64) [[TMP2]], <8 x i64> [[TMP0]])
 ; CHECK-NEXT:    [[TMP4:%.*]] = load <8 x i64>, <8 x i64>* [[TMP2]], align 32
 ; CHECK-NEXT:    store <8 x i64> [[TMP4]], <8 x i64>* [[ARG]], align 2
 ; CHECK-NEXT:    ret void

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
new file mode 100644
index 000000000000..ace6b60e99a7
--- /dev/null
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/alignment.ll
@@ -0,0 +1,32 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --function-signature --scrub-attributes
+; RUN: opt -S -passes='attributor' -aa-pipeline='basic-aa' -attributor-disable=false -attributor-max-iterations-verify -attributor-max-iterations=2 < %s | FileCheck %s
+
+define void @f() {
+; CHECK-LABEL: define {{[^@]+}}@f()
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A:%.*]] = alloca i32, align 1
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A]], align 1
+; CHECK-NEXT:    call void @g(i32 [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %a = alloca i32, align 1
+  call void @g(i32* %a)
+  ret void
+}
+
+define internal void @g(i32* %a) {
+; CHECK-LABEL: define {{[^@]+}}@g
+; CHECK-SAME: (i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[A_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV]]
+; CHECK-NEXT:    [[AA:%.*]] = load i32, i32* [[A_PRIV]], align 1
+; CHECK-NEXT:    call void @z(i32 [[AA]])
+; CHECK-NEXT:    ret void
+;
+  %aa = load i32, i32* %a, align 1
+  call void @z(i32 %aa)
+  ret void
+}
+
+declare void @z(i32)

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
index ac9b0766b387..f9e6ab5c4d8b 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/attrs.ll
@@ -6,13 +6,20 @@
 ; Don't drop 'byval' on %X here.
 define internal void @f(%struct.ss* byval %b, i32* byval %X, i32 %i) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]], i32 [[I:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]], i32 [[I:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[X]]
+; CHECK-NEXT:    store i32 0, i32* [[X_PRIV]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -36,7 +43,12 @@ define i32 @test(i32* %X) {
 ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; CHECK-NEXT:    call void @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]], i32 zeroext 0)
+; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 1
+; CHECK-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]], i32 zeroext 0)
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
index ea3feffe6811..45721e5528b1 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/basictest.ll
@@ -4,9 +4,13 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 define internal i32 @test(i32* %X, i32* %Y) {
 ; CHECK-LABEL: define {{[^@]+}}@test
-; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[X:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[Y:%.*]])
-; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[X]], align 4
-; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[Y]], align 4
+; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; CHECK-NEXT:    [[Y_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[Y_PRIV]]
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[A:%.*]] = load i32, i32* [[X_PRIV]], align 4
+; CHECK-NEXT:    [[B:%.*]] = load i32, i32* [[Y_PRIV]], align 4
 ; CHECK-NEXT:    [[C:%.*]] = add i32 [[A]], [[B]]
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
@@ -18,10 +22,14 @@ define internal i32 @test(i32* %X, i32* %Y) {
 
 define internal i32 @caller(i32* %B) {
 ; CHECK-LABEL: define {{[^@]+}}@caller
-; CHECK-SAME: (i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV]]
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 1, i32* [[A]], align 4
-; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[A]], align 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[B_PRIV]], align 1
+; CHECK-NEXT:    [[C:%.*]] = call i32 @test(i32 [[TMP2]], i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 [[C]]
 ;
   %A = alloca i32
@@ -34,7 +42,8 @@ define i32 @callercaller() {
 ; CHECK-LABEL: define {{[^@]+}}@callercaller()
 ; CHECK-NEXT:    [[B:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 2, i32* [[B]], align 4
-; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[B]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[B]], align 1
+; CHECK-NEXT:    [[X:%.*]] = call i32 @caller(i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
   %B = alloca i32

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
index 12e02f855f73..be039e8206c4 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval-2.ll
@@ -5,13 +5,20 @@
 
 define internal void @f(%struct.ss* byval  %b, i32* byval %X) nounwind  {
 ; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]], i32* noalias nocapture nofree nonnull writeonly byval dereferenceable(4) [[X:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]], i32 [[TMP2:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[X_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP2]], i32* [[X_PRIV]]
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
-; CHECK-NEXT:    store i32 0, i32* [[X]]
+; CHECK-NEXT:    store i32 0, i32* [[X_PRIV]]
 ; CHECK-NEXT:    ret void
 ;
 entry:
@@ -33,7 +40,12 @@ define i32 @test(i32* %X) {
 ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; CHECK-NEXT:    call void @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]], i32* nocapture nofree readonly byval [[X]])
+; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[X]], align 1
+; CHECK-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]], i32 [[TMP2]])
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
index bba9f6f20b61..104d71a9a815 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/byval.ll
@@ -7,9 +7,14 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 define internal void @f(%struct.ss* byval  %b) nounwind  {
 ; CHECK-LABEL: define {{[^@]+}}@f
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 8 dereferenceable(12) [[B:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 8
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 8
@@ -26,9 +31,14 @@ entry:
 
 define internal void @g(%struct.ss* byval align 32 %b) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@g
-; CHECK-SAME: (%struct.ss* noalias nocapture nofree nonnull byval align 32 dereferenceable(12) [[B:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[B]], i32 0, i32 0
+; CHECK-NEXT:    [[B_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; CHECK-NEXT:    [[B_PRIV_CAST:%.*]] = bitcast %struct.ss* [[B_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[B_PRIV_CAST]]
+; CHECK-NEXT:    [[B_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[B_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[B_PRIV]], i32 0, i32 0
 ; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP]], align 32
 ; CHECK-NEXT:    [[TMP2:%.*]] = add i32 [[TMP1]], 1
 ; CHECK-NEXT:    store i32 [[TMP2]], i32* [[TMP]], align 32
@@ -51,8 +61,16 @@ define i32 @main() nounwind  {
 ; CHECK-NEXT:    store i32 1, i32* [[TMP1]], align 8
 ; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; CHECK-NEXT:    store i64 2, i64* [[TMP4]], align 4
-; CHECK-NEXT:    call void @f(%struct.ss* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(12) [[S]])
-; CHECK-NEXT:    call void @g(%struct.ss* noalias nocapture nofree nonnull readonly byval align 32 dereferenceable(12) [[S]])
+; CHECK-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; CHECK-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[S_0_1]], align 1
+; CHECK-NEXT:    call void @f(i32 [[TMP0]], i64 [[TMP1]])
+; CHECK-NEXT:    [[S_CAST1:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[S_CAST1]], align 1
+; CHECK-NEXT:    [[S_0_12:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i64, i64* [[S_0_12]], align 1
+; CHECK-NEXT:    call void @g(i32 [[TMP2]], i64 [[TMP3]])
 ; CHECK-NEXT:    ret i32 0
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
index 92257857f27a..9ee1386b9869 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/control-flow2.ll
@@ -5,12 +5,14 @@ target datalayout = "E-p:64:64:64-a0:0:8-f32:32:32-f64:64:64-i1:8:8-i8:8:8-i16:1
 
 define internal i32 @callee(i1 %C, i32* %P) {
 ; CHECK-LABEL: define {{[^@]+}}@callee
-; CHECK-SAME: (i1 [[C:%.*]], i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[P:%.*]])
+; CHECK-SAME: (i1 [[C:%.*]], i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[P_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[P_PRIV]]
 ; CHECK-NEXT:    br label [[F:%.*]]
 ; CHECK:       T:
 ; CHECK-NEXT:    unreachable
 ; CHECK:       F:
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P]], align 4
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[P_PRIV]], align 4
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
   br i1 %C, label %T, label %F
@@ -27,7 +29,8 @@ define i32 @foo() {
 ; CHECK-LABEL: define {{[^@]+}}@foo()
 ; CHECK-NEXT:    [[A:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 17, i32* [[A]], align 4
-; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) [[A]])
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[A]], align 1
+; CHECK-NEXT:    [[X:%.*]] = call i32 @callee(i1 false, i32 [[TMP1]])
 ; CHECK-NEXT:    ret i32 [[X]]
 ;
   %A = alloca i32         ; <i32*> [#uses=2]

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
index 6a6864b3b3e0..b77520a105b5 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/fp80.ll
@@ -15,7 +15,11 @@ target triple = "x86_64-unknown-linux-gnu"
 define void @run() {
 ; CHECK-LABEL: define {{[^@]+}}@run()
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = call i64 @CaptureAStruct(%struct.Foo* nofree nonnull readonly align 8 dereferenceable(16) @a)
+; CHECK-NEXT:    [[A_CAST:%.*]] = bitcast %struct.Foo* @a to i32*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32, i32* [[A_CAST]], align 1
+; CHECK-NEXT:    [[A_0_1:%.*]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* @a, i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i64, i64* [[A_0_1]], align 1
+; CHECK-NEXT:    [[TMP2:%.*]] = call i64 @CaptureAStruct(i32 [[TMP0]], i64 [[TMP1]])
 ; CHECK-NEXT:    unreachable
 ;
 entry:
@@ -48,15 +52,20 @@ define internal i64 @AccessPaddingOfStruct(%struct.Foo* byval %a) {
 
 define internal i64 @CaptureAStruct(%struct.Foo* byval %a) {
 ; CHECK-LABEL: define {{[^@]+}}@CaptureAStruct
-; CHECK-SAME: (%struct.Foo* noalias nofree nonnull byval align 8 dereferenceable(16) [[A:%.*]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i64 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[A_PRIV:%.*]] = alloca [[STRUCT_FOO:%.*]]
+; CHECK-NEXT:    [[A_PRIV_CAST:%.*]] = bitcast %struct.Foo* [[A_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[A_PRIV_CAST]]
+; CHECK-NEXT:    [[A_PRIV_0_1:%.*]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i64 [[TMP1]], i64* [[A_PRIV_0_1]]
 ; CHECK-NEXT:    [[A_PTR:%.*]] = alloca %struct.Foo*
 ; CHECK-NEXT:    br label [[LOOP:%.*]]
 ; CHECK:       loop:
 ; CHECK-NEXT:    [[PHI:%.*]] = phi %struct.Foo* [ null, [[ENTRY:%.*]] ], [ [[GEP:%.*]], [[LOOP]] ]
-; CHECK-NEXT:    [[TMP0:%.*]] = phi %struct.Foo* [ [[A]], [[ENTRY]] ], [ [[TMP0]], [[LOOP]] ]
+; CHECK-NEXT:    [[TMP2:%.*]] = phi %struct.Foo* [ [[A_PRIV]], [[ENTRY]] ], [ [[TMP2]], [[LOOP]] ]
 ; CHECK-NEXT:    store %struct.Foo* [[PHI]], %struct.Foo** [[A_PTR]], align 8
-; CHECK-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO:%.*]], %struct.Foo* [[A]], i64 0
+; CHECK-NEXT:    [[GEP]] = getelementptr [[STRUCT_FOO]], %struct.Foo* [[A_PRIV]], i64 0
 ; CHECK-NEXT:    br label [[LOOP]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
index 88c13ac98693..495408485fe1 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/inalloca.ll
@@ -19,10 +19,15 @@ define internal i32 @f(%struct.ss* inalloca  %s) {
 ; ATTRIBUTOR-NEXT:    ret i32 [[R]]
 ;
 ; GLOBALOPT_ATTRIBUTOR-LABEL: define {{[^@]+}}@f
-; GLOBALOPT_ATTRIBUTOR-SAME: (%struct.ss* noalias nocapture nofree nonnull readonly align 4 dereferenceable(8) [[S:%.*]]) unnamed_addr
+; GLOBALOPT_ATTRIBUTOR-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]]) unnamed_addr
 ; GLOBALOPT_ATTRIBUTOR-NEXT:  entry:
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F0:%.*]] = getelementptr [[STRUCT_SS:%.*]], %struct.ss* [[S]], i32 0, i32 0
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_PRIV:%.*]] = alloca [[STRUCT_SS:%.*]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_PRIV_CAST:%.*]] = bitcast %struct.ss* [[S_PRIV]] to i32*
+; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 [[TMP0]], i32* [[S_PRIV_CAST]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_PRIV_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 [[TMP1]], i32* [[S_PRIV_0_1]]
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F0:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 0
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S_PRIV]], i32 0, i32 1
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[A:%.*]] = load i32, i32* [[F0]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[B:%.*]] = load i32, i32* [[F1]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = add i32 [[A]], [[B]]
@@ -55,7 +60,11 @@ define i32 @main() {
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    [[F1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 1, i32* [[F0]], align 4
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    store i32 2, i32* [[F1]], align 4
-; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = call fastcc i32 @f(%struct.ss* noalias nocapture nofree nonnull readonly align 4 dereferenceable(8) [[S]])
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_CAST:%.*]] = bitcast %struct.ss* [[S]] to i32*
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP0:%.*]] = load i32, i32* [[S_CAST]], align 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[S_0_1:%.*]] = getelementptr [[STRUCT_SS]], %struct.ss* [[S]], i32 0, i32 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[TMP1:%.*]] = load i32, i32* [[S_0_1]], align 1
+; GLOBALOPT_ATTRIBUTOR-NEXT:    [[R:%.*]] = call fastcc i32 @f(i32 [[TMP0]], i32 [[TMP1]])
 ; GLOBALOPT_ATTRIBUTOR-NEXT:    ret i32 [[R]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
index 5eda92356f5d..1bf7ed6d2fe5 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/profile.ll
@@ -8,7 +8,8 @@ define void @caller() #0 {
 ; CHECK-LABEL: define {{[^@]+}}@caller()
 ; CHECK-NEXT:    [[X:%.*]] = alloca i32
 ; CHECK-NEXT:    store i32 42, i32* [[X]], align 4
-; CHECK-NEXT:    call void @promote_i32_ptr(i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[X]]), !prof !0
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[X]], align 1
+; CHECK-NEXT:    call void @promote_i32_ptr(i32 [[TMP1]]), !prof !0
 ; CHECK-NEXT:    ret void
 ;
   %x = alloca i32
@@ -19,8 +20,10 @@ define void @caller() #0 {
 
 define internal void @promote_i32_ptr(i32* %xp) {
 ; CHECK-LABEL: define {{[^@]+}}@promote_i32_ptr
-; CHECK-SAME: (i32* noalias nocapture nonnull readonly align 4 dereferenceable(4) [[XP:%.*]])
-; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[XP]], align 4
+; CHECK-SAME: (i32 [[TMP0:%.*]])
+; CHECK-NEXT:    [[XP_PRIV:%.*]] = alloca i32
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[XP_PRIV]]
+; CHECK-NEXT:    [[X:%.*]] = load i32, i32* [[XP_PRIV]], align 4
 ; CHECK-NEXT:    call void @use_i32(i32 [[X]])
 ; CHECK-NEXT:    ret void
 ;

diff  --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
index 6f55d4acab96..6141c5f3073b 100644
--- a/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
+++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/tail.ll
@@ -10,8 +10,13 @@ declare i8* @foo(%pair*)
 
 define internal void @bar(%pair* byval %Data) {
 ; CHECK-LABEL: define {{[^@]+}}@bar
-; CHECK-SAME: (%pair* noalias byval [[DATA:%.*]])
-; CHECK-NEXT:    [[TMP1:%.*]] = tail call i8* @foo(%pair* [[DATA]])
+; CHECK-SAME: (i32 [[TMP0:%.*]], i32 [[TMP1:%.*]])
+; CHECK-NEXT:    [[DATA_PRIV:%.*]] = alloca [[PAIR:%.*]]
+; CHECK-NEXT:    [[DATA_PRIV_CAST:%.*]] = bitcast %pair* [[DATA_PRIV]] to i32*
+; CHECK-NEXT:    store i32 [[TMP0]], i32* [[DATA_PRIV_CAST]]
+; CHECK-NEXT:    [[DATA_PRIV_0_1:%.*]] = getelementptr [[PAIR]], %pair* [[DATA_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[DATA_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP3:%.*]] = call i8* @foo(%pair* [[DATA_PRIV]])
 ; CHECK-NEXT:    ret void
 ;
   tail call i8* @foo(%pair* %Data)
@@ -21,7 +26,11 @@ define internal void @bar(%pair* byval %Data) {
 define void @zed(%pair* byval %Data) {
 ; CHECK-LABEL: define {{[^@]+}}@zed
 ; CHECK-SAME: (%pair* noalias nocapture readonly byval [[DATA:%.*]])
-; CHECK-NEXT:    call void @bar(%pair* noalias nocapture readonly byval [[DATA]])
+; CHECK-NEXT:    [[DATA_CAST:%.*]] = bitcast %pair* [[DATA]] to i32*
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[DATA_CAST]], align 1
+; CHECK-NEXT:    [[DATA_0_1:%.*]] = getelementptr [[PAIR:%.*]], %pair* [[DATA]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP2:%.*]] = load i32, i32* [[DATA_0_1]], align 1
+; CHECK-NEXT:    call void @bar(i32 [[TMP1]], i32 [[TMP2]])
 ; CHECK-NEXT:    ret void
 ;
   call void @bar(%pair* byval %Data)

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
index 55175d70db6d..dedaa3dd3063 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/2009-09-24-byval-ptr.ll
@@ -6,12 +6,17 @@
 @mystr = internal global %struct.MYstr zeroinitializer ; <%struct.MYstr*> [#uses=3]
 define internal void @vfu1(%struct.MYstr* byval align 4 %u) nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@vfu1
-; CHECK-SAME: (%struct.MYstr* noalias nocapture nofree nonnull writeonly byval align 8 dereferenceable(8) [[U:%.*]])
+; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1
-; CHECK-NEXT:    store i32 99, i32* [[TMP0]], align 4
-; CHECK-NEXT:    [[TMP1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 0
-; CHECK-NEXT:    store i8 97, i8* [[TMP1]], align 8
+; CHECK-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
+; CHECK-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
+; CHECK-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
+; CHECK-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i32 99, i32* [[TMP2]], align 4
+; CHECK-NEXT:    [[TMP3:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
+; CHECK-NEXT:    store i8 97, i8* [[TMP3]], align 8
 ; CHECK-NEXT:    br label [[RETURN:%.*]]
 ; CHECK:       return:
 ; CHECK-NEXT:    ret void
@@ -29,15 +34,20 @@ return:                                           ; preds = %entry
 
 define internal i32 @vfu2(%struct.MYstr* byval align 4 %u) nounwind readonly {
 ; CHECK-LABEL: define {{[^@]+}}@vfu2
-; CHECK-SAME: (%struct.MYstr* noalias nocapture nofree nonnull readonly byval align 8 dereferenceable(8) [[U:%.*]])
+; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
-; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[TMP1]]
-; CHECK-NEXT:    ret i32 [[TMP5]]
+; CHECK-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
+; CHECK-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
+; CHECK-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
+; CHECK-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP7]]
 ;
 entry:
   %0 = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1 ; <i32*> [#uses=1]
@@ -52,8 +62,16 @@ entry:
 define i32 @unions() nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@unions()
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @vfu1(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr)
-; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr)
+; CHECK-NEXT:    [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 1
+; CHECK-NEXT:    [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 1
+; CHECK-NEXT:    call void @vfu1(i8 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
+; CHECK-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2(i8 [[TMP2]], i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 [[RESULT]]
 ;
 entry:
@@ -64,17 +82,22 @@ entry:
 
 define internal i32 @vfu2_v2(%struct.MYstr* byval align 4 %u) nounwind readonly {
 ; CHECK-LABEL: define {{[^@]+}}@vfu2_v2
-; CHECK-SAME: (%struct.MYstr* noalias nocapture nofree nonnull byval align 8 dereferenceable(8) [[U:%.*]])
+; CHECK-SAME: (i8 [[TMP0:%.*]], i32 [[TMP1:%.*]])
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* [[U]], i32 0, i32 1
+; CHECK-NEXT:    [[U_PRIV:%.*]] = alloca [[STRUCT_MYSTR:%.*]]
+; CHECK-NEXT:    [[U_PRIV_CAST:%.*]] = bitcast %struct.MYstr* [[U_PRIV]] to i8*
+; CHECK-NEXT:    store i8 [[TMP0]], i8* [[U_PRIV_CAST]]
+; CHECK-NEXT:    [[U_PRIV_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    store i32 [[TMP1]], i32* [[U_PRIV_0_1]]
+; CHECK-NEXT:    [[Z:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
 ; CHECK-NEXT:    store i32 99, i32* [[Z]], align 4
-; CHECK-NEXT:    [[TMP0:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 1
-; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[TMP0]]
-; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U]], i32 0, i32 0
-; CHECK-NEXT:    [[TMP3:%.*]] = load i8, i8* [[TMP2]], align 8
-; CHECK-NEXT:    [[TMP4:%.*]] = zext i8 [[TMP3]] to i32
-; CHECK-NEXT:    [[TMP5:%.*]] = add i32 [[TMP4]], [[TMP1]]
-; CHECK-NEXT:    ret i32 [[TMP5]]
+; CHECK-NEXT:    [[TMP2:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[TMP2]]
+; CHECK-NEXT:    [[TMP4:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* [[U_PRIV]], i32 0, i32 0
+; CHECK-NEXT:    [[TMP5:%.*]] = load i8, i8* [[TMP4]], align 8
+; CHECK-NEXT:    [[TMP6:%.*]] = zext i8 [[TMP5]] to i32
+; CHECK-NEXT:    [[TMP7:%.*]] = add i32 [[TMP6]], [[TMP3]]
+; CHECK-NEXT:    ret i32 [[TMP7]]
 ;
 entry:
   %z = getelementptr %struct.MYstr, %struct.MYstr* %u, i32 0, i32 1
@@ -91,8 +114,16 @@ entry:
 define i32 @unions_v2() nounwind {
 ; CHECK-LABEL: define {{[^@]+}}@unions_v2()
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    call void @vfu1(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr)
-; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(%struct.MYstr* nofree nonnull readonly byval align 8 dereferenceable(8) @mystr)
+; CHECK-NEXT:    [[MYSTR_CAST1:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; CHECK-NEXT:    [[TMP0:%.*]] = load i8, i8* [[MYSTR_CAST1]], align 1
+; CHECK-NEXT:    [[MYSTR_0_12:%.*]] = getelementptr [[STRUCT_MYSTR:%.*]], %struct.MYstr* @mystr, i32 0, i32 1
+; CHECK-NEXT:    [[TMP1:%.*]] = load i32, i32* [[MYSTR_0_12]], align 1
+; CHECK-NEXT:    call void @vfu1(i8 [[TMP0]], i32 [[TMP1]])
+; CHECK-NEXT:    [[MYSTR_CAST:%.*]] = bitcast %struct.MYstr* @mystr to i8*
+; CHECK-NEXT:    [[TMP2:%.*]] = load i8, i8* [[MYSTR_CAST]], align 1
+; CHECK-NEXT:    [[MYSTR_0_1:%.*]] = getelementptr [[STRUCT_MYSTR]], %struct.MYstr* @mystr, i32 0, i32 1
+; CHECK-NEXT:    [[TMP3:%.*]] = load i32, i32* [[MYSTR_0_1]], align 1
+; CHECK-NEXT:    [[RESULT:%.*]] = call i32 @vfu2_v2(i8 [[TMP2]], i32 [[TMP3]])
 ; CHECK-NEXT:    ret i32 [[RESULT]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
index 6ec7dde13f25..b15edccfadd3 100644
--- a/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
+++ b/llvm/test/Transforms/Attributor/IPConstantProp/PR16052.ll
@@ -9,7 +9,7 @@ define i64 @fn2() {
 ; CHECK-NEXT:  entry:
 ; CHECK-NEXT:    [[CONV:%.*]] = sext i32 undef to i64
 ; CHECK-NEXT:    [[DIV:%.*]] = sdiv i64 8, [[CONV]]
-; CHECK-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]])
+; CHECK-NEXT:    [[CALL2:%.*]] = call i64 @fn1(i64 [[DIV]]) #0, !range !0
 ; CHECK-NEXT:    ret i64 [[CALL2]]
 ;
 entry:

diff  --git a/llvm/test/Transforms/Attributor/callbacks.ll b/llvm/test/Transforms/Attributor/callbacks.ll
index d4487393ddb6..5e3ac0741b1a 100644
--- a/llvm/test/Transforms/Attributor/callbacks.ll
+++ b/llvm/test/Transforms/Attributor/callbacks.ll
@@ -63,5 +63,112 @@ declare void @t0_check(i32* align 256, i64, i32*)
 
 declare !callback !0 void @t0_callback_broker(i32*, i32*, void (i32*, i32*, ...)*, ...)
 
+; Test 1
+;
+; Similar to test 0 but with some additional annotations (noalias/nocapute) to make sure
+; we deduce and propagate noalias and others properly.
+
+define void @t1_caller(i32* noalias %a) {
+; CHECK-LABEL: define {{[^@]+}}@t1_caller
+; CHECK-SAME: (i32* noalias nocapture align 256 [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 32
+; CHECK-NEXT:    [[C:%.*]] = alloca i32*, align 64
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 128
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
+; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca i32, align 32
+  %c = alloca i32*, align 64
+  %ptr = alloca i32, align 128
+  %0 = bitcast i32* %b to i8*
+  store i32 42, i32* %b, align 4
+  store i32* %b, i32** %c, align 8
+  call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t1_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t1_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+  ret void
+}
+
+; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
+; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
+define internal void @t1_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
+; CHECK-LABEL: define {{[^@]+}}@t1_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* noalias nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT:    tail call void @t1_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr_val = load i32, i32* %ptr, align 8
+  store i32 %ptr_val, i32* %is_not_null
+  %0 = load i32*, i32** %c, align 8
+  tail call void @t1_check(i32* %a, i64 %b, i32* %0)
+  ret void
+}
+
+declare void @t1_check(i32* nocapture align 256, i64, i32* nocapture) nosync
+
+declare !callback !0 void @t1_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+
+; Test 2
+;
+; Similar to test 1 but checking that the noalias is only placed if potential synchronization through @t2_check is preserved.
+
+define void @t2_caller(i32* noalias %a) {
+; CHECK-LABEL: define {{[^@]+}}@t2_caller
+; CHECK-SAME: (i32* noalias nocapture align 256 [[A:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[B:%.*]] = alloca i32, align 32
+; CHECK-NEXT:    [[C:%.*]] = alloca i32*, align 64
+; CHECK-NEXT:    [[PTR:%.*]] = alloca i32, align 128
+; CHECK-NEXT:    [[TMP0:%.*]] = bitcast i32* [[B]] to i8*
+; CHECK-NEXT:    store i32 42, i32* [[B]], align 32
+; CHECK-NEXT:    store i32* [[B]], i32** [[C]], align 64
+; CHECK-NEXT:    call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* noalias align 536870912 null, i32* noalias nonnull align 128 dereferenceable(4) [[PTR]], void (i32*, i32*, ...)* nonnull bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* noalias nocapture align 256 [[A]], i64 99, i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %b = alloca i32, align 32
+  %c = alloca i32*, align 64
+  %ptr = alloca i32, align 128
+  %0 = bitcast i32* %b to i8*
+  store i32 42, i32* %b, align 4
+  store i32* %b, i32** %c, align 8
+  call void (i32*, i32*, void (i32*, i32*, ...)*, ...) @t2_callback_broker(i32* null, i32* %ptr, void (i32*, i32*, ...)* bitcast (void (i32*, i32*, i32*, i64, i32**)* @t2_callback_callee to void (i32*, i32*, ...)*), i32* %a, i64 99, i32** %c)
+  ret void
+}
+
+; Note that the first two arguments are provided by the callback_broker according to the callback in !1 below!
+; The others are annotated with alignment information, amongst others, or even replaced by the constants passed to the call.
+;
+; FIXME: We should derive noalias for %a and add a "fake use" of %a in all potentially synchronizing calls.
+define internal void @t2_callback_callee(i32* %is_not_null, i32* %ptr, i32* %a, i64 %b, i32** %c) {
+; CHECK-LABEL: define {{[^@]+}}@t2_callback_callee
+; CHECK-SAME: (i32* nocapture nonnull writeonly dereferenceable(4) [[IS_NOT_NULL:%.*]], i32* nocapture nonnull readonly align 8 dereferenceable(4) [[PTR:%.*]], i32* nocapture align 256 [[A:%.*]], i64 [[B:%.*]], i32** noalias nocapture nonnull readonly align 64 dereferenceable(8) [[C:%.*]])
+; CHECK-NEXT:  entry:
+; CHECK-NEXT:    [[PTR_VAL:%.*]] = load i32, i32* [[PTR]], align 8
+; CHECK-NEXT:    store i32 [[PTR_VAL]], i32* [[IS_NOT_NULL]]
+; CHECK-NEXT:    [[TMP0:%.*]] = load i32*, i32** [[C]], align 64
+; CHECK-NEXT:    tail call void @t2_check(i32* nocapture align 256 [[A]], i64 99, i32* [[TMP0]])
+; CHECK-NEXT:    ret void
+;
+entry:
+  %ptr_val = load i32, i32* %ptr, align 8
+  store i32 %ptr_val, i32* %is_not_null
+  %0 = load i32*, i32** %c, align 8
+  tail call void @t2_check(i32* %a, i64 %b, i32* %0)
+  ret void
+}
+
+declare void @t2_check(i32* nocapture align 256, i64, i32* nocapture)
+
+declare !callback !0 void @t2_callback_broker(i32* nocapture , i32* nocapture , void (i32*, i32*, ...)* nocapture, ...)
+
 !0 = !{!1}
 !1 = !{i64 2, i64 -1, i64 -1, i1 true}

diff  --git a/llvm/test/Transforms/Attributor/internal-noalias.ll b/llvm/test/Transforms/Attributor/internal-noalias.ll
index 6c380b514ce3..14cf6f3ff73d 100644
--- a/llvm/test/Transforms/Attributor/internal-noalias.ll
+++ b/llvm/test/Transforms/Attributor/internal-noalias.ll
@@ -40,7 +40,7 @@ entry:
   ret i32 %add
 }
 
-; CHECK: define internal i32 @noalias_args_argmem_ro(i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) %A, i32* noalias nocapture nofree nonnull readonly align 4 dereferenceable(4) %B)
+; CHECK: define internal i32 @noalias_args_argmem_ro(i32 %0, i32 %1)
 define internal i32 @noalias_args_argmem_ro(i32* %A, i32* %B) #1 {
   %t0 = load i32, i32* %A, align 4
   %t1 = load i32, i32* %B, align 4


        


More information about the llvm-commits mailing list