[llvm] 8a88755 - Reland [X86] Codegen for preallocated

Arthur Eubanks via llvm-commits llvm-commits at lists.llvm.org
Wed May 20 11:32:29 PDT 2020


Author: Arthur Eubanks
Date: 2020-05-20T11:25:44-07:00
New Revision: 8a88755610d0f7ae630b5e6124cc0579cb1c32c8

URL: https://github.com/llvm/llvm-project/commit/8a88755610d0f7ae630b5e6124cc0579cb1c32c8
DIFF: https://github.com/llvm/llvm-project/commit/8a88755610d0f7ae630b5e6124cc0579cb1c32c8.diff

LOG: Reland [X86] Codegen for preallocated

See https://reviews.llvm.org/D74651 for the preallocated IR constructs
and LangRef changes.

In X86TargetLowering::LowerCall(), if a call is preallocated, record
each argument's offset from the stack pointer and the total stack
adjustment. Associate the call Value with an integer index. Store the
info in X86MachineFunctionInfo with the integer index as the key.

This adds two new target independent ISDOpcodes and two new target
dependent Opcodes corresponding to @llvm.call.preallocated.{setup,arg}.

The setup ISelDAG node takes in a chain and outputs a chain and a
SrcValue of the preallocated call Value. It is lowered to a target
dependent node with the SrcValue replaced with the integer index key by
looking in X86MachineFunctionInfo. In
X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to an
%esp adjustment, the exact amount determined by looking in
X86MachineFunctionInfo with the integer index key.

The arg ISelDAG node takes in a chain, a SrcValue of the preallocated
call Value, and the arg index int constant. It produces a chain and the
pointer fo the arg. It is lowered to a target dependent node with the
SrcValue replaced with the integer index key by looking in
X86MachineFunctionInfo. In
X86TargetLowering::EmitInstrWithCustomInserter() this is lowered to a
lea of the stack pointer plus an offset determined by looking in
X86MachineFunctionInfo with the integer index key.

Force any function containing a preallocated call to use the frame
pointer.

Does not yet handle a setup without a call, or a conditional call.
Does not yet handle musttail. That requires a LangRef change first.

Tried to look at all references to inalloca and see if they apply to
preallocated. I've made preallocated versions of tests testing inalloca
whenever possible and when they make sense (e.g. not alloca related,
inalloca edge cases).

Aside from the tests added here, I checked that this codegen produces
correct code for something like

```
struct A {
        A();
        A(A&&);
        ~A();
};

void bar() {
        foo(foo(foo(foo(foo(A(), 4), 5), 6), 7), 8);
}
```

by replacing the inalloca version of the .ll file with the appropriate
preallocated code. Running the executable produces the same results as
using the current inalloca implementation.

Reverted due to unexpectedly passing tests, added REQUIRES: asserts for reland.

Subscribers: hiraditya, llvm-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D77689

Added: 
    llvm/test/CodeGen/X86/preallocated-nocall.ll
    llvm/test/CodeGen/X86/preallocated-x64.ll
    llvm/test/CodeGen/X86/preallocated.ll
    llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll

Modified: 
    llvm/include/llvm/CodeGen/ISDOpcodes.h
    llvm/include/llvm/CodeGen/TargetCallingConv.h
    llvm/include/llvm/CodeGen/TargetLowering.h
    llvm/include/llvm/IR/Argument.h
    llvm/include/llvm/IR/Attributes.h
    llvm/include/llvm/IR/InstrTypes.h
    llvm/include/llvm/Support/TargetOpcodes.def
    llvm/include/llvm/Target/Target.td
    llvm/include/llvm/Target/TargetCallingConv.td
    llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
    llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
    llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
    llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
    llvm/lib/IR/Attributes.cpp
    llvm/lib/IR/Function.cpp
    llvm/lib/Target/X86/X86CallingConv.td
    llvm/lib/Target/X86/X86FastISel.cpp
    llvm/lib/Target/X86/X86FrameLowering.cpp
    llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
    llvm/lib/Target/X86/X86ISelLowering.cpp
    llvm/lib/Target/X86/X86MachineFunctionInfo.h
    llvm/lib/Target/X86/X86RegisterInfo.cpp
    llvm/lib/Transforms/Coroutines/CoroSplit.cpp
    llvm/lib/Transforms/IPO/Attributor.cpp
    llvm/lib/Transforms/IPO/AttributorAttributes.cpp
    llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
    llvm/lib/Transforms/IPO/FunctionAttrs.cpp
    llvm/lib/Transforms/IPO/GlobalOpt.cpp
    llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
    llvm/test/CodeGen/X86/arg-copy-elide.ll
    llvm/test/CodeGen/X86/musttail-indirect.ll
    llvm/test/CodeGen/X86/musttail-thiscall.ll
    llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll
    llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll
    llvm/test/Transforms/Attributor/value-simplify.ll
    llvm/test/Transforms/DeadArgElim/keepalive.ll
    llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
    llvm/test/Transforms/DeadStoreElimination/simple.ll
    llvm/test/Transforms/FunctionAttrs/readattrs.ll
    llvm/test/Transforms/GlobalOpt/fastcc.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/CodeGen/ISDOpcodes.h b/llvm/include/llvm/CodeGen/ISDOpcodes.h
index a0e00069fe0c..839e82d9d84f 100644
--- a/llvm/include/llvm/CodeGen/ISDOpcodes.h
+++ b/llvm/include/llvm/CodeGen/ISDOpcodes.h
@@ -906,6 +906,13 @@ enum NodeType {
   VAEND,
   VASTART,
 
+  // PREALLOCATED_SETUP - This has 2 operands: an input chain and a SRCVALUE
+  // with the preallocated call Value.
+  PREALLOCATED_SETUP,
+  // PREALLOCATED_ARG - This has 3 operands: an input chain, a SRCVALUE
+  // with the preallocated call Value, and a constant int.
+  PREALLOCATED_ARG,
+
   /// SRCVALUE - This is a node type that holds a Value* that is used to
   /// make reference to a value in the LLVM IR.
   SRCVALUE,

diff  --git a/llvm/include/llvm/CodeGen/TargetCallingConv.h b/llvm/include/llvm/CodeGen/TargetCallingConv.h
index 07f7f81d8c0a..6ccc1ce11d99 100644
--- a/llvm/include/llvm/CodeGen/TargetCallingConv.h
+++ b/llvm/include/llvm/CodeGen/TargetCallingConv.h
@@ -35,6 +35,7 @@ namespace ISD {
     unsigned IsReturned : 1; ///< Always returned
     unsigned IsSplit : 1;
     unsigned IsInAlloca : 1;   ///< Passed with inalloca
+    unsigned IsPreallocated : 1; ///< ByVal without the copy
     unsigned IsSplitEnd : 1;   ///< Last part of a split
     unsigned IsSwiftSelf : 1;  ///< Swift self parameter
     unsigned IsSwiftError : 1; ///< Swift error parameter
@@ -56,9 +57,9 @@ namespace ISD {
   public:
     ArgFlagsTy()
         : IsZExt(0), IsSExt(0), IsInReg(0), IsSRet(0), IsByVal(0), IsNest(0),
-          IsReturned(0), IsSplit(0), IsInAlloca(0), IsSplitEnd(0),
-          IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0), IsHva(0),
-          IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
+          IsReturned(0), IsSplit(0), IsInAlloca(0), IsPreallocated(0),
+          IsSplitEnd(0), IsSwiftSelf(0), IsSwiftError(0), IsCFGuardTarget(0),
+          IsHva(0), IsHvaStart(0), IsSecArgPass(0), ByValAlign(0), OrigAlign(0),
           IsInConsecutiveRegsLast(0), IsInConsecutiveRegs(0),
           IsCopyElisionCandidate(0), IsPointer(0), ByValSize(0),
           PointerAddrSpace(0) {
@@ -83,6 +84,9 @@ namespace ISD {
     bool isInAlloca() const { return IsInAlloca; }
     void setInAlloca() { IsInAlloca = 1; }
 
+    bool isPreallocated() const { return IsPreallocated; }
+    void setPreallocated() { IsPreallocated = 1; }
+
     bool isSwiftSelf() const { return IsSwiftSelf; }
     void setSwiftSelf() { IsSwiftSelf = 1; }
 

diff  --git a/llvm/include/llvm/CodeGen/TargetLowering.h b/llvm/include/llvm/CodeGen/TargetLowering.h
index 2cab2572335c..db501c8c64f2 100644
--- a/llvm/include/llvm/CodeGen/TargetLowering.h
+++ b/llvm/include/llvm/CodeGen/TargetLowering.h
@@ -273,17 +273,20 @@ class TargetLoweringBase {
     bool IsNest : 1;
     bool IsByVal : 1;
     bool IsInAlloca : 1;
+    bool IsPreallocated : 1;
     bool IsReturned : 1;
     bool IsSwiftSelf : 1;
     bool IsSwiftError : 1;
     bool IsCFGuardTarget : 1;
     MaybeAlign Alignment = None;
     Type *ByValType = nullptr;
+    Type *PreallocatedType = nullptr;
 
     ArgListEntry()
         : IsSExt(false), IsZExt(false), IsInReg(false), IsSRet(false),
-          IsNest(false), IsByVal(false), IsInAlloca(false), IsReturned(false),
-          IsSwiftSelf(false), IsSwiftError(false), IsCFGuardTarget(false) {}
+          IsNest(false), IsByVal(false), IsInAlloca(false),
+          IsPreallocated(false), IsReturned(false), IsSwiftSelf(false),
+          IsSwiftError(false), IsCFGuardTarget(false) {}
 
     void setAttributes(const CallBase *Call, unsigned ArgIdx);
   };
@@ -3608,6 +3611,7 @@ class TargetLowering : public TargetLoweringBase {
     bool IsReturnValueUsed : 1;
     bool IsConvergent      : 1;
     bool IsPatchPoint      : 1;
+    bool IsPreallocated : 1;
 
     // IsTailCall should be modified by implementations of
     // TargetLowering::LowerCall that perform tail call conversions.
@@ -3631,7 +3635,7 @@ class TargetLowering : public TargetLoweringBase {
     CallLoweringInfo(SelectionDAG &DAG)
         : RetSExt(false), RetZExt(false), IsVarArg(false), IsInReg(false),
           DoesNotReturn(false), IsReturnValueUsed(true), IsConvergent(false),
-          IsPatchPoint(false), DAG(DAG) {}
+          IsPatchPoint(false), IsPreallocated(false), DAG(DAG) {}
 
     CallLoweringInfo &setDebugLoc(const SDLoc &dl) {
       DL = dl;
@@ -3737,6 +3741,11 @@ class TargetLowering : public TargetLoweringBase {
       return *this;
     }
 
+    CallLoweringInfo &setIsPreallocated(bool Value = true) {
+      IsPreallocated = Value;
+      return *this;
+    }
+
     CallLoweringInfo &setIsPostTypeLegalization(bool Value=true) {
       IsPostTypeLegalization = Value;
       return *this;

diff  --git a/llvm/include/llvm/IR/Argument.h b/llvm/include/llvm/IR/Argument.h
index e23e9d08ca59..2bd8e99f12c4 100644
--- a/llvm/include/llvm/IR/Argument.h
+++ b/llvm/include/llvm/IR/Argument.h
@@ -110,6 +110,9 @@ class Argument final : public Value {
   /// Return true if this argument has the inalloca attribute.
   bool hasInAllocaAttr() const;
 
+  /// Return true if this argument has the preallocated attribute.
+  bool hasPreallocatedAttr() const;
+
   /// Return true if this argument has the zext attribute.
   bool hasZExtAttr() const;
 

diff  --git a/llvm/include/llvm/IR/Attributes.h b/llvm/include/llvm/IR/Attributes.h
index 05c076816320..a76e29132acc 100644
--- a/llvm/include/llvm/IR/Attributes.h
+++ b/llvm/include/llvm/IR/Attributes.h
@@ -623,6 +623,9 @@ class AttributeList {
   /// Return the byval type for the specified function parameter.
   Type *getParamByValType(unsigned ArgNo) const;
 
+  /// Return the preallocated type for the specified function parameter.
+  Type *getParamPreallocatedType(unsigned ArgNo) const;
+
   /// Get the stack alignment.
   MaybeAlign getStackAlignment(unsigned Index) const;
 

diff  --git a/llvm/include/llvm/IR/InstrTypes.h b/llvm/include/llvm/IR/InstrTypes.h
index fd34bf30b909..8ad39add81d5 100644
--- a/llvm/include/llvm/IR/InstrTypes.h
+++ b/llvm/include/llvm/IR/InstrTypes.h
@@ -1604,6 +1604,12 @@ class CallBase : public Instruction {
     return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
   }
 
+  /// Extract the preallocated type for a call or parameter.
+  Type *getParamPreallocatedType(unsigned ArgNo) const {
+    Type *Ty = Attrs.getParamPreallocatedType(ArgNo);
+    return Ty ? Ty : getArgOperand(ArgNo)->getType()->getPointerElementType();
+  }
+
   /// Extract the number of dereferenceable bytes for a call or
   /// parameter (0=unknown).
   uint64_t getDereferenceableBytes(unsigned i) const {

diff  --git a/llvm/include/llvm/Support/TargetOpcodes.def b/llvm/include/llvm/Support/TargetOpcodes.def
index 5ab88f367ed3..8385af9de2a4 100644
--- a/llvm/include/llvm/Support/TargetOpcodes.def
+++ b/llvm/include/llvm/Support/TargetOpcodes.def
@@ -127,6 +127,12 @@ HANDLE_TARGET_OPCODE(PATCHPOINT)
 /// additionally expand this pseudo after register allocation.
 HANDLE_TARGET_OPCODE(LOAD_STACK_GUARD)
 
+/// These are used to support call sites that must have the stack adjusted
+/// before the call (e.g. to initialize an argument passed by value).
+/// See llvm.call.preallocated.{setup,arg} in the LangRef for more details.
+HANDLE_TARGET_OPCODE(PREALLOCATED_SETUP)
+HANDLE_TARGET_OPCODE(PREALLOCATED_ARG)
+
 /// Call instruction with associated vm state for deoptimization and list
 /// of live pointers for relocation by the garbage collector.  It is
 /// intended to support garbage collection with fully precise relocating

diff  --git a/llvm/include/llvm/Target/Target.td b/llvm/include/llvm/Target/Target.td
index 70b17bc10647..c628fa8b625f 100644
--- a/llvm/include/llvm/Target/Target.td
+++ b/llvm/include/llvm/Target/Target.td
@@ -1173,6 +1173,18 @@ def LOAD_STACK_GUARD : StandardPseudoInstruction {
   let hasSideEffects = 0;
   bit isPseudo = 1;
 }
+def PREALLOCATED_SETUP : StandardPseudoInstruction {
+  let OutOperandList = (outs);
+  let InOperandList = (ins i32imm:$a);
+  let usesCustomInserter = 1;
+  let hasSideEffects = 1;
+}
+def PREALLOCATED_ARG : StandardPseudoInstruction {
+  let OutOperandList = (outs ptr_rc:$loc);
+  let InOperandList = (ins i32imm:$a, i32imm:$b);
+  let usesCustomInserter = 1;
+  let hasSideEffects = 1;
+}
 def LOCAL_ESCAPE : StandardPseudoInstruction {
   // This instruction is really just a label. It has to be part of the chain so
   // that it doesn't get dropped from the DAG, but it produces nothing and has

diff  --git a/llvm/include/llvm/Target/TargetCallingConv.td b/llvm/include/llvm/Target/TargetCallingConv.td
index d5f3931c3d5d..057f33083e08 100644
--- a/llvm/include/llvm/Target/TargetCallingConv.td
+++ b/llvm/include/llvm/Target/TargetCallingConv.td
@@ -41,6 +41,11 @@ class CCIf<string predicate, CCAction A> : CCPredicateAction<A> {
 class CCIfByVal<CCAction A> : CCIf<"ArgFlags.isByVal()", A> {
 }
 
+/// CCIfPreallocated - If the current argument has Preallocated parameter attribute,
+/// apply Action A.
+class CCIfPreallocated<CCAction A> : CCIf<"ArgFlags.isPreallocated()", A> {
+}
+
 /// CCIfSwiftSelf - If the current argument has swiftself parameter attribute,
 /// apply Action A.
 class CCIfSwiftSelf<CCAction A> : CCIf<"ArgFlags.isSwiftSelf()", A> {

diff  --git a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
index 86fbb4b9ae8a..1e85d0a3a19b 100644
--- a/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
+++ b/llvm/lib/CodeGen/GlobalISel/CallLowering.cpp
@@ -96,10 +96,12 @@ void CallLowering::setArgFlags(CallLowering::ArgInfo &Arg, unsigned OpIdx,
     Flags.setSwiftError();
   if (Attrs.hasAttribute(OpIdx, Attribute::ByVal))
     Flags.setByVal();
+  if (Attrs.hasAttribute(OpIdx, Attribute::Preallocated))
+    Flags.setPreallocated();
   if (Attrs.hasAttribute(OpIdx, Attribute::InAlloca))
     Flags.setInAlloca();
 
-  if (Flags.isByVal() || Flags.isInAlloca()) {
+  if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
     Type *ElementTy = cast<PointerType>(Arg.Ty)->getElementType();
 
     auto Ty = Attrs.getAttribute(OpIdx, Attribute::ByVal).getValueAsType();

diff  --git a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
index d117c6032d3c..eb5b7ef4dab5 100644
--- a/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/FastISel.cpp
@@ -1214,7 +1214,16 @@ bool FastISel::lowerCallTo(CallLoweringInfo &CLI) {
       // the various CC lowering callbacks.
       Flags.setByVal();
     }
-    if (Arg.IsByVal || Arg.IsInAlloca) {
+    if (Arg.IsPreallocated) {
+      Flags.setPreallocated();
+      // Set the byval flag for CCAssignFn callbacks that don't know about
+      // preallocated. This way we can know how many bytes we should've
+      // allocated and how many bytes a callee cleanup function will pop.  If we
+      // port preallocated to more targets, we'll have to add custom
+      // preallocated handling in the various CC lowering callbacks.
+      Flags.setByVal();
+    }
+    if (Arg.IsByVal || Arg.IsInAlloca || Arg.IsPreallocated) {
       PointerType *Ty = cast<PointerType>(Arg.Ty);
       Type *ElementTy = Ty->getElementType();
       unsigned FrameSize =

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
index 2000f511993e..2dcab73b177b 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAG.cpp
@@ -1873,9 +1873,6 @@ SDValue SelectionDAG::getBlockAddress(const BlockAddress *BA, EVT VT,
 }
 
 SDValue SelectionDAG::getSrcValue(const Value *V) {
-  assert((!V || V->getType()->isPointerTy()) &&
-         "SrcValue is not a pointer?");
-
   FoldingSetNodeID ID;
   AddNodeIDNode(ID, ISD::SRCVALUE, getVTList(MVT::Other), None);
   ID.AddPointer(V);

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
index 54157cc053bc..ee8a2f90fb94 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp
@@ -5606,6 +5606,23 @@ void SelectionDAGBuilder::lowerCallToExternalSymbol(const CallInst &I,
   LowerCallTo(I, Callee, I.isTailCall());
 }
 
+/// Given a @llvm.call.preallocated.setup, return the corresponding
+/// preallocated call.
+static const CallBase *FindPreallocatedCall(const Value *PreallocatedSetup) {
+  assert(cast<CallBase>(PreallocatedSetup)
+                 ->getCalledFunction()
+                 ->getIntrinsicID() == Intrinsic::call_preallocated_setup &&
+         "expected call_preallocated_setup Value");
+  for (auto *U : PreallocatedSetup->users()) {
+    auto *UseCall = cast<CallBase>(U);
+    const Function *Fn = UseCall->getCalledFunction();
+    if (!Fn || Fn->getIntrinsicID() != Intrinsic::call_preallocated_arg) {
+      return UseCall;
+    }
+  }
+  llvm_unreachable("expected corresponding call to preallocated setup/arg");
+}
+
 /// Lower the call to the specified intrinsic function.
 void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
                                              unsigned Intrinsic) {
@@ -5798,6 +5815,30 @@ void SelectionDAGBuilder::visitIntrinsicCall(const CallInst &I,
     updateDAGForMaybeTailCall(MC);
     return;
   }
+  case Intrinsic::call_preallocated_setup: {
+    const CallBase *PreallocatedCall = FindPreallocatedCall(&I);
+    SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+    SDValue Res = DAG.getNode(ISD::PREALLOCATED_SETUP, sdl, MVT::Other,
+                              getRoot(), SrcValue);
+    setValue(&I, Res);
+    DAG.setRoot(Res);
+    return;
+  }
+  case Intrinsic::call_preallocated_arg: {
+    const CallBase *PreallocatedCall = FindPreallocatedCall(I.getOperand(0));
+    SDValue SrcValue = DAG.getSrcValue(PreallocatedCall);
+    SDValue Ops[3];
+    Ops[0] = getRoot();
+    Ops[1] = SrcValue;
+    Ops[2] = DAG.getTargetConstant(*cast<ConstantInt>(I.getArgOperand(1)), sdl,
+                                   MVT::i32); // arg index
+    SDValue Res = DAG.getNode(
+        ISD::PREALLOCATED_ARG, sdl,
+        DAG.getVTList(TLI.getPointerTy(DAG.getDataLayout()), MVT::Other), Ops);
+    setValue(&I, Res);
+    DAG.setRoot(Res.getValue(1));
+    return;
+  }
   case Intrinsic::dbg_addr:
   case Intrinsic::dbg_declare: {
     const auto &DI = cast<DbgVariableIntrinsic>(I);
@@ -7116,7 +7157,9 @@ void SelectionDAGBuilder::LowerCallTo(const CallBase &CB, SDValue Callee,
       .setChain(getRoot())
       .setCallee(RetTy, FTy, Callee, std::move(Args), CB)
       .setTailCall(isTailCall)
-      .setConvergent(CB.isConvergent());
+      .setConvergent(CB.isConvergent())
+      .setIsPreallocated(
+          CB.countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
   std::pair<SDValue, SDValue> Result = lowerInvokable(CLI, EHPadBB);
 
   if (Result.first.getNode()) {
@@ -7642,9 +7685,9 @@ void SelectionDAGBuilder::visitCall(const CallInst &I) {
   // Deopt bundles are lowered in LowerCallSiteWithDeoptBundle, and we don't
   // have to do anything here to lower funclet bundles.
   // CFGuardTarget bundles are lowered in LowerCallTo.
-  assert(!I.hasOperandBundlesOtherThan({LLVMContext::OB_deopt,
-                                        LLVMContext::OB_funclet,
-                                        LLVMContext::OB_cfguardtarget}) &&
+  assert(!I.hasOperandBundlesOtherThan(
+             {LLVMContext::OB_deopt, LLVMContext::OB_funclet,
+              LLVMContext::OB_cfguardtarget, LLVMContext::OB_preallocated}) &&
          "Cannot lower calls with arbitrary operand bundles!");
 
   SDValue Callee = getValue(I.getCalledOperand());
@@ -8605,7 +8648,9 @@ void SelectionDAGBuilder::populateCallLoweringInfo(
       .setChain(getRoot())
       .setCallee(Call->getCallingConv(), ReturnTy, Callee, std::move(Args))
       .setDiscardResult(Call->use_empty())
-      .setIsPatchPoint(IsPatchPoint);
+      .setIsPatchPoint(IsPatchPoint)
+      .setIsPreallocated(
+          Call->countOperandBundlesOfType(LLVMContext::OB_preallocated) != 0);
 }
 
 /// Add a stack map intrinsic call's live variable operands to a stackmap
@@ -9125,6 +9170,15 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         Flags.setCFGuardTarget();
       if (Args[i].IsByVal)
         Flags.setByVal();
+      if (Args[i].IsPreallocated) {
+        Flags.setPreallocated();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // preallocated.  This way we can know how many bytes we should've
+        // allocated and how many bytes a callee cleanup function will pop.  If
+        // we port preallocated to more targets, we'll have to add custom
+        // preallocated handling in the various CC lowering callbacks.
+        Flags.setByVal();
+      }
       if (Args[i].IsInAlloca) {
         Flags.setInAlloca();
         // Set the byval flag for CCAssignFn callbacks that don't know about
@@ -9134,7 +9188,7 @@ TargetLowering::LowerCallTo(TargetLowering::CallLoweringInfo &CLI) const {
         // in the various CC lowering callbacks.
         Flags.setByVal();
       }
-      if (Args[i].IsByVal || Args[i].IsInAlloca) {
+      if (Args[i].IsByVal || Args[i].IsInAlloca || Args[i].IsPreallocated) {
         PointerType *Ty = cast<PointerType>(Args[i].Ty);
         Type *ElementTy = Ty->getElementType();
 
@@ -9448,7 +9502,7 @@ findArgumentCopyElisionCandidates(const DataLayout &DL,
     // initializes the alloca. Don't elide copies from the same argument twice.
     const Value *Val = SI->getValueOperand()->stripPointerCasts();
     const auto *Arg = dyn_cast<Argument>(Val);
-    if (!Arg || Arg->hasInAllocaAttr() || Arg->hasByValAttr() ||
+    if (!Arg || Arg->hasPassPointeeByValueAttr() ||
         Arg->getType()->isEmptyTy() ||
         DL.getTypeStoreSize(Arg->getType()) !=
             DL.getTypeAllocSize(AI->getAllocatedType()) ||
@@ -9634,12 +9688,21 @@ void SelectionDAGISel::LowerArguments(const Function &F) {
         // in the various CC lowering callbacks.
         Flags.setByVal();
       }
+      if (Arg.hasAttribute(Attribute::Preallocated)) {
+        Flags.setPreallocated();
+        // Set the byval flag for CCAssignFn callbacks that don't know about
+        // preallocated.  This way we can know how many bytes we should've
+        // allocated and how many bytes a callee cleanup function will pop.  If
+        // we port preallocated to more targets, we'll have to add custom
+        // preallocated handling in the various CC lowering callbacks.
+        Flags.setByVal();
+      }
       if (F.getCallingConv() == CallingConv::X86_INTR) {
         // IA Interrupt passes frame (1st parameter) by value in the stack.
         if (ArgNo == 0)
           Flags.setByVal();
       }
-      if (Flags.isByVal() || Flags.isInAlloca()) {
+      if (Flags.isByVal() || Flags.isInAlloca() || Flags.isPreallocated()) {
         Type *ElementTy = Arg.getParamByValType();
 
         // For ByVal, size and alignment should be passed from FE.  BE will

diff  --git a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
index f81d18c6ad52..816b1dcded2e 100644
--- a/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp
@@ -393,6 +393,10 @@ std::string SDNode::getOperationName(const SelectionDAG *G) const {
   case ISD::GC_TRANSITION_END:          return "gc_transition.end";
   case ISD::GET_DYNAMIC_AREA_OFFSET:    return "get.dynamic.area.offset";
   case ISD::FREEZE:                     return "freeze";
+  case ISD::PREALLOCATED_SETUP:
+    return "call_setup";
+  case ISD::PREALLOCATED_ARG:
+    return "call_alloc";
 
   // Bit manipulation
   case ISD::ABS:                        return "abs";

diff  --git a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
index a8890788ccbf..0c7df1cae261 100644
--- a/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
+++ b/llvm/lib/CodeGen/SelectionDAG/TargetLowering.cpp
@@ -110,14 +110,18 @@ void TargetLoweringBase::ArgListEntry::setAttributes(const CallBase *Call,
   IsSRet = Call->paramHasAttr(ArgIdx, Attribute::StructRet);
   IsNest = Call->paramHasAttr(ArgIdx, Attribute::Nest);
   IsByVal = Call->paramHasAttr(ArgIdx, Attribute::ByVal);
+  IsPreallocated = Call->paramHasAttr(ArgIdx, Attribute::Preallocated);
   IsInAlloca = Call->paramHasAttr(ArgIdx, Attribute::InAlloca);
   IsReturned = Call->paramHasAttr(ArgIdx, Attribute::Returned);
   IsSwiftSelf = Call->paramHasAttr(ArgIdx, Attribute::SwiftSelf);
   IsSwiftError = Call->paramHasAttr(ArgIdx, Attribute::SwiftError);
   Alignment = Call->getParamAlign(ArgIdx);
   ByValType = nullptr;
-  if (Call->paramHasAttr(ArgIdx, Attribute::ByVal))
+  if (IsByVal)
     ByValType = Call->getParamByValType(ArgIdx);
+  PreallocatedType = nullptr;
+  if (IsPreallocated)
+    PreallocatedType = Call->getParamPreallocatedType(ArgIdx);
 }
 
 /// Generate a libcall taking the given operands as arguments and returning a

diff  --git a/llvm/lib/IR/Attributes.cpp b/llvm/lib/IR/Attributes.cpp
index bf397409478f..122cfe5d5fca 100644
--- a/llvm/lib/IR/Attributes.cpp
+++ b/llvm/lib/IR/Attributes.cpp
@@ -1437,6 +1437,10 @@ Type *AttributeList::getParamByValType(unsigned Index) const {
   return getAttributes(Index+FirstArgIndex).getByValType();
 }
 
+Type *AttributeList::getParamPreallocatedType(unsigned Index) const {
+  return getAttributes(Index + FirstArgIndex).getPreallocatedType();
+}
+
 MaybeAlign AttributeList::getStackAlignment(unsigned Index) const {
   return getAttributes(Index).getStackAlignment();
 }

diff  --git a/llvm/lib/IR/Function.cpp b/llvm/lib/IR/Function.cpp
index d8663be873c8..cb444292b95a 100644
--- a/llvm/lib/IR/Function.cpp
+++ b/llvm/lib/IR/Function.cpp
@@ -114,6 +114,12 @@ bool Argument::hasInAllocaAttr() const {
   return hasAttribute(Attribute::InAlloca);
 }
 
+bool Argument::hasPreallocatedAttr() const {
+  if (!getType()->isPointerTy())
+    return false;
+  return hasAttribute(Attribute::Preallocated);
+}
+
 bool Argument::hasPassPointeeByValueAttr() const {
   if (!getType()->isPointerTy()) return false;
   AttributeList Attrs = getParent()->getAttributes();

diff  --git a/llvm/lib/Target/X86/X86CallingConv.td b/llvm/lib/Target/X86/X86CallingConv.td
index 3ec947dfdb8c..802e694999b6 100644
--- a/llvm/lib/Target/X86/X86CallingConv.td
+++ b/llvm/lib/Target/X86/X86CallingConv.td
@@ -789,8 +789,9 @@ def CC_X86_32_Vector_Darwin : CallingConv<[
 /// CC_X86_32_Common - In all X86-32 calling conventions, extra integers and FP
 /// values are spilled on the stack.
 def CC_X86_32_Common : CallingConv<[
-  // Handles byval parameters.
+  // Handles byval/preallocated parameters.
   CCIfByVal<CCPassByVal<4, 4>>,
+  CCIfPreallocated<CCPassByVal<4, 4>>,
 
   // The first 3 float or double arguments, if marked 'inreg' and if the call
   // is not a vararg call and if SSE2 is available, are passed in SSE registers.

diff  --git a/llvm/lib/Target/X86/X86FastISel.cpp b/llvm/lib/Target/X86/X86FastISel.cpp
index 5bc4edcef222..5a51e249f5ce 100644
--- a/llvm/lib/Target/X86/X86FastISel.cpp
+++ b/llvm/lib/Target/X86/X86FastISel.cpp
@@ -3245,7 +3245,7 @@ bool X86FastISel::fastLowerCall(CallLoweringInfo &CLI) {
     return false;
 
   for (auto Flag : CLI.OutFlags)
-    if (Flag.isSwiftError())
+    if (Flag.isSwiftError() || Flag.isPreallocated())
       return false;
 
   SmallVector<MVT, 16> OutVTs;

diff  --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index b538d0b407df..e00a26049f1c 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -57,7 +57,8 @@ X86FrameLowering::X86FrameLowering(const X86Subtarget &STI,
 
 bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
   return !MF.getFrameInfo().hasVarSizedObjects() &&
-         !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences();
+         !MF.getInfo<X86MachineFunctionInfo>()->getHasPushSequences() &&
+         !MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall();
 }
 
 /// canSimplifyCallFramePseudos - If there is a reserved call frame, the
@@ -67,6 +68,7 @@ bool X86FrameLowering::hasReservedCallFrame(const MachineFunction &MF) const {
 bool
 X86FrameLowering::canSimplifyCallFramePseudos(const MachineFunction &MF) const {
   return hasReservedCallFrame(MF) ||
+         MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
          (hasFP(MF) && !TRI->needsStackRealignment(MF)) ||
          TRI->hasBasePointer(MF);
 }
@@ -90,10 +92,10 @@ X86FrameLowering::needsFrameIndexResolution(const MachineFunction &MF) const {
 bool X86FrameLowering::hasFP(const MachineFunction &MF) const {
   const MachineFrameInfo &MFI = MF.getFrameInfo();
   return (MF.getTarget().Options.DisableFramePointerElim(MF) ||
-          TRI->needsStackRealignment(MF) ||
-          MFI.hasVarSizedObjects() ||
+          TRI->needsStackRealignment(MF) || MFI.hasVarSizedObjects() ||
           MFI.isFrameAddressTaken() || MFI.hasOpaqueSPAdjustment() ||
           MF.getInfo<X86MachineFunctionInfo>()->getForceFramePointer() ||
+          MF.getInfo<X86MachineFunctionInfo>()->hasPreallocatedCall() ||
           MF.callsUnwindInit() || MF.hasEHFunclets() || MF.callsEHReturn() ||
           MFI.hasStackMap() || MFI.hasPatchPoint() ||
           MFI.hasCopyImplyingStackAdjustment());

diff  --git a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
index ffe9d432ccdf..5586efbf561e 100644
--- a/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
+++ b/llvm/lib/Target/X86/X86ISelDAGToDAG.cpp
@@ -5552,6 +5552,39 @@ void X86DAGToDAGISel::Select(SDNode *Node) {
     CurDAG->RemoveDeadNode(Node);
     return;
   }
+  case ISD::PREALLOCATED_SETUP: {
+    auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+    auto CallId = MFI->getPreallocatedIdForCallSite(
+        cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+    SDValue Chain = Node->getOperand(0);
+    SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+    MachineSDNode *New = CurDAG->getMachineNode(
+        TargetOpcode::PREALLOCATED_SETUP, dl, MVT::Other, CallIdValue, Chain);
+    ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Chain
+    CurDAG->RemoveDeadNode(Node);
+    return;
+  }
+  case ISD::PREALLOCATED_ARG: {
+    auto *MFI = CurDAG->getMachineFunction().getInfo<X86MachineFunctionInfo>();
+    auto CallId = MFI->getPreallocatedIdForCallSite(
+        cast<SrcValueSDNode>(Node->getOperand(1))->getValue());
+    SDValue Chain = Node->getOperand(0);
+    SDValue CallIdValue = CurDAG->getTargetConstant(CallId, dl, MVT::i32);
+    SDValue ArgIndex = Node->getOperand(2);
+    SDValue Ops[3];
+    Ops[0] = CallIdValue;
+    Ops[1] = ArgIndex;
+    Ops[2] = Chain;
+    MachineSDNode *New = CurDAG->getMachineNode(
+        TargetOpcode::PREALLOCATED_ARG, dl,
+        CurDAG->getVTList(TLI->getPointerTy(CurDAG->getDataLayout()),
+                          MVT::Other),
+        Ops);
+    ReplaceUses(SDValue(Node, 0), SDValue(New, 0)); // Arg pointer
+    ReplaceUses(SDValue(Node, 1), SDValue(New, 1)); // Chain
+    CurDAG->RemoveDeadNode(Node);
+    return;
+  }
   }
 
   SelectCode(Node);

diff  --git a/llvm/lib/Target/X86/X86ISelLowering.cpp b/llvm/lib/Target/X86/X86ISelLowering.cpp
index f4a88de7a1d7..8380a0df2f0e 100644
--- a/llvm/lib/Target/X86/X86ISelLowering.cpp
+++ b/llvm/lib/Target/X86/X86ISelLowering.cpp
@@ -3945,6 +3945,21 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
     if (ArgLocs.back().getLocMemOffset() != 0)
       report_fatal_error("any parameter with the inalloca attribute must be "
                          "the only memory argument");
+  } else if (CLI.IsPreallocated) {
+    assert(ArgLocs.back().isMemLoc() &&
+           "cannot use preallocated attribute on a register "
+           "parameter");
+    SmallVector<size_t, 4> PreallocatedOffsets;
+    for (size_t i = 0; i < CLI.OutVals.size(); ++i) {
+      if (CLI.CB->paramHasAttr(i, Attribute::Preallocated)) {
+        PreallocatedOffsets.push_back(ArgLocs[i].getLocMemOffset());
+      }
+    }
+    auto *MFI = DAG.getMachineFunction().getInfo<X86MachineFunctionInfo>();
+    size_t PreallocatedId = MFI->getPreallocatedIdForCallSite(CLI.CB);
+    MFI->setPreallocatedStackSize(PreallocatedId, NumBytes);
+    MFI->setPreallocatedArgOffsets(PreallocatedId, PreallocatedOffsets);
+    NumBytesToPush = 0;
   }
 
   if (!IsSibcall && !IsMustTail)
@@ -3972,9 +3987,9 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
   for (unsigned I = 0, OutIndex = 0, E = ArgLocs.size(); I != E;
        ++I, ++OutIndex) {
     assert(OutIndex < Outs.size() && "Invalid Out index");
-    // Skip inalloca arguments, they have already been written.
+    // Skip inalloca/preallocated arguments, they have already been written.
     ISD::ArgFlagsTy Flags = Outs[OutIndex].Flags;
-    if (Flags.isInAlloca())
+    if (Flags.isInAlloca() || Flags.isPreallocated())
       continue;
 
     CCValAssign &VA = ArgLocs[I];
@@ -4161,8 +4176,8 @@ X86TargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
       assert(VA.isMemLoc());
       SDValue Arg = OutVals[OutsIndex];
       ISD::ArgFlagsTy Flags = Outs[OutsIndex].Flags;
-      // Skip inalloca arguments.  They don't require any work.
-      if (Flags.isInAlloca())
+      // Skip inalloca/preallocated arguments.  They don't require any work.
+      if (Flags.isInAlloca() || Flags.isPreallocated())
         continue;
       // Create frame index.
       int32_t Offset = VA.getLocMemOffset()+FPDiff;
@@ -33076,6 +33091,36 @@ X86TargetLowering::EmitInstrWithCustomInserter(MachineInstr &MI,
       BB->addLiveIn(BasePtr);
     return BB;
   }
+  case TargetOpcode::PREALLOCATED_SETUP: {
+    assert(Subtarget.is32Bit() && "preallocated only used in 32-bit");
+    auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+    MFI->setHasPreallocatedCall(true);
+    int64_t PreallocatedId = MI.getOperand(0).getImm();
+    size_t StackAdjustment = MFI->getPreallocatedStackSize(PreallocatedId);
+    assert(StackAdjustment != 0 && "0 stack adjustment");
+    LLVM_DEBUG(dbgs() << "PREALLOCATED_SETUP stack adjustment "
+                      << StackAdjustment << "\n");
+    BuildMI(*BB, MI, DL, TII->get(X86::SUB32ri), X86::ESP)
+        .addReg(X86::ESP)
+        .addImm(StackAdjustment);
+    MI.eraseFromParent();
+    return BB;
+  }
+  case TargetOpcode::PREALLOCATED_ARG: {
+    assert(Subtarget.is32Bit() && "preallocated calls only used in 32-bit");
+    int64_t PreallocatedId = MI.getOperand(1).getImm();
+    int64_t ArgIdx = MI.getOperand(2).getImm();
+    auto MFI = MF->getInfo<X86MachineFunctionInfo>();
+    size_t ArgOffset = MFI->getPreallocatedArgOffsets(PreallocatedId)[ArgIdx];
+    LLVM_DEBUG(dbgs() << "PREALLOCATED_ARG arg index " << ArgIdx
+                      << ", arg offset " << ArgOffset << "\n");
+    // stack pointer + offset
+    addRegOffset(
+        BuildMI(*BB, MI, DL, TII->get(X86::LEA32r), MI.getOperand(0).getReg()),
+        X86::ESP, false, ArgOffset);
+    MI.eraseFromParent();
+    return BB;
+  }
   }
 }
 

diff  --git a/llvm/lib/Target/X86/X86MachineFunctionInfo.h b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
index 70deef9e4904..eedad952c3b9 100644
--- a/llvm/lib/Target/X86/X86MachineFunctionInfo.h
+++ b/llvm/lib/Target/X86/X86MachineFunctionInfo.h
@@ -13,6 +13,8 @@
 #ifndef LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
 #define LLVM_LIB_TARGET_X86_X86MACHINEFUNCTIONINFO_H
 
+#include "llvm/ADT/ArrayRef.h"
+#include "llvm/ADT/SmallVector.h"
 #include "llvm/CodeGen/CallingConvLower.h"
 #include "llvm/CodeGen/MachineFunction.h"
 
@@ -103,6 +105,13 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
   /// True if this function has WIN_ALLOCA instructions.
   bool HasWinAlloca = false;
 
+  /// True if this function has any preallocated calls.
+  bool HasPreallocatedCall = false;
+
+  ValueMap<const Value *, size_t> PreallocatedIds;
+  SmallVector<size_t, 0> PreallocatedStackSizes;
+  SmallVector<SmallVector<size_t, 4>, 0> PreallocatedArgOffsets;
+
 private:
   /// ForwardedMustTailRegParms - A list of virtual and physical registers
   /// that must be forwarded to every musttail call.
@@ -184,6 +193,36 @@ class X86MachineFunctionInfo : public MachineFunctionInfo {
 
   bool hasWinAlloca() const { return HasWinAlloca; }
   void setHasWinAlloca(bool v) { HasWinAlloca = v; }
+
+  bool hasPreallocatedCall() const { return HasPreallocatedCall; }
+  void setHasPreallocatedCall(bool v) { HasPreallocatedCall = v; }
+
+  size_t getPreallocatedIdForCallSite(const Value *CS) {
+    auto Insert = PreallocatedIds.insert({CS, PreallocatedIds.size()});
+    if (Insert.second) {
+      PreallocatedStackSizes.push_back(0);
+      PreallocatedArgOffsets.emplace_back();
+    }
+    return Insert.first->second;
+  }
+
+  void setPreallocatedStackSize(size_t Id, size_t StackSize) {
+    PreallocatedStackSizes[Id] = StackSize;
+  }
+
+  size_t getPreallocatedStackSize(const size_t Id) {
+    assert(PreallocatedStackSizes[Id] != 0 && "stack size not set");
+    return PreallocatedStackSizes[Id];
+  }
+
+  void setPreallocatedArgOffsets(size_t Id, ArrayRef<size_t> AO) {
+    PreallocatedArgOffsets[Id].assign(AO.begin(), AO.end());
+  }
+
+  const ArrayRef<size_t> getPreallocatedArgOffsets(const size_t Id) {
+    assert(!PreallocatedArgOffsets[Id].empty() && "arg offsets not set");
+    return PreallocatedArgOffsets[Id];
+  }
 };
 
 } // End llvm namespace

diff  --git a/llvm/lib/Target/X86/X86RegisterInfo.cpp b/llvm/lib/Target/X86/X86RegisterInfo.cpp
index 78346327e289..f456728cf47b 100644
--- a/llvm/lib/Target/X86/X86RegisterInfo.cpp
+++ b/llvm/lib/Target/X86/X86RegisterInfo.cpp
@@ -627,18 +627,22 @@ static bool CantUseSP(const MachineFrameInfo &MFI) {
 }
 
 bool X86RegisterInfo::hasBasePointer(const MachineFunction &MF) const {
-   const MachineFrameInfo &MFI = MF.getFrameInfo();
-
-   if (!EnableBasePointer)
-     return false;
-
-   // When we need stack realignment, we can't address the stack from the frame
-   // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
-   // can't address variables from the stack pointer.  MS inline asm can
-   // reference locals while also adjusting the stack pointer.  When we can't
-   // use both the SP and the FP, we need a separate base pointer register.
-   bool CantUseFP = needsStackRealignment(MF);
-   return CantUseFP && CantUseSP(MFI);
+  const X86MachineFunctionInfo *X86FI = MF.getInfo<X86MachineFunctionInfo>();
+  if (X86FI->hasPreallocatedCall())
+    return true;
+
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+
+  if (!EnableBasePointer)
+    return false;
+
+  // When we need stack realignment, we can't address the stack from the frame
+  // pointer.  When we have dynamic allocas or stack-adjusting inline asm, we
+  // can't address variables from the stack pointer.  MS inline asm can
+  // reference locals while also adjusting the stack pointer.  When we can't
+  // use both the SP and the FP, we need a separate base pointer register.
+  bool CantUseFP = needsStackRealignment(MF);
+  return CantUseFP && CantUseSP(MFI);
 }
 
 bool X86RegisterInfo::canRealignStack(const MachineFunction &MF) const {

diff  --git a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
index 520aa9d04d32..4fd127656f21 100644
--- a/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
+++ b/llvm/lib/Transforms/Coroutines/CoroSplit.cpp
@@ -1015,9 +1015,9 @@ static bool shouldBeMustTail(const CallInst &CI, const Function &F) {
 
   // CI should not has any ABI-impacting function attributes.
   static const Attribute::AttrKind ABIAttrs[] = {
-      Attribute::StructRet, Attribute::ByVal,    Attribute::InAlloca,
-      Attribute::InReg,     Attribute::Returned, Attribute::SwiftSelf,
-      Attribute::SwiftError};
+      Attribute::StructRet,    Attribute::ByVal,     Attribute::InAlloca,
+      Attribute::Preallocated, Attribute::InReg,     Attribute::Returned,
+      Attribute::SwiftSelf,    Attribute::SwiftError};
   AttributeList Attrs = CI.getAttributes();
   for (auto AK : ABIAttrs)
     if (Attrs.hasParamAttribute(0, AK))

diff  --git a/llvm/lib/Transforms/IPO/Attributor.cpp b/llvm/lib/Transforms/IPO/Attributor.cpp
index a4f0965c27cf..df6cbba83be2 100644
--- a/llvm/lib/Transforms/IPO/Attributor.cpp
+++ b/llvm/lib/Transforms/IPO/Attributor.cpp
@@ -1363,7 +1363,8 @@ bool Attributor::isValidFunctionSignatureRewrite(
   AttributeList FnAttributeList = Fn->getAttributes();
   if (FnAttributeList.hasAttrSomewhere(Attribute::Nest) ||
       FnAttributeList.hasAttrSomewhere(Attribute::StructRet) ||
-      FnAttributeList.hasAttrSomewhere(Attribute::InAlloca)) {
+      FnAttributeList.hasAttrSomewhere(Attribute::InAlloca) ||
+      FnAttributeList.hasAttrSomewhere(Attribute::Preallocated)) {
     LLVM_DEBUG(
         dbgs() << "[Attributor] Cannot rewrite due to complex attribute\n");
     return false;

diff  --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
index 3af6d0898f96..0117a73003b1 100644
--- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
+++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp
@@ -4455,7 +4455,8 @@ struct AAValueSimplifyArgument final : AAValueSimplifyImpl {
     AAValueSimplifyImpl::initialize(A);
     if (!getAnchorScope() || getAnchorScope()->isDeclaration())
       indicatePessimisticFixpoint();
-    if (hasAttr({Attribute::InAlloca, Attribute::StructRet, Attribute::Nest},
+    if (hasAttr({Attribute::InAlloca, Attribute::Preallocated,
+                 Attribute::StructRet, Attribute::Nest},
                 /* IgnoreSubsumingPositions */ true))
       indicatePessimisticFixpoint();
 
@@ -5695,7 +5696,7 @@ struct AAMemoryBehaviorArgument : AAMemoryBehaviorFloating {
 
     // TODO: From readattrs.ll: "inalloca parameters are always
     //                           considered written"
-    if (hasAttr({Attribute::InAlloca})) {
+    if (hasAttr({Attribute::InAlloca, Attribute::Preallocated})) {
       removeKnownBits(NO_WRITES);
       removeAssumedBits(NO_WRITES);
     }

diff  --git a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
index 55039f42bdd5..5dc1a6ff6327 100644
--- a/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
+++ b/llvm/lib/Transforms/IPO/DeadArgumentElimination.cpp
@@ -483,9 +483,10 @@ DeadArgumentEliminationPass::SurveyUses(const Value *V,
 // We consider arguments of non-internal functions to be intrinsically alive as
 // well as arguments to functions which have their "address taken".
 void DeadArgumentEliminationPass::SurveyFunction(const Function &F) {
-  // Functions with inalloca parameters are expecting args in a particular
-  // register and memory layout.
-  if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca)) {
+  // Functions with inalloca/preallocated parameters are expecting args in a
+  // particular register and memory layout.
+  if (F.getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+      F.getAttributes().hasAttrSomewhere(Attribute::Preallocated)) {
     MarkLive(F);
     return;
   }

diff  --git a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
index 0701dbc50958..4baeaa6e1630 100644
--- a/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
+++ b/llvm/lib/Transforms/IPO/FunctionAttrs.cpp
@@ -447,7 +447,7 @@ determinePointerReadAttrs(Argument *A,
   SmallPtrSet<Use *, 32> Visited;
 
   // inalloca arguments are always clobbered by the call.
-  if (A->hasInAllocaAttr())
+  if (A->hasInAllocaAttr() || A->hasPreallocatedAttr())
     return Attribute::None;
 
   bool IsRead = false;

diff  --git a/llvm/lib/Transforms/IPO/GlobalOpt.cpp b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
index c5837ea87f34..0257be156f30 100644
--- a/llvm/lib/Transforms/IPO/GlobalOpt.cpp
+++ b/llvm/lib/Transforms/IPO/GlobalOpt.cpp
@@ -2333,6 +2333,7 @@ OptimizeFunctions(Module &M,
     // wouldn't be safe in the presence of inalloca.
     // FIXME: We should also hoist alloca affected by this to the entry
     // block if possible.
+    // FIXME: handle preallocated
     if (F->getAttributes().hasAttrSomewhere(Attribute::InAlloca) &&
         !F->hasAddressTaken()) {
       RemoveAttribute(F, Attribute::InAlloca);

diff  --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
index d5540115bf87..64ba81abd7ba 100644
--- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp
@@ -4737,6 +4737,7 @@ bool InstCombiner::transformConstExprCastCall(CallBase &Call) {
   //
   //  Similarly, avoid folding away bitcasts of byval calls.
   if (Callee->getAttributes().hasAttrSomewhere(Attribute::InAlloca) ||
+      Callee->getAttributes().hasAttrSomewhere(Attribute::Preallocated) ||
       Callee->getAttributes().hasAttrSomewhere(Attribute::ByVal))
     return false;
 

diff  --git a/llvm/test/CodeGen/X86/arg-copy-elide.ll b/llvm/test/CodeGen/X86/arg-copy-elide.ll
index 705a35615561..14db84fae449 100644
--- a/llvm/test/CodeGen/X86/arg-copy-elide.ll
+++ b/llvm/test/CodeGen/X86/arg-copy-elide.ll
@@ -246,6 +246,20 @@ entry:
 ; CHECK: calll _addrof_i32
 ; CHECK: retl
 
+define void @avoid_preallocated(i32* preallocated(i32) %x) {
+entry:
+  %x.p.p = alloca i32*
+  store i32* %x, i32** %x.p.p
+  call void @addrof_i32(i32* %x)
+  ret void
+}
+
+; CHECK-LABEL: _avoid_preallocated:
+; CHECK: leal {{[0-9]+}}(%esp), %[[reg:[^ ]*]]
+; CHECK: pushl %[[reg]]
+; CHECK: calll _addrof_i32
+; CHECK: retl
+
 ; Don't elide the copy when the alloca is escaped with a store.
 define void @escape_with_store(i32 %x) {
   %x1 = alloca i32

diff  --git a/llvm/test/CodeGen/X86/musttail-indirect.ll b/llvm/test/CodeGen/X86/musttail-indirect.ll
index c142ffae69d0..285ad9dcf4c9 100644
--- a/llvm/test/CodeGen/X86/musttail-indirect.ll
+++ b/llvm/test/CodeGen/X86/musttail-indirect.ll
@@ -22,6 +22,8 @@
 ; Each member pointer creates a thunk.  The ones with inalloca are required to
 ; tail calls by the ABI, even at O0.
 
+; TODO: add tests for preallocated/musttail once supported
+
 %struct.B = type { i32 (...)** }
 %struct.A = type { i32 }
 

diff  --git a/llvm/test/CodeGen/X86/musttail-thiscall.ll b/llvm/test/CodeGen/X86/musttail-thiscall.ll
index a1ddbd5d1cbc..5cc8faa48e75 100644
--- a/llvm/test/CodeGen/X86/musttail-thiscall.ll
+++ b/llvm/test/CodeGen/X86/musttail-thiscall.ll
@@ -1,6 +1,8 @@
 ; RUN: llc -verify-machineinstrs -mtriple=i686-- < %s | FileCheck %s
 ; RUN: llc -verify-machineinstrs -mtriple=i686-- -O0 < %s | FileCheck %s
 
+; TODO: add tests for preallocated/musttail once supported
+
 ; CHECK-LABEL: t1:
 ; CHECK: jmp {{_?}}t1_callee
 define x86_thiscallcc void @t1(i8* %this) {

diff  --git a/llvm/test/CodeGen/X86/preallocated-nocall.ll b/llvm/test/CodeGen/X86/preallocated-nocall.ll
new file mode 100644
index 000000000000..dc88eb8d0260
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preallocated-nocall.ll
@@ -0,0 +1,23 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+; REQUIRES: asserts
+; XFAIL: *
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare void @init(%Foo*)
+
+
+
+declare void @foo_p(%Foo* preallocated(%Foo))
+
+define void @no_call() {
+; CHECK-LABEL: _no_call:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+  call void @init(%Foo* %b)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/preallocated-x64.ll b/llvm/test/CodeGen/X86/preallocated-x64.ll
new file mode 100644
index 000000000000..89b181955ca6
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preallocated-x64.ll
@@ -0,0 +1,18 @@
+; RUN: llc %s -mtriple=x86_64-windows-msvc -o /dev/null 2>&1
+; REQUIRES: asserts
+; XFAIL: *
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare x86_thiscallcc void @f(i32, %Foo* preallocated(%Foo))
+
+define void @g() {
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+  call void @f(i32 0, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/preallocated.ll b/llvm/test/CodeGen/X86/preallocated.ll
new file mode 100644
index 000000000000..8de69883e091
--- /dev/null
+++ b/llvm/test/CodeGen/X86/preallocated.ll
@@ -0,0 +1,187 @@
+; RUN: llc < %s -mtriple=i686-pc-win32 | FileCheck %s
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+%Foo = type { i32, i32 }
+
+declare void @init(%Foo*)
+
+
+
+declare void @foo_p(%Foo* preallocated(%Foo))
+
+define void @one_preallocated() {
+; CHECK-LABEL: _one_preallocated:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+define void @one_preallocated_two_blocks() {
+; CHECK-LABEL: _one_preallocated_two_blocks:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  br label %second
+second:
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+define void @preallocated_with_store() {
+; CHECK-LABEL: _preallocated_with_store:
+; CHECK: subl $8, %esp
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+  %p0 = getelementptr %Foo, %Foo* %b, i32 0, i32 0
+  %p1 = getelementptr %Foo, %Foo* %b, i32 0, i32 1
+  store i32 13, i32* %p0
+  store i32 42, i32* %p1
+; CHECK-DAG: movl $13, ([[REGISTER]])
+; CHECK-DAG: movl $42, 4([[REGISTER]])
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+define void @preallocated_with_init() {
+; CHECK-LABEL: _preallocated_with_init:
+; CHECK: subl $8, %esp
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: pushl [[REGISTER]]
+; CHECK: calll _init
+  call void @init(%Foo* %b)
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_p
+  call void @foo_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+declare void @foo_p_p(%Foo* preallocated(%Foo), %Foo* preallocated(%Foo))
+
+define void @two_preallocated() {
+; CHECK-LABEL: _two_preallocated:
+  %t = call token @llvm.call.preallocated.setup(i32 2)
+  %a1 = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b1 = bitcast i8* %a1 to %Foo*
+  %a2 = call i8* @llvm.call.preallocated.arg(token %t, i32 1) preallocated(%Foo)
+  %b2 = bitcast i8* %a2 to %Foo*
+; CHECK: subl $16, %esp
+; CHECK: calll _foo_p_p
+  call void @foo_p_p(%Foo* preallocated(%Foo) %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t)]
+  ret void
+}
+
+declare void @foo_p_int(%Foo* preallocated(%Foo), i32)
+
+define void @one_preallocated_one_normal() {
+; CHECK-LABEL: _one_preallocated_one_normal:
+; CHECK: subl $12, %esp
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: leal (%esp), [[REGISTER:%[a-z]+]]
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: pushl [[REGISTER]]
+; CHECK: calll _init
+  call void @init(%Foo* %b)
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: movl $2, 8(%esp)
+; CHECK: calll _foo_p_int
+  call void @foo_p_int(%Foo* preallocated(%Foo) %b, i32 2) ["preallocated"(token %t)]
+  ret void
+}
+
+declare void @foo_ret_p(%Foo* sret, %Foo* preallocated(%Foo))
+
+define void @nested_with_init() {
+; CHECK-LABEL: _nested_with_init:
+  %tmp = alloca %Foo
+
+  %t1 = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: subl $12, %esp
+  %a1 = call i8* @llvm.call.preallocated.arg(token %t1, i32 0) preallocated(%Foo)
+  %b1 = bitcast i8* %a1 to %Foo*
+; CHECK: leal 4(%esp), [[REGISTER1:%[a-z]+]]
+
+  %t2 = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK: subl $12, %esp
+  %a2 = call i8* @llvm.call.preallocated.arg(token %t2, i32 0) preallocated(%Foo)
+; CHECK: leal 4(%esp), [[REGISTER2:%[a-z]+]]
+  %b2 = bitcast i8* %a2 to %Foo*
+
+  call void @init(%Foo* %b2)
+; CHECK: pushl [[REGISTER2]]
+; CHECK: calll _init
+
+  call void @foo_ret_p(%Foo* %b1, %Foo* preallocated(%Foo) %b2) ["preallocated"(token %t2)]
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_ret_p
+  call void @foo_ret_p(%Foo* %tmp, %Foo* preallocated(%Foo) %b1) ["preallocated"(token %t1)]
+; CHECK-NOT: subl {{\$[0-9]+}}, %esp
+; CHECK-NOT: pushl
+; CHECK: calll _foo_ret_p
+  ret void
+}
+
+declare void @foo_inreg_p(i32 inreg, %Foo* preallocated(%Foo))
+
+define void @inreg() {
+; CHECK-LABEL: _inreg:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: movl $9, %eax
+; CHECK: calll _foo_inreg_p
+  call void @foo_inreg_p(i32 9, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+declare x86_thiscallcc void @foo_thiscall_p(i8*, %Foo* preallocated(%Foo))
+
+define void @thiscall() {
+; CHECK-LABEL: _thiscall:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: xorl %ecx, %ecx
+; CHECK: calll _foo_thiscall_p
+  call x86_thiscallcc void @foo_thiscall_p(i8* null, %Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+  ret void
+}
+
+declare x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo))
+declare x86_stdcallcc void @i(i32)
+
+define void @stdcall() {
+; CHECK-LABEL: _stdcall:
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(%Foo)
+  %b = bitcast i8* %a to %Foo*
+; CHECK: subl $8, %esp
+; CHECK: calll _foo_stdcall_p at 8
+  call x86_stdcallcc void @foo_stdcall_p(%Foo* preallocated(%Foo) %b) ["preallocated"(token %t)]
+; CHECK-NOT: %esp
+; CHECK: pushl
+; CHECK: calll _i at 4
+  call x86_stdcallcc void @i(i32 0)
+  ret void
+}

diff  --git a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll
index 1364732813ad..32e046df6bb0 100644
--- a/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll
+++ b/llvm/test/CodeGen/X86/shrink-wrap-chkstk.ll
@@ -1,5 +1,8 @@
 ; RUN: llc < %s -enable-shrink-wrap=true | FileCheck %s
 
+; TODO: add preallocated versions of tests
+; we don't yet support conditionally called preallocated calls after the setup
+
 ; chkstk cannot come before the usual prologue, since it adjusts ESP.
 ; If chkstk is used in the prologue, we also have to be careful about preserving
 ; EAX if it is used.

diff  --git a/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll b/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll
index 491bbba8c2fc..34db632ec205 100644
--- a/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll
+++ b/llvm/test/CodeGen/X86/tail-call-mutable-memarg.ll
@@ -9,6 +9,21 @@ target triple = "i386-pc-windows-msvc19.0.24215"
 declare x86_stdcallcc void @tail_std(i32)
 declare void @capture(i32*)
 
+define x86_thiscallcc void @preallocated(i32* %this, i32* preallocated(i32) %args) {
+entry:
+  %val = load i32, i32* %args
+  store i32 0, i32* %args
+  tail call x86_stdcallcc void @tail_std(i32 %val)
+  ret void
+}
+
+; CHECK-LABEL: _preallocated:                              # @preallocated
+; CHECK:         movl    4(%esp), %[[reg:[^ ]*]]
+; CHECK:         movl    $0, 4(%esp)
+; CHECK:         pushl   %[[reg]]
+; CHECK:         calll   _tail_std at 4
+; CHECK:         retl    $4
+
 define x86_thiscallcc void @inalloca(i32* %this, i32* inalloca %args) {
 entry:
   %val = load i32, i32* %args

diff  --git a/llvm/test/Transforms/Attributor/value-simplify.ll b/llvm/test/Transforms/Attributor/value-simplify.ll
index 915591114c26..ac0fcaef1693 100644
--- a/llvm/test/Transforms/Attributor/value-simplify.ll
+++ b/llvm/test/Transforms/Attributor/value-simplify.ll
@@ -6,6 +6,8 @@
 
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 declare void @f(i32)
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
 
 ; Test1: Replace argument with constant
 define internal void @test1(i32 %a) {
@@ -280,6 +282,24 @@ define i32* @complicated_args_inalloca() {
   ret i32* %call
 }
 
+define internal i32* @test_preallocated(i32* preallocated(i32) %a) {
+; CHECK-LABEL: define {{[^@]+}}@test_preallocated
+; CHECK-SAME: (i32* noalias nofree returned writeonly preallocated(i32) align 536870912 "no-capture-maybe-returned" [[A:%.*]])
+; CHECK-NEXT:    ret i32* [[A]]
+;
+  ret i32* %a
+}
+define i32* @complicated_args_preallocated() {
+; CHECK-LABEL: define {{[^@]+}}@complicated_args_preallocated()
+; CHECK-NEXT:    [[C:%.*]] = call token @llvm.call.preallocated.setup(i32 1)
+; CHECK-NEXT:    [[CALL:%.*]] = call i32* @test_preallocated(i32* noalias nocapture nofree writeonly preallocated(i32) align 536870912 null)
+; CHECK-NEXT:    ret i32* [[CALL]]
+;
+  %c = call token @llvm.call.preallocated.setup(i32 1)
+  %call = call i32* @test_preallocated(i32* preallocated(i32) null) ["preallocated"(token %c)]
+  ret i32* %call
+}
+
 define internal void @test_sret(%struct.X* sret %a, %struct.X** %b) {
 ;
 ; CHECK-LABEL: define {{[^@]+}}@test_sret

diff  --git a/llvm/test/Transforms/DeadArgElim/keepalive.ll b/llvm/test/Transforms/DeadArgElim/keepalive.ll
index d8a09933dcf9..c53ee7654836 100644
--- a/llvm/test/Transforms/DeadArgElim/keepalive.ll
+++ b/llvm/test/Transforms/DeadArgElim/keepalive.ll
@@ -1,5 +1,8 @@
 ; RUN: opt < %s -deadargelim -S | FileCheck %s
 
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
 %Ty = type <{ i32, i32 }>
 
 ; Check if the pass doesn't modify anything that doesn't need changing. We feed
@@ -44,4 +47,22 @@ define i32 @caller2() {
 	ret i32 %v
 }
 
+; We can't remove 'this' here, as that would put argmem in ecx instead of
+; memory.
+define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem) {
+	%v = load i32, i32* %argmem
+	ret i32 %v
+}
+; CHECK-LABEL: define internal x86_thiscallcc i32 @unused_this_preallocated(i32* %this, i32* preallocated(i32) %argmem)
+
+define i32 @caller3() {
+	%t = alloca i32
+	%c = call token @llvm.call.preallocated.setup(i32 1)
+	%M = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
+	%m = bitcast i8* %M to i32*
+	store i32 42, i32* %m
+	%v = call x86_thiscallcc i32 @unused_this_preallocated(i32* %t, i32* preallocated(i32) %m) ["preallocated"(token %c)]
+	ret i32 %v
+}
+
 ; CHECK: attributes #0 = { nounwind }

diff  --git a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
index d0a3cef71a7e..7e212187f030 100644
--- a/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/MSSA/simple-todo.ll
@@ -58,6 +58,16 @@ define void @test9_2(%struct.x* inalloca  %a) nounwind  {
   ret void
 }
 
+; Test for preallocated handling.
+define void @test9_3(%struct.x* preallocated(%struct.x)  %a) nounwind  {
+; CHECK-LABEL: @test9_3(
+; CHECK-NEXT:    ret void
+;
+  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+  store i32 1, i32* %tmp2, align 4
+  ret void
+}
+
 ; DSE should delete the dead trampoline.
 declare void @test11f()
 define void @test11() {

diff  --git a/llvm/test/Transforms/DeadStoreElimination/simple.ll b/llvm/test/Transforms/DeadStoreElimination/simple.ll
index ee902a020277..9c381fc5d82d 100644
--- a/llvm/test/Transforms/DeadStoreElimination/simple.ll
+++ b/llvm/test/Transforms/DeadStoreElimination/simple.ll
@@ -169,6 +169,16 @@ define void @test9_2(%struct.x* inalloca  %a) nounwind  {
   ret void
 }
 
+; Test for preallocated handling.
+define void @test9_3(%struct.x* preallocated(%struct.x)  %a) nounwind  {
+; CHECK-LABEL: @test9_3(
+; CHECK-NEXT:    ret void
+;
+  %tmp2 = getelementptr %struct.x, %struct.x* %a, i32 0, i32 0
+  store i32 1, i32* %tmp2, align 4
+  ret void
+}
+
 ; va_arg has fuzzy dependence, the store shouldn't be zapped.
 define double @test10(i8* %X) {
 ; CHECK-LABEL: @test10(

diff  --git a/llvm/test/Transforms/FunctionAttrs/readattrs.ll b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
index b11b3edcebfc..e566c96d42b0 100644
--- a/llvm/test/Transforms/FunctionAttrs/readattrs.ll
+++ b/llvm/test/Transforms/FunctionAttrs/readattrs.ll
@@ -56,6 +56,12 @@ define void @test7_1(i32* inalloca %a) {
   ret void
 }
 
+; CHECK: define void @test7_2(i32* nocapture preallocated(i32) %a)
+; preallocated parameters are always considered written
+define void @test7_2(i32* preallocated(i32) %a) {
+  ret void
+}
+
 ; CHECK: define i32* @test8_1(i32* readnone returned %p)
 define i32* @test8_1(i32* %p) {
 entry:

diff  --git a/llvm/test/Transforms/GlobalOpt/fastcc.ll b/llvm/test/Transforms/GlobalOpt/fastcc.ll
index 39542d0b0cb7..7bf3e9700f1c 100644
--- a/llvm/test/Transforms/GlobalOpt/fastcc.ll
+++ b/llvm/test/Transforms/GlobalOpt/fastcc.ll
@@ -1,5 +1,8 @@
 ; RUN: opt < %s -globalopt -S | FileCheck %s
 
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
 define internal i32 @f(i32* %m) {
 ; CHECK-LABEL: define internal fastcc i32 @f
   %v = load i32, i32* %m
@@ -32,6 +35,13 @@ define internal i32 @inalloca(i32* inalloca %p) {
   ret i32 %rv
 }
 
+define internal i32 @preallocated(i32* preallocated(i32) %p) {
+; TODO: handle preallocated:
+; CHECK-NOT-LABEL: define internal fastcc i32 @preallocated(i32* %p)
+  %rv = load i32, i32* %p
+  ret i32 %rv
+}
+
 define void @call_things() {
   %m = alloca i32
   call i32 @f(i32* %m)
@@ -40,6 +50,11 @@ define void @call_things() {
   call i32 @j(i32* %m)
   %args = alloca inalloca i32
   call i32 @inalloca(i32* inalloca %args)
+  ; TODO: handle preallocated
+  ;%c = call token @llvm.call.preallocated.setup(i32 1)
+  ;%N = call i8* @llvm.call.preallocated.arg(token %c, i32 0) preallocated(i32)
+  ;%n = bitcast i8* %N to i32*
+   ;call i32 @preallocated(i32* preallocated(i32) %n) ["preallocated"(token %c)]
   ret void
 }
 

diff  --git a/llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll b/llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll
new file mode 100644
index 000000000000..fc96a16fa8d0
--- /dev/null
+++ b/llvm/test/Transforms/InstCombine/call-cast-target-preallocated.ll
@@ -0,0 +1,28 @@
+; RUN: opt < %s -instcombine -S | FileCheck %s
+
+target datalayout = "e-p:32:32"
+target triple = "i686-pc-win32"
+
+
+declare token @llvm.call.preallocated.setup(i32)
+declare i8* @llvm.call.preallocated.arg(token, i32)
+
+declare void @takes_i32(i32)
+declare void @takes_i32_preallocated(i32* preallocated(i32))
+
+define void @f() {
+; CHECK-LABEL: define void @f()
+  %t = call token @llvm.call.preallocated.setup(i32 1)
+  %a = call i8* @llvm.call.preallocated.arg(token %t, i32 0) preallocated(i32)
+  %arg = bitcast i8* %a to i32*
+  call void bitcast (void (i32)* @takes_i32 to void (i32*)*)(i32* preallocated(i32) %arg) ["preallocated"(token %t)]
+; CHECK: call void bitcast{{.*}}@takes_i32
+  ret void
+}
+
+define void @g() {
+; CHECK-LABEL: define void @g()
+  call void bitcast (void (i32*)* @takes_i32_preallocated to void (i32)*)(i32 0)
+; CHECK: call void bitcast{{.*}}@takes_i32_preallocated
+  ret void
+}


        


More information about the llvm-commits mailing list