[llvm] 92600c2 - [VE] call isel with stack passing

Tue Jan 28 01:59:20 PST 2020

Author: Kazushi (Jam) Marukawa
Date: 2020-01-28T10:55:47+01:00
New Revision: 92600c2ec83233f897b306f8c20986f0055edf8b

URL: https://github.com/llvm/llvm-project/commit/92600c2ec83233f897b306f8c20986f0055edf8b
DIFF: https://github.com/llvm/llvm-project/commit/92600c2ec83233f897b306f8c20986f0055edf8b.diff

LOG: [VE] call isel with stack passing

Summary:
Function calls and stack-passing of function arguments.
Custom lowering, isel patterns and tests.

Reviewed By: arsenm

Differential Revision: https://reviews.llvm.org/D73461

Added: 
    llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
    llvm/lib/Target/VE/VEMachineFunctionInfo.h
    llvm/test/CodeGen/VE/call.ll
    llvm/test/CodeGen/VE/callee.ll
    llvm/test/CodeGen/VE/callstruct.ll

Modified: 
    llvm/lib/Target/VE/CMakeLists.txt
    llvm/lib/Target/VE/VECallingConv.td
    llvm/lib/Target/VE/VEFrameLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.cpp
    llvm/lib/Target/VE/VEISelLowering.h
    llvm/lib/Target/VE/VEInstrInfo.td

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt
index 661f0d41dc09..5b32fd50e583 100644

--- a/llvm/lib/Target/VE/CMakeLists.txt
+++ b/llvm/lib/Target/VE/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_target(VECodeGen
   VEISelDAGToDAG.cpp
   VEISelLowering.cpp
   VEInstrInfo.cpp
+  VEMachineFunctionInfo.cpp
   VEMCInstLower.cpp
   VERegisterInfo.cpp
   VESubtarget.cpp

diff  --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td
index 110505674312..2fbbced744f9 100644
--- a/llvm/lib/Target/VE/VECallingConv.td
+++ b/llvm/lib/Target/VE/VECallingConv.td
@@ -13,6 +13,17 @@
 //===----------------------------------------------------------------------===//
 // Aurora VE
 //===----------------------------------------------------------------------===//
+def CC_VE_C_Stack: CallingConv<[
+  // float --> need special handling like below.
+  //    0      4
+  //    +------+------+
+  //    | empty| float|
+  //    +------+------+
+  CCIfType<[f32], CCCustom<"allocateFloat">>,
+
+  // All of the rest are assigned to the stack in 8-byte aligned units.
+  CCAssignToStack<0, 8>
+]>;
 
 def CC_VE : CallingConv<[
   // All arguments get passed in generic registers if there is space.
@@ -33,6 +44,9 @@ def CC_VE : CallingConv<[
   // long long/double --> generic 64 bit registers
   CCIfType<[i64, f64],
            CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+  // Alternatively, they are assigned to the stack in 8-byte aligned units.
+  CCDelegateTo<CC_VE_C_Stack>
 ]>;
 
 def RetCC_VE : CallingConv<[

diff  --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp
index 7e8f15948bf4..dcbb4bc75f5d 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -12,6 +12,7 @@
 
 #include "VEFrameLowering.h"
 #include "VEInstrInfo.h"
+#include "VEMachineFunctionInfo.h"
 #include "VESubtarget.h"
 #include "llvm/CodeGen/MachineFrameInfo.h"
 #include "llvm/CodeGen/MachineFunction.h"
@@ -297,9 +298,40 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
 
 int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
                                             unsigned &FrameReg) const {
+  const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
+  const MachineFrameInfo &MFI = MF.getFrameInfo();
+  const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+  const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+  bool isFixed = MFI.isFixedObjectIndex(FI);
+
   // Addressable stack objects are accessed using neg. offsets from
   // %fp, or positive offsets from %sp.
+  bool UseFP = true;
+
+  // VE uses FP-based references in general, even when "hasFP" is
+  // false. That function is rather a misnomer, because %fp is
+  // actually always available, unless isLeafProc.
+  if (FuncInfo->isLeafProc()) {
+    // If there's a leaf proc, all offsets need to be %sp-based,
+    // because we haven't caused %fp to actually point to our frame.
+    UseFP = false;
+  } else if (isFixed) {
+    // Otherwise, argument access should always use %fp.
+    UseFP = true;
+  } else if (RegInfo->needsStackRealignment(MF)) {
+    // If there is dynamic stack realignment, all local object
+    // references need to be via %sp, to take account of the
+    // re-alignment.
+    UseFP = false;
+  }
+
   int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
+
+  if (UseFP) {
+    FrameReg = RegInfo->getFrameRegister(MF);
+    return FrameOffset;
+  }
+
   FrameReg = VE::SX11; // %sp
   return FrameOffset + MF.getFrameInfo().getStackSize();
 }
@@ -321,5 +353,8 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
                                            RegScavenger *RS) const {
   TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
 
-  assert(isLeafProc(MF) && "TODO implement for non-leaf procs");
+  if (isLeafProc(MF)) {
+    VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>();
+    MFI->setLeafProc(true);
+  }
 }

diff  --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index e7af52862c60..ffbc7287cab5 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -37,6 +37,28 @@ using namespace llvm;
 // Calling Convention Implementation
 //===----------------------------------------------------------------------===//
 
+static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
+                          CCValAssign::LocInfo LocInfo,
+                          ISD::ArgFlagsTy ArgFlags, CCState &State) {
+  switch (LocVT.SimpleTy) {
+  case MVT::f32: {
+    // Allocate stack like below
+    //    0      4
+    //    +------+------+
+    //    | empty| float|
+    //    +------+------+
+    // Use align=8 for dummy area to align the beginning of these 2 area.
+    State.AllocateStack(4, 8); // for empty area
+    // Use align=4 for value to place it at just after the dummy area.
+    unsigned Offset = State.AllocateStack(4, 4); // for float value area
+    State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+    return true;
+  }
+  default:
+    return false;
+  }
+}
+
 #include "VEGenCallingConv.inc"
 
 bool VETargetLowering::CanLowerReturn(
@@ -114,6 +136,8 @@ SDValue VETargetLowering::LowerFormalArguments(
     SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
   MachineFunction &MF = DAG.getMachineFunction();
 
+  // Get the base offset of the incoming arguments stack space.
+  unsigned ArgsBaseOffset = 176;
   // Get the size of the preserved arguments area
   unsigned ArgsPreserved = 64;
 
@@ -129,7 +153,6 @@ SDValue VETargetLowering::LowerFormalArguments(
 
   for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
     CCValAssign &VA = ArgLocs[i];
-    assert(VA.isRegLoc() && "TODO implement argument passing on stack");
     if (VA.isRegLoc()) {
       // This argument is passed in a register.
       // All integer register arguments are promoted by the caller to i64.
@@ -166,6 +189,18 @@ SDValue VETargetLowering::LowerFormalArguments(
       InVals.push_back(Arg);
       continue;
     }
+
+    // The registers are exhausted. This argument was passed on the stack.
+    assert(VA.isMemLoc());
+    // The CC_VE_Full/Half functions compute stack offsets relative to the
+    // beginning of the arguments area at %fp+176.
+    unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
+    unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+    int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
+    InVals.push_back(
+        DAG.getLoad(VA.getValVT(), DL, Chain,
+                    DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
+                    MachinePointerInfo::getFixedStack(MF, FI)));
   }
 
   assert(!IsVarArg && "TODO implement var args");
@@ -198,6 +233,224 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
 // TargetLowering Implementation
 //===----------------------------------------------------------------------===//
 
+SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                                    SmallVectorImpl<SDValue> &InVals) const {
+  SelectionDAG &DAG = CLI.DAG;
+  SDLoc DL = CLI.DL;
+  SDValue Chain = CLI.Chain;
+  auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+  // VE target does not yet support tail call optimization.
+  CLI.IsTailCall = false;
+
+  // Get the base offset of the outgoing arguments stack space.
+  unsigned ArgsBaseOffset = 176;
+  // Get the size of the preserved arguments area
+  unsigned ArgsPreserved = 8 * 8u;
+
+  // Analyze operands of the call, assigning locations to each operand.
+  SmallVector<CCValAssign, 16> ArgLocs;
+  CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
+                 *DAG.getContext());
+  // Allocate the preserved area first.
+  CCInfo.AllocateStack(ArgsPreserved, 8);
+  // We already allocated the preserved area, so the stack offset computed
+  // by CC_VE would be correct now.
+  CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
+
+  assert(!CLI.IsVarArg);
+
+  // Get the size of the outgoing arguments stack space requirement.
+  unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+  // Keep stack frames 16-byte aligned.
+  ArgsSize = alignTo(ArgsSize, 16);
+
+  // Adjust the stack pointer to make room for the arguments.
+  // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+  // with more than 6 arguments.
+  Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
+
+  // Collect the set of registers to pass to the function and their values.
+  // This will be emitted as a sequence of CopyToReg nodes glued to the call
+  // instruction.
+  SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+  // Collect chains from all the memory opeations that copy arguments to the
+  // stack. They must follow the stack pointer adjustment above and precede the
+  // call instruction itself.
+  SmallVector<SDValue, 8> MemOpChains;
+
+  // VE needs to get address of callee function in a register
+  // So, prepare to copy it to SX12 here.
+
+  // If the callee is a GlobalAddress node (quite common, every direct call is)
+  // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+  // Likewise ExternalSymbol -> TargetExternalSymbol.
+  SDValue Callee = CLI.Callee;
+
+  assert(!isPositionIndependent() && "TODO PIC");
+
+  // Turn GlobalAddress/ExternalSymbol node into a value node
+  // containing the address of them here.
+  if (isa<GlobalAddressSDNode>(Callee)) {
+    Callee =
+        makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+  } else if (isa<ExternalSymbolSDNode>(Callee)) {
+    Callee =
+        makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+  }
+
+  RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
+
+  for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+    CCValAssign &VA = ArgLocs[i];
+    SDValue Arg = CLI.OutVals[i];
+
+    // Promote the value if needed.
+    switch (VA.getLocInfo()) {
+    default:
+      llvm_unreachable("Unknown location info!");
+    case CCValAssign::Full:
+      break;
+    case CCValAssign::SExt:
+      Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::ZExt:
+      Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    case CCValAssign::AExt:
+      Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+      break;
+    }
+
+    if (VA.isRegLoc()) {
+      RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+      continue;
+    }
+
+    assert(VA.isMemLoc());
+
+    // Create a store off the stack pointer for this argument.
+    SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
+    // The argument area starts at %fp+176 in the callee frame,
+    // %sp+176 in ours.
+    SDValue PtrOff =
+        DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
+    PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
+    MemOpChains.push_back(
+        DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
+  }
+
+  // Emit all stores, make sure they occur before the call.
+  if (!MemOpChains.empty())
+    Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
+
+  // Build a sequence of CopyToReg nodes glued together with token chain and
+  // glue operands which copy the outgoing args into registers. The InGlue is
+  // necessary since all emitted instructions must be stuck together in order
+  // to pass the live physical registers.
+  SDValue InGlue;
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+    Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
+                             RegsToPass[i].second, InGlue);
+    InGlue = Chain.getValue(1);
+  }
+
+  // Build the operands for the call instruction itself.
+  SmallVector<SDValue, 8> Ops;
+  Ops.push_back(Chain);
+  for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+    Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+                                  RegsToPass[i].second.getValueType()));
+
+  // Add a register mask operand representing the call-preserved registers.
+  const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
+  const uint32_t *Mask =
+      TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
+  assert(Mask && "Missing call preserved mask for calling convention");
+  Ops.push_back(DAG.getRegisterMask(Mask));
+
+  // Make sure the CopyToReg nodes are glued to the call instruction which
+  // consumes the registers.
+  if (InGlue.getNode())
+    Ops.push_back(InGlue);
+
+  // Now the call itself.
+  SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+  Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
+  InGlue = Chain.getValue(1);
+
+  // Revert the stack pointer immediately after the call.
+  Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
+                             DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
+  InGlue = Chain.getValue(1);
+
+  // Now extract the return values. This is more or less the same as
+  // LowerFormalArguments.
+
+  // Assign locations to each value returned by this call.
+  SmallVector<CCValAssign, 16> RVLocs;
+  CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
+                 *DAG.getContext());
+
+  // Set inreg flag manually for codegen generated library calls that
+  // return float.
+  if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CS)
+    CLI.Ins[0].Flags.setInReg();
+
+  RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
+
+  // Copy all of the result registers out of their specified physreg.
+  for (unsigned i = 0; i != RVLocs.size(); ++i) {
+    CCValAssign &VA = RVLocs[i];
+    unsigned Reg = VA.getLocReg();
+
+    // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+    // reside in the same register in the high and low bits. Reuse the
+    // CopyFromReg previous node to avoid duplicate copies.
+    SDValue RV;
+    if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+      if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+        RV = Chain.getValue(0);
+
+    // But usually we'll create a new CopyFromReg for a 
diff erent register.
+    if (!RV.getNode()) {
+      RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+      Chain = RV.getValue(1);
+      InGlue = Chain.getValue(2);
+    }
+
+    // Get the high bits for i32 struct elements.
+    if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+      RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+                       DAG.getConstant(32, DL, MVT::i32));
+
+    // The callee promoted the return value, so insert an Assert?ext SDNode so
+    // we won't promote the value again in this function.
+    switch (VA.getLocInfo()) {
+    case CCValAssign::SExt:
+      RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    case CCValAssign::ZExt:
+      RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+                       DAG.getValueType(VA.getValVT()));
+      break;
+    default:
+      break;
+    }
+
+    // Truncate the register down to the return value type.
+    if (VA.isExtInLoc())
+      RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+    InVals.push_back(RV);
+  }
+
+  return Chain;
+}
+
 /// isFPImmLegal - Returns true if the target can instruction select the
 /// specified FP immediate natively. If false, the legalizer will
 /// materialize the FP immediate as a load from a constant pool.
@@ -268,6 +521,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
     break;
     TARGET_NODE_CASE(Lo)
     TARGET_NODE_CASE(Hi)
+    TARGET_NODE_CASE(CALL)
     TARGET_NODE_CASE(RET_FLAG)
   }
 #undef TARGET_NODE_CASE
@@ -320,6 +574,7 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
 }
 
 /// Custom Lower {
+
 SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
                                              SelectionDAG &DAG) const {
   return makeAddress(Op, DAG);

diff  --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index d6b719568307..eb7835e6a8ae 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -27,6 +27,7 @@ enum NodeType : unsigned {
   Hi,
   Lo, // Hi/Lo operations, typically on a global address.
 
+  CALL,            // A call instruction.
   RET_FLAG, // Return with a flag operand.
 };
 }
@@ -55,6 +56,9 @@ class VETargetLowering : public TargetLowering {
                                const SDLoc &dl, SelectionDAG &DAG,
                                SmallVectorImpl<SDValue> &InVals) const override;
 
+  SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+                    SmallVectorImpl<SDValue> &InVals) const override;
+
   bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
                       bool isVarArg,
                       const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,

diff  --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 0c5fd29e1e89..3bd50d3d0759 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -150,6 +150,11 @@ def brtarget32 : Operand<OtherVT> {
   let EncoderMethod = "getBranchTarget32OpValue";
 }
 
+def calltarget : Operand<i64> {
+  let EncoderMethod = "getCallTargetOpValue";
+  let DecoderMethod = "DecodeCall";
+}
+
 def simm7Op32 : Operand<i32> {
   let DecoderMethod = "DecodeSIMM7";
 }
@@ -192,7 +197,10 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
 def callseq_end   : SDNode<"ISD::CALLSEQ_END",   SDT_SPCallSeqEnd,
                            [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
 
-// def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def SDT_SPCall    : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def call          : SDNode<"VEISD::CALL", SDT_SPCall,
+                           [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+                            SDNPVariadic]>;
 
 def retflag       : SDNode<"VEISD::RET_FLAG", SDTNone,
                            [SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -549,6 +557,11 @@ let cx = 0 in
 defm LEA32 : RMm<"lea", 0x06, I32, i32, simm7Op32, simm32Op32, add>;
 }
 
+let cx = 0, cy = 1, cz = 0, sz = 0, hasSideEffects = 0 in {
+  def LEAasx : RM<
+      0x06, (outs I64:$sx), (ins MEMri:$addr),
+      "lea $sx,$addr", [(set iPTR:$sx, ADDRri:$addr)]>;
+}
 
 // 5.3.2.2. Fixed-Point Arithmetic Operation Instructions
 
@@ -775,6 +788,27 @@ def MONC : RR<
     0x3F, (outs), (ins),
     "monc">;
 
+//===----------------------------------------------------------------------===//
+// Instructions for CodeGenOnly
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+
+// Call instruction
+let Defs = [SX10], Uses = [SX11], hasDelaySlot = 1, isCall = 1, hasSideEffects = 0 in {
+let cx = 0, sx = 10, cy = 0, sy = 0, cz = 0, sz = 0 in
+def CALL : RM<
+    0x08, (outs), (ins calltarget:$imm32, variable_ops),
+    "bsic %lr, $imm32">;
+// use sz to represent a register
+let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1, imm32 = 0 in
+def CALLr : RM<
+    0x08, (outs), (ins I64:$sz, variable_ops),
+    "bsic %lr, (,$sz)">;
+}
+
+}
+
 //===----------------------------------------------------------------------===//
 // Pattern Matchings
 //===----------------------------------------------------------------------===//
@@ -893,6 +927,13 @@ def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
           (LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32),
                     (tglobaladdr:$in1))>;
 
+// Calls
+def : Pat<(call tglobaladdr:$dst),
+          (CALL tglobaladdr:$dst)>;
+def : Pat<(call i64:$dst),
+          (CALLr i64:$dst)>;
+
+
 //===----------------------------------------------------------------------===//
 // Pseudo Instructions
 //===----------------------------------------------------------------------===//

diff  --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..1addfc7174eb
--- /dev/null
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
@@ -0,0 +1,13 @@
+//===-- VEMachineFunctionInfo.cpp - VE Machine Function Info --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void VEMachineFunctionInfo::anchor() {}

diff  --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
new file mode 100644
index 000000000000..b89520fd2174
--- /dev/null
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
@@ -0,0 +1,35 @@
+//===- VEMachineFunctionInfo.h - VE Machine Function Info -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares  VE specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class VEMachineFunctionInfo : public MachineFunctionInfo {
+  virtual void anchor();
+
+private:
+  /// IsLeafProc - True if the function is a leaf procedure.
+  bool IsLeafProc;
+
+public:
+  VEMachineFunctionInfo() : IsLeafProc(false) {}
+  explicit VEMachineFunctionInfo(MachineFunction &MF) : IsLeafProc(false) {}
+
+  void setLeafProc(bool rhs) { IsLeafProc = rhs; }
+  bool isLeafProc() const { return IsLeafProc; }
+};
+} // namespace llvm
+
+#endif

diff  --git a/llvm/test/CodeGen/VE/call.ll b/llvm/test/CodeGen/VE/call.ll
new file mode 100644
index 000000000000..c03f5bcf84be
--- /dev/null
+++ b/llvm/test/CodeGen/VE/call.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+define i32 @sample_call() {
+; CHECK-LABEL: sample_call:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, sample_add at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, sample_add at hi(%s0)
+; CHECK-NEXT:    or %s0, 1, (0)1
+; CHECK-NEXT:    or %s1, 2, (0)1
+; CHECK-NEXT:    bsic %lr, (,%s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %r = tail call i32 @sample_add(i32 1, i32 2)
+  ret i32 %r
+}
+
+declare i32 @sample_add(i32, i32)
+
+define i32 @stack_call_int() {
+; CHECK-LABEL: stack_call_int:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s0, 10, (0)1
+; CHECK-NEXT:    stl %s0, 248(,%s11)
+; CHECK-NEXT:    or %s34, 9, (0)1
+; CHECK-NEXT:    lea %s0, stack_callee_int at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, stack_callee_int at hi(%s0)
+; CHECK-NEXT:    or %s0, 1, (0)1
+; CHECK-NEXT:    or %s1, 2, (0)1
+; CHECK-NEXT:    or %s2, 3, (0)1
+; CHECK-NEXT:    or %s3, 4, (0)1
+; CHECK-NEXT:    or %s4, 5, (0)1
+; CHECK-NEXT:    or %s5, 6, (0)1
+; CHECK-NEXT:    or %s6, 7, (0)1
+; CHECK-NEXT:    or %s7, 8, (0)1
+; CHECK-NEXT:    stl %s34, 240(,%s11)
+; CHECK-NEXT:    bsic %lr, (,%s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %r = tail call i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+  ret i32 %r
+}
+
+declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+define i32 @stack_call_int_szext() {
+; CHECK-LABEL: stack_call_int_szext:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    or %s0, -1, (0)1
+; CHECK-NEXT:    stl %s0, 248(,%s11)
+; CHECK-NEXT:    lea %s34, 65535
+; CHECK-NEXT:    lea %s1, stack_callee_int_szext at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s12, stack_callee_int_szext at hi(%s1)
+; CHECK-NEXT:    lea %s1, 255
+; CHECK-NEXT:    or %s2, 3, (0)1
+; CHECK-NEXT:    or %s3, 4, (0)1
+; CHECK-NEXT:    or %s4, 5, (0)1
+; CHECK-NEXT:    or %s5, 6, (0)1
+; CHECK-NEXT:    or %s6, 7, (0)1
+; CHECK-NEXT:    or %s7, 8, (0)1
+; CHECK-NEXT:    stl %s34, 240(,%s11)
+; CHECK-NEXT:    bsic %lr, (,%s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %r = tail call i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1)
+  ret i32 %r
+}
+
+declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext)
+
+define float @stack_call_float() {
+; CHECK-LABEL: stack_call_float:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, 1092616192
+; CHECK-NEXT:    stl %s0, 252(,%s11)
+; CHECK-NEXT:    lea %s0, 1091567616
+; CHECK-NEXT:    lea %s1, stack_callee_float at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s12, stack_callee_float at hi(%s1)
+; CHECK-NEXT:    lea.sl %s1, 1065353216
+; CHECK-NEXT:    lea.sl %s2, 1073741824
+; CHECK-NEXT:    lea.sl %s3, 1077936128
+; CHECK-NEXT:    lea.sl %s4, 1082130432
+; CHECK-NEXT:    lea.sl %s5, 1084227584
+; CHECK-NEXT:    lea.sl %s6, 1086324736
+; CHECK-NEXT:    lea.sl %s7, 1088421888
+; CHECK-NEXT:    lea.sl %s34, 1090519040
+; CHECK-NEXT:    stl %s0, 244(,%s11)
+; CHECK-NEXT:    or %s0, 0, %s1
+; CHECK-NEXT:    or %s1, 0, %s2
+; CHECK-NEXT:    or %s2, 0, %s3
+; CHECK-NEXT:    or %s3, 0, %s4
+; CHECK-NEXT:    or %s4, 0, %s5
+; CHECK-NEXT:    or %s5, 0, %s6
+; CHECK-NEXT:    or %s6, 0, %s7
+; CHECK-NEXT:    or %s7, 0, %s34
+; CHECK-NEXT:    bsic %lr, (,%s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %r = tail call float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0)
+  ret float %r
+}
+
+declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float)
+
+define float @stack_call_float2(float %p0) {
+; CHECK-LABEL: stack_call_float2:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    stu %s0, 252(,%s11)
+; CHECK-NEXT:    lea %s1, stack_callee_float at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s12, stack_callee_float at hi(%s1)
+; CHECK-NEXT:    stu %s0, 244(,%s11)
+; CHECK-NEXT:    or %s1, 0, %s0
+; CHECK-NEXT:    or %s2, 0, %s0
+; CHECK-NEXT:    or %s3, 0, %s0
+; CHECK-NEXT:    or %s4, 0, %s0
+; CHECK-NEXT:    or %s5, 0, %s0
+; CHECK-NEXT:    or %s6, 0, %s0
+; CHECK-NEXT:    or %s7, 0, %s0
+; CHECK-NEXT:    bsic %lr, (,%s12)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %r = tail call float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0)
+  ret float %r
+}
+

diff  --git a/llvm/test/CodeGen/VE/callee.ll b/llvm/test/CodeGen/VE/callee.ll
new file mode 100644
index 000000000000..08d271c6f9c5
--- /dev/null
+++ b/llvm/test/CodeGen/VE/callee.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+define i32 @stack_stack_arg_i32_r9(i1 %0, i8 %1, i16 %2, i32 %3, i64 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
+; CHECK-LABEL: stack_stack_arg_i32_r9:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    ldl.sx %s0, 424(,%s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  ret i32 %9
+}
+
+define i64 @stack_stack_arg_i64_r9(i1 %0, i8 %1, i16 %2, i32 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) {
+; CHECK-LABEL: stack_stack_arg_i64_r9:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    ld %s0, 424(,%s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  ret i64 %9
+}
+
+define float @stack_stack_arg_f32_r9(float %p0, float %p1, float %p2, float %p3, float %p4, float %p5, float %p6, float %p7, float %s0, float %s1) {
+; CHECK-LABEL: stack_stack_arg_f32_r9:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    ldu %s0, 428(,%s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  ret float %s1
+}
+
+define i32 @stack_stack_arg_i32f32_r8(i32 %p0, float %p1, i32 %p2, float %p3, i32 %p4, float %p5, i32 %p6, float %p7, i32 %s0, float %s1) {
+; CHECK-LABEL: stack_stack_arg_i32f32_r8:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    ldl.sx %s0, 416(,%s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  ret i32 %s0
+}
+
+define float @stack_stack_arg_i32f32_r9(i32 %p0, float %p1, i32 %p2, float %p3, i32 %p4, float %p5, i32 %p6, float %p7, i32 %s0, float %s1) {
+; CHECK-LABEL: stack_stack_arg_i32f32_r9:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    ldu %s0, 428(,%s11)
+; CHECK-NEXT:    or %s11, 0, %s9
+  ret float %s1
+}

diff  --git a/llvm/test/CodeGen/VE/callstruct.ll b/llvm/test/CodeGen/VE/callstruct.ll
new file mode 100644
index 000000000000..a76a9511f73f
--- /dev/null
+++ b/llvm/test/CodeGen/VE/callstruct.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+%struct.a = type { i32, i32 }
+
+ at A = common global %struct.a zeroinitializer, align 4
+
+; Function Attrs: norecurse nounwind
+define void @fun(%struct.a* noalias nocapture sret %a, i32 %p1, i32 %p2) {
+; CHECK-LABEL: fun:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    stl %s1, (,%s0)
+; CHECK-NEXT:    stl %s2, 4(,%s0)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %a.zero = getelementptr inbounds %struct.a, %struct.a* %a, i64 0, i32 0
+  store i32 %p1, i32* %a.zero, align 4
+  %a.one = getelementptr inbounds %struct.a, %struct.a* %a, i64 0, i32 1
+  store i32 %p2, i32* %a.one, align 4
+  ret void
+}
+
+; Function Attrs: nounwind
+define void @caller() {
+; CHECK-LABEL: caller:
+; CHECK:       .LBB{{[0-9]+}}_2:
+; CHECK-NEXT:    lea %s0, callee at lo
+; CHECK-NEXT:    and %s0, %s0, (32)0
+; CHECK-NEXT:    lea.sl %s12, callee at hi(%s0)
+; CHECK-NEXT:    lea %s0,-8(,%s9)
+; CHECK-NEXT:    or %s1, 3, (0)1
+; CHECK-NEXT:    or %s2, 4, (0)1
+; CHECK-NEXT:    bsic %lr, (,%s12)
+; CHECK-NEXT:    ld %s0, -8(,%s9)
+; CHECK-NEXT:    lea %s1, A at lo
+; CHECK-NEXT:    and %s1, %s1, (32)0
+; CHECK-NEXT:    lea.sl %s1, A at hi(%s1)
+; CHECK-NEXT:    st %s0, (,%s1)
+; CHECK-NEXT:    or %s11, 0, %s9
+  %a = alloca i64, align 8
+  %a.bc = bitcast i64* %a to %struct.a*
+  call void @callee(%struct.a* nonnull sret %a.bc, i32 3, i32 4)
+  %a.val = load i64, i64* %a, align 8
+  store i64 %a.val, i64* bitcast (%struct.a* @A to i64*), align 4
+  ret void
+}
+
+declare void @callee(%struct.a* sret, i32, i32)