[llvm] 92600c2 - [VE] call isel with stack passing
Simon Moll via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 28 01:59:20 PST 2020
Author: Kazushi (Jam) Marukawa
Date: 2020-01-28T10:55:47+01:00
New Revision: 92600c2ec83233f897b306f8c20986f0055edf8b
URL: https://github.com/llvm/llvm-project/commit/92600c2ec83233f897b306f8c20986f0055edf8b
DIFF: https://github.com/llvm/llvm-project/commit/92600c2ec83233f897b306f8c20986f0055edf8b.diff
LOG: [VE] call isel with stack passing
Summary:
Function calls and stack-passing of function arguments.
Custom lowering, isel patterns and tests.
Reviewed By: arsenm
Differential Revision: https://reviews.llvm.org/D73461
Added:
llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
llvm/lib/Target/VE/VEMachineFunctionInfo.h
llvm/test/CodeGen/VE/call.ll
llvm/test/CodeGen/VE/callee.ll
llvm/test/CodeGen/VE/callstruct.ll
Modified:
llvm/lib/Target/VE/CMakeLists.txt
llvm/lib/Target/VE/VECallingConv.td
llvm/lib/Target/VE/VEFrameLowering.cpp
llvm/lib/Target/VE/VEISelLowering.cpp
llvm/lib/Target/VE/VEISelLowering.h
llvm/lib/Target/VE/VEInstrInfo.td
Removed:
################################################################################
diff --git a/llvm/lib/Target/VE/CMakeLists.txt b/llvm/lib/Target/VE/CMakeLists.txt
index 661f0d41dc09..5b32fd50e583 100644
--- a/llvm/lib/Target/VE/CMakeLists.txt
+++ b/llvm/lib/Target/VE/CMakeLists.txt
@@ -14,6 +14,7 @@ add_llvm_target(VECodeGen
VEISelDAGToDAG.cpp
VEISelLowering.cpp
VEInstrInfo.cpp
+ VEMachineFunctionInfo.cpp
VEMCInstLower.cpp
VERegisterInfo.cpp
VESubtarget.cpp
diff --git a/llvm/lib/Target/VE/VECallingConv.td b/llvm/lib/Target/VE/VECallingConv.td
index 110505674312..2fbbced744f9 100644
--- a/llvm/lib/Target/VE/VECallingConv.td
+++ b/llvm/lib/Target/VE/VECallingConv.td
@@ -13,6 +13,17 @@
//===----------------------------------------------------------------------===//
// Aurora VE
//===----------------------------------------------------------------------===//
+def CC_VE_C_Stack: CallingConv<[
+ // float --> need special handling like below.
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ CCIfType<[f32], CCCustom<"allocateFloat">>,
+
+ // All of the rest are assigned to the stack in 8-byte aligned units.
+ CCAssignToStack<0, 8>
+]>;
def CC_VE : CallingConv<[
// All arguments get passed in generic registers if there is space.
@@ -33,6 +44,9 @@ def CC_VE : CallingConv<[
// long long/double --> generic 64 bit registers
CCIfType<[i64, f64],
CCAssignToReg<[SX0, SX1, SX2, SX3, SX4, SX5, SX6, SX7]>>,
+
+ // Alternatively, they are assigned to the stack in 8-byte aligned units.
+ CCDelegateTo<CC_VE_C_Stack>
]>;
def RetCC_VE : CallingConv<[
diff --git a/llvm/lib/Target/VE/VEFrameLowering.cpp b/llvm/lib/Target/VE/VEFrameLowering.cpp
index 7e8f15948bf4..dcbb4bc75f5d 100644
--- a/llvm/lib/Target/VE/VEFrameLowering.cpp
+++ b/llvm/lib/Target/VE/VEFrameLowering.cpp
@@ -12,6 +12,7 @@
#include "VEFrameLowering.h"
#include "VEInstrInfo.h"
+#include "VEMachineFunctionInfo.h"
#include "VESubtarget.h"
#include "llvm/CodeGen/MachineFrameInfo.h"
#include "llvm/CodeGen/MachineFunction.h"
@@ -297,9 +298,40 @@ bool VEFrameLowering::hasFP(const MachineFunction &MF) const {
int VEFrameLowering::getFrameIndexReference(const MachineFunction &MF, int FI,
unsigned &FrameReg) const {
+ const VESubtarget &Subtarget = MF.getSubtarget<VESubtarget>();
+ const MachineFrameInfo &MFI = MF.getFrameInfo();
+ const VERegisterInfo *RegInfo = Subtarget.getRegisterInfo();
+ const VEMachineFunctionInfo *FuncInfo = MF.getInfo<VEMachineFunctionInfo>();
+ bool isFixed = MFI.isFixedObjectIndex(FI);
+
// Addressable stack objects are accessed using neg. offsets from
// %fp, or positive offsets from %sp.
+ bool UseFP = true;
+
+ // VE uses FP-based references in general, even when "hasFP" is
+ // false. That function is rather a misnomer, because %fp is
+ // actually always available, unless isLeafProc.
+ if (FuncInfo->isLeafProc()) {
+ // If there's a leaf proc, all offsets need to be %sp-based,
+ // because we haven't caused %fp to actually point to our frame.
+ UseFP = false;
+ } else if (isFixed) {
+ // Otherwise, argument access should always use %fp.
+ UseFP = true;
+ } else if (RegInfo->needsStackRealignment(MF)) {
+ // If there is dynamic stack realignment, all local object
+ // references need to be via %sp, to take account of the
+ // re-alignment.
+ UseFP = false;
+ }
+
int64_t FrameOffset = MF.getFrameInfo().getObjectOffset(FI);
+
+ if (UseFP) {
+ FrameReg = RegInfo->getFrameRegister(MF);
+ return FrameOffset;
+ }
+
FrameReg = VE::SX11; // %sp
return FrameOffset + MF.getFrameInfo().getStackSize();
}
@@ -321,5 +353,8 @@ void VEFrameLowering::determineCalleeSaves(MachineFunction &MF,
RegScavenger *RS) const {
TargetFrameLowering::determineCalleeSaves(MF, SavedRegs, RS);
- assert(isLeafProc(MF) && "TODO implement for non-leaf procs");
+ if (isLeafProc(MF)) {
+ VEMachineFunctionInfo *MFI = MF.getInfo<VEMachineFunctionInfo>();
+ MFI->setLeafProc(true);
+ }
}
diff --git a/llvm/lib/Target/VE/VEISelLowering.cpp b/llvm/lib/Target/VE/VEISelLowering.cpp
index e7af52862c60..ffbc7287cab5 100644
--- a/llvm/lib/Target/VE/VEISelLowering.cpp
+++ b/llvm/lib/Target/VE/VEISelLowering.cpp
@@ -37,6 +37,28 @@ using namespace llvm;
// Calling Convention Implementation
//===----------------------------------------------------------------------===//
+static bool allocateFloat(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State) {
+ switch (LocVT.SimpleTy) {
+ case MVT::f32: {
+ // Allocate stack like below
+ // 0 4
+ // +------+------+
+ // | empty| float|
+ // +------+------+
+ // Use align=8 for dummy area to align the beginning of these 2 area.
+ State.AllocateStack(4, 8); // for empty area
+ // Use align=4 for value to place it at just after the dummy area.
+ unsigned Offset = State.AllocateStack(4, 4); // for float value area
+ State.addLoc(CCValAssign::getMem(ValNo, ValVT, Offset, LocVT, LocInfo));
+ return true;
+ }
+ default:
+ return false;
+ }
+}
+
#include "VEGenCallingConv.inc"
bool VETargetLowering::CanLowerReturn(
@@ -114,6 +136,8 @@ SDValue VETargetLowering::LowerFormalArguments(
SelectionDAG &DAG, SmallVectorImpl<SDValue> &InVals) const {
MachineFunction &MF = DAG.getMachineFunction();
+ // Get the base offset of the incoming arguments stack space.
+ unsigned ArgsBaseOffset = 176;
// Get the size of the preserved arguments area
unsigned ArgsPreserved = 64;
@@ -129,7 +153,6 @@ SDValue VETargetLowering::LowerFormalArguments(
for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
CCValAssign &VA = ArgLocs[i];
- assert(VA.isRegLoc() && "TODO implement argument passing on stack");
if (VA.isRegLoc()) {
// This argument is passed in a register.
// All integer register arguments are promoted by the caller to i64.
@@ -166,6 +189,18 @@ SDValue VETargetLowering::LowerFormalArguments(
InVals.push_back(Arg);
continue;
}
+
+ // The registers are exhausted. This argument was passed on the stack.
+ assert(VA.isMemLoc());
+ // The CC_VE_Full/Half functions compute stack offsets relative to the
+ // beginning of the arguments area at %fp+176.
+ unsigned Offset = VA.getLocMemOffset() + ArgsBaseOffset;
+ unsigned ValSize = VA.getValVT().getSizeInBits() / 8;
+ int FI = MF.getFrameInfo().CreateFixedObject(ValSize, Offset, true);
+ InVals.push_back(
+ DAG.getLoad(VA.getValVT(), DL, Chain,
+ DAG.getFrameIndex(FI, getPointerTy(MF.getDataLayout())),
+ MachinePointerInfo::getFixedStack(MF, FI)));
}
assert(!IsVarArg && "TODO implement var args");
@@ -198,6 +233,224 @@ Register VETargetLowering::getRegisterByName(const char *RegName, LLT VT,
// TargetLowering Implementation
//===----------------------------------------------------------------------===//
+SDValue VETargetLowering::LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const {
+ SelectionDAG &DAG = CLI.DAG;
+ SDLoc DL = CLI.DL;
+ SDValue Chain = CLI.Chain;
+ auto PtrVT = getPointerTy(DAG.getDataLayout());
+
+ // VE target does not yet support tail call optimization.
+ CLI.IsTailCall = false;
+
+ // Get the base offset of the outgoing arguments stack space.
+ unsigned ArgsBaseOffset = 176;
+ // Get the size of the preserved arguments area
+ unsigned ArgsPreserved = 8 * 8u;
+
+ // Analyze operands of the call, assigning locations to each operand.
+ SmallVector<CCValAssign, 16> ArgLocs;
+ CCState CCInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), ArgLocs,
+ *DAG.getContext());
+ // Allocate the preserved area first.
+ CCInfo.AllocateStack(ArgsPreserved, 8);
+ // We already allocated the preserved area, so the stack offset computed
+ // by CC_VE would be correct now.
+ CCInfo.AnalyzeCallOperands(CLI.Outs, CC_VE);
+
+ assert(!CLI.IsVarArg);
+
+ // Get the size of the outgoing arguments stack space requirement.
+ unsigned ArgsSize = CCInfo.getNextStackOffset();
+
+ // Keep stack frames 16-byte aligned.
+ ArgsSize = alignTo(ArgsSize, 16);
+
+ // Adjust the stack pointer to make room for the arguments.
+ // FIXME: Use hasReservedCallFrame to avoid %sp adjustments around all calls
+ // with more than 6 arguments.
+ Chain = DAG.getCALLSEQ_START(Chain, ArgsSize, 0, DL);
+
+ // Collect the set of registers to pass to the function and their values.
+ // This will be emitted as a sequence of CopyToReg nodes glued to the call
+ // instruction.
+ SmallVector<std::pair<unsigned, SDValue>, 8> RegsToPass;
+
+ // Collect chains from all the memory opeations that copy arguments to the
+ // stack. They must follow the stack pointer adjustment above and precede the
+ // call instruction itself.
+ SmallVector<SDValue, 8> MemOpChains;
+
+ // VE needs to get address of callee function in a register
+ // So, prepare to copy it to SX12 here.
+
+ // If the callee is a GlobalAddress node (quite common, every direct call is)
+ // turn it into a TargetGlobalAddress node so that legalize doesn't hack it.
+ // Likewise ExternalSymbol -> TargetExternalSymbol.
+ SDValue Callee = CLI.Callee;
+
+ assert(!isPositionIndependent() && "TODO PIC");
+
+ // Turn GlobalAddress/ExternalSymbol node into a value node
+ // containing the address of them here.
+ if (isa<GlobalAddressSDNode>(Callee)) {
+ Callee =
+ makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+ } else if (isa<ExternalSymbolSDNode>(Callee)) {
+ Callee =
+ makeHiLoPair(Callee, VEMCExpr::VK_VE_HI32, VEMCExpr::VK_VE_LO32, DAG);
+ }
+
+ RegsToPass.push_back(std::make_pair(VE::SX12, Callee));
+
+ for (unsigned i = 0, e = ArgLocs.size(); i != e; ++i) {
+ CCValAssign &VA = ArgLocs[i];
+ SDValue Arg = CLI.OutVals[i];
+
+ // Promote the value if needed.
+ switch (VA.getLocInfo()) {
+ default:
+ llvm_unreachable("Unknown location info!");
+ case CCValAssign::Full:
+ break;
+ case CCValAssign::SExt:
+ Arg = DAG.getNode(ISD::SIGN_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::ZExt:
+ Arg = DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ case CCValAssign::AExt:
+ Arg = DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Arg);
+ break;
+ }
+
+ if (VA.isRegLoc()) {
+ RegsToPass.push_back(std::make_pair(VA.getLocReg(), Arg));
+ continue;
+ }
+
+ assert(VA.isMemLoc());
+
+ // Create a store off the stack pointer for this argument.
+ SDValue StackPtr = DAG.getRegister(VE::SX11, PtrVT);
+ // The argument area starts at %fp+176 in the callee frame,
+ // %sp+176 in ours.
+ SDValue PtrOff =
+ DAG.getIntPtrConstant(VA.getLocMemOffset() + ArgsBaseOffset, DL);
+ PtrOff = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr, PtrOff);
+ MemOpChains.push_back(
+ DAG.getStore(Chain, DL, Arg, PtrOff, MachinePointerInfo()));
+ }
+
+ // Emit all stores, make sure they occur before the call.
+ if (!MemOpChains.empty())
+ Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
+
+ // Build a sequence of CopyToReg nodes glued together with token chain and
+ // glue operands which copy the outgoing args into registers. The InGlue is
+ // necessary since all emitted instructions must be stuck together in order
+ // to pass the live physical registers.
+ SDValue InGlue;
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i) {
+ Chain = DAG.getCopyToReg(Chain, DL, RegsToPass[i].first,
+ RegsToPass[i].second, InGlue);
+ InGlue = Chain.getValue(1);
+ }
+
+ // Build the operands for the call instruction itself.
+ SmallVector<SDValue, 8> Ops;
+ Ops.push_back(Chain);
+ for (unsigned i = 0, e = RegsToPass.size(); i != e; ++i)
+ Ops.push_back(DAG.getRegister(RegsToPass[i].first,
+ RegsToPass[i].second.getValueType()));
+
+ // Add a register mask operand representing the call-preserved registers.
+ const VERegisterInfo *TRI = Subtarget->getRegisterInfo();
+ const uint32_t *Mask =
+ TRI->getCallPreservedMask(DAG.getMachineFunction(), CLI.CallConv);
+ assert(Mask && "Missing call preserved mask for calling convention");
+ Ops.push_back(DAG.getRegisterMask(Mask));
+
+ // Make sure the CopyToReg nodes are glued to the call instruction which
+ // consumes the registers.
+ if (InGlue.getNode())
+ Ops.push_back(InGlue);
+
+ // Now the call itself.
+ SDVTList NodeTys = DAG.getVTList(MVT::Other, MVT::Glue);
+ Chain = DAG.getNode(VEISD::CALL, DL, NodeTys, Ops);
+ InGlue = Chain.getValue(1);
+
+ // Revert the stack pointer immediately after the call.
+ Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(ArgsSize, DL, true),
+ DAG.getIntPtrConstant(0, DL, true), InGlue, DL);
+ InGlue = Chain.getValue(1);
+
+ // Now extract the return values. This is more or less the same as
+ // LowerFormalArguments.
+
+ // Assign locations to each value returned by this call.
+ SmallVector<CCValAssign, 16> RVLocs;
+ CCState RVInfo(CLI.CallConv, CLI.IsVarArg, DAG.getMachineFunction(), RVLocs,
+ *DAG.getContext());
+
+ // Set inreg flag manually for codegen generated library calls that
+ // return float.
+ if (CLI.Ins.size() == 1 && CLI.Ins[0].VT == MVT::f32 && !CLI.CS)
+ CLI.Ins[0].Flags.setInReg();
+
+ RVInfo.AnalyzeCallResult(CLI.Ins, RetCC_VE);
+
+ // Copy all of the result registers out of their specified physreg.
+ for (unsigned i = 0; i != RVLocs.size(); ++i) {
+ CCValAssign &VA = RVLocs[i];
+ unsigned Reg = VA.getLocReg();
+
+ // When returning 'inreg {i32, i32 }', two consecutive i32 arguments can
+ // reside in the same register in the high and low bits. Reuse the
+ // CopyFromReg previous node to avoid duplicate copies.
+ SDValue RV;
+ if (RegisterSDNode *SrcReg = dyn_cast<RegisterSDNode>(Chain.getOperand(1)))
+ if (SrcReg->getReg() == Reg && Chain->getOpcode() == ISD::CopyFromReg)
+ RV = Chain.getValue(0);
+
+ // But usually we'll create a new CopyFromReg for a
diff erent register.
+ if (!RV.getNode()) {
+ RV = DAG.getCopyFromReg(Chain, DL, Reg, RVLocs[i].getLocVT(), InGlue);
+ Chain = RV.getValue(1);
+ InGlue = Chain.getValue(2);
+ }
+
+ // Get the high bits for i32 struct elements.
+ if (VA.getValVT() == MVT::i32 && VA.needsCustom())
+ RV = DAG.getNode(ISD::SRL, DL, VA.getLocVT(), RV,
+ DAG.getConstant(32, DL, MVT::i32));
+
+ // The callee promoted the return value, so insert an Assert?ext SDNode so
+ // we won't promote the value again in this function.
+ switch (VA.getLocInfo()) {
+ case CCValAssign::SExt:
+ RV = DAG.getNode(ISD::AssertSext, DL, VA.getLocVT(), RV,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ case CCValAssign::ZExt:
+ RV = DAG.getNode(ISD::AssertZext, DL, VA.getLocVT(), RV,
+ DAG.getValueType(VA.getValVT()));
+ break;
+ default:
+ break;
+ }
+
+ // Truncate the register down to the return value type.
+ if (VA.isExtInLoc())
+ RV = DAG.getNode(ISD::TRUNCATE, DL, VA.getValVT(), RV);
+
+ InVals.push_back(RV);
+ }
+
+ return Chain;
+}
+
/// isFPImmLegal - Returns true if the target can instruction select the
/// specified FP immediate natively. If false, the legalizer will
/// materialize the FP immediate as a load from a constant pool.
@@ -268,6 +521,7 @@ const char *VETargetLowering::getTargetNodeName(unsigned Opcode) const {
break;
TARGET_NODE_CASE(Lo)
TARGET_NODE_CASE(Hi)
+ TARGET_NODE_CASE(CALL)
TARGET_NODE_CASE(RET_FLAG)
}
#undef TARGET_NODE_CASE
@@ -320,6 +574,7 @@ SDValue VETargetLowering::makeAddress(SDValue Op, SelectionDAG &DAG) const {
}
/// Custom Lower {
+
SDValue VETargetLowering::LowerGlobalAddress(SDValue Op,
SelectionDAG &DAG) const {
return makeAddress(Op, DAG);
diff --git a/llvm/lib/Target/VE/VEISelLowering.h b/llvm/lib/Target/VE/VEISelLowering.h
index d6b719568307..eb7835e6a8ae 100644
--- a/llvm/lib/Target/VE/VEISelLowering.h
+++ b/llvm/lib/Target/VE/VEISelLowering.h
@@ -27,6 +27,7 @@ enum NodeType : unsigned {
Hi,
Lo, // Hi/Lo operations, typically on a global address.
+ CALL, // A call instruction.
RET_FLAG, // Return with a flag operand.
};
}
@@ -55,6 +56,9 @@ class VETargetLowering : public TargetLowering {
const SDLoc &dl, SelectionDAG &DAG,
SmallVectorImpl<SDValue> &InVals) const override;
+ SDValue LowerCall(TargetLowering::CallLoweringInfo &CLI,
+ SmallVectorImpl<SDValue> &InVals) const override;
+
bool CanLowerReturn(CallingConv::ID CallConv, MachineFunction &MF,
bool isVarArg,
const SmallVectorImpl<ISD::OutputArg> &ArgsFlags,
diff --git a/llvm/lib/Target/VE/VEInstrInfo.td b/llvm/lib/Target/VE/VEInstrInfo.td
index 0c5fd29e1e89..3bd50d3d0759 100644
--- a/llvm/lib/Target/VE/VEInstrInfo.td
+++ b/llvm/lib/Target/VE/VEInstrInfo.td
@@ -150,6 +150,11 @@ def brtarget32 : Operand<OtherVT> {
let EncoderMethod = "getBranchTarget32OpValue";
}
+def calltarget : Operand<i64> {
+ let EncoderMethod = "getCallTargetOpValue";
+ let DecoderMethod = "DecodeCall";
+}
+
def simm7Op32 : Operand<i32> {
let DecoderMethod = "DecodeSIMM7";
}
@@ -192,7 +197,10 @@ def callseq_start : SDNode<"ISD::CALLSEQ_START", SDT_SPCallSeqStart,
def callseq_end : SDNode<"ISD::CALLSEQ_END", SDT_SPCallSeqEnd,
[SDNPHasChain, SDNPOptInGlue, SDNPOutGlue]>;
-// def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def SDT_SPCall : SDTypeProfile<0, -1, [SDTCisVT<0, i64>]>;
+def call : SDNode<"VEISD::CALL", SDT_SPCall,
+ [SDNPHasChain, SDNPOptInGlue, SDNPOutGlue,
+ SDNPVariadic]>;
def retflag : SDNode<"VEISD::RET_FLAG", SDTNone,
[SDNPHasChain, SDNPOptInGlue, SDNPVariadic]>;
@@ -549,6 +557,11 @@ let cx = 0 in
defm LEA32 : RMm<"lea", 0x06, I32, i32, simm7Op32, simm32Op32, add>;
}
+let cx = 0, cy = 1, cz = 0, sz = 0, hasSideEffects = 0 in {
+ def LEAasx : RM<
+ 0x06, (outs I64:$sx), (ins MEMri:$addr),
+ "lea $sx,$addr", [(set iPTR:$sx, ADDRri:$addr)]>;
+}
// 5.3.2.2. Fixed-Point Arithmetic Operation Instructions
@@ -775,6 +788,27 @@ def MONC : RR<
0x3F, (outs), (ins),
"monc">;
+//===----------------------------------------------------------------------===//
+// Instructions for CodeGenOnly
+//===----------------------------------------------------------------------===//
+
+let isCodeGenOnly = 1 in {
+
+// Call instruction
+let Defs = [SX10], Uses = [SX11], hasDelaySlot = 1, isCall = 1, hasSideEffects = 0 in {
+let cx = 0, sx = 10, cy = 0, sy = 0, cz = 0, sz = 0 in
+def CALL : RM<
+ 0x08, (outs), (ins calltarget:$imm32, variable_ops),
+ "bsic %lr, $imm32">;
+// use sz to represent a register
+let cx = 0, sx = 10, cy = 0, sy = 0, cz = 1, imm32 = 0 in
+def CALLr : RM<
+ 0x08, (outs), (ins I64:$sz, variable_ops),
+ "bsic %lr, (,$sz)">;
+}
+
+}
+
//===----------------------------------------------------------------------===//
// Pattern Matchings
//===----------------------------------------------------------------------===//
@@ -893,6 +927,13 @@ def : Pat<(add (VEhi tglobaladdr:$in1), (VElo tglobaladdr:$in2)),
(LEASLrzi (ANDrm0 (LEAzzi tglobaladdr:$in2), 32),
(tglobaladdr:$in1))>;
+// Calls
+def : Pat<(call tglobaladdr:$dst),
+ (CALL tglobaladdr:$dst)>;
+def : Pat<(call i64:$dst),
+ (CALLr i64:$dst)>;
+
+
//===----------------------------------------------------------------------===//
// Pseudo Instructions
//===----------------------------------------------------------------------===//
diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
new file mode 100644
index 000000000000..1addfc7174eb
--- /dev/null
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.cpp
@@ -0,0 +1,13 @@
+//===-- VEMachineFunctionInfo.cpp - VE Machine Function Info --------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "VEMachineFunctionInfo.h"
+
+using namespace llvm;
+
+void VEMachineFunctionInfo::anchor() {}
diff --git a/llvm/lib/Target/VE/VEMachineFunctionInfo.h b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
new file mode 100644
index 000000000000..b89520fd2174
--- /dev/null
+++ b/llvm/lib/Target/VE/VEMachineFunctionInfo.h
@@ -0,0 +1,35 @@
+//===- VEMachineFunctionInfo.h - VE Machine Function Info -------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+// This file declares VE specific per-machine-function information.
+//
+//===----------------------------------------------------------------------===//
+#ifndef LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+#define LLVM_LIB_TARGET_VE_VEMACHINEFUNCTIONINFO_H
+
+#include "llvm/CodeGen/MachineFunction.h"
+
+namespace llvm {
+
+class VEMachineFunctionInfo : public MachineFunctionInfo {
+ virtual void anchor();
+
+private:
+ /// IsLeafProc - True if the function is a leaf procedure.
+ bool IsLeafProc;
+
+public:
+ VEMachineFunctionInfo() : IsLeafProc(false) {}
+ explicit VEMachineFunctionInfo(MachineFunction &MF) : IsLeafProc(false) {}
+
+ void setLeafProc(bool rhs) { IsLeafProc = rhs; }
+ bool isLeafProc() const { return IsLeafProc; }
+};
+} // namespace llvm
+
+#endif
diff --git a/llvm/test/CodeGen/VE/call.ll b/llvm/test/CodeGen/VE/call.ll
new file mode 100644
index 000000000000..c03f5bcf84be
--- /dev/null
+++ b/llvm/test/CodeGen/VE/call.ll
@@ -0,0 +1,124 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+define i32 @sample_call() {
+; CHECK-LABEL: sample_call:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea %s0, sample_add at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s12, sample_add at hi(%s0)
+; CHECK-NEXT: or %s0, 1, (0)1
+; CHECK-NEXT: or %s1, 2, (0)1
+; CHECK-NEXT: bsic %lr, (,%s12)
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @sample_add(i32 1, i32 2)
+ ret i32 %r
+}
+
+declare i32 @sample_add(i32, i32)
+
+define i32 @stack_call_int() {
+; CHECK-LABEL: stack_call_int:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, 10, (0)1
+; CHECK-NEXT: stl %s0, 248(,%s11)
+; CHECK-NEXT: or %s34, 9, (0)1
+; CHECK-NEXT: lea %s0, stack_callee_int at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s12, stack_callee_int at hi(%s0)
+; CHECK-NEXT: or %s0, 1, (0)1
+; CHECK-NEXT: or %s1, 2, (0)1
+; CHECK-NEXT: or %s2, 3, (0)1
+; CHECK-NEXT: or %s3, 4, (0)1
+; CHECK-NEXT: or %s4, 5, (0)1
+; CHECK-NEXT: or %s5, 6, (0)1
+; CHECK-NEXT: or %s6, 7, (0)1
+; CHECK-NEXT: or %s7, 8, (0)1
+; CHECK-NEXT: stl %s34, 240(,%s11)
+; CHECK-NEXT: bsic %lr, (,%s12)
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @stack_callee_int(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+ ret i32 %r
+}
+
+declare i32 @stack_callee_int(i32, i32, i32, i32, i32, i32, i32, i32, i32, i32)
+
+define i32 @stack_call_int_szext() {
+; CHECK-LABEL: stack_call_int_szext:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: or %s0, -1, (0)1
+; CHECK-NEXT: stl %s0, 248(,%s11)
+; CHECK-NEXT: lea %s34, 65535
+; CHECK-NEXT: lea %s1, stack_callee_int_szext at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s12, stack_callee_int_szext at hi(%s1)
+; CHECK-NEXT: lea %s1, 255
+; CHECK-NEXT: or %s2, 3, (0)1
+; CHECK-NEXT: or %s3, 4, (0)1
+; CHECK-NEXT: or %s4, 5, (0)1
+; CHECK-NEXT: or %s5, 6, (0)1
+; CHECK-NEXT: or %s6, 7, (0)1
+; CHECK-NEXT: or %s7, 8, (0)1
+; CHECK-NEXT: stl %s34, 240(,%s11)
+; CHECK-NEXT: bsic %lr, (,%s12)
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call i32 @stack_callee_int_szext(i1 -1, i8 -1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i16 -1, i8 -1)
+ ret i32 %r
+}
+
+declare i32 @stack_callee_int_szext(i1 signext, i8 zeroext, i32, i32, i32, i32, i32, i32, i16 zeroext, i8 signext)
+
+define float @stack_call_float() {
+; CHECK-LABEL: stack_call_float:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea %s0, 1092616192
+; CHECK-NEXT: stl %s0, 252(,%s11)
+; CHECK-NEXT: lea %s0, 1091567616
+; CHECK-NEXT: lea %s1, stack_callee_float at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s12, stack_callee_float at hi(%s1)
+; CHECK-NEXT: lea.sl %s1, 1065353216
+; CHECK-NEXT: lea.sl %s2, 1073741824
+; CHECK-NEXT: lea.sl %s3, 1077936128
+; CHECK-NEXT: lea.sl %s4, 1082130432
+; CHECK-NEXT: lea.sl %s5, 1084227584
+; CHECK-NEXT: lea.sl %s6, 1086324736
+; CHECK-NEXT: lea.sl %s7, 1088421888
+; CHECK-NEXT: lea.sl %s34, 1090519040
+; CHECK-NEXT: stl %s0, 244(,%s11)
+; CHECK-NEXT: or %s0, 0, %s1
+; CHECK-NEXT: or %s1, 0, %s2
+; CHECK-NEXT: or %s2, 0, %s3
+; CHECK-NEXT: or %s3, 0, %s4
+; CHECK-NEXT: or %s4, 0, %s5
+; CHECK-NEXT: or %s5, 0, %s6
+; CHECK-NEXT: or %s6, 0, %s7
+; CHECK-NEXT: or %s7, 0, %s34
+; CHECK-NEXT: bsic %lr, (,%s12)
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call float @stack_callee_float(float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0)
+ ret float %r
+}
+
+declare float @stack_callee_float(float, float, float, float, float, float, float, float, float, float)
+
+define float @stack_call_float2(float %p0) {
+; CHECK-LABEL: stack_call_float2:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: stu %s0, 252(,%s11)
+; CHECK-NEXT: lea %s1, stack_callee_float at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s12, stack_callee_float at hi(%s1)
+; CHECK-NEXT: stu %s0, 244(,%s11)
+; CHECK-NEXT: or %s1, 0, %s0
+; CHECK-NEXT: or %s2, 0, %s0
+; CHECK-NEXT: or %s3, 0, %s0
+; CHECK-NEXT: or %s4, 0, %s0
+; CHECK-NEXT: or %s5, 0, %s0
+; CHECK-NEXT: or %s6, 0, %s0
+; CHECK-NEXT: or %s7, 0, %s0
+; CHECK-NEXT: bsic %lr, (,%s12)
+; CHECK-NEXT: or %s11, 0, %s9
+ %r = tail call float @stack_callee_float(float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0, float %p0)
+ ret float %r
+}
+
diff --git a/llvm/test/CodeGen/VE/callee.ll b/llvm/test/CodeGen/VE/callee.ll
new file mode 100644
index 000000000000..08d271c6f9c5
--- /dev/null
+++ b/llvm/test/CodeGen/VE/callee.ll
@@ -0,0 +1,41 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+define i32 @stack_stack_arg_i32_r9(i1 %0, i8 %1, i16 %2, i32 %3, i64 %4, i32 %5, i32 %6, i32 %7, i32 %8, i32 %9) {
+; CHECK-LABEL: stack_stack_arg_i32_r9:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldl.sx %s0, 424(,%s11)
+; CHECK-NEXT: or %s11, 0, %s9
+ ret i32 %9
+}
+
+define i64 @stack_stack_arg_i64_r9(i1 %0, i8 %1, i16 %2, i32 %3, i64 %4, i64 %5, i64 %6, i64 %7, i64 %8, i64 %9) {
+; CHECK-LABEL: stack_stack_arg_i64_r9:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ld %s0, 424(,%s11)
+; CHECK-NEXT: or %s11, 0, %s9
+ ret i64 %9
+}
+
+define float @stack_stack_arg_f32_r9(float %p0, float %p1, float %p2, float %p3, float %p4, float %p5, float %p6, float %p7, float %s0, float %s1) {
+; CHECK-LABEL: stack_stack_arg_f32_r9:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldu %s0, 428(,%s11)
+; CHECK-NEXT: or %s11, 0, %s9
+ ret float %s1
+}
+
+define i32 @stack_stack_arg_i32f32_r8(i32 %p0, float %p1, i32 %p2, float %p3, i32 %p4, float %p5, i32 %p6, float %p7, i32 %s0, float %s1) {
+; CHECK-LABEL: stack_stack_arg_i32f32_r8:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldl.sx %s0, 416(,%s11)
+; CHECK-NEXT: or %s11, 0, %s9
+ ret i32 %s0
+}
+
+define float @stack_stack_arg_i32f32_r9(i32 %p0, float %p1, i32 %p2, float %p3, i32 %p4, float %p5, i32 %p6, float %p7, i32 %s0, float %s1) {
+; CHECK-LABEL: stack_stack_arg_i32f32_r9:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: ldu %s0, 428(,%s11)
+; CHECK-NEXT: or %s11, 0, %s9
+ ret float %s1
+}
diff --git a/llvm/test/CodeGen/VE/callstruct.ll b/llvm/test/CodeGen/VE/callstruct.ll
new file mode 100644
index 000000000000..a76a9511f73f
--- /dev/null
+++ b/llvm/test/CodeGen/VE/callstruct.ll
@@ -0,0 +1,46 @@
+; RUN: llc < %s -mtriple=ve-unknown-unknown | FileCheck %s
+
+%struct.a = type { i32, i32 }
+
+ at A = common global %struct.a zeroinitializer, align 4
+
+; Function Attrs: norecurse nounwind
+define void @fun(%struct.a* noalias nocapture sret %a, i32 %p1, i32 %p2) {
+; CHECK-LABEL: fun:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: stl %s1, (,%s0)
+; CHECK-NEXT: stl %s2, 4(,%s0)
+; CHECK-NEXT: or %s11, 0, %s9
+ %a.zero = getelementptr inbounds %struct.a, %struct.a* %a, i64 0, i32 0
+ store i32 %p1, i32* %a.zero, align 4
+ %a.one = getelementptr inbounds %struct.a, %struct.a* %a, i64 0, i32 1
+ store i32 %p2, i32* %a.one, align 4
+ ret void
+}
+
+; Function Attrs: nounwind
+define void @caller() {
+; CHECK-LABEL: caller:
+; CHECK: .LBB{{[0-9]+}}_2:
+; CHECK-NEXT: lea %s0, callee at lo
+; CHECK-NEXT: and %s0, %s0, (32)0
+; CHECK-NEXT: lea.sl %s12, callee at hi(%s0)
+; CHECK-NEXT: lea %s0,-8(,%s9)
+; CHECK-NEXT: or %s1, 3, (0)1
+; CHECK-NEXT: or %s2, 4, (0)1
+; CHECK-NEXT: bsic %lr, (,%s12)
+; CHECK-NEXT: ld %s0, -8(,%s9)
+; CHECK-NEXT: lea %s1, A at lo
+; CHECK-NEXT: and %s1, %s1, (32)0
+; CHECK-NEXT: lea.sl %s1, A at hi(%s1)
+; CHECK-NEXT: st %s0, (,%s1)
+; CHECK-NEXT: or %s11, 0, %s9
+ %a = alloca i64, align 8
+ %a.bc = bitcast i64* %a to %struct.a*
+ call void @callee(%struct.a* nonnull sret %a.bc, i32 3, i32 4)
+ %a.val = load i64, i64* %a, align 8
+ store i64 %a.val, i64* bitcast (%struct.a* @A to i64*), align 4
+ ret void
+}
+
+declare void @callee(%struct.a* sret, i32, i32)
More information about the llvm-commits
mailing list