[llvm] aa3519f - [SystemZ][z/OS] Initial implementation for lowerCall on z/OS
Anirudh Prasad via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 21 06:49:04 PDT 2021
Author: Anirudh Prasad
Date: 2021-10-21T09:48:59-04:00
New Revision: aa3519f178fc6ea563f950a4077b34d8dc6c4470
URL: https://github.com/llvm/llvm-project/commit/aa3519f178fc6ea563f950a4077b34d8dc6c4470
DIFF: https://github.com/llvm/llvm-project/commit/aa3519f178fc6ea563f950a4077b34d8dc6c4470.diff
LOG: [SystemZ][z/OS] Initial implementation for lowerCall on z/OS
- This patch provides the initial implementation for lowering a call on z/OS according to the XPLINK64 calling convention
- A series of changes have been made to SystemZCallingConv.td to account for these additional XPLINK64 changes including adding a new helper function to shadow the stack along with allocation of a register wherever appropriate
- For the cases of copying a f64 to a gr64 and a f128 / 128-bit vector type to a gr64, a `CCBitConvertToType` has been added and has been bitcasted appropriately in the lowering phase
- Support for the ADA register (R5) will be provided in a later patch.
Reviewed By: uweigand
Differential Revision: https://reviews.llvm.org/D111662
Added:
llvm/test/CodeGen/SystemZ/call-zos-01.ll
llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
llvm/test/CodeGen/SystemZ/call-zos-vec.ll
Modified:
llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
llvm/lib/Target/SystemZ/SystemZCallingConv.h
llvm/lib/Target/SystemZ/SystemZCallingConv.td
llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
llvm/lib/Target/TargetMachine.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
index 86eb8365d527f..9c73757d7f5cf 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.cpp
@@ -28,3 +28,7 @@ const MCPhysReg SystemZ::XPLINK64ArgGPRs[SystemZ::XPLINK64NumArgGPRs] = {
const MCPhysReg SystemZ::XPLINK64ArgFPRs[SystemZ::XPLINK64NumArgFPRs] = {
SystemZ::F0D, SystemZ::F2D, SystemZ::F4D, SystemZ::F6D
};
+
+const MCPhysReg SystemZ::XPLINK64ArgVRs[SystemZ::XPLINK64NumArgVRs] = {
+ SystemZ::V24, SystemZ::V25, SystemZ::V26, SystemZ::V27,
+ SystemZ::V28, SystemZ::V29, SystemZ::V30, SystemZ::V31};
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.h b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
index 96c1080d52375..f82c61c0f344e 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.h
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.h
@@ -27,6 +27,9 @@ namespace SystemZ {
const unsigned XPLINK64NumArgFPRs = 4;
extern const MCPhysReg XPLINK64ArgFPRs[XPLINK64NumArgFPRs];
+
+ const unsigned XPLINK64NumArgVRs = 8;
+ extern const MCPhysReg XPLINK64ArgVRs[XPLINK64NumArgVRs];
} // end namespace SystemZ
class SystemZCCState : public CCState {
@@ -124,7 +127,9 @@ inline bool CC_SystemZ_I128Indirect(unsigned &ValNo, MVT &ValVT,
else
llvm_unreachable("Unknown Calling Convention!");
- unsigned Offset = Reg ? 0 : State.AllocateStack(8, Align(8));
+ unsigned Offset = Reg && !Subtarget.isTargetXPLINK64()
+ ? 0
+ : State.AllocateStack(8, Align(8));
// Use that same location for all the pending parts.
for (auto &It : PendingMembers) {
@@ -167,12 +172,6 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
CCValAssign::LocInfo &LocInfo,
ISD::ArgFlagsTy &ArgFlags,
CCState &State) {
- if (LocVT.getSizeInBits() < 128)
- return false;
-
- if (static_cast<SystemZCCState *>(&State)->IsFixed(ValNo))
- return false;
-
// For any C or C++ program, this should always be
// false, since it is illegal to have a function
// where the first argument is variadic. Therefore
@@ -185,21 +184,59 @@ inline bool CC_XPLINK64_Allocate128BitVararg(unsigned &ValNo, MVT &ValVT,
bool AllocGPR3 = State.AllocateReg(SystemZ::R3D);
// If GPR2 and GPR3 are available, then we may pass vararg in R2Q.
- if (AllocGPR2 && AllocGPR3) {
- State.addLoc(
- CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
+ // If only GPR3 is available, we need to set custom handling to copy
+ // hi bits into GPR3.
+ // Either way, we allocate on the stack.
+ if (AllocGPR3) {
+ // For f128 and vector var arg case, set the bitcast flag to bitcast to
+ // i128.
+ LocVT = MVT::i128;
+ LocInfo = CCValAssign::BCvt;
+ auto Offset = State.AllocateStack(16, Align(8));
+ if (AllocGPR2)
+ State.addLoc(
+ CCValAssign::getReg(ValNo, ValVT, SystemZ::R2Q, LocVT, LocInfo));
+ else
+ State.addLoc(
+ CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
return true;
}
- // If only GPR3 is available, we allocate on stack but need to
- // set custom handling to copy hi bits into GPR3.
- if (!AllocGPR2 && AllocGPR3) {
- auto Offset = State.AllocateStack(16, Align(8));
- State.addLoc(
- CCValAssign::getCustomMem(ValNo, ValVT, Offset, LocVT, LocInfo));
- return true;
+ return false;
+}
+
+inline bool CC_XPLINK64_Shadow_Stack(unsigned &ValNo, MVT &ValVT, MVT &LocVT,
+ CCValAssign::LocInfo &LocInfo,
+ ISD::ArgFlagsTy &ArgFlags,
+ CCState &State) {
+ ArrayRef<MCPhysReg> RegList;
+
+ switch (LocVT.SimpleTy) {
+ case MVT::i64:
+ RegList = SystemZ::XPLINK64ArgGPRs;
+ break;
+ case MVT::v16i8:
+ case MVT::v8i16:
+ case MVT::v4i32:
+ case MVT::v2i64:
+ case MVT::v4f32:
+ case MVT::v2f64:
+ RegList = SystemZ::XPLINK64ArgVRs;
+ break;
+ case MVT::f32:
+ case MVT::f64:
+ case MVT::f128:
+ RegList = SystemZ::XPLINK64ArgFPRs;
+ break;
+ default:
+ return false;
}
+ unsigned UnallocatedRegisterIndex = State.getFirstUnallocated(RegList);
+ // Every time we can allocate a register, allocate on the stack.
+ if (UnallocatedRegisterIndex < RegList.size())
+ State.AllocateStack(LocVT.getSizeInBits() / 8, Align(8));
+
return false;
}
diff --git a/llvm/lib/Target/SystemZ/SystemZCallingConv.td b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
index c606e78b69b6c..373023effb4a1 100644
--- a/llvm/lib/Target/SystemZ/SystemZCallingConv.td
+++ b/llvm/lib/Target/SystemZ/SystemZCallingConv.td
@@ -224,6 +224,17 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// XPLINK64 ABI compliant code widens integral types smaller than i64
// to i64 before placing the parameters either on the stack or in registers.
CCIfType<[i32], CCIfExtend<CCPromoteToType<i64>>>,
+ // Promote f32 to f64 and bitcast to i64, if it needs to be passed in GPRS.
+ CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
+ CCIfType<[f64], CCIfNotFixed<CCBitConvertToType<i64>>>,
+ // long double, can only be passed in GPR2 and GPR3, if available,
+ // hence R2Q
+ CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
+ // Non fixed vector arguments are treated in the same way as long
+ // doubles.
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
// A SwiftSelf is passed in callee-saved R10.
CCIfSwiftSelf<CCIfType<[i64], CCAssignToReg<[R10D]>>>,
@@ -238,7 +249,7 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// The first 3 integer arguments are passed in registers R1D-R3D.
// The rest will be passed in the user area. The address offset of the user
// area can be found in register R4D.
- CCIfType<[i32], CCAssignToReg<[R1L, R2L, R3L]>>,
+ CCIfType<[i64], CCCustom<"CC_XPLINK64_Shadow_Stack">>,
CCIfType<[i64], CCAssignToReg<[R1D, R2D, R3D]>>,
// The first 8 named vector arguments are passed in V24-V31. Sub-128 vectors
@@ -247,6 +258,9 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>>,
+ CCIfSubtarget<"hasVector()",
+ CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
+ CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>>,
CCIfSubtarget<"hasVector()",
CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
CCIfFixed<CCAssignToReg<[V24, V25, V26, V27,
@@ -255,28 +269,15 @@ def CC_SystemZ_XPLINK64 : CallingConv<[
// The first 4 named float and double arguments are passed in registers FPR0-FPR6.
// The rest will be passed in the user area.
CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
+ CCIfType<[f32, f64], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f32], CCIfFixed<CCAssignToReg<[F0S, F2S, F4S, F6S]>>>,
CCIfType<[f64], CCIfFixed<CCAssignToReg<[F0D, F2D, F4D, F6D]>>>,
// The first 2 long double arguments are passed in register FPR0/FPR2
// and FPR4/FPR6. The rest will be passed in the user area.
CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Reg">>>,
+ CCIfType<[f128], CCIfFixed<CCCustom<"CC_XPLINK64_Shadow_Stack">>>,
CCIfType<[f128], CCIfFixed<CCAssignToReg<[F0Q, F4Q]>>>,
- // Non fixed floats are passed in GPRs
- // Promote f32 to f64, if it needs to be passed in GPRs.
- CCIfType<[f32], CCIfNotFixed<CCPromoteToType<f64>>>,
- // Assign f64 varargs to their proper GPRs.
- CCIfType<[f64], CCIfNotFixed<CCAssignToReg<[R1D, R2D, R3D]>>>,
- // long double, can only be passed in GPR2 and GPR3, if available,
- // hence R2Q
- CCIfType<[f128], CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>,
-
- // Non fixed vector arguments are treated in the same way as long
- // doubles.
- CCIfSubtarget<"hasVector()",
- CCIfType<[v16i8, v8i16, v4i32, v2i64, v4f32, v2f64],
- CCIfNotFixed<CCCustom<"CC_XPLINK64_Allocate128BitVararg">>>>,
-
// Other arguments are passed in 8-byte-aligned 8-byte stack slots.
CCIfType<[i32, i64, f32, f64], CCAssignToStack<8, 8>>,
// Other f128 arguments are passed in 8-byte-aligned 16-byte stack slots.
diff --git a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
index e3a79e6d1a994..a9bc4f30fff66 100644
--- a/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
+++ b/llvm/lib/Target/SystemZ/SystemZISelLowering.cpp
@@ -1358,14 +1358,21 @@ static SDValue convertValVTToLocVT(SelectionDAG &DAG, const SDLoc &DL,
return DAG.getNode(ISD::ZERO_EXTEND, DL, VA.getLocVT(), Value);
case CCValAssign::AExt:
return DAG.getNode(ISD::ANY_EXTEND, DL, VA.getLocVT(), Value);
- case CCValAssign::BCvt:
- // If this is a short vector argument to be stored to the stack,
+ case CCValAssign::BCvt: {
+ assert(VA.getLocVT() == MVT::i64 || VA.getLocVT() == MVT::i128);
+ assert(VA.getValVT().isVector() || VA.getValVT() == MVT::f64 ||
+ VA.getValVT() == MVT::f128);
+ MVT BitCastToType = VA.getValVT().isVector() && VA.getLocVT() == MVT::i64
+ ? MVT::v2i64
+ : VA.getLocVT();
+ Value = DAG.getNode(ISD::BITCAST, DL, BitCastToType, Value);
+ // For ELF, this is a short vector argument to be stored to the stack,
// bitcast to v2i64 and then extract first element.
- assert(VA.getLocVT() == MVT::i64);
- assert(VA.getValVT().isVector());
- Value = DAG.getNode(ISD::BITCAST, DL, MVT::v2i64, Value);
- return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
- DAG.getConstant(0, DL, MVT::i32));
+ if (BitCastToType == MVT::v2i64)
+ return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, DL, VA.getLocVT(), Value,
+ DAG.getConstant(0, DL, MVT::i32));
+ return Value;
+ }
case CCValAssign::Full:
return Value;
default:
@@ -1472,6 +1479,10 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
NumFixedFPRs += 1;
RC = &SystemZ::FP64BitRegClass;
break;
+ case MVT::f128:
+ NumFixedFPRs += 2;
+ RC = &SystemZ::FP128BitRegClass;
+ break;
case MVT::v16i8:
case MVT::v8i16:
case MVT::v4i32:
@@ -1525,7 +1536,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
InVals.push_back(convertLocVTToValVT(DAG, DL, VA, Chain, ArgValue));
}
- if (IsVarArg) {
+ // FIXME: Add support for lowering varargs for XPLINK64 in a later patch.
+ if (IsVarArg && Subtarget.isTargetELF()) {
// Save the number of non-varargs registers for later use by va_start, etc.
FuncInfo->setVarArgsFirstGPR(NumFixedGPRs);
FuncInfo->setVarArgsFirstFPR(NumFixedFPRs);
@@ -1564,6 +1576,8 @@ SDValue SystemZTargetLowering::LowerFormalArguments(
}
}
+ // FIXME: For XPLINK64, Add in support for handling incoming "ADA" special
+ // register (R5)
return Chain;
}
@@ -1604,6 +1618,11 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
MachineFunction &MF = DAG.getMachineFunction();
EVT PtrVT = getPointerTy(MF.getDataLayout());
LLVMContext &Ctx = *DAG.getContext();
+ SystemZCallingConventionRegisters *Regs = Subtarget.getSpecialRegisters();
+
+ // FIXME: z/OS support to be added in later.
+ if (Subtarget.isTargetXPLINK64())
+ IsTailCall = false;
// Detect unsupported vector argument and return types.
if (Subtarget.hasVector()) {
@@ -1624,6 +1643,13 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Get a count of how many bytes are to be pushed on the stack.
unsigned NumBytes = ArgCCInfo.getNextStackOffset();
+ if (Subtarget.isTargetXPLINK64())
+ // Although the XPLINK specifications for AMODE64 state that minimum size
+ // of the param area is minimum 32 bytes and no rounding is otherwise
+ // specified, we round this area in 64 bytes increments to be compatible
+ // with existing compilers.
+ NumBytes = std::max(64U, (unsigned)alignTo(NumBytes, 64));
+
// Mark the start of the call.
if (!IsTailCall)
Chain = DAG.getCALLSEQ_START(Chain, NumBytes, 0, DL);
@@ -1674,17 +1700,24 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
} else
ArgValue = convertValVTToLocVT(DAG, DL, VA, ArgValue);
- if (VA.isRegLoc())
+ if (VA.isRegLoc()) {
+ // In XPLINK64, for the 128-bit vararg case, ArgValue is bitcasted to a
+ // MVT::i128 type. We decompose the 128-bit type to a pair of its high
+ // and low values.
+ if (VA.getLocVT() == MVT::i128)
+ ArgValue = lowerI128ToGR128(DAG, ArgValue);
// Queue up the argument copies and emit them at the end.
RegsToPass.push_back(std::make_pair(VA.getLocReg(), ArgValue));
- else {
+ } else {
assert(VA.isMemLoc() && "Argument not register or memory");
// Work out the address of the stack slot. Unpromoted ints and
// floats are passed as right-justified 8-byte values.
if (!StackPtr.getNode())
- StackPtr = DAG.getCopyFromReg(Chain, DL, SystemZ::R15D, PtrVT);
- unsigned Offset = SystemZMC::ELFCallFrameSize + VA.getLocMemOffset();
+ StackPtr = DAG.getCopyFromReg(Chain, DL,
+ Regs->getStackPointerRegister(), PtrVT);
+ unsigned Offset = Regs->getStackPointerBias() + Regs->getCallFrameSize() +
+ VA.getLocMemOffset();
if (VA.getLocVT() == MVT::i32 || VA.getLocVT() == MVT::f32)
Offset += 4;
SDValue Address = DAG.getNode(ISD::ADD, DL, PtrVT, StackPtr,
@@ -1693,6 +1726,17 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// Emit the store.
MemOpChains.push_back(
DAG.getStore(Chain, DL, ArgValue, Address, MachinePointerInfo()));
+
+ // Although long doubles or vectors are passed through the stack when
+ // they are vararg (non-fixed arguments), if a long double or vector
+ // occupies the third and fourth slot of the argument list GPR3 should
+ // still shadow the third slot of the argument list.
+ if (Subtarget.isTargetXPLINK64() && VA.needsCustom()) {
+ SDValue ShadowArgValue =
+ DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i64, ArgValue,
+ DAG.getIntPtrConstant(1, DL));
+ RegsToPass.push_back(std::make_pair(SystemZ::R3D, ShadowArgValue));
+ }
}
}
@@ -1704,6 +1748,7 @@ SystemZTargetLowering::LowerCall(CallLoweringInfo &CLI,
// associated Target* opcodes. Force %r1 to be used for indirect
// tail calls.
SDValue Glue;
+ // FIXME: Add support for XPLINK using the ADA register.
if (auto *G = dyn_cast<GlobalAddressSDNode>(Callee)) {
Callee = DAG.getTargetGlobalAddress(G->getGlobal(), DL, PtrVT);
Callee = DAG.getNode(SystemZISD::PCREL_WRAPPER, DL, PtrVT, Callee);
diff --git a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
index 2a4253e2deafa..8ce01074873a2 100644
--- a/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
+++ b/llvm/lib/Target/SystemZ/SystemZRegisterInfo.h
@@ -66,6 +66,12 @@ class SystemZCallingConventionRegisters {
virtual const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const = 0;
+ /// \returns the offset to the locals area.
+ virtual int getCallFrameSize() = 0;
+
+ /// \returns the stack pointer bias.
+ virtual int getStackPointerBias() = 0;
+
/// Destroys the object. Bogus destructor allowing derived classes
/// to override it.
virtual ~SystemZCallingConventionRegisters(){};
@@ -91,6 +97,10 @@ class SystemZXPLINK64Registers : public SystemZCallingConventionRegisters {
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
+ int getCallFrameSize() override final { return 128; }
+
+ int getStackPointerBias() override final { return 2048; }
+
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZXPLINK64Registers(){};
};
@@ -113,6 +123,10 @@ class SystemZELFRegisters : public SystemZCallingConventionRegisters {
const uint32_t *getCallPreservedMask(const MachineFunction &MF,
CallingConv::ID CC) const override final;
+ int getCallFrameSize() override final { return SystemZMC::ELFCallFrameSize; }
+
+ int getStackPointerBias() override final { return 0; }
+
/// Destroys the object. Bogus destructor overriding base class destructor
~SystemZELFRegisters(){};
};
diff --git a/llvm/lib/Target/TargetMachine.cpp b/llvm/lib/Target/TargetMachine.cpp
index 08295df376e13..390457dbb2bc0 100644
--- a/llvm/lib/Target/TargetMachine.cpp
+++ b/llvm/lib/Target/TargetMachine.cpp
@@ -135,6 +135,9 @@ bool TargetMachine::shouldAssumeDSOLocal(const Module &M,
return true;
}
+ if (TT.isOSBinFormatGOFF())
+ return true;
+
if (TT.isOSBinFormatMachO()) {
if (RM == Reloc::Static)
return true;
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-01.ll b/llvm/test/CodeGen/SystemZ/call-zos-01.ll
new file mode 100644
index 0000000000000..7194d09cba16d
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/call-zos-01.ll
@@ -0,0 +1,191 @@
+; Test the passing of scalar values in GPRs, FPRs in 64-bit calls on z/OS.
+;
+; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
+
+; CHECK-LABEL: call_char:
+; CHECK: lghi 1, 8
+define i8 @call_char(){
+ %retval = call i8 (i8) @pass_char(i8 8)
+ ret i8 %retval
+}
+
+; CHECK-LABEL: call_short:
+; CHECK: lghi 1, 16
+define i16 @call_short() {
+entry:
+ %retval = call i16 (i16) @pass_short(i16 16)
+ ret i16 %retval
+}
+
+; CHECK-LABEL: call_int:
+; CHECK: lghi 1, 32
+; CHECK: lghi 2, 33
+define i32 @call_int() {
+entry:
+ %retval = call i32 (i32, i32) @pass_int(i32 32, i32 33)
+ ret i32 %retval
+}
+
+; CHECK-LABEL: call_long:
+; CHECK: lghi 1, 64
+; CHECK: lghi 2, 65
+; CHECK: lghi 3, 66
+define i64 @call_long() {
+entry:
+ %retval = call i64 (i64, i64, i64) @pass_long(i64 64, i64 65, i64 66)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_ptr:
+; CHECK: lgr 1, 2
+define i32 @call_ptr(i32* %p1, i32* %p2) {
+entry:
+ %retval = call i32 (i32*) @pass_ptr(i32* %p2)
+ ret i32 %retval
+}
+
+; CHECK-LABEL: call_integrals:
+; CHECK: lghi 1, 64
+; CHECK: lghi 2, 32
+; CHECK: lghi 3, 16
+define i64 @call_integrals() {
+entry:
+ %retval = call i64 (i64, i32, i16, i64) @pass_integrals0(i64 64, i32 32, i16 16, i64 128)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: pass_char:
+; CHECK: lgr 3, 1
+define signext i8 @pass_char(i8 signext %arg) {
+entry:
+ ret i8 %arg
+}
+
+; CHECK-LABEL: pass_short:
+; CHECK: lgr 3, 1
+define signext i16 @pass_short(i16 signext %arg) {
+entry:
+ ret i16 %arg
+}
+
+; CHECK-LABEL: pass_int:
+; CHECK: lgr 3, 2
+define signext i32 @pass_int(i32 signext %arg0, i32 signext %arg1) {
+entry:
+ ret i32 %arg1
+}
+
+; CHECK-LABEL: pass_long:
+; CHECK: agr 1, 2
+; CHECK: agr 3, 1
+define signext i64 @pass_long(i64 signext %arg0, i64 signext %arg1, i64 signext %arg2) {
+entry:
+ %N = add i64 %arg0, %arg1
+ %M = add i64 %N, %arg2
+ ret i64 %M
+}
+
+; CHECK-LABEL: pass_integrals0:
+; CHECK: ag 2, -{{[0-9]+}}(4)
+; CHECK-NEXT: lgr 3, 2
+define signext i64 @pass_integrals0(i64 signext %arg0, i32 signext %arg1, i16 signext %arg2, i64 signext %arg3) {
+entry:
+ %N = sext i32 %arg1 to i64
+ %M = add i64 %arg3, %N
+ ret i64 %M
+}
+
+; CHECK-LABEL: call_float:
+; CHECK: le 0, 0({{[0-9]}})
+define float @call_float() {
+entry:
+ %ret = call float (float) @pass_float(float 0x400921FB60000000)
+ ret float %ret
+}
+
+; CHECK-LABEL: call_double:
+; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
+; CHECK-NEXT: ld 0, 0([[GENREG]])
+define double @call_double() {
+entry:
+ %ret = call double (double) @pass_double(double 3.141000e+00)
+ ret double %ret
+}
+
+; CHECK-LABEL: call_longdouble:
+; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
+; CHECK-NEXT: ld 0, 0([[GENREG]])
+; CHECK-NEXT: ld 2, 8([[GENREG]])
+define fp128 @call_longdouble() {
+entry:
+ %ret = call fp128 (fp128) @pass_longdouble(fp128 0xLE0FC1518450562CD4000921FB5444261)
+ ret fp128 %ret
+}
+
+; CHECK-LABEL: call_floats0
+; CHECK: larl [[GENREG:[0-9]+]], @{{CPI[0-9]+_[0-9]+}}
+; CHECK-NEXT: ld 1, 0([[GENREG]])
+; CHECK-NEXT: ld 3, 8([[GENREG]])
+; CHECK: lxr 5, 0
+; CHECK: lxr 0, 1
+; CHECK: lxr 4, 5
+define i64 @call_floats0(fp128 %arg0, double %arg1) {
+entry:
+ %ret = call i64 (fp128, fp128, double) @pass_floats0(fp128 0xLE0FC1518450562CD4000921FB5444261, fp128 %arg0, double %arg1)
+ ret i64 %ret
+}
+
+; CHECK-LABEL: call_floats1
+; CHECK: lxr 1, 0
+; CHECK: ldr 0, 4
+; CHECK: lxr 4, 1
+define i64 @call_floats1(fp128 %arg0, double %arg1) {
+entry:
+ %ret = call i64 (double, fp128) @pass_floats1(double %arg1, fp128 %arg0)
+ ret i64 %ret
+}
+
+; CHECK-LABEL: pass_float:
+; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
+; CHECK: aeb 0, 0(1)
+define float @pass_float(float %arg) {
+entry:
+ %X = fadd float %arg, 0x400821FB60000000
+ ret float %X
+}
+
+; CHECK-LABEL: pass_double:
+; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
+; CHECK: adb 0, 0(1)
+define double @pass_double(double %arg) {
+entry:
+ %X = fadd double %arg, 1.414213e+00
+ ret double %X
+}
+
+; CHECK-LABEL: pass_longdouble
+; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
+; CHECK: lxdb 1, 0(1)
+; CHECK: axbr 0, 1
+define fp128 @pass_longdouble(fp128 %arg) {
+entry:
+ %X = fadd fp128 %arg, 0xL10000000000000004000921FB53C8D4F
+ ret fp128 %X
+}
+
+; CHECK-LABEL: pass_floats0
+; CHECK: larl 1, @{{CPI[0-9]+_[0-9]+}}
+; CHECK: axbr 0, 4
+; CHECK: axbr 1, 0
+; CHECK: cxbr 1, 5
+define i64 @pass_floats0(fp128 %arg0, fp128 %arg1, double %arg2) {
+ %X = fadd fp128 %arg0, %arg1
+ %arg2_ext = fpext double %arg2 to fp128
+ %Y = fadd fp128 %X, %arg2_ext
+ %ret_bool = fcmp ueq fp128 %Y, 0xLE0FC1518450562CD4000921FB5444261
+ %ret = sext i1 %ret_bool to i64
+ ret i64 %ret
+}
+
+declare i64 @pass_floats1(double %arg0, fp128 %arg1)
+declare i32 @pass_ptr(i32* %arg)
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
new file mode 100644
index 0000000000000..2efe27172efcc
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vararg.ll
@@ -0,0 +1,195 @@
+; Test passing variable argument lists in 64-bit calls on z/OS.
+; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z10 | FileCheck %s
+; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z14 | FileCheck %s -check-prefix=ARCH12
+; CHECK-LABEL: call_vararg_double0
+; CHECK: llihf 3, 1074118262
+; CHECK-NEXT: oilf 3, 3367254360
+; CHECK: lghi 1, 1
+; CHECK: lghi 2, 2
+define i64 @call_vararg_double0() {
+entry:
+ %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 2.718000e+00)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_double1
+; CHECK: llihf 0, 1074118262
+; CHECK-NEXT: oilf 0, 3367254360
+; CHECK: llihf 3, 1074340036
+; CHECK-NEXT: oilf 3, 2611340116
+; CHECK: lghi 1, 1
+; CHECK: lghi 2, 2
+; CHECK: stg 0, 2200(4)
+define i64 @call_vararg_double1() {
+entry:
+ %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, double 3.141000e+00, double 2.718000e+00)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_double2
+; CHECK-NOT: llihf 0
+; CHECK-NOT: oilf 0
+; CHECK: llihf 2, 1074118262
+; CHECK-NEXT: oilf 2, 3367254360
+; CHECK: lghi 1, 8200
+define i64 @call_vararg_double2() {
+entry:
+ %retval = call i64 (i64, ...) @pass_vararg2(i64 8200, double 2.718000e+00)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_double3
+; CHECK: llihf 0, 1072703839
+; CHECK-NEXT: oilf 0, 2861204133
+; CHECK: llihf 1, 1074118262
+; CHECK-NEXT: oilf 1, 3367254360
+; CHECK: llihf 2, 1074340036
+; CHECK-NEXT: oilf 2, 2611340116
+; CHECK: llihf 3, 1073127358
+; CHECK-NEXT: oilf 3, 1992864825
+; CHECK: stg 0, 2200(4)
+define i64 @call_vararg_double3() {
+entry:
+ %retval = call i64 (...) @pass_vararg3(double 2.718000e+00, double 3.141000e+00, double 1.414000e+00, double 1.010101e+00)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_both0
+; CHECK: lgr 2, 1
+; CHECK: lgdr 1, 0
+define i64 @call_vararg_both0(i64 %arg0, double %arg1) {
+ %retval = call i64(...) @pass_vararg3(double %arg1, i64 %arg0)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_long_double0
+; CHECK: larl 1, @CPI5_0
+; CHECK-NEXT: ld 0, 0(1)
+; CHECK-NEXT: ld 2, 8(1)
+; CHECK-NEXT: lgdr 3, 0
+; CHECK: lghi 1, 1
+; CHECK: lghi 2, 2
+; CHECK: std 0, 2192(4)
+; CHECK-NEXT: std 2, 2200(4)
+define i64 @call_vararg_long_double0() {
+entry:
+ %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 0xLE0FC1518450562CD4000921FB5444261)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_long_double1
+; CHECK: lgdr 3, 0
+; CHECK: lghi 1, 1
+; CHECK: lghi 2, 2
+; CHECK: std 0, 2192(4)
+; CHECK-NEXT: std 2, 2200(4)
+define i64 @call_vararg_long_double1(fp128 %arg0) {
+entry:
+ %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_long_double2
+; CHECK: std 4, 2208(4)
+; CHECK-NEXT: std 6, 2216(4)
+; CHECK: lgdr 3, 0
+; CHECK: lghi 1, 1
+; CHECK: lghi 2, 2
+; CHECK: std 0, 2192(4)
+; CHECK-NEXT: std 2, 2200(4)
+define i64 @call_vararg_long_double2(fp128 %arg0, fp128 %arg1) {
+entry:
+ %retval = call i64 (i64, i64, ...) @pass_vararg0(i64 1, i64 2, fp128 %arg0, fp128 %arg1)
+ ret i64 %retval
+}
+
+; CHECK-LABEL: call_vararg_long_double3
+; CHECK: lgdr 3, 2
+; CHECK-NEXT: lgdr 2, 0
+define i64 @call_vararg_long_double3(fp128 %arg0) {
+entry:
+ %retval = call i64 (...) @pass_vararg3(fp128 %arg0)
+ ret i64 %retval
+}
+
+; ARCH12-LABEL: call_vec_vararg_test0
+; ARCH12: vlgvg 3, 24, 1
+; ARCH12: vlgvg 2, 24, 0
+; ARCH12: lghi 1, 1
+define void @call_vec_vararg_test0(<2 x double> %v) {
+ %retval = call i64(i64, ...) @pass_vararg2(i64 1, <2 x double> %v)
+ ret void
+}
+
+; ARCH12-LABEL: call_vec_vararg_test1
+; ARCH12: larl 1, @CPI10_0
+; ARCH12: vl 0, 0(1), 3
+; ARCH12: vlgvg 3, 24, 0
+; ARCH12: vrepg 2, 0, 1
+; ARCH12: vst 25, 2208(4), 3
+; ARCH12: vst 24, 2192(4), 3
+define void @call_vec_vararg_test1(<4 x i32> %v, <2 x i64> %w) {
+ %retval = call i64(fp128, ...) @pass_vararg1(fp128 0xLE0FC1518450562CD4000921FB5444261, <4 x i32> %v, <2 x i64> %w)
+ ret void
+}
+
+; ARCH12-LABEL: call_vec_char_vararg_straddle
+; ARCH12: vlgvg 3, 24, 0
+; ARCH12: lghi 1, 1
+; ARCH12: lghi 2, 2
+; ARCH12: vst 24, 2192(4), 3
+define void @call_vec_char_vararg_straddle(<16 x i8> %v) {
+ %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <16 x i8> %v)
+ ret void
+}
+
+; ARCH12-LABEL: call_vec_short_vararg_straddle
+; ARCH12: vlgvg 3, 24, 0
+; ARCH12: lghi 1, 1
+; ARCH12: lghi 2, 2
+; ARCH12: vst 24, 2192(4), 3
+define void @call_vec_short_vararg_straddle(<8 x i16> %v) {
+ %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <8 x i16> %v)
+ ret void
+}
+
+; ARCH12-LABEL: call_vec_int_vararg_straddle
+; ARCH12: vlgvg 3, 24, 0
+; ARCH12: lghi 1, 1
+; ARCH12: lghi 2, 2
+; ARCH12: vst 24, 2192(4), 3
+define void @call_vec_int_vararg_straddle(<4 x i32> %v) {
+ %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <4 x i32> %v)
+ ret void
+}
+
+; ARCH12-LABEL: call_vec_double_vararg_straddle
+; ARCH12: vlgvg 3, 24, 0
+; ARCH12: lghi 1, 1
+; ARCH12: lghi 2, 2
+; ARCH12: vst 24, 2192(4), 3
+define void @call_vec_double_vararg_straddle(<2 x double> %v) {
+ %retval = call i64(i64, i64, ...) @pass_vararg0(i64 1, i64 2, <2 x double> %v)
+ ret void
+}
+
+; CHECK-LABEL: call_vararg_integral0
+; Since arguments 0, 1, and 2 are already in the correct
+; registers, we should have no loads of any sort into
+; GPRs 1, 2, and 3.
+; CHECK-NOT: lg 1
+; CHECK-NOT: lgr 1
+; CHECK-NOT: lg 2
+; CHECK-NOT: lgr 2
+; CHECK-NOT: lg 3
+; CHECK-NOT: lgr 3
+define i64 @call_vararg_integral0(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3) {
+entry:
+ %retval = call i64(...) @pass_vararg3(i32 signext %arg0, i16 signext %arg1, i64 signext %arg2, i8 signext %arg3)
+ ret i64 %retval
+}
+
+declare i64 @pass_vararg0(i64 %arg0, i64 %arg1, ...)
+declare i64 @pass_vararg1(fp128 %arg0, ...)
+declare i64 @pass_vararg2(i64 %arg0, ...)
+declare i64 @pass_vararg3(...)
diff --git a/llvm/test/CodeGen/SystemZ/call-zos-vec.ll b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
new file mode 100644
index 0000000000000..8d6b93387330f
--- /dev/null
+++ b/llvm/test/CodeGen/SystemZ/call-zos-vec.ll
@@ -0,0 +1,66 @@
+; RUN: llc < %s -mtriple=s390x-ibm-zos -mcpu=z13 | FileCheck %s
+
+; CHECK-LABEL: sum_vecs0
+; CHECK: vag 24, 24, 25
+define <2 x i64> @sum_vecs0(<2 x i64> %v1, <2 x i64> %v2) {
+entry:
+ %add0 = add <2 x i64> %v1, %v2
+ ret <2 x i64> %add0
+}
+
+; CHECK-LABEL: sum_vecs1
+; CHECK: vaf 1, 24, 25
+; CHECK: vaf 1, 1, 26
+; CHECK: vaf 1, 1, 27
+; CHECK: vaf 1, 1, 28
+; CHECK: vaf 1, 1, 29
+; CHECK: vl 0, 32(4), 4
+; CHECK: vaf 1, 1, 30
+; CHECK: vaf 1, 1, 31
+; CHECK: vaf 24, 1, 0
+define <4 x i32> @sum_vecs1(<4 x i32> %v1, <4 x i32> %v2, <4 x i32> %v3, <4 x i32> %v4, <4 x i32> %v5, <4 x i32> %v6, <4 x i32> %v7, <4 x i32> %v8, <4 x i32> %v9) {
+entry:
+ %add0 = add <4 x i32> %v1, %v2
+ %add1 = add <4 x i32> %add0, %v3
+ %add2 = add <4 x i32> %add1, %v4
+ %add3 = add <4 x i32> %add2, %v5
+ %add4 = add <4 x i32> %add3, %v6
+ %add5 = add <4 x i32> %add4, %v7
+ %add6 = add <4 x i32> %add5, %v8
+ %add7 = add <4 x i32> %add6, %v9
+ ret <4 x i32> %add7
+}
+
+; Verify that 3 is used for passing integral types if
+; only 24 is used.
+; CHECK-LABEL: call_vecs0
+; CHECK: lgr 3, 1
+define i64 @call_vecs0(i64 %n, <2 x i64> %v1) {
+entry:
+ %ret = call i64 (<2 x i64>, i64) @pass_vecs0(<2 x i64> %v1, i64 %n)
+ ret i64 %ret
+}
+
+; Verify that 3 is not allocated for passing integral types
+; if 24 and %f0 are used.
+; CHECK-LABEL: call_vecs1
+; CHECK: vlr 24, 25
+; CHECK: stg 1, 2200(4)
+define i64 @call_vecs1(i64 %n, <2 x i64> %v1, double %x, <2 x i64> %v2) {
+entry:
+ %ret = call i64 (<2 x i64>, double, i64) @pass_vecs1(<2 x i64> %v2, double %x, i64 %n)
+ ret i64 %ret
+}
+
+; Verify that 3 is not allocated for passing integral types
+; if 24 and 25 are used.
+; CHECK-LABEL: call_vecs2
+; CHECK: mvghi 2208(4), 55
+define i64 @call_vecs2(<2 x i64> %v1, <2 x i64> %v2) {
+ %ret = call i64 (<2 x i64>, <2 x i64>, i64) @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 55)
+ ret i64 %ret
+}
+
+declare i64 @pass_vecs0(<2 x i64> %v1, i64 %n)
+declare i64 @pass_vecs1(<2 x i64> %v1, double %x, i64 %n)
+declare i64 @pass_vecs2(<2 x i64> %v1, <2 x i64> %v2, i64 %n)
More information about the llvm-commits
mailing list