[llvm] 5637ec0 - [ARM64EC 4/?] Add LLVM support for varargs calling convention.
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Mon Sep 5 13:10:12 PDT 2022
Author: Eli Friedman
Date: 2022-09-05T13:05:48-07:00
New Revision: 5637ec0983041b26cdcd2064d8ec6636f352e3c9
URL: https://github.com/llvm/llvm-project/commit/5637ec0983041b26cdcd2064d8ec6636f352e3c9
DIFF: https://github.com/llvm/llvm-project/commit/5637ec0983041b26cdcd2064d8ec6636f352e3c9.diff
LOG: [ARM64EC 4/?] Add LLVM support for varargs calling convention.
Part of patchset to add initial support for ARM64EC.
The ARM64EC calling convention is the same as ARM64 for non-varargs
functions, but for varargs, the convention is significantly different.
Basically, only x0-x3 registers are used for passing arguments, and x4
and x5 describe the address/size of the arguments passed in memory. (See
https://docs.microsoft.com/en-us/windows/uwp/porting/arm64ec-abi for
more details; see
https://docs.microsoft.com/en-us/cpp/build/x64-calling-convention for
the x64 calling convention rules, which this convention needs to match.)
Note that this currently doesn't handle i128 arguments correctly; as
noted in review, that's sort of complicated to handle, so I'm leaving it
for a followup.
Differential Revision: https://reviews.llvm.org/D125415
Added:
llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
Modified:
llvm/lib/Target/AArch64/AArch64CallingConvention.h
llvm/lib/Target/AArch64/AArch64CallingConvention.td
llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
Removed:
################################################################################
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.h b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
index 59939e0684ed..d7ab83c946ce 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.h
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.h
@@ -19,6 +19,9 @@ namespace llvm {
bool CC_AArch64_AAPCS(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags,
CCState &State);
+bool CC_AArch64_Arm64EC_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
+ CCValAssign::LocInfo LocInfo,
+ ISD::ArgFlagsTy ArgFlags, CCState &State);
bool CC_AArch64_DarwinPCS_VarArg(unsigned ValNo, MVT ValVT, MVT LocVT,
CCValAssign::LocInfo LocInfo,
ISD::ArgFlagsTy ArgFlags, CCState &State);
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index b6e2929bb9d3..6cf7bf6d1cfc 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -162,6 +162,46 @@ def CC_AArch64_Win64_VarArg : CallingConv<[
CCDelegateTo<CC_AArch64_AAPCS>
]>;
+// Vararg functions on Arm64EC ABI use a
diff erent convention, using
+// a stack layout compatible with the x64 calling convention.
+let Entry = 1 in
+def CC_AArch64_Arm64EC_VarArg : CallingConv<[
+ // Convert small floating-point values to integer.
+ CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
+ CCIfType<[f32], CCBitConvertToType<i32>>,
+ CCIfType<[f64, v1f64, v1i64, v2f32, v2i32, v4i16, v4f16, v4bf16, v8i8, iPTR],
+ CCBitConvertToType<i64>>,
+
+ // Larger floating-point/vector values are passed indirectly.
+ CCIfType<[f128, v2f64, v2i64, v4i32, v4f32, v8i16, v8f16, v8bf16, v16i8],
+ CCPassIndirect<i64>>,
+ CCIfType<[nxv16i8, nxv8i16, nxv4i32, nxv2i64, nxv2f16, nxv4f16, nxv8f16,
+ nxv2bf16, nxv4bf16, nxv8bf16, nxv2f32, nxv4f32, nxv2f64],
+ CCPassIndirect<i64>>,
+ CCIfType<[nxv2i1, nxv4i1, nxv8i1, nxv16i1],
+ CCPassIndirect<i64>>,
+
+ // Handle SRet. See comment in CC_AArch64_AAPCS.
+ CCIfInReg<CCIfType<[i64],
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X0, X1]>>>>>,
+ CCIfSRet<CCIfType<[i64], CCAssignToReg<[X8]>>>,
+
+ // Put ByVal arguments directly on the stack. Minimum size and alignment of a
+ // slot is 64-bit. (Shouldn't normally come up; the Microsoft ABI doesn't
+ // use byval.)
+ CCIfByVal<CCPassByVal<8, 8>>,
+
+ // Promote small integers to i32
+ CCIfType<[i1, i8, i16], CCPromoteToType<i32>>,
+
+ // Pass first four arguments in x0-x3.
+ CCIfType<[i32], CCAssignToReg<[W0, W1, W2, W3]>>,
+ CCIfType<[i64], CCAssignToReg<[X0, X1, X2, X3]>>,
+
+ // Put remaining arguments on stack.
+ CCIfType<[i32, i64], CCAssignToStack<8, 8>>,
+]>;
+
// Windows Control Flow Guard checks take a single argument (the target function
// address) and have no return value.
let Entry = 1 in
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index c949c5fbb748..b5d8d3d6b4ef 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -5810,8 +5810,11 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::Swift:
case CallingConv::SwiftTail:
case CallingConv::Tail:
- if (Subtarget->isTargetWindows() && IsVarArg)
+ if (Subtarget->isTargetWindows() && IsVarArg) {
+ if (Subtarget->isWindowsArm64EC())
+ return CC_AArch64_Arm64EC_VarArg;
return CC_AArch64_Win64_VarArg;
+ }
if (!Subtarget->isTargetDarwin())
return CC_AArch64_AAPCS;
if (!IsVarArg)
@@ -5819,7 +5822,12 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
return Subtarget->isTargetILP32() ? CC_AArch64_DarwinPCS_ILP32_VarArg
: CC_AArch64_DarwinPCS_VarArg;
case CallingConv::Win64:
- return IsVarArg ? CC_AArch64_Win64_VarArg : CC_AArch64_AAPCS;
+ if (IsVarArg) {
+ if (Subtarget->isWindowsArm64EC())
+ return CC_AArch64_Arm64EC_VarArg;
+ return CC_AArch64_Win64_VarArg;
+ }
+ return CC_AArch64_AAPCS;
case CallingConv::CFGuard_Check:
return CC_AArch64_Win64_CFGuard_Check;
case CallingConv::AArch64_VectorCall:
@@ -5955,8 +5963,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
case CCValAssign::Full:
break;
case CCValAssign::Indirect:
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ assert((VA.getValVT().isScalableVector() ||
+ Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
break;
case CCValAssign::BCvt:
ArgValue = DAG.getNode(ISD::BITCAST, DL, VA.getValVT(), ArgValue);
@@ -5983,10 +5992,24 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
!Ins[i].Flags.isInConsecutiveRegs())
BEAlign = 8 - ArgSize;
- int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
+ SDValue FIN;
+ MachinePointerInfo PtrInfo;
+ if (isVarArg && Subtarget->isWindowsArm64EC()) {
+ // In the ARM64EC varargs convention, fixed arguments on the stack are
+ // accessed relative to x4, not sp.
+ unsigned ObjOffset = ArgOffset + BEAlign;
+ Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ FIN = DAG.getNode(ISD::ADD, DL, MVT::i64, Val,
+ DAG.getConstant(ObjOffset, DL, MVT::i64));
+ PtrInfo = MachinePointerInfo::getUnknownStack(MF);
+ } else {
+ int FI = MFI.CreateFixedObject(ArgSize, ArgOffset + BEAlign, true);
- // Create load nodes to retrieve arguments from the stack.
- SDValue FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ // Create load nodes to retrieve arguments from the stack.
+ FIN = DAG.getFrameIndex(FI, getPointerTy(DAG.getDataLayout()));
+ PtrInfo = MachinePointerInfo::getFixedStack(MF, FI);
+ }
// For NON_EXTLOAD, generic code in getLoad assert(ValVT == MemVT)
ISD::LoadExtType ExtType = ISD::NON_EXTLOAD;
@@ -6000,8 +6023,9 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
MemVT = VA.getLocVT();
break;
case CCValAssign::Indirect:
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ assert((VA.getValVT().isScalableVector() ||
+ Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
MemVT = VA.getLocVT();
break;
case CCValAssign::SExt:
@@ -6015,14 +6039,14 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
break;
}
- ArgValue =
- DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN,
- MachinePointerInfo::getFixedStack(MF, FI), MemVT);
+ ArgValue = DAG.getExtLoad(ExtType, DL, VA.getLocVT(), Chain, FIN, PtrInfo,
+ MemVT);
}
if (VA.getLocInfo() == CCValAssign::Indirect) {
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ assert(
+ (VA.getValVT().isScalableVector() || Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
uint64_t PartSize = VA.getValVT().getStoreSize().getKnownMinSize();
unsigned NumParts = 1;
@@ -6042,9 +6066,16 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
InVals.push_back(ArgValue);
NumParts--;
if (NumParts > 0) {
- SDValue BytesIncrement = DAG.getVScale(
- DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDValue BytesIncrement;
+ if (PartLoad.isScalableVector()) {
+ BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ } else {
+ BytesIncrement = DAG.getConstant(
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize), DL,
+ Ptr.getValueType());
+ }
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
Ptr = DAG.getNode(ISD::ADD, DL, Ptr.getValueType(), Ptr,
@@ -6090,6 +6121,7 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
unsigned StackOffset = CCInfo.getNextStackOffset();
// We currently pass all varargs at 8-byte alignment, or 4 for ILP32
StackOffset = alignTo(StackOffset, Subtarget->isTargetILP32() ? 4 : 8);
+ FuncInfo->setVarArgsStackOffset(StackOffset);
FuncInfo->setVarArgsStackIndex(MFI.CreateFixedObject(4, StackOffset, true));
if (MFI.hasMustTailInVarArgFunc()) {
@@ -6171,7 +6203,12 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
static const MCPhysReg GPRArgRegs[] = { AArch64::X0, AArch64::X1, AArch64::X2,
AArch64::X3, AArch64::X4, AArch64::X5,
AArch64::X6, AArch64::X7 };
- static const unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
+ unsigned NumGPRArgRegs = array_lengthof(GPRArgRegs);
+ if (Subtarget->isWindowsArm64EC()) {
+ // In the ARM64EC ABI, only x0-x3 are used to pass arguments to varargs
+ // functions.
+ NumGPRArgRegs = 4;
+ }
unsigned FirstVariadicGPR = CCInfo.getFirstUnallocated(GPRArgRegs);
unsigned GPRSaveSize = 8 * (NumGPRArgRegs - FirstVariadicGPR);
@@ -6185,7 +6222,19 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
} else
GPRIdx = MFI.CreateStackObject(GPRSaveSize, Align(8), false);
- SDValue FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
+ SDValue FIN;
+ if (Subtarget->isWindowsArm64EC()) {
+ // With the Arm64EC ABI, we reserve the save area as usual, but we
+ // compute its address relative to x4. For a normal AArch64->AArch64
+ // call, x4 == sp on entry, but calls from an entry thunk can pass in a
+ //
diff erent address.
+ Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(Chain, DL, VReg, MVT::i64);
+ FIN = DAG.getNode(ISD::SUB, DL, MVT::i64, Val,
+ DAG.getConstant(GPRSaveSize, DL, MVT::i64));
+ } else {
+ FIN = DAG.getFrameIndex(GPRIdx, PtrVT);
+ }
for (unsigned i = FirstVariadicGPR; i < NumGPRArgRegs; ++i) {
Register VReg = MF.addLiveIn(GPRArgRegs[i], &AArch64::GPR64RegClass);
@@ -6493,9 +6542,10 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// 'getBytesInStackArgArea' is not sufficient to determine whether we need to
// allocate space on the stack. That is why we determine this explicitly here
// the call cannot be a tailcall.
- if (llvm::any_of(ArgLocs, [](CCValAssign &A) {
+ if (llvm::any_of(ArgLocs, [&](CCValAssign &A) {
assert((A.getLocInfo() != CCValAssign::Indirect ||
- A.getValVT().isScalableVector()) &&
+ A.getValVT().isScalableVector() ||
+ Subtarget->isWindowsArm64EC()) &&
"Expected value to be scalable");
return A.getLocInfo() == CCValAssign::Indirect;
}))
@@ -6760,8 +6810,9 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Arg = DAG.getNode(ISD::FP_EXTEND, DL, VA.getLocVT(), Arg);
break;
case CCValAssign::Indirect:
- assert(VA.getValVT().isScalableVector() &&
- "Only scalable vectors can be passed indirectly");
+ bool isScalable = VA.getValVT().isScalableVector();
+ assert((isScalable || Subtarget->isWindowsArm64EC()) &&
+ "Indirect arguments should be scalable on most subtargets");
uint64_t StoreSize = VA.getValVT().getStoreSize().getKnownMinSize();
uint64_t PartSize = StoreSize;
@@ -6777,7 +6828,8 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Type *Ty = EVT(VA.getValVT()).getTypeForEVT(*DAG.getContext());
Align Alignment = DAG.getDataLayout().getPrefTypeAlign(Ty);
int FI = MFI.CreateStackObject(StoreSize, Alignment, false);
- MFI.setStackID(FI, TargetStackID::ScalableVector);
+ if (isScalable)
+ MFI.setStackID(FI, TargetStackID::ScalableVector);
MachinePointerInfo MPI = MachinePointerInfo::getFixedStack(MF, FI);
SDValue Ptr = DAG.getFrameIndex(
@@ -6790,9 +6842,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
Chain = DAG.getStore(Chain, DL, OutVals[i], Ptr, MPI);
NumParts--;
if (NumParts > 0) {
- SDValue BytesIncrement = DAG.getVScale(
- DL, Ptr.getValueType(),
- APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ SDValue BytesIncrement;
+ if (isScalable) {
+ BytesIncrement = DAG.getVScale(
+ DL, Ptr.getValueType(),
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize));
+ } else {
+ BytesIncrement = DAG.getConstant(
+ APInt(Ptr.getValueSizeInBits().getFixedSize(), PartSize), DL,
+ Ptr.getValueType());
+ }
SDNodeFlags Flags;
Flags.setNoUnsignedWrap(true);
@@ -6911,6 +6970,16 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
}
+ if (IsVarArg && Subtarget->isWindowsArm64EC()) {
+ // For vararg calls, the Arm64EC ABI requires values in x4 and x5
+ // describing the argument list. x4 contains the address of the
+ // first stack parameter. x5 contains the size in bytes of all parameters
+ // passed on the stack.
+ RegsToPass.emplace_back(AArch64::X4, StackPtr);
+ RegsToPass.emplace_back(AArch64::X5,
+ DAG.getConstant(NumBytes, DL, MVT::i64));
+ }
+
if (!MemOpChains.empty())
Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, MemOpChains);
@@ -8599,14 +8668,30 @@ SDValue AArch64TargetLowering::LowerDarwin_VASTART(SDValue Op,
SDValue AArch64TargetLowering::LowerWin64_VASTART(SDValue Op,
SelectionDAG &DAG) const {
- AArch64FunctionInfo *FuncInfo =
- DAG.getMachineFunction().getInfo<AArch64FunctionInfo>();
+ MachineFunction &MF = DAG.getMachineFunction();
+ AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
SDLoc DL(Op);
- SDValue FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
- ? FuncInfo->getVarArgsGPRIndex()
- : FuncInfo->getVarArgsStackIndex(),
- getPointerTy(DAG.getDataLayout()));
+ SDValue FR;
+ if (Subtarget->isWindowsArm64EC()) {
+ // With the Arm64EC ABI, we compute the address of the varargs save area
+ // relative to x4. For a normal AArch64->AArch64 call, x4 == sp on entry,
+ // but calls from an entry thunk can pass in a
diff erent address.
+ Register VReg = MF.addLiveIn(AArch64::X4, &AArch64::GPR64RegClass);
+ SDValue Val = DAG.getCopyFromReg(DAG.getEntryNode(), DL, VReg, MVT::i64);
+ uint64_t StackOffset;
+ if (FuncInfo->getVarArgsGPRSize() > 0)
+ StackOffset = -(uint64_t)FuncInfo->getVarArgsGPRSize();
+ else
+ StackOffset = FuncInfo->getVarArgsStackOffset();
+ FR = DAG.getNode(ISD::ADD, DL, MVT::i64, Val,
+ DAG.getConstant(StackOffset, DL, MVT::i64));
+ } else {
+ FR = DAG.getFrameIndex(FuncInfo->getVarArgsGPRSize() > 0
+ ? FuncInfo->getVarArgsGPRIndex()
+ : FuncInfo->getVarArgsStackIndex(),
+ getPointerTy(DAG.getDataLayout()));
+ }
const Value *SV = cast<SrcValueSDNode>(Op.getOperand(2))->getValue();
return DAG.getStore(Op.getOperand(0), DL, FR, Op.getOperand(1),
MachinePointerInfo(SV));
diff --git a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
index f070f989a5b7..85fbf88bfc0e 100644
--- a/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
+++ b/llvm/lib/Target/AArch64/AArch64MachineFunctionInfo.h
@@ -85,6 +85,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
/// stack.
int VarArgsStackIndex = 0;
+ /// Offset of start of varargs area for arguments passed on the stack.
+ unsigned VarArgsStackOffset = 0;
+
/// FrameIndex for start of varargs area for arguments passed in
/// general purpose registers.
int VarArgsGPRIndex = 0;
@@ -329,6 +332,9 @@ class AArch64FunctionInfo final : public MachineFunctionInfo {
int getVarArgsStackIndex() const { return VarArgsStackIndex; }
void setVarArgsStackIndex(int Index) { VarArgsStackIndex = Index; }
+ unsigned getVarArgsStackOffset() const { return VarArgsStackOffset; }
+ void setVarArgsStackOffset(unsigned Offset) { VarArgsStackOffset = Offset; }
+
int getVarArgsGPRIndex() const { return VarArgsGPRIndex; }
void setVarArgsGPRIndex(int Index) { VarArgsGPRIndex = Index; }
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index a45221273dc4..32f1bf867019 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -1100,6 +1100,11 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
MachineRegisterInfo &MRI = MF.getRegInfo();
auto &DL = F.getParent()->getDataLayout();
const AArch64TargetLowering &TLI = *getTLI<AArch64TargetLowering>();
+ const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
+
+ // Arm64EC has extra requirements for varargs calls; bail out for now.
+ if (Info.IsVarArg && Subtarget.isWindowsArm64EC())
+ return false;
SmallVector<ArgInfo, 8> OutArgs;
for (auto &OrigArg : Info.OrigArgs) {
@@ -1153,7 +1158,6 @@ bool AArch64CallLowering::lowerCall(MachineIRBuilder &MIRBuilder,
// Create a temporarily-floating call instruction so we can add the implicit
// uses of arg registers.
- const AArch64Subtarget &Subtarget = MF.getSubtarget<AArch64Subtarget>();
unsigned Opc = 0;
// Calls with operand bundle "clang.arc.attachedcall" are special. They should
// be expanded to the call, directly followed by a special marker sequence and
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
new file mode 100644
index 000000000000..212c9408d6e4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
@@ -0,0 +1,98 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple=arm64ec-pc-windows-msvc < %s | FileCheck %s
+; RUN: llc -mtriple=arm64ec-pc-windows-msvc < %s -global-isel=1 -global-isel-abort=0 | FileCheck %s
+
+define void @varargs_callee(double %x, ...) nounwind {
+; CHECK-LABEL: varargs_callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: stp x1, x2, [x4, #-24]!
+; CHECK-NEXT: str x3, [x4, #16]
+; CHECK-NEXT: str x4, [sp, #8]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %list = alloca i8*, align 8
+ %listx = bitcast i8** %list to i8*
+ call void @llvm.va_start(i8* nonnull %listx)
+ ret void
+}
+
+define void @varargs_callee_manyargs(i64, i64, i64, i64, i64, ...) nounwind {
+; CHECK-LABEL: varargs_callee_manyargs:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: add x8, x4, #8
+; CHECK-NEXT: str x8, [sp, #8]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %list = alloca i8*, align 8
+ %listx = bitcast i8** %list to i8*
+ call void @llvm.va_start(i8* nonnull %listx)
+ ret void
+}
+
+define void @varargs_caller() nounwind {
+; CHECK-LABEL: varargs_caller:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: add x8, sp, #16
+; CHECK-NEXT: mov x9, #4617315517961601024
+; CHECK-NEXT: mov x0, #4607182418800017408
+; CHECK-NEXT: mov w1, #2
+; CHECK-NEXT: mov x2, #4613937818241073152
+; CHECK-NEXT: mov w3, #4
+; CHECK-NEXT: mov w5, #16
+; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: stp x8, xzr, [sp, #8]
+; CHECK-NEXT: str x9, [sp]
+; CHECK-NEXT: bl varargs_callee
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
+ ret void
+}
+
+define <2 x double> @varargs_many_argscallee(double %a, double %b, double %c,
+; CHECK-LABEL: varargs_many_argscallee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: ldr x8, [x4]
+; CHECK-NEXT: ldr q0, [x3]
+; CHECK-NEXT: ldr q1, [x8]
+; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
+; CHECK-NEXT: ret
+ <2 x double> %d, <2 x double> %e, ...) nounwind {
+ %rval = fadd <2 x double> %d, %e
+ ret <2 x double> %rval
+}
+
+define void @varargs_many_argscalleer() nounwind {
+; CHECK-LABEL: varargs_many_argscalleer:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: movi v0.2d, #0000000000000000
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: mov x8, #4618441417868443648
+; CHECK-NEXT: add x9, sp, #16
+; CHECK-NEXT: add x3, sp, #32
+; CHECK-NEXT: mov x0, #4607182418800017408
+; CHECK-NEXT: mov x1, #4611686018427387904
+; CHECK-NEXT: mov x2, #4613937818241073152
+; CHECK-NEXT: mov w5, #16
+; CHECK-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-NEXT: stp q0, q0, [sp, #16]
+; CHECK-NEXT: stp x9, x8, [sp]
+; CHECK-NEXT: bl varargs_many_argscallee
+; CHECK-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ call <2 x double> (double, double, double, <2 x double>, <2 x double>, ...)
+ @varargs_many_argscallee(double 1., double 2., double 3.,
+ <2 x double> zeroinitializer,
+ <2 x double> zeroinitializer, double 6.)
+ ret void
+}
+
+
+declare void @llvm.va_start(i8*)
More information about the llvm-commits
mailing list