[llvm] [AArch64] Support varargs for preserve_nonecc (PR #99434)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Jul 18 23:19:59 PDT 2024
https://github.com/antangelo updated https://github.com/llvm/llvm-project/pull/99434
>From b92d76db65d44aa2a7e0d0c169e5438afb3e9f87 Mon Sep 17 00:00:00 2001
From: antangelo <contact at antangelo.com>
Date: Thu, 18 Jul 2024 00:49:30 -0400
Subject: [PATCH 1/2] [AArch64] Support varargs for preserve_nonecc
Adds varargs support for preserve_none by falling back to C argument
passing for the target platform.
---
.../AArch64/AArch64CallingConvention.td | 73 +++++++-----
.../Target/AArch64/AArch64FrameLowering.cpp | 17 ++-
.../Target/AArch64/AArch64ISelLowering.cpp | 39 +++++--
.../Target/AArch64/AArch64RegisterInfo.cpp | 7 +-
llvm/lib/Target/AArch64/AArch64Subtarget.h | 4 +-
.../AArch64/GISel/AArch64CallLowering.cpp | 12 +-
.../GISel/AArch64InstructionSelector.cpp | 2 +-
.../AArch64/preserve_nonecc_varargs_aapcs.ll | 105 ++++++++++++++++++
.../AArch64/preserve_nonecc_varargs_darwin.ll | 49 ++++++++
.../AArch64/preserve_nonecc_varargs_win64.ll | 51 +++++++++
10 files changed, 305 insertions(+), 54 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
create mode 100644 llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 2f7e226fd09b2..0167bfe5743eb 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -17,6 +17,11 @@ class CCIfBigEndian<CCAction A> :
class CCIfILP32<CCAction A> :
CCIf<"State.getMachineFunction().getDataLayout().getPointerSize() == 4", A>;
+/// CCIfSubtarget - Match if the current subtarget has a feature F.
+class CCIfSubtarget<string F, CCAction A>
+ : CCIf<!strconcat("State.getMachineFunction()"
+ ".getSubtarget<AArch64Subtarget>().", F),
+ A>;
//===----------------------------------------------------------------------===//
// ARM AAPCS64 Calling Convention
@@ -496,36 +501,44 @@ def CC_AArch64_GHC : CallingConv<[
let Entry = 1 in
def CC_AArch64_Preserve_None : CallingConv<[
- // We can pass arguments in all general registers, except:
- // - X8, used for sret
- // - X16/X17, used by the linker as IP0/IP1
- // - X18, the platform register
- // - X19, the base pointer
- // - X29, the frame pointer
- // - X30, the link register
- // General registers are not preserved with the exception of
- // FP, LR, and X18
- // Non-volatile registers are used first, so functions may call
- // normal functions without saving and reloading arguments.
- // X9 is assigned last as it is used in FrameLowering as the first
- // choice for a scratch register.
- CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23,
- W24, W25, W26, W27, W28,
- W0, W1, W2, W3, W4, W5,
- W6, W7, W10, W11,
- W12, W13, W14, W9]>>,
- CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23,
- X24, X25, X26, X27, X28,
- X0, X1, X2, X3, X4, X5,
- X6, X7, X10, X11,
- X12, X13, X14, X9]>>,
-
- // Windows uses X15 for stack allocation
- CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
- CCIfType<[i32], CCAssignToReg<[W15]>>>,
- CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
- CCIfType<[i64], CCAssignToReg<[X15]>>>,
- CCDelegateTo<CC_AArch64_AAPCS>
+ // VarArgs are only supported using the C calling convention.
+ // This handles the non-variadic parameter case. Variadic parameters
+ // are handled in CCAssignFnForCall.
+ CCIfVarArg<CCIfSubtarget<"isTargetDarwin()", CCDelegateTo<CC_AArch64_DarwinPCS>>>,
+ CCIfVarArg<CCIfSubtarget<"isTargetWindows()", CCDelegateTo<CC_AArch64_Win64PCS>>>,
+ CCIfVarArg<CCDelegateTo<CC_AArch64_AAPCS>>,
+
+ // We can pass arguments in all general registers, except:
+ // - X8, used for sret
+ // - X16/X17, used by the linker as IP0/IP1
+ // - X18, the platform register
+ // - X19, the base pointer
+ // - X29, the frame pointer
+ // - X30, the link register
+ // General registers are not preserved with the exception of
+ // FP, LR, and X18
+ // Non-volatile registers are used first, so functions may call
+ // normal functions without saving and reloading arguments.
+ // X9 is assigned last as it is used in FrameLowering as the first
+ // choice for a scratch register.
+ CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23,
+ W24, W25, W26, W27, W28,
+ W0, W1, W2, W3, W4, W5,
+ W6, W7, W10, W11,
+ W12, W13, W14, W9]>>,
+ CCIfType<[i64], CCAssignToReg<[X20, X21, X22, X23,
+ X24, X25, X26, X27, X28,
+ X0, X1, X2, X3, X4, X5,
+ X6, X7, X10, X11,
+ X12, X13, X14, X9]>>,
+
+ // Windows uses X15 for stack allocation
+ CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
+ CCIfType<[i32], CCAssignToReg<[W15]>>>,
+ CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
+ CCIfType<[i64], CCAssignToReg<[X15]>>>,
+
+ CCDelegateTo<CC_AArch64_AAPCS>
]>;
// The order of the callee-saves in this file is important, because the
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 0f1e860fac732..73d7dfe33b36e 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1870,8 +1870,7 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
return;
}
- bool IsWin64 =
- Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
+ bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
auto PrologueSaveSize = AFI->getCalleeSavedStackSize() + FixedObject;
@@ -2277,8 +2276,8 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// How much of the stack used by incoming arguments this function is expected
// to restore in this particular epilogue.
int64_t ArgumentStackToRestore = getArgumentStackToRestore(MF, MBB);
- bool IsWin64 =
- Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
+ bool IsWin64 = Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
+ MF.getFunction().isVarArg());
unsigned FixedObject = getFixedObjectSize(MF, AFI, IsWin64, IsFunclet);
int64_t AfterCSRPopSize = ArgumentStackToRestore;
@@ -2584,8 +2583,8 @@ static StackOffset getFPOffset(const MachineFunction &MF,
int64_t ObjectOffset) {
const auto *AFI = MF.getInfo<AArch64FunctionInfo>();
const auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- bool IsWin64 =
- Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv());
+ const Function &F = MF.getFunction();
+ bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg());
unsigned FixedObject =
getFixedObjectSize(MF, AFI, IsWin64, /*IsFunclet=*/false);
int64_t CalleeSaveSize = AFI->getCalleeSavedStackSize(MF.getFrameInfo());
@@ -2691,9 +2690,9 @@ StackOffset AArch64FrameLowering::resolveFrameOffsetReference(
// via the frame pointer, so we have to use the FP in the parent
// function.
(void) Subtarget;
- assert(
- Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv()) &&
- "Funclets should only be present on Win64");
+ assert(Subtarget.isCallingConvWin64(MF.getFunction().getCallingConv(),
+ MF.getFunction().isVarArg()) &&
+ "Funclets should only be present on Win64");
UseFP = true;
} else {
// We have the choice between FP and (SP or BP).
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index eef83a845e2c3..7e7087efcfb39 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7091,7 +7091,13 @@ CCAssignFn *AArch64TargetLowering::CCAssignFnForCall(CallingConv::ID CC,
case CallingConv::GHC:
return CC_AArch64_GHC;
case CallingConv::PreserveNone:
- return CC_AArch64_Preserve_None;
+ // The VarArg implementation makes assumptions about register
+ // argument passing that do not hold for preserve_none, so we
+ // instead fall back to C argument passing.
+ // The non-vararg case is handled in the CC function itself.
+ if (!IsVarArg)
+ return CC_AArch64_Preserve_None;
+ [[fallthrough]];
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
@@ -7164,7 +7170,8 @@ SDValue AArch64TargetLowering::LowerFormalArguments(
MachineFunction &MF = DAG.getMachineFunction();
const Function &F = MF.getFunction();
MachineFrameInfo &MFI = MF.getFrameInfo();
- bool IsWin64 = Subtarget->isCallingConvWin64(F.getCallingConv());
+ bool IsWin64 =
+ Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
bool StackViaX4 = CallConv == CallingConv::ARM64EC_Thunk_X64 ||
(isVarArg && Subtarget->isWindowsArm64EC());
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
@@ -7616,7 +7623,9 @@ void AArch64TargetLowering::saveVarArgRegisters(CCState &CCInfo,
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto PtrVT = getPointerTy(DAG.getDataLayout());
- bool IsWin64 = Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv());
+ Function &F = MF.getFunction();
+ bool IsWin64 =
+ Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg());
SmallVector<SDValue, 8> MemOps;
@@ -7787,6 +7796,21 @@ static bool mayTailCallThisCC(CallingConv::ID CC) {
}
}
+/// Return true if the call convention supports varargs
+/// Currently only those that pass varargs like the C
+/// calling convention does are eligible
+/// Calling conventions listed in this function must also
+/// be properly handled in AArch64Subtarget::isCallingConvWin64
+static bool callConvSupportsVarArgs(CallingConv::ID CC) {
+ switch (CC) {
+ case CallingConv::C:
+ case CallingConv::PreserveNone:
+ return true;
+ default:
+ return false;
+ }
+}
+
static void analyzeCallOperands(const AArch64TargetLowering &TLI,
const AArch64Subtarget *Subtarget,
const TargetLowering::CallLoweringInfo &CLI,
@@ -7795,7 +7819,7 @@ static void analyzeCallOperands(const AArch64TargetLowering &TLI,
CallingConv::ID CalleeCC = CLI.CallConv;
bool IsVarArg = CLI.IsVarArg;
const SmallVector<ISD::OutputArg, 32> &Outs = CLI.Outs;
- bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC);
+ bool IsCalleeWin64 = Subtarget->isCallingConvWin64(CalleeCC, IsVarArg);
// For Arm64EC thunks, allocate 32 extra bytes at the bottom of the stack
// for the shadow store.
@@ -7923,8 +7947,8 @@ bool AArch64TargetLowering::isEligibleForTailCallOptimization(
// I want anyone implementing a new calling convention to think long and hard
// about this assert.
- assert((!IsVarArg || CalleeCC == CallingConv::C) &&
- "Unexpected variadic calling convention");
+ if (IsVarArg && !callConvSupportsVarArgs(CalleeCC))
+ report_fatal_error("Unsupported variadic calling convention");
LLVMContext &C = *DAG.getContext();
// Check that the call results are passed in the same way.
@@ -10854,8 +10878,9 @@ SDValue AArch64TargetLowering::LowerAAPCS_VASTART(SDValue Op,
SDValue AArch64TargetLowering::LowerVASTART(SDValue Op,
SelectionDAG &DAG) const {
MachineFunction &MF = DAG.getMachineFunction();
+ Function &F = MF.getFunction();
- if (Subtarget->isCallingConvWin64(MF.getFunction().getCallingConv()))
+ if (Subtarget->isCallingConvWin64(F.getCallingConv(), F.isVarArg()))
return LowerWin64_VASTART(Op, DAG);
else if (Subtarget->isTargetDarwin())
return LowerDarwin_VASTART(Op, DAG);
diff --git a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
index 1e069f4790c53..435cc18cdea62 100644
--- a/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
+++ b/llvm/lib/Target/AArch64/AArch64RegisterInfo.cpp
@@ -611,7 +611,8 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
MCRegister Reg) const {
CallingConv::ID CC = MF.getFunction().getCallingConv();
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
- bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv());
+ bool IsVarArg = STI.isCallingConvWin64(MF.getFunction().getCallingConv(),
+ MF.getFunction().isVarArg());
auto HasReg = [](ArrayRef<MCRegister> RegList, MCRegister Reg) {
return llvm::is_contained(RegList, Reg);
@@ -623,7 +624,9 @@ bool AArch64RegisterInfo::isArgumentRegister(const MachineFunction &MF,
case CallingConv::GHC:
return HasReg(CC_AArch64_GHC_ArgRegs, Reg);
case CallingConv::PreserveNone:
- return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg);
+ if (!MF.getFunction().isVarArg())
+ return HasReg(CC_AArch64_Preserve_None_ArgRegs, Reg);
+ [[fallthrough]];
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::PreserveMost:
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index 4b840b24ba134..12c3d25d32ee7 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -322,13 +322,15 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
std::unique_ptr<PBQPRAConstraint> getCustomPBQPConstraints() const override;
- bool isCallingConvWin64(CallingConv::ID CC) const {
+ bool isCallingConvWin64(CallingConv::ID CC, bool IsVarArg) const {
switch (CC) {
case CallingConv::C:
case CallingConv::Fast:
case CallingConv::Swift:
case CallingConv::SwiftTail:
return isTargetWindows();
+ case CallingConv::PreserveNone:
+ return IsVarArg && isTargetWindows();
case CallingConv::Win64:
return true;
default:
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
index 5206ba46260ed..b4d2a3388c1df 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64CallLowering.cpp
@@ -117,7 +117,9 @@ struct AArch64OutgoingValueAssigner
CCValAssign::LocInfo LocInfo,
const CallLowering::ArgInfo &Info, ISD::ArgFlagsTy Flags,
CCState &State) override {
- bool IsCalleeWin = Subtarget.isCallingConvWin64(State.getCallingConv());
+ const Function &F = State.getMachineFunction().getFunction();
+ bool IsCalleeWin =
+ Subtarget.isCallingConvWin64(State.getCallingConv(), F.isVarArg());
bool UseVarArgsCCForFixed = IsCalleeWin && State.isVarArg();
bool Res;
@@ -557,8 +559,8 @@ void AArch64CallLowering::saveVarArgRegisters(
MachineFrameInfo &MFI = MF.getFrameInfo();
AArch64FunctionInfo *FuncInfo = MF.getInfo<AArch64FunctionInfo>();
auto &Subtarget = MF.getSubtarget<AArch64Subtarget>();
- bool IsWin64CC =
- Subtarget.isCallingConvWin64(CCInfo.getCallingConv());
+ bool IsWin64CC = Subtarget.isCallingConvWin64(CCInfo.getCallingConv(),
+ MF.getFunction().isVarArg());
const LLT p0 = LLT::pointer(0, 64);
const LLT s64 = LLT::scalar(64);
@@ -653,7 +655,9 @@ bool AArch64CallLowering::lowerFormalArguments(
F.getCallingConv() == CallingConv::ARM64EC_Thunk_X64)
return false;
- bool IsWin64 = Subtarget.isCallingConvWin64(F.getCallingConv()) && !Subtarget.isWindowsArm64EC();
+ bool IsWin64 =
+ Subtarget.isCallingConvWin64(F.getCallingConv(), F.isVarArg()) &&
+ !Subtarget.isWindowsArm64EC();
SmallVector<ArgInfo, 8> SplitArgs;
SmallVector<std::pair<Register, Register>> BoolArgs;
diff --git a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
index 24d65624e09e9..c079283bc4e51 100644
--- a/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
+++ b/llvm/lib/Target/AArch64/GISel/AArch64InstructionSelector.cpp
@@ -2001,7 +2001,7 @@ bool AArch64InstructionSelector::selectVaStartDarwin(
int FrameIdx = FuncInfo->getVarArgsStackIndex();
if (MF.getSubtarget<AArch64Subtarget>().isCallingConvWin64(
- MF.getFunction().getCallingConv())) {
+ MF.getFunction().getCallingConv(), MF.getFunction().isVarArg())) {
FrameIdx = FuncInfo->getVarArgsGPRSize() > 0
? FuncInfo->getVarArgsGPRIndex()
: FuncInfo->getVarArgsStackIndex();
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
new file mode 100644
index 0000000000000..fdac9f76843a8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
@@ -0,0 +1,105 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64 < %s | FileCheck %s
+
+%va_list = type { ptr, ptr, ptr, i32, i32 }
+
+define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
+; CHECK-LABEL: callee:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #192
+; CHECK-NEXT: mov x8, #-24 // =0xffffffffffffffe8
+; CHECK-NEXT: mov x9, sp
+; CHECK-NEXT: add x10, sp, #136
+; CHECK-NEXT: movk x8, #65408, lsl #32
+; CHECK-NEXT: add x9, x9, #128
+; CHECK-NEXT: stp x6, x7, [sp, #144]
+; CHECK-NEXT: stp x9, x8, [sp, #176]
+; CHECK-NEXT: add x9, x10, #24
+; CHECK-NEXT: add x10, sp, #192
+; CHECK-NEXT: mov w8, #-24 // =0xffffffe8
+; CHECK-NEXT: str x5, [sp, #136]
+; CHECK-NEXT: stp q0, q1, [sp]
+; CHECK-NEXT: stp q2, q3, [sp, #32]
+; CHECK-NEXT: stp q4, q5, [sp, #64]
+; CHECK-NEXT: stp q6, q7, [sp, #96]
+; CHECK-NEXT: stp x10, x9, [sp, #160]
+; CHECK-NEXT: tbz w8, #31, .LBB0_3
+; CHECK-NEXT: // %bb.1: // %maybe_reg
+; CHECK-NEXT: add w9, w8, #8
+; CHECK-NEXT: cmp w9, #0
+; CHECK-NEXT: str w9, [sp, #184]
+; CHECK-NEXT: b.gt .LBB0_3
+; CHECK-NEXT: // %bb.2: // %in_reg
+; CHECK-NEXT: ldr x9, [sp, #168]
+; CHECK-NEXT: add x8, x9, w8, sxtw
+; CHECK-NEXT: b .LBB0_4
+; CHECK-NEXT: .LBB0_3: // %on_stack
+; CHECK-NEXT: ldr x8, [sp, #160]
+; CHECK-NEXT: add x9, x8, #8
+; CHECK-NEXT: str x9, [sp, #160]
+; CHECK-NEXT: .LBB0_4: // %end
+; CHECK-NEXT: ldr w0, [x8]
+; CHECK-NEXT: add sp, sp, #192
+; CHECK-NEXT: ret
+entry:
+ %args = alloca %va_list, align 8
+ call void @llvm.va_start(ptr %args)
+ %gr_offs_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 3
+ %gr_offs = load i32, ptr %gr_offs_p, align 8
+ %0 = icmp sge i32 %gr_offs, 0
+ br i1 %0, label %on_stack, label %maybe_reg
+
+maybe_reg:
+ %new_reg_offs = add i32 %gr_offs, 8
+ store i32 %new_reg_offs, ptr %gr_offs_p, align 8
+ %inreg = icmp sle i32 %new_reg_offs, 0
+ br i1 %inreg, label %in_reg, label %on_stack
+
+in_reg:
+ %reg_top_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 1
+ %reg_top = load ptr, ptr %reg_top_p, align 8
+ %reg = getelementptr inbounds i8, ptr %reg_top, i32 %gr_offs
+ br label %end
+
+on_stack:
+ %stack_p = getelementptr inbounds %va_list, ptr %args, i32 0, i32 0
+ %stack = load ptr, ptr %stack_p, align 8
+ %new_stack = getelementptr inbounds i8, ptr %stack, i64 8
+ store ptr %new_stack, ptr %stack_p, align 8
+ br label %end
+
+end:
+ %p = phi ptr [ %reg, %in_reg ], [ %stack, %on_stack ]
+ %10 = load i32, ptr %p, align 8
+ call void @llvm.va_end.p0(ptr %args)
+ ret i32 %10
+}
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+
+define i32 @caller() nounwind ssp {
+; CHECK-LABEL: caller:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: mov w8, #10 // =0xa
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: mov w2, #3 // =0x3
+; CHECK-NEXT: mov w3, #4 // =0x4
+; CHECK-NEXT: mov w4, #5 // =0x5
+; CHECK-NEXT: mov w5, #6 // =0x6
+; CHECK-NEXT: mov w6, #7 // =0x7
+; CHECK-NEXT: mov w7, #8 // =0x8
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: str w8, [sp, #8]
+; CHECK-NEXT: str w9, [sp]
+; CHECK-NEXT: bl callee
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %r = tail call i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+ ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
new file mode 100644
index 0000000000000..db6ec0dcc8da8
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -0,0 +1,49 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s
+
+define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
+; CHECK-LABEL: callee:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: add x8, sp, #16
+; CHECK-NEXT: ldr w0, [sp, #16]
+; CHECK-NEXT: orr x8, x8, #0x8
+; CHECK-NEXT: str x8, [sp, #8]
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: ret
+ %args = alloca ptr, align 8
+ call void @llvm.va_start(ptr %args)
+ %10 = va_arg ptr %args, i32
+ call void @llvm.va_end(ptr %args)
+ ret i32 %10
+}
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+
+define i32 @caller() nounwind ssp {
+; CHECK-LABEL: caller:
+; CHECK: ; %bb.0:
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: mov w8, #10 ; =0xa
+; CHECK-NEXT: mov w9, #9 ; =0x9
+; CHECK-NEXT: mov w0, #1 ; =0x1
+; CHECK-NEXT: stp x9, x8, [sp, #24]
+; CHECK-NEXT: mov w8, #8 ; =0x8
+; CHECK-NEXT: mov w9, #6 ; =0x6
+; CHECK-NEXT: str x8, [sp, #16]
+; CHECK-NEXT: mov w8, #7 ; =0x7
+; CHECK-NEXT: mov w1, #2 ; =0x2
+; CHECK-NEXT: mov w2, #3 ; =0x3
+; CHECK-NEXT: mov w3, #4 ; =0x4
+; CHECK-NEXT: mov w4, #5 ; =0x5
+; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x9, x8, [sp]
+; CHECK-NEXT: bl _callee
+; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ret
+ %r = tail call i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+ ret i32 %r
+}
+
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
new file mode 100644
index 0000000000000..94ee9ba5215d9
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
@@ -0,0 +1,51 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-pc-windows < %s | FileCheck %s
+
+define preserve_nonecc i32 @callee(i32 %a1, i32 %a2, i32 %a3, i32 %a4, i32 %a5, ...) nounwind noinline ssp {
+; CHECK-LABEL: callee:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: mov x0, x5
+; CHECK-NEXT: add x8, sp, #24
+; CHECK-NEXT: stp x6, x7, [sp, #32]
+; CHECK-NEXT: str x5, [sp, #24]
+; CHECK-NEXT: // kill: def $w0 killed $w0 killed $x0
+; CHECK-NEXT: str x8, [sp, #8]
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: ret
+ %args = alloca ptr, align 8
+ call void @llvm.va_start(ptr %args)
+ %p = load ptr, ptr %args, align 8
+ %10 = load i32, ptr %p, align 8
+ call void @llvm.va_end(ptr %args)
+ ret i32 %10
+}
+
+declare void @llvm.va_start(ptr) nounwind
+declare void @llvm.va_end(ptr) nounwind
+
+define i32 @caller() nounwind ssp {
+; CHECK-LABEL: caller:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: mov w8, #10 // =0xa
+; CHECK-NEXT: mov w9, #9 // =0x9
+; CHECK-NEXT: mov w0, #1 // =0x1
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: mov w2, #3 // =0x3
+; CHECK-NEXT: mov w3, #4 // =0x4
+; CHECK-NEXT: mov w4, #5 // =0x5
+; CHECK-NEXT: mov w5, #6 // =0x6
+; CHECK-NEXT: mov w6, #7 // =0x7
+; CHECK-NEXT: mov w7, #8 // =0x8
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: str w8, [sp, #8]
+; CHECK-NEXT: str w9, [sp]
+; CHECK-NEXT: bl callee
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ret
+ %r = tail call i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+ ret i32 %r
+}
+
>From 80bac77f8f5a5646e1ad2890f5a0f8d6d440ed05 Mon Sep 17 00:00:00 2001
From: antangelo <contact at antangelo.com>
Date: Fri, 19 Jul 2024 02:19:11 -0400
Subject: [PATCH 2/2] Fix calling convention in test calls
---
.../AArch64/preserve_nonecc_varargs_aapcs.ll | 28 +++++++++++++++----
.../AArch64/preserve_nonecc_varargs_darwin.ll | 28 +++++++++++++++----
.../AArch64/preserve_nonecc_varargs_win64.ll | 28 +++++++++++++++----
3 files changed, 69 insertions(+), 15 deletions(-)
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
index fdac9f76843a8..48898719f40ce 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_aapcs.ll
@@ -81,7 +81,7 @@ declare void @llvm.va_end(ptr) nounwind
define i32 @caller() nounwind ssp {
; CHECK-LABEL: caller:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: sub sp, sp, #176
; CHECK-NEXT: mov w8, #10 // =0xa
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: mov w0, #1 // =0x1
@@ -92,14 +92,32 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: mov w5, #6 // =0x6
; CHECK-NEXT: mov w6, #7 // =0x7
; CHECK-NEXT: mov w7, #8 // =0x8
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
; CHECK-NEXT: str w8, [sp, #8]
; CHECK-NEXT: str w9, [sp]
; CHECK-NEXT: bl callee
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #176
; CHECK-NEXT: ret
- %r = tail call i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+ %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
ret i32 %r
}
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
index db6ec0dcc8da8..e227f14542cc1 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_darwin.ll
@@ -24,7 +24,7 @@ declare void @llvm.va_end(ptr) nounwind
define i32 @caller() nounwind ssp {
; CHECK-LABEL: caller:
; CHECK: ; %bb.0:
-; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: sub sp, sp, #208
; CHECK-NEXT: mov w8, #10 ; =0xa
; CHECK-NEXT: mov w9, #9 ; =0x9
; CHECK-NEXT: mov w0, #1 ; =0x1
@@ -37,13 +37,31 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: mov w2, #3 ; =0x3
; CHECK-NEXT: mov w3, #4 ; =0x4
; CHECK-NEXT: mov w4, #5 ; =0x5
-; CHECK-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #48] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #64] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #80] ; 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #96] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #112] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #128] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #144] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #160] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #176] ; 16-byte Folded Spill
+; CHECK-NEXT: stp x29, x30, [sp, #192] ; 16-byte Folded Spill
; CHECK-NEXT: stp x9, x8, [sp]
; CHECK-NEXT: bl _callee
-; CHECK-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: ldp x29, x30, [sp, #192] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #176] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #160] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #144] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #128] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #112] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #96] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #80] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #64] ; 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #48] ; 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #208
; CHECK-NEXT: ret
- %r = tail call i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+ %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
ret i32 %r
}
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
index 94ee9ba5215d9..83dd240a6540f 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_varargs_win64.ll
@@ -27,7 +27,7 @@ declare void @llvm.va_end(ptr) nounwind
define i32 @caller() nounwind ssp {
; CHECK-LABEL: caller:
; CHECK: // %bb.0:
-; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: sub sp, sp, #176
; CHECK-NEXT: mov w8, #10 // =0xa
; CHECK-NEXT: mov w9, #9 // =0x9
; CHECK-NEXT: mov w0, #1 // =0x1
@@ -38,14 +38,32 @@ define i32 @caller() nounwind ssp {
; CHECK-NEXT: mov w5, #6 // =0x6
; CHECK-NEXT: mov w6, #7 // =0x7
; CHECK-NEXT: mov w7, #8 // =0x8
-; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
; CHECK-NEXT: str w8, [sp, #8]
; CHECK-NEXT: str w9, [sp]
; CHECK-NEXT: bl callee
-; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
-; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #176
; CHECK-NEXT: ret
- %r = tail call i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
+ %r = tail call preserve_nonecc i32 (i32, i32, i32, i32, i32, ...) @callee(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10)
ret i32 %r
}
More information about the llvm-commits
mailing list