[llvm-branch-commits] [llvm] Backport ARM64EC variadic args fixes to LLVM 18 (PR #81800)
Daniel Paoliello via llvm-branch-commits
llvm-branch-commits at lists.llvm.org
Fri Mar 8 15:42:18 PST 2024
https://github.com/dpaoliello updated https://github.com/llvm/llvm-project/pull/81800
>From 064dd621c19b3738af5db29afd5b986a6d739ab1 Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05 at gmail.com>
Date: Wed, 31 Jan 2024 02:32:15 +0000
Subject: [PATCH 1/3] [AArch64] Fix variadic tail-calls on ARM64EC (#79774)
ARM64EC varargs calls expect that x4 = sp at entry, special handling is
needed to ensure this with tail calls since they occur after the
epilogue and the x4 write happens before.
I tried going through AArch64MachineFrameLowering for this, hoping to
avoid creating the dummy object but this was the best I could do since
the stack info that uses isn't populated at this stage,
CreateFixedObject also explicitly forbids 0 sized objects.
---
.../Target/AArch64/AArch64ISelLowering.cpp | 10 ++++-
llvm/test/CodeGen/AArch64/arm64ec-varargs.ll | 37 +++++++++++++++++++
llvm/test/CodeGen/AArch64/vararg-tallcall.ll | 8 ++++
3 files changed, 54 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0287856560e91a..196aa50cf4060b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -8007,11 +8007,19 @@ AArch64TargetLowering::LowerCall(CallLoweringInfo &CLI,
}
if (IsVarArg && Subtarget->isWindowsArm64EC()) {
+ SDValue ParamPtr = StackPtr;
+ if (IsTailCall) {
+ // Create a dummy object at the top of the stack that can be used to get
+ // the SP after the epilogue
+ int FI = MF.getFrameInfo().CreateFixedObject(1, FPDiff, true);
+ ParamPtr = DAG.getFrameIndex(FI, PtrVT);
+ }
+
// For vararg calls, the Arm64EC ABI requires values in x4 and x5
// describing the argument list. x4 contains the address of the
// first stack parameter. x5 contains the size in bytes of all parameters
// passed on the stack.
- RegsToPass.emplace_back(AArch64::X4, StackPtr);
+ RegsToPass.emplace_back(AArch64::X4, ParamPtr);
RegsToPass.emplace_back(AArch64::X5,
DAG.getConstant(NumBytes, DL, MVT::i64));
}
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
index dc16b3a1a0f270..844fc52ddade63 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-varargs.ll
@@ -100,5 +100,42 @@ define void @varargs_many_argscalleer() nounwind {
ret void
}
+define void @varargs_caller_tail() nounwind {
+; CHECK-LABEL: varargs_caller_tail:
+; CHECK: // %bb.0:
+; CHECK-NEXT: sub sp, sp, #48
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: add x8, sp, #16
+; CHECK-NEXT: mov x9, #4617315517961601024 // =0x4014000000000000
+; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: mov w1, #2 // =0x2
+; CHECK-NEXT: mov x2, #4613937818241073152 // =0x4008000000000000
+; CHECK-NEXT: mov w3, #4 // =0x4
+; CHECK-NEXT: mov w5, #16 // =0x10
+; CHECK-NEXT: stp xzr, x30, [sp, #24] // 8-byte Folded Spill
+; CHECK-NEXT: stp x9, x8, [sp]
+; CHECK-NEXT: str xzr, [sp, #16]
+; CHECK-NEXT: .weak_anti_dep varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee at WEAKREF
+; CHECK-NEXT: bl "#varargs_callee"
+; CHECK-NEXT: ldr x30, [sp, #32] // 8-byte Folded Reload
+; CHECK-NEXT: add x4, sp, #48
+; CHECK-NEXT: mov x0, #4607182418800017408 // =0x3ff0000000000000
+; CHECK-NEXT: mov w1, #4 // =0x4
+; CHECK-NEXT: mov w2, #3 // =0x3
+; CHECK-NEXT: mov w3, #2 // =0x2
+; CHECK-NEXT: mov x5, xzr
+; CHECK-NEXT: add sp, sp, #48
+; CHECK-NEXT: .weak_anti_dep varargs_callee
+; CHECK-NEXT:.set varargs_callee, "#varargs_callee"@WEAKREF
+; CHECK-NEXT: .weak_anti_dep "#varargs_callee"
+; CHECK-NEXT:.set "#varargs_callee", varargs_callee at WEAKREF
+; CHECK-NEXT: b "#varargs_callee"
+ call void (double, ...) @varargs_callee(double 1.0, i32 2, double 3.0, i32 4, double 5.0, <2 x double> <double 0.0, double 0.0>)
+ tail call void (double, ...) @varargs_callee(double 1.0, i32 4, i32 3, i32 2)
+ ret void
+}
declare void @llvm.va_start(ptr)
diff --git a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
index 2d6db1642247d7..812837639196e6 100644
--- a/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
+++ b/llvm/test/CodeGen/AArch64/vararg-tallcall.ll
@@ -1,5 +1,6 @@
; RUN: llc -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
+; RUN: llc -mtriple=arm64ec-windows-msvc %s -o - | FileCheck %s --check-prefixes=CHECK-EC
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-windows-msvc %s -o - | FileCheck %s
; RUN: llc -global-isel -global-isel-abort=2 -verify-machineinstrs -mtriple=aarch64-linux-gnu %s -o - | FileCheck %s
@@ -32,3 +33,10 @@ attributes #1 = { noinline optnone "thunk" }
; CHECK: ldr x9, [x9]
; CHECK: mov v0.16b, v16.16b
; CHECK: br x9
+; CHECK-EC: mov v7.16b, v0.16b
+; CHECK-EC: ldr x9, [x0]
+; CHECK-EC: ldr x11, [x9]
+; CHECK-EC: mov v0.16b, v7.16b
+; CHECK-EC: add x4, sp, #64
+; CHECK-EC: add sp, sp, #64
+; CHECK-EC: br x11
>From 780b4dc9186f6e7d3549358f84666fc85fad414f Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05 at gmail.com>
Date: Mon, 5 Feb 2024 17:26:16 +0000
Subject: [PATCH 2/3] [AArch64] Fix generated types for ARM64EC variadic entry
thunk targets (#80595)
ISel handles filling in x4/x5 when calling variadic functions as they
don't correspond to the 5th/6th X64 arguments but rather to the end of
the shadow space on the stack and the size in bytes of all stack
parameters (ignored and written as 0 for calls from entry thunks).
Will PR a follow up with ISel handling after this is merged.
---
.../AArch64/AArch64Arm64ECCallLowering.cpp | 48 +++++++++++--------
.../CodeGen/AArch64/arm64ec-entry-thunks.ll | 4 +-
2 files changed, 29 insertions(+), 23 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 11248bb7aef31f..91b4f18c73c935 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -43,6 +43,8 @@ static cl::opt<bool> GenerateThunks("arm64ec-generate-thunks", cl::Hidden,
namespace {
+enum class ThunkType { GuestExit, Entry, Exit };
+
class AArch64Arm64ECCallLowering : public ModulePass {
public:
static char ID;
@@ -69,14 +71,14 @@ class AArch64Arm64ECCallLowering : public ModulePass {
Type *I64Ty;
Type *VoidTy;
- void getThunkType(FunctionType *FT, AttributeList AttrList, bool EntryThunk,
+ void getThunkType(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out, FunctionType *&Arm64Ty,
FunctionType *&X64Ty);
void getThunkRetType(FunctionType *FT, AttributeList AttrList,
raw_ostream &Out, Type *&Arm64RetTy, Type *&X64RetTy,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool &HasSretPtr);
- void getThunkArgTypes(FunctionType *FT, AttributeList AttrList,
+ void getThunkArgTypes(FunctionType *FT, AttributeList AttrList, ThunkType TT,
raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr);
@@ -89,10 +91,11 @@ class AArch64Arm64ECCallLowering : public ModulePass {
void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
AttributeList AttrList,
- bool EntryThunk, raw_ostream &Out,
+ ThunkType TT, raw_ostream &Out,
FunctionType *&Arm64Ty,
FunctionType *&X64Ty) {
- Out << (EntryThunk ? "$ientry_thunk$cdecl$" : "$iexit_thunk$cdecl$");
+ Out << (TT == ThunkType::Entry ? "$ientry_thunk$cdecl$"
+ : "$iexit_thunk$cdecl$");
Type *Arm64RetTy;
Type *X64RetTy;
@@ -102,8 +105,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
// The first argument to a thunk is the called function, stored in x9.
// For exit thunks, we pass the called function down to the emulator;
- // for entry thunks, we just call the Arm64 function directly.
- if (!EntryThunk)
+ // for entry/guest exit thunks, we just call the Arm64 function directly.
+ if (TT == ThunkType::Exit)
Arm64ArgTypes.push_back(PtrTy);
X64ArgTypes.push_back(PtrTy);
@@ -111,14 +114,16 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
getThunkRetType(FT, AttrList, Out, Arm64RetTy, X64RetTy, Arm64ArgTypes,
X64ArgTypes, HasSretPtr);
- getThunkArgTypes(FT, AttrList, Out, Arm64ArgTypes, X64ArgTypes, HasSretPtr);
+ getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
+ HasSretPtr);
- Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);
+ Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes,
+ TT == ThunkType::Entry && FT->isVarArg());
X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false);
}
void AArch64Arm64ECCallLowering::getThunkArgTypes(
- FunctionType *FT, AttributeList AttrList, raw_ostream &Out,
+ FunctionType *FT, AttributeList AttrList, ThunkType TT, raw_ostream &Out,
SmallVectorImpl<Type *> &Arm64ArgTypes,
SmallVectorImpl<Type *> &X64ArgTypes, bool HasSretPtr) {
@@ -151,14 +156,16 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
X64ArgTypes.push_back(I64Ty);
}
- // x4
- Arm64ArgTypes.push_back(PtrTy);
- X64ArgTypes.push_back(PtrTy);
- // x5
- Arm64ArgTypes.push_back(I64Ty);
- // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
- // have proper isel for varargs
- X64ArgTypes.push_back(I64Ty);
+ if (TT != ThunkType::Entry) {
+ // x4
+ Arm64ArgTypes.push_back(PtrTy);
+ X64ArgTypes.push_back(PtrTy);
+ // x5
+ Arm64ArgTypes.push_back(I64Ty);
+ // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
+ // have proper isel for varargs
+ X64ArgTypes.push_back(I64Ty);
+ }
return;
}
@@ -339,8 +346,7 @@ Function *AArch64Arm64ECCallLowering::buildExitThunk(FunctionType *FT,
SmallString<256> ExitThunkName;
llvm::raw_svector_ostream ExitThunkStream(ExitThunkName);
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(FT, Attrs, /*EntryThunk*/ false, ExitThunkStream, Arm64Ty,
- X64Ty);
+ getThunkType(FT, Attrs, ThunkType::Exit, ExitThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(ExitThunkName))
return F;
@@ -443,7 +449,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
SmallString<256> EntryThunkName;
llvm::raw_svector_ostream EntryThunkStream(EntryThunkName);
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+ getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::Entry,
EntryThunkStream, Arm64Ty, X64Ty);
if (Function *F = M->getFunction(EntryThunkName))
return F;
@@ -518,7 +524,7 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Function *AArch64Arm64ECCallLowering::buildGuestExitThunk(Function *F) {
llvm::raw_null_ostream NullThunkName;
FunctionType *Arm64Ty, *X64Ty;
- getThunkType(F->getFunctionType(), F->getAttributes(), /*EntryThunk*/ true,
+ getThunkType(F->getFunctionType(), F->getAttributes(), ThunkType::GuestExit,
NullThunkName, Arm64Ty, X64Ty);
auto MangledName = getArm64ECMangledFunctionName(F->getName().str());
assert(MangledName && "Can't guest exit to function that's already native");
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 5c56f51e1ca554..0083818def1514 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -147,8 +147,8 @@ define void @has_varargs(...) nounwind {
; CHECK-NEXT: add x29, sp, #160
; CHECK-NEXT: .seh_add_fp 160
; CHECK-NEXT: .seh_endprologue
-; CHECK-NEXT: ldp x8, x5, [x4, #32]
-; CHECK-NEXT: mov x4, x8
+; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: blr x9
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret
; CHECK-NEXT: ldr x0, [x8, :lo12:__os_arm64x_dispatch_ret]
>From e90c108369333ba9138a2cbefa2134bf2ea2a497 Mon Sep 17 00:00:00 2001
From: Billy Laws <blaws05 at gmail.com>
Date: Tue, 27 Feb 2024 18:32:15 +0000
Subject: [PATCH 3/3] [AArch64] Skip over shadow space for ARM64EC entry thunk
variadic calls (#80994)
When in an entry thunk the x64 SP is passed in x4 but this cannot be
directly passed through since x64 varargs calls have a 32 byte shadow
store at SP followed by the in-stack parameters. ARM64EC varargs calls
on the other hand expect x4 to point to the first in-stack parameter.
---
.../AArch64/AArch64Arm64ECCallLowering.cpp | 35 ++++++++++++++-----
.../AArch64/AArch64CallingConvention.td | 3 ++
.../CodeGen/AArch64/arm64ec-entry-thunks.ll | 2 +-
3 files changed, 30 insertions(+), 10 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
index 91b4f18c73c935..03d641d04413ef 100644
--- a/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Arm64ECCallLowering.cpp
@@ -117,8 +117,8 @@ void AArch64Arm64ECCallLowering::getThunkType(FunctionType *FT,
getThunkArgTypes(FT, AttrList, TT, Out, Arm64ArgTypes, X64ArgTypes,
HasSretPtr);
- Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes,
- TT == ThunkType::Entry && FT->isVarArg());
+ Arm64Ty = FunctionType::get(Arm64RetTy, Arm64ArgTypes, false);
+
X64Ty = FunctionType::get(X64RetTy, X64ArgTypes, false);
}
@@ -156,13 +156,13 @@ void AArch64Arm64ECCallLowering::getThunkArgTypes(
X64ArgTypes.push_back(I64Ty);
}
+ // x4
+ Arm64ArgTypes.push_back(PtrTy);
+ X64ArgTypes.push_back(PtrTy);
+ // x5
+ Arm64ArgTypes.push_back(I64Ty);
if (TT != ThunkType::Entry) {
- // x4
- Arm64ArgTypes.push_back(PtrTy);
- X64ArgTypes.push_back(PtrTy);
- // x5
- Arm64ArgTypes.push_back(I64Ty);
- // FIXME: x5 isn't actually passed/used by the x64 side; revisit once we
+ // FIXME: x5 isn't actually used by the x64 side; revisit once we
// have proper isel for varargs
X64ArgTypes.push_back(I64Ty);
}
@@ -471,10 +471,11 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
bool TransformDirectToSRet = X64RetType->isVoidTy() && !RetTy->isVoidTy();
unsigned ThunkArgOffset = TransformDirectToSRet ? 2 : 1;
+ unsigned PassthroughArgSize = F->isVarArg() ? 5 : Thunk->arg_size();
// Translate arguments to call.
SmallVector<Value *> Args;
- for (unsigned i = ThunkArgOffset, e = Thunk->arg_size(); i != e; ++i) {
+ for (unsigned i = ThunkArgOffset, e = PassthroughArgSize; i != e; ++i) {
Value *Arg = Thunk->getArg(i);
Type *ArgTy = Arm64Ty->getParamType(i - ThunkArgOffset);
if (ArgTy->isArrayTy() || ArgTy->isStructTy() ||
@@ -491,6 +492,22 @@ Function *AArch64Arm64ECCallLowering::buildEntryThunk(Function *F) {
Args.push_back(Arg);
}
+ if (F->isVarArg()) {
+ // The 5th argument to variadic entry thunks is used to model the x64 sp
+ // which is passed to the thunk in x4, this can be passed to the callee as
+ // the variadic argument start address after skipping over the 32 byte
+ // shadow store.
+
+ // The EC thunk CC will assign any argument marked as InReg to x4.
+ Thunk->addParamAttr(5, Attribute::InReg);
+ Value *Arg = Thunk->getArg(5);
+ Arg = IRB.CreatePtrAdd(Arg, IRB.getInt64(0x20));
+ Args.push_back(Arg);
+
+ // Pass in a zero variadic argument size (in x5).
+ Args.push_back(IRB.getInt64(0));
+ }
+
// Call the function passed to the thunk.
Value *Callee = Thunk->getArg(0);
Callee = IRB.CreateBitCast(Callee, PtrTy);
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 78ea4a5180f703..8e67f0f5c8815f 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -213,6 +213,9 @@ def CC_AArch64_Arm64EC_VarArg : CallingConv<[
// address is passed in X9.
let Entry = 1 in
def CC_AArch64_Arm64EC_Thunk : CallingConv<[
+ // ARM64EC-specific: the InReg attribute can be used to access the x64 sp passed into entry thunks in x4 from the IR.
+ CCIfInReg<CCIfType<[i64], CCAssignToReg<[X4]>>>,
+
// Byval aggregates are passed by pointer
CCIfByVal<CCPassIndirect<i64>>,
diff --git a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
index 0083818def1514..bb9ba05f7a2724 100644
--- a/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
+++ b/llvm/test/CodeGen/AArch64/arm64ec-entry-thunks.ll
@@ -147,7 +147,7 @@ define void @has_varargs(...) nounwind {
; CHECK-NEXT: add x29, sp, #160
; CHECK-NEXT: .seh_add_fp 160
; CHECK-NEXT: .seh_endprologue
-; CHECK-NEXT: mov x4, sp
+; CHECK-NEXT: add x4, x4, #32
; CHECK-NEXT: mov x5, xzr
; CHECK-NEXT: blr x9
; CHECK-NEXT: adrp x8, __os_arm64x_dispatch_ret
More information about the llvm-branch-commits
mailing list