[compiler-rt] [flang] [llvm] [AArch64] fix trampoline implementation: use X15 (PR #126743)
Jameson Nash via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 12 09:58:58 PST 2025
https://github.com/vtjnash updated https://github.com/llvm/llvm-project/pull/126743
>From 546fed81109e575b5b44693c3940e08ea0231ebc Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Mon, 10 Feb 2025 19:21:38 +0000
Subject: [PATCH 1/5] [AArch64] fix trampoline implementation: use X15
AAPCS64 reserves any of X9-X15 for this purpose, and says not to use X16
or X18 like GCC did. Simply choosing a different register fixes the
problem of this being broken on any platform that actually follows the
platform ABI. As a side benefit, also generate slightly better code by
following the XCore implementation instead of PPC (although following
the RISCV might have been slightly more readable in hindsight).
---
compiler-rt/lib/builtins/README.txt | 5 -
compiler-rt/lib/builtins/trampoline_setup.c | 42 ---
.../builtins/Unit/trampoline_setup_test.c | 2 +-
.../lib/Optimizer/CodeGen/BoxedProcedure.cpp | 4 +-
.../AArch64/AArch64CallingConvention.td | 36 ++-
.../Target/AArch64/AArch64FrameLowering.cpp | 26 ++
.../Target/AArch64/AArch64ISelLowering.cpp | 85 +++---
llvm/test/CodeGen/AArch64/nest-register.ll | 16 +-
.../CodeGen/AArch64/preserve_nonecc_call.ll | 116 ++++----
.../AArch64/statepoint-call-lowering.ll | 2 +-
llvm/test/CodeGen/AArch64/trampoline.ll | 257 +++++++++++++++++-
llvm/test/CodeGen/AArch64/win64cc-x18.ll | 27 +-
.../CodeGen/AArch64/zero-call-used-regs.ll | 16 +-
13 files changed, 435 insertions(+), 199 deletions(-)
diff --git a/compiler-rt/lib/builtins/README.txt b/compiler-rt/lib/builtins/README.txt
index 19f26c92a0f94..2d213d95f333a 100644
--- a/compiler-rt/lib/builtins/README.txt
+++ b/compiler-rt/lib/builtins/README.txt
@@ -272,11 +272,6 @@ switch32
switch8
switchu8
-// This function generates a custom trampoline function with the specific
-// realFunc and localsPtr values.
-void __trampoline_setup(uint32_t* trampOnStack, int trampSizeAllocated,
- const void* realFunc, void* localsPtr);
-
// There is no C interface to the *_vfp_d8_d15_regs functions. There are
// called in the prolog and epilog of Thumb1 functions. When the C++ ABI use
// SJLJ for exceptions, each function with a catch clause or destructors needs
diff --git a/compiler-rt/lib/builtins/trampoline_setup.c b/compiler-rt/lib/builtins/trampoline_setup.c
index 830e25e4c0303..844eb27944142 100644
--- a/compiler-rt/lib/builtins/trampoline_setup.c
+++ b/compiler-rt/lib/builtins/trampoline_setup.c
@@ -41,45 +41,3 @@ COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
__clear_cache(trampOnStack, &trampOnStack[10]);
}
#endif // __powerpc__ && !defined(__powerpc64__)
-
-// The AArch64 compiler generates calls to __trampoline_setup() when creating
-// trampoline functions on the stack for use with nested functions.
-// This function creates a custom 36-byte trampoline function on the stack
-// which loads x18 with a pointer to the outer function's locals
-// and then jumps to the target nested function.
-// Note: x18 is a reserved platform register on Windows and macOS.
-
-#if defined(__aarch64__) && defined(__ELF__)
-COMPILER_RT_ABI void __trampoline_setup(uint32_t *trampOnStack,
- int trampSizeAllocated,
- const void *realFunc, void *localsPtr) {
- // This should never happen, but if compiler did not allocate
- // enough space on stack for the trampoline, abort.
- if (trampSizeAllocated < 36)
- compilerrt_abort();
-
- // create trampoline
- // Load realFunc into x17. mov/movk 16 bits at a time.
- trampOnStack[0] =
- 0xd2800000u | ((((uint64_t)realFunc >> 0) & 0xffffu) << 5) | 0x11;
- trampOnStack[1] =
- 0xf2a00000u | ((((uint64_t)realFunc >> 16) & 0xffffu) << 5) | 0x11;
- trampOnStack[2] =
- 0xf2c00000u | ((((uint64_t)realFunc >> 32) & 0xffffu) << 5) | 0x11;
- trampOnStack[3] =
- 0xf2e00000u | ((((uint64_t)realFunc >> 48) & 0xffffu) << 5) | 0x11;
- // Load localsPtr into x18
- trampOnStack[4] =
- 0xd2800000u | ((((uint64_t)localsPtr >> 0) & 0xffffu) << 5) | 0x12;
- trampOnStack[5] =
- 0xf2a00000u | ((((uint64_t)localsPtr >> 16) & 0xffffu) << 5) | 0x12;
- trampOnStack[6] =
- 0xf2c00000u | ((((uint64_t)localsPtr >> 32) & 0xffffu) << 5) | 0x12;
- trampOnStack[7] =
- 0xf2e00000u | ((((uint64_t)localsPtr >> 48) & 0xffffu) << 5) | 0x12;
- trampOnStack[8] = 0xd61f0220; // br x17
-
- // Clear instruction cache.
- __clear_cache(trampOnStack, &trampOnStack[9]);
-}
-#endif // defined(__aarch64__) && !defined(__APPLE__) && !defined(_WIN64)
diff --git a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
index d51d35acaa02f..da115fe764271 100644
--- a/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
+++ b/compiler-rt/test/builtins/Unit/trampoline_setup_test.c
@@ -7,7 +7,7 @@
/*
* Tests nested functions
- * The ppc and aarch64 compilers generates a call to __trampoline_setup
+ * The ppc compiler generates a call to __trampoline_setup
* The i386 and x86_64 compilers generate a call to ___enable_execute_stack
*/
diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index 26f4aee21d8bd..f402404121da0 100644
--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -274,10 +274,10 @@ class BoxedProcedurePass
auto loc = embox.getLoc();
mlir::Type i8Ty = builder.getI8Type();
mlir::Type i8Ptr = builder.getRefType(i8Ty);
- // For AArch64, PPC32 and PPC64, the thunk is populated by a call to
+ // For PPC32 and PPC64, the thunk is populated by a call to
// __trampoline_setup, which is defined in
// compiler-rt/lib/builtins/trampoline_setup.c and requires the
- // thunk size greater than 32 bytes. For RISCV and x86_64, the
+ // thunk size greater than 32 bytes. For Aarch64, RISCV and x86_64, the
// thunk setup doesn't go through __trampoline_setup and fits in 32
// bytes.
fir::SequenceType::Extent thunkSize = triple.getTrampolineSize();
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 7cca6d9bc6b9c..8355463dea94e 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -28,6 +28,12 @@ class CCIfSubtarget<string F, CCAction A>
//===----------------------------------------------------------------------===//
defvar AArch64_Common = [
+ // The 'nest' parameter, if any, is passed in X15.
+ // The previous register used here (X18) is also defined to be unavailable
+ // for this purpose, while all of X9-X15 were defined to be free for LLVM to
+ // use for this, so use X15 (which LLVM often already clobbers anyways).
+ CCIfNest<CCAssignToReg<[X15]>>,
+
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
@@ -117,16 +123,12 @@ defvar AArch64_Common = [
];
let Entry = 1 in
-def CC_AArch64_AAPCS : CallingConv<!listconcat(
- // The 'nest' parameter, if any, is passed in X18.
- // Darwin and Windows use X18 as the platform register and hence 'nest' isn't
- // currently supported there.
- [CCIfNest<CCAssignToReg<[X18]>>],
- AArch64_Common
-)>;
+def CC_AArch64_AAPCS : CallingConv<AArch64_Common>;
let Entry = 1 in
def RetCC_AArch64_AAPCS : CallingConv<[
+ CCIfNest<CCAssignToReg<[X15]>>,
+
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32], CCBitConvertToType<v2i64>>,
@@ -177,6 +179,8 @@ def CC_AArch64_Win64_VarArg : CallingConv<[
// a stack layout compatible with the x64 calling convention.
let Entry = 1 in
def CC_AArch64_Arm64EC_VarArg : CallingConv<[
+ CCIfNest<CCAssignToReg<[X15]>>,
+
// Convert small floating-point values to integer.
CCIfType<[f16, bf16], CCBitConvertToType<i16>>,
CCIfType<[f32], CCBitConvertToType<i32>>,
@@ -295,6 +299,8 @@ def CC_AArch64_Arm64EC_Thunk_Native : CallingConv<[
let Entry = 1 in
def RetCC_AArch64_Arm64EC_Thunk : CallingConv<[
+ CCIfNest<CCAssignToReg<[X15]>>,
+
// The X86-Win64 calling convention always returns __m64 values in RAX.
CCIfType<[x86mmx], CCBitConvertToType<i64>>,
@@ -353,6 +359,8 @@ def RetCC_AArch64_Arm64EC_CFGuard_Check : CallingConv<[
// + Stack slots are sized as needed rather than being at least 64-bit.
let Entry = 1 in
def CC_AArch64_DarwinPCS : CallingConv<[
+ CCIfNest<CCAssignToReg<[X15]>>,
+
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
@@ -427,6 +435,8 @@ def CC_AArch64_DarwinPCS : CallingConv<[
let Entry = 1 in
def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
+ CCIfNest<CCAssignToReg<[X15]>>,
+
CCIfType<[iPTR], CCBitConvertToType<i64>>,
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
@@ -450,6 +460,8 @@ def CC_AArch64_DarwinPCS_VarArg : CallingConv<[
// same as the normal Darwin VarArgs handling.
let Entry = 1 in
def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
+ CCIfNest<CCAssignToReg<[X15]>>,
+
CCIfType<[v2f32], CCBitConvertToType<v2i32>>,
CCIfType<[v2f64, v4f32, f128], CCBitConvertToType<v2i64>>,
@@ -494,6 +506,8 @@ def CC_AArch64_DarwinPCS_ILP32_VarArg : CallingConv<[
let Entry = 1 in
def CC_AArch64_GHC : CallingConv<[
+ CCIfNest<CCAssignToReg<[X15]>>,
+
CCIfType<[iPTR], CCBitConvertToType<i64>>,
// Handle all vector types as either f64 or v2f64.
@@ -523,6 +537,7 @@ def CC_AArch64_Preserve_None : CallingConv<[
// We can pass arguments in all general registers, except:
// - X8, used for sret
// - X16/X17, used by the linker as IP0/IP1
+ // - X15, the nest register and used by Windows for stack allocation
// - X18, the platform register
// - X19, the base pointer
// - X29, the frame pointer
@@ -533,6 +548,7 @@ def CC_AArch64_Preserve_None : CallingConv<[
// normal functions without saving and reloading arguments.
// X9 is assigned last as it is used in FrameLowering as the first
// choice for a scratch register.
+ CCIfNest<CCAssignToReg<[X15]>>,
CCIfType<[i32], CCAssignToReg<[W20, W21, W22, W23,
W24, W25, W26, W27, W28,
W0, W1, W2, W3, W4, W5,
@@ -544,12 +560,6 @@ def CC_AArch64_Preserve_None : CallingConv<[
X6, X7, X10, X11,
X12, X13, X14, X9]>>,
- // Windows uses X15 for stack allocation
- CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
- CCIfType<[i32], CCAssignToReg<[W15]>>>,
- CCIf<"!State.getMachineFunction().getSubtarget<AArch64Subtarget>().isTargetWindows()",
- CCIfType<[i64], CCAssignToReg<[X15]>>>,
-
CCDelegateTo<CC_AArch64_AAPCS>
]>;
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d3abd79b85a75..ced3ff7b742ad 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2044,6 +2044,25 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
: 0;
if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
+ // Find an available register to store value of VG to.
+ unsigned X15Scratch = AArch64::NoRegister;
+ if (LiveRegs.contains(AArch64::X15)) {
+ // if (llvm::any_of(
+ // MBB.liveins(),
+ // [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+ // return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
+ // AArch64::X15, LiveIn.PhysReg);
+ // }))
+ X15Scratch = findScratchNonCalleeSaveRegister(&MBB);
+ assert(X15Scratch != AArch64::NoRegister);
+ LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
+ .addReg(AArch64::XZR)
+ .addReg(AArch64::X15, RegState::Undef)
+ .addReg(AArch64::X15, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
+
uint64_t NumWords = (NumBytes + RealignmentPadding) >> 4;
if (NeedsWinCFI) {
HasWinCFI = true;
@@ -2166,6 +2185,13 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// we've set a frame pointer and already finished the SEH prologue.
assert(!NeedsWinCFI);
}
+ if (X15Scratch != AArch64::NoRegister) {
+ BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), AArch64::X15)
+ .addReg(AArch64::XZR)
+ .addReg(X15Scratch, RegState::Undef)
+ .addReg(X15Scratch, RegState::Implicit)
+ .setMIFlag(MachineInstr::FrameSetup);
+ }
}
StackOffset SVECalleeSavesSize = {}, SVELocalsSize = SVEStackSize;
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 0d1608a97bfd3..1404077446420 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7290,59 +7290,66 @@ static SDValue LowerFLDEXP(SDValue Op, SelectionDAG &DAG) {
SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
- // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
- if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
- report_fatal_error(
- "ADJUST_TRAMPOLINE operation is only supported on Linux.");
-
return Op.getOperand(0);
}
SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
-
- // Note: x18 cannot be used for the Nest parameter on Windows and macOS.
- if (Subtarget->isTargetDarwin() || Subtarget->isTargetWindows())
- report_fatal_error("INIT_TRAMPOLINE operation is only supported on Linux.");
-
SDValue Chain = Op.getOperand(0);
- SDValue Trmp = Op.getOperand(1); // trampoline
+ SDValue Trmp = Op.getOperand(1); // trampoline, 36 bytes
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
- SDLoc dl(Op);
- EVT PtrVT = getPointerTy(DAG.getDataLayout());
- Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext());
-
- TargetLowering::ArgListTy Args;
- TargetLowering::ArgListEntry Entry;
+ const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- Entry.Ty = IntPtrTy;
- Entry.Node = Trmp;
- Args.push_back(Entry);
+ // ldr x15, .+16
+ // ldr x17, .+20
+ // br x17
+ // 0
+ // .nest: .qword nest
+ // .fptr: .qword fptr
+ SDValue OutChains[5];
- if (auto *FI = dyn_cast<FrameIndexSDNode>(Trmp.getNode())) {
- MachineFunction &MF = DAG.getMachineFunction();
- MachineFrameInfo &MFI = MF.getFrameInfo();
- Entry.Node =
- DAG.getConstant(MFI.getObjectSize(FI->getIndex()), dl, MVT::i64);
- } else
- Entry.Node = DAG.getConstant(36, dl, MVT::i64);
+ const char X15 = 0x0f;
+ const char X17 = 0x11;
- Args.push_back(Entry);
- Entry.Node = FPtr;
- Args.push_back(Entry);
- Entry.Node = Nest;
- Args.push_back(Entry);
+ SDValue Addr = Trmp;
- // Lower to a call to __trampoline_setup(Trmp, TrampSize, FPtr, ctx_reg)
- TargetLowering::CallLoweringInfo CLI(DAG);
- CLI.setDebugLoc(dl).setChain(Chain).setLibCallee(
- CallingConv::C, Type::getVoidTy(*DAG.getContext()),
- DAG.getExternalSymbol("__trampoline_setup", PtrVT), std::move(Args));
+ SDLoc dl(Op);
+ OutChains[0] =
+ DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | X15, dl, MVT::i32), Addr,
+ MachinePointerInfo(TrmpAddr));
- std::pair<SDValue, SDValue> CallResult = LowerCallTo(CLI);
- return CallResult.second;
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(4, dl, MVT::i64));
+ OutChains[1] =
+ DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | X17, dl, MVT::i32), Addr,
+ MachinePointerInfo(TrmpAddr, 4));
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(8, dl, MVT::i64));
+ OutChains[2] =
+ DAG.getStore(Chain, dl, DAG.getConstant(0xd61f0220u, dl, MVT::i32), Addr,
+ MachinePointerInfo(TrmpAddr, 8));
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(16, dl, MVT::i64));
+ OutChains[3] =
+ DAG.getStore(Chain, dl, Nest, Addr, MachinePointerInfo(TrmpAddr, 16));
+
+ Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(24, dl, MVT::i64));
+ OutChains[4] =
+ DAG.getStore(Chain, dl, FPtr, Addr, MachinePointerInfo(TrmpAddr, 24));
+
+ SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
+
+ SDValue EndOfTrmp = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
+ DAG.getConstant(12, dl, MVT::i64));
+
+ // Call clear cache on the trampoline instructions.
+ return DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
+ Trmp, EndOfTrmp);
}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
diff --git a/llvm/test/CodeGen/AArch64/nest-register.ll b/llvm/test/CodeGen/AArch64/nest-register.ll
index 1e1c1b044bab6..2e94dfba1fa52 100644
--- a/llvm/test/CodeGen/AArch64/nest-register.ll
+++ b/llvm/test/CodeGen/AArch64/nest-register.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -disable-post-ra -verify-machineinstrs < %s -mtriple=aarch64-none-linux-gnu | FileCheck %s
; Tests that the 'nest' parameter attribute causes the relevant parameter to be
@@ -5,18 +6,21 @@
define ptr @nest_receiver(ptr nest %arg) nounwind {
; CHECK-LABEL: nest_receiver:
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: mov x0, x18
-; CHECK-NEXT: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: mov x0, x15
+; CHECK-NEXT: ret
ret ptr %arg
}
define ptr @nest_caller(ptr %arg) nounwind {
; CHECK-LABEL: nest_caller:
-; CHECK: mov x18, x0
-; CHECK-NEXT: bl nest_receiver
-; CHECK: ret
+; CHECK: // %bb.0:
+; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-NEXT: mov x15, x0
+; CHECK-NEXT: bl nest_receiver
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ret
%result = call ptr @nest_receiver(ptr nest %arg)
ret ptr %result
diff --git a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
index 9b9717c19321e..e0d7b5abe7bea 100644
--- a/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
+++ b/llvm/test/CodeGen/AArch64/preserve_nonecc_call.ll
@@ -184,10 +184,11 @@ declare preserve_nonecc i64 @callee_with_many_param2(i64 %a1, i64 %a2, i64 %a3,
define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i64 %a4, i64 %a5, i64 %a6, i64 %a7, i64 %a8, i64 %a9, i64 %a10, i64 %a11, i64 %a12, i64 %a13, i64 %a14, i64 %a15, i64 %a16, i64 %a17, i64 %a18, i64 %a19, i64 %a20, i64 %a21, i64 %a22, i64 %a23, i64 %a24) {
; CHECK-LABEL: callee_with_many_param:
; CHECK: // %bb.0:
-; CHECK-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 16
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 32
; CHECK-NEXT: .cfi_offset w30, -16
-; CHECK-NEXT: mov x8, x15
+; CHECK-NEXT: ldr x8, [sp, #32]
; CHECK-NEXT: mov x15, x20
; CHECK-NEXT: mov x20, x21
; CHECK-NEXT: mov x21, x22
@@ -212,17 +213,20 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
; CHECK-NEXT: mov x13, x14
; CHECK-NEXT: mov x14, x9
; CHECK-NEXT: mov x9, x8
+; CHECK-NEXT: str x15, [sp]
; CHECK-NEXT: bl callee_with_many_param2
-; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: ret
;
; DARWIN-LABEL: callee_with_many_param:
; DARWIN: ; %bb.0:
-; DARWIN-NEXT: stp x29, x30, [sp, #-16]! ; 16-byte Folded Spill
-; DARWIN-NEXT: .cfi_def_cfa_offset 16
+; DARWIN-NEXT: sub sp, sp, #32
+; DARWIN-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; DARWIN-NEXT: .cfi_def_cfa_offset 32
; DARWIN-NEXT: .cfi_offset w30, -8
; DARWIN-NEXT: .cfi_offset w29, -16
-; DARWIN-NEXT: mov x8, x15
+; DARWIN-NEXT: ldr x8, [sp, #32]
; DARWIN-NEXT: mov x15, x20
; DARWIN-NEXT: mov x20, x21
; DARWIN-NEXT: mov x21, x22
@@ -247,8 +251,10 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
; DARWIN-NEXT: mov x13, x14
; DARWIN-NEXT: mov x14, x9
; DARWIN-NEXT: mov x9, x8
+; DARWIN-NEXT: str x15, [sp]
; DARWIN-NEXT: bl _callee_with_many_param2
-; DARWIN-NEXT: ldp x29, x30, [sp], #16 ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; DARWIN-NEXT: add sp, sp, #32
; DARWIN-NEXT: ret
;
; WIN-LABEL: callee_with_many_param:
@@ -302,17 +308,18 @@ define preserve_nonecc i64 @callee_with_many_param(i64 %a1, i64 %a2, i64 %a3, i6
define i64 @caller3() {
; CHECK-LABEL: caller3:
; CHECK: // %bb.0:
-; CHECK-NEXT: stp d15, d14, [sp, #-160]! // 16-byte Folded Spill
-; CHECK-NEXT: stp d13, d12, [sp, #16] // 16-byte Folded Spill
-; CHECK-NEXT: stp d11, d10, [sp, #32] // 16-byte Folded Spill
-; CHECK-NEXT: stp d9, d8, [sp, #48] // 16-byte Folded Spill
-; CHECK-NEXT: str x30, [sp, #64] // 8-byte Folded Spill
-; CHECK-NEXT: stp x28, x27, [sp, #80] // 16-byte Folded Spill
-; CHECK-NEXT: stp x26, x25, [sp, #96] // 16-byte Folded Spill
-; CHECK-NEXT: stp x24, x23, [sp, #112] // 16-byte Folded Spill
-; CHECK-NEXT: stp x22, x21, [sp, #128] // 16-byte Folded Spill
-; CHECK-NEXT: stp x20, x19, [sp, #144] // 16-byte Folded Spill
-; CHECK-NEXT: .cfi_def_cfa_offset 160
+; CHECK-NEXT: sub sp, sp, #176
+; CHECK-NEXT: stp d15, d14, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: stp d13, d12, [sp, #32] // 16-byte Folded Spill
+; CHECK-NEXT: stp d11, d10, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: stp d9, d8, [sp, #64] // 16-byte Folded Spill
+; CHECK-NEXT: str x30, [sp, #80] // 8-byte Folded Spill
+; CHECK-NEXT: stp x28, x27, [sp, #96] // 16-byte Folded Spill
+; CHECK-NEXT: stp x26, x25, [sp, #112] // 16-byte Folded Spill
+; CHECK-NEXT: stp x24, x23, [sp, #128] // 16-byte Folded Spill
+; CHECK-NEXT: stp x22, x21, [sp, #144] // 16-byte Folded Spill
+; CHECK-NEXT: stp x20, x19, [sp, #160] // 16-byte Folded Spill
+; CHECK-NEXT: .cfi_def_cfa_offset 176
; CHECK-NEXT: .cfi_offset w19, -8
; CHECK-NEXT: .cfi_offset w20, -16
; CHECK-NEXT: .cfi_offset w21, -24
@@ -332,6 +339,7 @@ define i64 @caller3() {
; CHECK-NEXT: .cfi_offset b13, -144
; CHECK-NEXT: .cfi_offset b14, -152
; CHECK-NEXT: .cfi_offset b15, -160
+; CHECK-NEXT: mov w8, #24 // =0x18
; CHECK-NEXT: mov w20, #1 // =0x1
; CHECK-NEXT: mov w21, #2 // =0x2
; CHECK-NEXT: mov w22, #3 // =0x3
@@ -355,33 +363,35 @@ define i64 @caller3() {
; CHECK-NEXT: mov w13, #21 // =0x15
; CHECK-NEXT: mov w14, #22 // =0x16
; CHECK-NEXT: mov w9, #23 // =0x17
-; CHECK-NEXT: mov w15, #24 // =0x18
+; CHECK-NEXT: str x8, [sp]
; CHECK-NEXT: bl callee_with_many_param
-; CHECK-NEXT: ldp x20, x19, [sp, #144] // 16-byte Folded Reload
-; CHECK-NEXT: ldr x30, [sp, #64] // 8-byte Folded Reload
-; CHECK-NEXT: ldp x22, x21, [sp, #128] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x24, x23, [sp, #112] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x26, x25, [sp, #96] // 16-byte Folded Reload
-; CHECK-NEXT: ldp x28, x27, [sp, #80] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d11, d10, [sp, #32] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d13, d12, [sp, #16] // 16-byte Folded Reload
-; CHECK-NEXT: ldp d15, d14, [sp], #160 // 16-byte Folded Reload
+; CHECK-NEXT: ldp x20, x19, [sp, #160] // 16-byte Folded Reload
+; CHECK-NEXT: ldr x30, [sp, #80] // 8-byte Folded Reload
+; CHECK-NEXT: ldp x22, x21, [sp, #144] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x24, x23, [sp, #128] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x26, x25, [sp, #112] // 16-byte Folded Reload
+; CHECK-NEXT: ldp x28, x27, [sp, #96] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d9, d8, [sp, #64] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d11, d10, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d13, d12, [sp, #32] // 16-byte Folded Reload
+; CHECK-NEXT: ldp d15, d14, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #176
; CHECK-NEXT: ret
;
; DARWIN-LABEL: caller3:
; DARWIN: ; %bb.0:
-; DARWIN-NEXT: stp d15, d14, [sp, #-160]! ; 16-byte Folded Spill
-; DARWIN-NEXT: stp d13, d12, [sp, #16] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp d11, d10, [sp, #32] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp d9, d8, [sp, #48] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp x28, x27, [sp, #64] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp x26, x25, [sp, #80] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp x24, x23, [sp, #96] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp x22, x21, [sp, #112] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp x20, x19, [sp, #128] ; 16-byte Folded Spill
-; DARWIN-NEXT: stp x29, x30, [sp, #144] ; 16-byte Folded Spill
-; DARWIN-NEXT: .cfi_def_cfa_offset 160
+; DARWIN-NEXT: sub sp, sp, #176
+; DARWIN-NEXT: stp d15, d14, [sp, #16] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp d13, d12, [sp, #32] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp d11, d10, [sp, #48] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp d9, d8, [sp, #64] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp x28, x27, [sp, #80] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp x26, x25, [sp, #96] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp x24, x23, [sp, #112] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp x22, x21, [sp, #128] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp x20, x19, [sp, #144] ; 16-byte Folded Spill
+; DARWIN-NEXT: stp x29, x30, [sp, #160] ; 16-byte Folded Spill
+; DARWIN-NEXT: .cfi_def_cfa_offset 176
; DARWIN-NEXT: .cfi_offset w30, -8
; DARWIN-NEXT: .cfi_offset w29, -16
; DARWIN-NEXT: .cfi_offset w19, -24
@@ -402,6 +412,7 @@ define i64 @caller3() {
; DARWIN-NEXT: .cfi_offset b13, -144
; DARWIN-NEXT: .cfi_offset b14, -152
; DARWIN-NEXT: .cfi_offset b15, -160
+; DARWIN-NEXT: mov w8, #24 ; =0x18
; DARWIN-NEXT: mov w20, #1 ; =0x1
; DARWIN-NEXT: mov w21, #2 ; =0x2
; DARWIN-NEXT: mov w22, #3 ; =0x3
@@ -425,18 +436,19 @@ define i64 @caller3() {
; DARWIN-NEXT: mov w13, #21 ; =0x15
; DARWIN-NEXT: mov w14, #22 ; =0x16
; DARWIN-NEXT: mov w9, #23 ; =0x17
-; DARWIN-NEXT: mov w15, #24 ; =0x18
+; DARWIN-NEXT: str x8, [sp]
; DARWIN-NEXT: bl _callee_with_many_param
-; DARWIN-NEXT: ldp x29, x30, [sp, #144] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp x20, x19, [sp, #128] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp x22, x21, [sp, #112] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp x24, x23, [sp, #96] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp x26, x25, [sp, #80] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp x28, x27, [sp, #64] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp d9, d8, [sp, #48] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp d11, d10, [sp, #32] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp d13, d12, [sp, #16] ; 16-byte Folded Reload
-; DARWIN-NEXT: ldp d15, d14, [sp], #160 ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp x29, x30, [sp, #160] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp x20, x19, [sp, #144] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp x22, x21, [sp, #128] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp x24, x23, [sp, #112] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp x26, x25, [sp, #96] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp x28, x27, [sp, #80] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp d9, d8, [sp, #64] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp d11, d10, [sp, #48] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp d13, d12, [sp, #32] ; 16-byte Folded Reload
+; DARWIN-NEXT: ldp d15, d14, [sp, #16] ; 16-byte Folded Reload
+; DARWIN-NEXT: add sp, sp, #176
; DARWIN-NEXT: ret
;
; WIN-LABEL: caller3:
diff --git a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll
index 9619895c450ca..32c3eaeb9c876 100644
--- a/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll
+++ b/llvm/test/CodeGen/AArch64/statepoint-call-lowering.ll
@@ -207,7 +207,7 @@ define void @test_attributes(ptr byval(%struct2) %s) gc "statepoint-example" {
; CHECK-NEXT: .cfi_offset w30, -16
; CHECK-NEXT: ldr x8, [sp, #64]
; CHECK-NEXT: ldr q0, [sp, #48]
-; CHECK-NEXT: mov x18, xzr
+; CHECK-NEXT: mov x15, xzr
; CHECK-NEXT: mov w0, #42 // =0x2a
; CHECK-NEXT: mov w1, #17 // =0x11
; CHECK-NEXT: str x8, [sp, #16]
diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll
index 30ac2aa283b3e..0e682704afbf8 100644
--- a/llvm/test/CodeGen/AArch64/trampoline.ll
+++ b/llvm/test/CodeGen/AArch64/trampoline.ll
@@ -1,32 +1,265 @@
-; RUN: llc -mtriple=aarch64-- < %s | FileCheck %s
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc -mtriple=aarch64-linux-gnu < %s | FileCheck %s --check-prefixes=CHECK-LINUX
+; RUN: llc -mtriple=aarch64-none-eabi < %s | FileCheck %s --check-prefixes=CHECK-LINUX
+; RUN: llc -mtriple=aarch64-pc-windows-msvc < %s | FileCheck %s --check-prefix=CHECK-PC
+; RUN: llc -mtriple=aarch64-apple-darwin < %s | FileCheck %s --check-prefixes=CHECK-APPLE
@trampg = internal global [36 x i8] zeroinitializer, align 8
declare void @llvm.init.trampoline(ptr, ptr, ptr);
declare ptr @llvm.adjust.trampoline(ptr);
-define i64 @f(ptr nest %c, i64 %x, i64 %y) {
- %sum = add i64 %x, %y
- ret i64 %sum
+define ptr @f(ptr nest %x, i64 %y) {
+; CHECK-LINUX-LABEL: f:
+; CHECK-LINUX: // %bb.0:
+; CHECK-LINUX-NEXT: str x29, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-LINUX-NEXT: sub sp, sp, #237, lsl #12 // =970752
+; CHECK-LINUX-NEXT: sub sp, sp, #3264
+; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 974032
+; CHECK-LINUX-NEXT: .cfi_offset w29, -16
+; CHECK-LINUX-NEXT: add x0, x15, x0
+; CHECK-LINUX-NEXT: add sp, sp, #237, lsl #12 // =970752
+; CHECK-LINUX-NEXT: add sp, sp, #3264
+; CHECK-LINUX-NEXT: ldr x29, [sp], #16 // 8-byte Folded Reload
+; CHECK-LINUX-NEXT: ret
+;
+; CHECK-PC-LABEL: f:
+; CHECK-PC: .seh_proc f
+; CHECK-PC-NEXT: // %bb.0:
+; CHECK-PC-NEXT: stp x29, x30, [sp, #-16]! // 16-byte Folded Spill
+; CHECK-PC-NEXT: .seh_save_fplr_x 16
+; CHECK-PC-NEXT: mov x9, x15
+; CHECK-PC-NEXT: mov x15, #60876 // =0xedcc
+; CHECK-PC-NEXT: .seh_nop
+; CHECK-PC-NEXT: bl __chkstk
+; CHECK-PC-NEXT: .seh_nop
+; CHECK-PC-NEXT: sub sp, sp, x15, lsl #4
+; CHECK-PC-NEXT: .seh_stackalloc 974016
+; CHECK-PC-NEXT: mov x15, x9
+; CHECK-PC-NEXT: .seh_endprologue
+; CHECK-PC-NEXT: add x0, x15, x0
+; CHECK-PC-NEXT: .seh_startepilogue
+; CHECK-PC-NEXT: add sp, sp, #237, lsl #12 // =970752
+; CHECK-PC-NEXT: .seh_stackalloc 970752
+; CHECK-PC-NEXT: add sp, sp, #3264
+; CHECK-PC-NEXT: .seh_stackalloc 3264
+; CHECK-PC-NEXT: ldp x29, x30, [sp], #16 // 16-byte Folded Reload
+; CHECK-PC-NEXT: .seh_save_fplr_x 16
+; CHECK-PC-NEXT: .seh_endepilogue
+; CHECK-PC-NEXT: ret
+; CHECK-PC-NEXT: .seh_endfunclet
+; CHECK-PC-NEXT: .seh_endproc
+;
+; CHECK-APPLE-LABEL: f:
+; CHECK-APPLE: ; %bb.0:
+; CHECK-APPLE-NEXT: stp x28, x27, [sp, #-16]! ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT: sub sp, sp, #237, lsl #12 ; =970752
+; CHECK-APPLE-NEXT: sub sp, sp, #3264
+; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 974032
+; CHECK-APPLE-NEXT: .cfi_offset w27, -8
+; CHECK-APPLE-NEXT: .cfi_offset w28, -16
+; CHECK-APPLE-NEXT: add x0, x15, x0
+; CHECK-APPLE-NEXT: add sp, sp, #237, lsl #12 ; =970752
+; CHECK-APPLE-NEXT: add sp, sp, #3264
+; CHECK-APPLE-NEXT: ldp x28, x27, [sp], #16 ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT: ret
+ %chkstack = alloca [u0xedcba x i8]
+ %sum = getelementptr i8, ptr %x, i64 %y
+ ret ptr %sum
}
define i64 @func1() {
+; CHECK-LINUX-LABEL: func1:
+; CHECK-LINUX: // %bb.0:
+; CHECK-LINUX-NEXT: sub sp, sp, #64
+; CHECK-LINUX-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 64
+; CHECK-LINUX-NEXT: .cfi_offset w30, -16
+; CHECK-LINUX-NEXT: adrp x8, :got:f
+; CHECK-LINUX-NEXT: mov w9, #544 // =0x220
+; CHECK-LINUX-NEXT: add x0, sp, #8
+; CHECK-LINUX-NEXT: ldr x8, [x8, :got_lo12:f]
+; CHECK-LINUX-NEXT: movk w9, #54815, lsl #16
+; CHECK-LINUX-NEXT: str w9, [sp, #16]
+; CHECK-LINUX-NEXT: add x9, sp, #56
+; CHECK-LINUX-NEXT: stp x9, x8, [sp, #24]
+; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f
+; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16
+; CHECK-LINUX-NEXT: movk x8, #177, lsl #32
+; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48
+; CHECK-LINUX-NEXT: str x8, [sp, #8]
+; CHECK-LINUX-NEXT: add x8, sp, #8
+; CHECK-LINUX-NEXT: add x1, x8, #12
+; CHECK-LINUX-NEXT: bl __clear_cache
+; CHECK-LINUX-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-LINUX-NEXT: mov x0, xzr
+; CHECK-LINUX-NEXT: add sp, sp, #64
+; CHECK-LINUX-NEXT: ret
+;
+; CHECK-PC-LABEL: func1:
+; CHECK-PC: .seh_proc func1
+; CHECK-PC-NEXT: // %bb.0:
+; CHECK-PC-NEXT: sub sp, sp, #64
+; CHECK-PC-NEXT: .seh_stackalloc 64
+; CHECK-PC-NEXT: str x30, [sp, #48] // 8-byte Folded Spill
+; CHECK-PC-NEXT: .seh_save_reg x30, 48
+; CHECK-PC-NEXT: .seh_endprologue
+; CHECK-PC-NEXT: adrp x8, f
+; CHECK-PC-NEXT: add x8, x8, :lo12:f
+; CHECK-PC-NEXT: add x9, sp, #56
+; CHECK-PC-NEXT: stp x9, x8, [sp, #24]
+; CHECK-PC-NEXT: mov w8, #544 // =0x220
+; CHECK-PC-NEXT: add x0, sp, #8
+; CHECK-PC-NEXT: movk w8, #54815, lsl #16
+; CHECK-PC-NEXT: str w8, [sp, #16]
+; CHECK-PC-NEXT: mov x8, #143 // =0x8f
+; CHECK-PC-NEXT: movk x8, #22528, lsl #16
+; CHECK-PC-NEXT: movk x8, #177, lsl #32
+; CHECK-PC-NEXT: movk x8, #22528, lsl #48
+; CHECK-PC-NEXT: str x8, [sp, #8]
+; CHECK-PC-NEXT: add x8, sp, #8
+; CHECK-PC-NEXT: add x1, x8, #12
+; CHECK-PC-NEXT: bl __clear_cache
+; CHECK-PC-NEXT: mov x0, xzr
+; CHECK-PC-NEXT: .seh_startepilogue
+; CHECK-PC-NEXT: ldr x30, [sp, #48] // 8-byte Folded Reload
+; CHECK-PC-NEXT: .seh_save_reg x30, 48
+; CHECK-PC-NEXT: add sp, sp, #64
+; CHECK-PC-NEXT: .seh_stackalloc 64
+; CHECK-PC-NEXT: .seh_endepilogue
+; CHECK-PC-NEXT: ret
+; CHECK-PC-NEXT: .seh_endfunclet
+; CHECK-PC-NEXT: .seh_endproc
+;
+; CHECK-APPLE-LABEL: func1:
+; CHECK-APPLE: ; %bb.0:
+; CHECK-APPLE-NEXT: sub sp, sp, #64
+; CHECK-APPLE-NEXT: stp x29, x30, [sp, #48] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 64
+; CHECK-APPLE-NEXT: .cfi_offset w30, -8
+; CHECK-APPLE-NEXT: .cfi_offset w29, -16
+; CHECK-APPLE-NEXT: Lloh0:
+; CHECK-APPLE-NEXT: adrp x8, _f at PAGE
+; CHECK-APPLE-NEXT: Lloh1:
+; CHECK-APPLE-NEXT: add x8, x8, _f at PAGEOFF
+; CHECK-APPLE-NEXT: add x9, sp, #40
+; CHECK-APPLE-NEXT: stp x9, x8, [sp, #16]
+; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220
+; CHECK-APPLE-NEXT: mov x0, sp
+; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16
+; CHECK-APPLE-NEXT: str w8, [sp, #8]
+; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f
+; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16
+; CHECK-APPLE-NEXT: movk x8, #177, lsl #32
+; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48
+; CHECK-APPLE-NEXT: str x8, [sp]
+; CHECK-APPLE-NEXT: mov x8, sp
+; CHECK-APPLE-NEXT: add x1, x8, #12
+; CHECK-APPLE-NEXT: bl ___clear_cache
+; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #48] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT: mov x0, xzr
+; CHECK-APPLE-NEXT: add sp, sp, #64
+; CHECK-APPLE-NEXT: ret
+; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh0, Lloh1
%val = alloca i64
- %nval = bitcast ptr %val to ptr
%tramp = alloca [36 x i8], align 8
- ; CHECK: mov w1, #36
- ; CHECK: bl __trampoline_setup
- call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %nval)
+ call void @llvm.init.trampoline(ptr %tramp, ptr @f, ptr %val)
%fp = call ptr @llvm.adjust.trampoline(ptr %tramp)
ret i64 0
}
define i64 @func2() {
+; CHECK-LINUX-LABEL: func2:
+; CHECK-LINUX: // %bb.0:
+; CHECK-LINUX-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-LINUX-NEXT: .cfi_def_cfa_offset 16
+; CHECK-LINUX-NEXT: .cfi_offset w30, -16
+; CHECK-LINUX-NEXT: adrp x8, :got:f
+; CHECK-LINUX-NEXT: mov w9, #544 // =0x220
+; CHECK-LINUX-NEXT: adrp x0, trampg
+; CHECK-LINUX-NEXT: add x0, x0, :lo12:trampg
+; CHECK-LINUX-NEXT: ldr x8, [x8, :got_lo12:f]
+; CHECK-LINUX-NEXT: movk w9, #54815, lsl #16
+; CHECK-LINUX-NEXT: str w9, [x0, #8]
+; CHECK-LINUX-NEXT: add x9, sp, #8
+; CHECK-LINUX-NEXT: add x1, x0, #12
+; CHECK-LINUX-NEXT: stp x9, x8, [x0, #16]
+; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f
+; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16
+; CHECK-LINUX-NEXT: movk x8, #177, lsl #32
+; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48
+; CHECK-LINUX-NEXT: str x8, [x0]
+; CHECK-LINUX-NEXT: bl __clear_cache
+; CHECK-LINUX-NEXT: mov x0, xzr
+; CHECK-LINUX-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-LINUX-NEXT: ret
+;
+; CHECK-PC-LABEL: func2:
+; CHECK-PC: .seh_proc func2
+; CHECK-PC-NEXT: // %bb.0:
+; CHECK-PC-NEXT: str x30, [sp, #-16]! // 8-byte Folded Spill
+; CHECK-PC-NEXT: .seh_save_reg_x x30, 16
+; CHECK-PC-NEXT: .seh_endprologue
+; CHECK-PC-NEXT: adrp x0, trampg
+; CHECK-PC-NEXT: add x0, x0, :lo12:trampg
+; CHECK-PC-NEXT: adrp x8, f
+; CHECK-PC-NEXT: add x8, x8, :lo12:f
+; CHECK-PC-NEXT: add x9, sp, #8
+; CHECK-PC-NEXT: add x1, x0, #12
+; CHECK-PC-NEXT: stp x9, x8, [x0, #16]
+; CHECK-PC-NEXT: mov w8, #544 // =0x220
+; CHECK-PC-NEXT: movk w8, #54815, lsl #16
+; CHECK-PC-NEXT: str w8, [x0, #8]
+; CHECK-PC-NEXT: mov x8, #143 // =0x8f
+; CHECK-PC-NEXT: movk x8, #22528, lsl #16
+; CHECK-PC-NEXT: movk x8, #177, lsl #32
+; CHECK-PC-NEXT: movk x8, #22528, lsl #48
+; CHECK-PC-NEXT: str x8, [x0]
+; CHECK-PC-NEXT: bl __clear_cache
+; CHECK-PC-NEXT: mov x0, xzr
+; CHECK-PC-NEXT: .seh_startepilogue
+; CHECK-PC-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-PC-NEXT: .seh_save_reg_x x30, 16
+; CHECK-PC-NEXT: .seh_endepilogue
+; CHECK-PC-NEXT: ret
+; CHECK-PC-NEXT: .seh_endfunclet
+; CHECK-PC-NEXT: .seh_endproc
+;
+; CHECK-APPLE-LABEL: func2:
+; CHECK-APPLE: ; %bb.0:
+; CHECK-APPLE-NEXT: sub sp, sp, #32
+; CHECK-APPLE-NEXT: stp x29, x30, [sp, #16] ; 16-byte Folded Spill
+; CHECK-APPLE-NEXT: .cfi_def_cfa_offset 32
+; CHECK-APPLE-NEXT: .cfi_offset w30, -8
+; CHECK-APPLE-NEXT: .cfi_offset w29, -16
+; CHECK-APPLE-NEXT: Lloh2:
+; CHECK-APPLE-NEXT: adrp x0, _trampg at PAGE
+; CHECK-APPLE-NEXT: Lloh3:
+; CHECK-APPLE-NEXT: add x0, x0, _trampg at PAGEOFF
+; CHECK-APPLE-NEXT: Lloh4:
+; CHECK-APPLE-NEXT: adrp x8, _f at PAGE
+; CHECK-APPLE-NEXT: Lloh5:
+; CHECK-APPLE-NEXT: add x8, x8, _f at PAGEOFF
+; CHECK-APPLE-NEXT: add x9, sp, #8
+; CHECK-APPLE-NEXT: add x1, x0, #12
+; CHECK-APPLE-NEXT: stp x9, x8, [x0, #16]
+; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220
+; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16
+; CHECK-APPLE-NEXT: str w8, [x0, #8]
+; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f
+; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16
+; CHECK-APPLE-NEXT: movk x8, #177, lsl #32
+; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48
+; CHECK-APPLE-NEXT: str x8, [x0]
+; CHECK-APPLE-NEXT: bl ___clear_cache
+; CHECK-APPLE-NEXT: ldp x29, x30, [sp, #16] ; 16-byte Folded Reload
+; CHECK-APPLE-NEXT: mov x0, xzr
+; CHECK-APPLE-NEXT: add sp, sp, #32
+; CHECK-APPLE-NEXT: ret
+; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh4, Lloh5
+; CHECK-APPLE-NEXT: .loh AdrpAdd Lloh2, Lloh3
%val = alloca i64
- %nval = bitcast ptr %val to ptr
- ; CHECK: mov w1, #36
- ; CHECK: bl __trampoline_setup
- call void @llvm.init.trampoline(ptr @trampg, ptr @f, ptr %nval)
+ call void @llvm.init.trampoline(ptr @trampg, ptr @f, ptr %val)
%fp = call ptr @llvm.adjust.trampoline(ptr @trampg)
ret i64 0
}
diff --git a/llvm/test/CodeGen/AArch64/win64cc-x18.ll b/llvm/test/CodeGen/AArch64/win64cc-x18.ll
index b3e78cc9bbb81..4b45c300e9c1d 100644
--- a/llvm/test/CodeGen/AArch64/win64cc-x18.ll
+++ b/llvm/test/CodeGen/AArch64/win64cc-x18.ll
@@ -1,35 +1,26 @@
-; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+;; Testing that nest uses x15 on all calling conventions (except Arm64EC)
-;; Testing that x18 is not clobbered when passing pointers with the nest
-;; attribute on windows
-
-; RUN: llc < %s -mtriple=aarch64-pc-windows-msvc | FileCheck %s --check-prefixes=CHECK,CHECK-NO-X18
-; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s --check-prefixes=CHECK,CHECK-X18
+; RUN: llc < %s -mtriple=aarch64-pc-windows-msvc | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-linux-gnu | FileCheck %s
+; RUN: llc < %s -mtriple=aarch64-apple-darwin- | FileCheck %s
define dso_local i64 @other(ptr nest %p) #0 {
; CHECK-LABEL: other:
-; CHECK-X18: ldr x0, [x18]
-; CHECK-NO-X18: ldr x0, [x0]
+; CHECK: ldr x0, [x15]
+; CHECK: ret
%r = load i64, ptr %p
-; CHECK: ret
ret i64 %r
}
define dso_local void @func() #0 {
; CHECK-LABEL: func:
-
-
+; CHECK: add x15, sp, #8
+; CHECK: bl {{_?other}}
+; CHECK: ret
entry:
%p = alloca i64
-; CHECK: mov w8, #1
-; CHECK: stp x30, x8, [sp, #-16]
-; CHECK-X18: add x18, sp, #8
store i64 1, ptr %p
-; CHECK-NO-X18: add x0, sp, #8
-; CHECK: bl other
call void @other(ptr nest %p)
-; CHECK: ldr x30, [sp], #16
-; CHECK: ret
ret void
}
diff --git a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
index 4799ea3bcd19f..986666e015e9e 100644
--- a/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
+++ b/llvm/test/CodeGen/AArch64/zero-call-used-regs.ll
@@ -93,7 +93,7 @@ define dso_local i32 @all_gpr_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c
; CHECK-NEXT: mov x5, #0 // =0x0
; CHECK-NEXT: mov x6, #0 // =0x0
; CHECK-NEXT: mov x7, #0 // =0x0
-; CHECK-NEXT: mov x18, #0 // =0x0
+; CHECK-NEXT: mov x15, #0 // =0x0
; CHECK-NEXT: orr w0, w8, w2
; CHECK-NEXT: mov x2, #0 // =0x0
; CHECK-NEXT: mov x8, #0 // =0x0
@@ -146,7 +146,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo
; DEFAULT-NEXT: mov x5, #0 // =0x0
; DEFAULT-NEXT: mov x6, #0 // =0x0
; DEFAULT-NEXT: mov x7, #0 // =0x0
-; DEFAULT-NEXT: mov x18, #0 // =0x0
+; DEFAULT-NEXT: mov x15, #0 // =0x0
; DEFAULT-NEXT: movi v0.2d, #0000000000000000
; DEFAULT-NEXT: orr w0, w8, w2
; DEFAULT-NEXT: mov x2, #0 // =0x0
@@ -169,7 +169,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo
; SVE-OR-SME-NEXT: mov x5, #0 // =0x0
; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
-; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x15, #0 // =0x0
; SVE-OR-SME-NEXT: mov z0.d, #0 // =0x0
; SVE-OR-SME-NEXT: orr w0, w8, w2
; SVE-OR-SME-NEXT: mov x2, #0 // =0x0
@@ -196,7 +196,7 @@ define dso_local i32 @all_arg(i32 noundef %a, i32 noundef %b, i32 noundef %c) lo
; STREAMING-COMPAT-NEXT: mov x5, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
-; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0
; STREAMING-COMPAT-NEXT: fmov d0, xzr
; STREAMING-COMPAT-NEXT: orr w0, w8, w2
; STREAMING-COMPAT-NEXT: mov x2, #0 // =0x0
@@ -492,7 +492,7 @@ define dso_local double @all_gpr_arg_float(double noundef %a, float noundef %b)
; CHECK-NEXT: mov x6, #0 // =0x0
; CHECK-NEXT: mov x7, #0 // =0x0
; CHECK-NEXT: mov x8, #0 // =0x0
-; CHECK-NEXT: mov x18, #0 // =0x0
+; CHECK-NEXT: mov x15, #0 // =0x0
; CHECK-NEXT: ret
entry:
@@ -547,7 +547,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca
; DEFAULT-NEXT: mov x6, #0 // =0x0
; DEFAULT-NEXT: mov x7, #0 // =0x0
; DEFAULT-NEXT: mov x8, #0 // =0x0
-; DEFAULT-NEXT: mov x18, #0 // =0x0
+; DEFAULT-NEXT: mov x15, #0 // =0x0
; DEFAULT-NEXT: movi v1.2d, #0000000000000000
; DEFAULT-NEXT: movi v2.2d, #0000000000000000
; DEFAULT-NEXT: movi v3.2d, #0000000000000000
@@ -570,7 +570,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca
; SVE-OR-SME-NEXT: mov x6, #0 // =0x0
; SVE-OR-SME-NEXT: mov x7, #0 // =0x0
; SVE-OR-SME-NEXT: mov x8, #0 // =0x0
-; SVE-OR-SME-NEXT: mov x18, #0 // =0x0
+; SVE-OR-SME-NEXT: mov x15, #0 // =0x0
; SVE-OR-SME-NEXT: mov z1.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z2.d, #0 // =0x0
; SVE-OR-SME-NEXT: mov z3.d, #0 // =0x0
@@ -597,7 +597,7 @@ define dso_local double @all_arg_float(double noundef %a, float noundef %b) loca
; STREAMING-COMPAT-NEXT: mov x6, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x7, #0 // =0x0
; STREAMING-COMPAT-NEXT: mov x8, #0 // =0x0
-; STREAMING-COMPAT-NEXT: mov x18, #0 // =0x0
+; STREAMING-COMPAT-NEXT: mov x15, #0 // =0x0
; STREAMING-COMPAT-NEXT: fmov d1, xzr
; STREAMING-COMPAT-NEXT: fmov d2, xzr
; STREAMING-COMPAT-NEXT: fmov d3, xzr
>From f6122fa0eeeef5b9598439a5e049a0fdef9bd84a Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Wed, 12 Feb 2025 09:31:34 -0500
Subject: [PATCH 2/5] fixup! [AArch64] fix trampoline implementation: use X15
---
flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp | 2 +-
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 16 +++++++++-------
2 files changed, 10 insertions(+), 8 deletions(-)
diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index f402404121da0..c91ead6f0c019 100644
--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -277,7 +277,7 @@ class BoxedProcedurePass
// For PPC32 and PPC64, the thunk is populated by a call to
// __trampoline_setup, which is defined in
// compiler-rt/lib/builtins/trampoline_setup.c and requires the
- // thunk size greater than 32 bytes. For Aarch64, RISCV and x86_64, the
+ // thunk size greater than 32 bytes. For AArch64, RISCV and x86_64, the
// thunk setup doesn't go through __trampoline_setup and fits in 32
// bytes.
fir::SequenceType::Extent thunkSize = triple.getTrampolineSize();
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index ced3ff7b742ad..789bd6249b400 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2046,16 +2046,18 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
if (windowsRequiresStackProbe(MF, NumBytes + RealignmentPadding)) {
// Find an available register to store value of VG to.
unsigned X15Scratch = AArch64::NoRegister;
- if (LiveRegs.contains(AArch64::X15)) {
- // if (llvm::any_of(
- // MBB.liveins(),
- // [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
- // return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
- // AArch64::X15, LiveIn.PhysReg);
- // }))
+ const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
+ if (llvm::any_of(
+ MBB.liveins(),
+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+ return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
+ AArch64::X15, LiveIn.PhysReg);
+ })) {
X15Scratch = findScratchNonCalleeSaveRegister(&MBB);
assert(X15Scratch != AArch64::NoRegister);
+#ifndef NDEBUG
LiveRegs.removeReg(AArch64::X15); // ignore X15 since we restore it
+#endif
BuildMI(MBB, MBBI, DL, TII->get(AArch64::ORRXrr), X15Scratch)
.addReg(AArch64::XZR)
.addReg(AArch64::X15, RegState::Undef)
>From 9aa05538f87ea17f349a9c9921a6171a3d83aa36 Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Wed, 12 Feb 2025 10:04:51 -0500
Subject: [PATCH 3/5] fix langref and Arm64EC CallingConvention
---
llvm/docs/LangRef.rst | 7 ++++-
.../Target/AArch64/AArch64ISelLowering.cpp | 26 ++++++++++++++-----
llvm/lib/TargetParser/Triple.cpp | 2 --
3 files changed, 26 insertions(+), 9 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index 2d72e548ec82a..c1dd9dcfd63f7 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -20903,7 +20903,12 @@ sufficiently aligned block of memory; this memory is written to by the
intrinsic. Note that the size and the alignment are target-specific -
LLVM currently provides no portable way of determining them, so a
front-end that generates this intrinsic needs to have some
-target-specific knowledge. The ``func`` argument must hold a function.
+target-specific knowledge.
+
+The ``func`` argument must be a constant (potentially bitcasted) pointer to a
+function declaration or definition, since the calling convention may affect the
+content of the trampoline that is created.
+
Semantics:
""""""""""
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 1404077446420..795e6bd1c27fa 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7302,28 +7302,42 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
const Value *TrmpAddr = cast<SrcValueSDNode>(Op.getOperand(4))->getValue();
- // ldr x15, .+16
+ // ldr NestReg, .+16
// ldr x17, .+20
// br x17
- // 0
+ // .word 0
// .nest: .qword nest
// .fptr: .qword fptr
SDValue OutChains[5];
- const char X15 = 0x0f;
- const char X17 = 0x11;
+ const Function *Func =
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ CallingConv::ID CC = Func->getCallingConv();
+ unsigned NestReg;
+
+ switch (CC) {
+ default:
+ NestReg = 0x0f; // X15
+ case CallingConv::ARM64EC_Thunk_Native:
+ case CallingConv::ARM64EC_Thunk_X64:
+ // Must be kept in sync with AArch64CallingConv.td
+ NestReg = 0x04; // X4
+ break;
+ }
+
+ const char FptrReg = 0x11; // X17
SDValue Addr = Trmp;
SDLoc dl(Op);
OutChains[0] =
- DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | X15, dl, MVT::i32), Addr,
+ DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | NestReg, dl, MVT::i32), Addr,
MachinePointerInfo(TrmpAddr));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(4, dl, MVT::i64));
OutChains[1] =
- DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | X17, dl, MVT::i32), Addr,
+ DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | FptrReg, dl, MVT::i32), Addr,
MachinePointerInfo(TrmpAddr, 4));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
diff --git a/llvm/lib/TargetParser/Triple.cpp b/llvm/lib/TargetParser/Triple.cpp
index e9e6f130f757c..8ada0d44f96f9 100644
--- a/llvm/lib/TargetParser/Triple.cpp
+++ b/llvm/lib/TargetParser/Triple.cpp
@@ -1725,8 +1725,6 @@ unsigned Triple::getTrampolineSize() const {
if (isOSLinux())
return 48;
break;
- case Triple::aarch64:
- return 36;
}
return 32;
}
>From 78745ede2a514e0c0c874ccceee500454d46a3fd Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Wed, 12 Feb 2025 10:13:49 -0500
Subject: [PATCH 4/5] fix more langref mistakes with aarch64 calling convention
The calling convention previously stated that X9 and X15 were
callee-preserved, but the implementation of AArch64FrameLowering.cpp
uses those as scratch registers and does not actually preserve it.
---
llvm/docs/LangRef.rst | 10 ++++-----
.../AArch64/AArch64CallingConvention.td | 2 +-
.../CodeGen/AArch64/arm64-preserve-all.ll | 16 +++++++-------
.../CodeGen/AArch64/arm64-preserve-most.ll | 21 +++++++++----------
llvm/test/CodeGen/AArch64/preserve.ll | 4 ++--
llvm/test/CodeGen/AArch64/trampoline.ll | 12 +++++------
6 files changed, 33 insertions(+), 32 deletions(-)
diff --git a/llvm/docs/LangRef.rst b/llvm/docs/LangRef.rst
index c1dd9dcfd63f7..f14d5f1a0e14b 100644
--- a/llvm/docs/LangRef.rst
+++ b/llvm/docs/LangRef.rst
@@ -410,8 +410,8 @@ added in the future:
calling convention: on most platforms, they are not preserved and need to
be saved by the caller, but on Windows, xmm6-xmm15 are preserved.
- - On AArch64 the callee preserve all general purpose registers, except X0-X8
- and X16-X18.
+ - On AArch64 the callee preserve all general purpose registers, except X0-X9
+ and X15-X18. X9 can be used as a scratch register.
The idea behind this convention is to support calls to runtime functions
that have a hot path and a cold path. The hot path is usually a small piece
@@ -447,9 +447,9 @@ added in the future:
R11. R11 can be used as a scratch register. Furthermore it also preserves
all floating-point registers (XMMs/YMMs).
- - On AArch64 the callee preserve all general purpose registers, except X0-X8
- and X16-X18. Furthermore it also preserves lower 128 bits of V8-V31 SIMD -
- floating point registers.
+ - On AArch64 the callee preserve all general purpose registers, except X0-X9
+ and X15-X18. Furthermore it also preserves lower 128 bits of V8-V31 SIMD -
+ floating point registers. X9 can be used as a scratch register.
The idea behind this convention is to support calls to runtime functions
that don't need to call out to any other functions.
diff --git a/llvm/lib/Target/AArch64/AArch64CallingConvention.td b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
index 8355463dea94e..366e8122b55dc 100644
--- a/llvm/lib/Target/AArch64/AArch64CallingConvention.td
+++ b/llvm/lib/Target/AArch64/AArch64CallingConvention.td
@@ -691,7 +691,7 @@ def CSR_AArch64_NoRegs : CalleeSavedRegs<(add)>;
def CSR_AArch64_NoneRegs : CalleeSavedRegs<(add LR, FP)>;
def CSR_AArch64_RT_MostRegs : CalleeSavedRegs<(add CSR_AArch64_AAPCS,
- (sequence "X%u", 9, 15))>;
+ (sequence "X%u", 10, 14))>;
def CSR_AArch64_RT_AllRegs : CalleeSavedRegs<(add CSR_AArch64_RT_MostRegs,
(sequence "Q%u", 8, 31))>;
diff --git a/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll b/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll
index 778f4e2f9ec01..b5257f6518166 100644
--- a/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-preserve-all.ll
@@ -7,8 +7,8 @@ target triple = "aarch64-unknown-linux-gnu"
declare void @normal_cc()
; Caller: preserve_allcc; callee: normalcc. Normally callee saved registers
-; x9~x15 need to be spilled. Since most of them will be spilled in pairs in
-; reverse order, we only check the odd number ones due to FileCheck not
+; x10~x14 need to be spilled. Since most of them will be spilled in pairs in
+; reverse order, we only check the even number ones due to FileCheck not
; matching the same line of assembly twice.
; CHECK-LABEL: preserve_all
; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q8(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
@@ -22,21 +22,23 @@ declare void @normal_cc()
; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q26(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q28(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
; CHECK-DAG: {{st[rp]}} {{(q[0-9]+, )?q30(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x9(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x11(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x13(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x15(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
+; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x10(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
+; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x12(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
+; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x14(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
define preserve_allcc void @preserve_all() {
call void @normal_cc()
ret void
}
; Caller: normalcc; callee: preserve_allcc. x9/q9 does not need to be spilled.
-; The same holds for other x and q registers, but we only check x9 and q9.
+; The same holds for other x and q registers, but we only check x9, x11, and q9.
; CHECK-LABEL: normal_cc_caller
; CHECK-NOT: stp {{x[0-9]+}}, x9, [sp, #{{[-0-9]+}}]
; CHECK-NOT: stp x9, {{x[0-9]+}}, [sp, #{{[-0-9]+}}]
; CHECK-NOT: str x9, [sp, {{#[-0-9]+}}]
+; CHECK-NOT: stp {{x[0-9]+}}, x11, [sp, #{{[-0-9]+}}]
+; CHECK-NOT: stp x10, {{x[0-9]+}}, [sp, #{{[-0-9]+}}]
+; CHECK-NOT: str x10, [sp, {{#[-0-9]+}}]
; CHECK-NOT: stp {{q[0-9]+}}, q9, [sp, #{{[-0-9]+}}]
; CHECK-NOT: stp q9, {{q[0-9]+}}, [sp, #{{[-0-9]+}}]
; CHECK-NOT: str q9, [sp, {{#[-0-9]+}}]
diff --git a/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll b/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll
index f8196860aa34f..78605991114a4 100644
--- a/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-preserve-most.ll
@@ -7,25 +7,24 @@ target triple = "aarch64-unknown-linux-gnu"
declare void @normal_cc()
; Caller: preserve_mostcc; callee: normalcc. Normally callee saved registers
-; x9~x15 need to be spilled. Since most of them will be spilled in pairs in
-; reverse order, we only check the odd number ones due to FileCheck not
+; x10~x14 need to be spilled. Since most of them will be spilled in pairs in
+; reverse order, we only check the even number ones due to FileCheck not
; matching the same line of assembly twice.
; CHECK-LABEL: preserve_most
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x9(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x11(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x13(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
-; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x15(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
+; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x10(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
+; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x12(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
+; CHECK-DAG: {{st[rp]}} {{(x[0-9]+, )?x14(, x[0-9]+)?}}, [sp, #{{[-0-9]+}}]
define preserve_mostcc void @preserve_most() {
call void @normal_cc()
ret void
}
-; Caller: normalcc; callee: preserve_mostcc. x9 does not need to be spilled.
-; The same holds for x10 through x15, but we only check x9.
+; Caller: normalcc; callee: preserve_mostcc. x9 does need to be spilled, but not x10 to x14.
+; (we only check x10).
; CHECK-LABEL: normal_cc_caller
-; CHECK-NOT: stp {{x[0-9]+}}, x9, [sp, #{{[-0-9]+}}]
-; CHECK-NOT: stp x9, {{x[0-9]+}}, [sp, #{{[-0-9]+}}]
-; CHECK-NOT: str x9, [sp, {{#[-0-9]+}}]
+; CHECK-NOT: stp {{x[0-9]+}}, x10, [sp, #{{[-0-9]+}}]
+; CHECK-NOT: stp x10, {{x[0-9]+}}, [sp, #{{[-0-9]+}}]
+; CHECK-NOT: str x10, [sp, {{#[-0-9]+}}]
define dso_local void @normal_cc_caller() {
entry:
%v = alloca i32, align 4
diff --git a/llvm/test/CodeGen/AArch64/preserve.ll b/llvm/test/CodeGen/AArch64/preserve.ll
index 49fb3685bcfc1..6f6a85c158082 100644
--- a/llvm/test/CodeGen/AArch64/preserve.ll
+++ b/llvm/test/CodeGen/AArch64/preserve.ll
@@ -8,13 +8,13 @@
target triple = "aarch64-unknown-unknown"
declare void @bar1()
define preserve_mostcc void @baz() #0 {
-; CHECK: baz Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $b16_hi $b17_hi $b18_hi $b19_hi $b20_hi $b21_hi $b22_hi $b23_hi $b24_hi $b25_hi $b26_hi $b27_hi $b28_hi $b29_hi $b30_hi $b31_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $d8_hi $d9_hi $d10_hi $d11_hi $d12_hi $d13_hi $d14_hi $d15_hi $d16_hi $d17_hi $d18_hi $d19_hi $d20_hi $d21_hi $d22_hi $d23_hi $d24_hi $d25_hi $d26_hi $d27_hi $d28_hi $d29_hi $d30_hi $d31_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $h16_hi $h17_hi $h18_hi $h19_hi $h20_hi $h21_hi $h22_hi $h23_hi $h24_hi $h25_hi $h26_hi $h27_hi $h28_hi $h29_hi $h30_hi $h31_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $s16_hi $s17_hi $s18_hi $s19_hi $s20_hi $s21_hi $s22_hi $s23_hi $s24_hi $s25_hi $s26_hi $s27_hi $s28_hi $s29_hi $s30_hi $s31_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15
+; CHECK: baz Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $b16 $b17 $b18 $b19 $b20 $b21 $b22 $b23 $b24 $b25 $b26 $b27 $b28 $b29 $b30 $b31 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $d16 $d17 $d18 $d19 $d20 $d21 $d22 $d23 $d24 $d25 $d26 $d27 $d28 $d29 $d30 $d31 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $h16 $h17 $h18 $h19 $h20 $h21 $h22 $h23 $h24 $h25 $h26 $h27 $h28 $h29 $h30 $h31 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $q8 $q9 $q10 $q11 $q12 $q13 $q14 $q15 $q16 $q17 $q18 $q19 $q20 $q21 $q22 $q23 $q24 $q25 $q26 $q27 $q28 $q29 $q30 $q31 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $s16 $s17 $s18 $s19 $s20 $s21 $s22 $s23 $s24 $s25 $s26 $s27 $s28 $s29 $s30 $s31 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w15 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x15 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $b16_hi $b17_hi $b18_hi $b19_hi $b20_hi $b21_hi $b22_hi $b23_hi $b24_hi $b25_hi $b26_hi $b27_hi $b28_hi $b29_hi $b30_hi $b31_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $d8_hi $d9_hi $d10_hi $d11_hi $d12_hi $d13_hi $d14_hi $d15_hi $d16_hi $d17_hi $d18_hi $d19_hi $d20_hi $d21_hi $d22_hi $d23_hi $d24_hi $d25_hi $d26_hi $d27_hi $d28_hi $d29_hi $d30_hi $d31_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $h16_hi $h17_hi $h18_hi $h19_hi $h20_hi $h21_hi $h22_hi $h23_hi $h24_hi $h25_hi $h26_hi $h27_hi $h28_hi $h29_hi $h30_hi $h31_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $s16_hi $s17_hi $s18_hi $s19_hi $s20_hi $s21_hi $s22_hi $s23_hi $s24_hi $s25_hi $s26_hi $s27_hi $s28_hi $s29_hi $s30_hi $s31_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w9_hi $w15_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15
call void @bar1()
call void @bar2()
ret void
}
define preserve_allcc void @foo() #0 {
-; CHECK: foo Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15
+; CHECK: foo Clobbered Registers: $ffr $fpcr $fpmr $fpsr $nzcv $sp $vg $wsp $wsp_hi $za $b0 $b1 $b2 $b3 $b4 $b5 $b6 $b7 $d0 $d1 $d2 $d3 $d4 $d5 $d6 $d7 $h0 $h1 $h2 $h3 $h4 $h5 $h6 $h7 $p0 $p1 $p2 $p3 $p4 $p5 $p6 $p7 $p8 $p9 $p10 $p11 $p12 $p13 $p14 $p15 $pn0 $pn1 $pn2 $pn3 $pn4 $pn5 $pn6 $pn7 $pn8 $pn9 $pn10 $pn11 $pn12 $pn13 $pn14 $pn15 $q0 $q1 $q2 $q3 $q4 $q5 $q6 $q7 $s0 $s1 $s2 $s3 $s4 $s5 $s6 $s7 $w0 $w1 $w2 $w3 $w4 $w5 $w6 $w7 $w8 $w9 $w15 $w16 $w17 $w18 $x0 $x1 $x2 $x3 $x4 $x5 $x6 $x7 $x8 $x9 $x15 $x16 $x17 $x18 $z0 $z1 $z2 $z3 $z4 $z5 $z6 $z7 $z8 $z9 $z10 $z11 $z12 $z13 $z14 $z15 $z16 $z17 $z18 $z19 $z20 $z21 $z22 $z23 $z24 $z25 $z26 $z27 $z28 $z29 $z30 $z31 $zab0 $zad0 $zad1 $zad2 $zad3 $zad4 $zad5 $zad6 $zad7 $zah0 $zah1 $zaq0 $zaq1 $zaq2 $zaq3 $zaq4 $zaq5 $zaq6 $zaq7 $zaq8 $zaq9 $zaq10 $zaq11 $zaq12 $zaq13 $zaq14 $zaq15 $zas0 $zas1 $zas2 $zas3 $zt0 $b0_hi $b1_hi $b2_hi $b3_hi $b4_hi $b5_hi $b6_hi $b7_hi $d0_hi $d1_hi $d2_hi $d3_hi $d4_hi $d5_hi $d6_hi $d7_hi $h0_hi $h1_hi $h2_hi $h3_hi $h4_hi $h5_hi $h6_hi $h7_hi $q0_hi $q1_hi $q2_hi $q3_hi $q4_hi $q5_hi $q6_hi $q7_hi $q8_hi $q9_hi $q10_hi $q11_hi $q12_hi $q13_hi $q14_hi $q15_hi $q16_hi $q17_hi $q18_hi $q19_hi $q20_hi $q21_hi $q22_hi $q23_hi $q24_hi $q25_hi $q26_hi $q27_hi $q28_hi $q29_hi $q30_hi $q31_hi $s0_hi $s1_hi $s2_hi $s3_hi $s4_hi $s5_hi $s6_hi $s7_hi $w0_hi $w1_hi $w2_hi $w3_hi $w4_hi $w5_hi $w6_hi $w7_hi $w8_hi $w9_hi $w15_hi $w16_hi $w17_hi $w18_hi $d0_d1 $d1_d2 $d2_d3 $d3_d4 $d4_d5 $d5_d6 $d6_d7 $d7_d8 $d15_d16 $d16_d17 $d17_d18 $d18_d19 $d19_d20 $d20_d21 $d21_d22 $d22_d23 $d23_d24 $d24_d25 $d25_d26 $d26_d27 $d27_d28 $d28_d29 $d29_d30 $d30_d31 $d31_d0 $d0_d1_d2_d3 $d1_d2_d3_d4 $d2_d3_d4_d5 $d3_d4_d5_d6 $d4_d5_d6_d7 $d5_d6_d7_d8 $d6_d7_d8_d9 $d7_d8_d9_d10 $d13_d14_d15_d16 $d14_d15_d16_d17 $d15_d16_d17_d18 $d16_d17_d18_d19 $d17_d18_d19_d20 $d18_d19_d20_d21 $d19_d20_d21_d22 $d20_d21_d22_d23 $d21_d22_d23_d24 $d22_d23_d24_d25 $d23_d24_d25_d26 $d24_d25_d26_d27 $d25_d26_d27_d28 $d26_d27_d28_d29 $d27_d28_d29_d30 $d28_d29_d30_d31 $d29_d30_d31_d0 $d30_d31_d0_d1 $d31_d0_d1_d2 $d0_d1_d2 $d1_d2_d3 $d2_d3_d4 $d3_d4_d5 $d4_d5_d6 $d5_d6_d7 $d6_d7_d8 $d7_d8_d9 $d14_d15_d16 $d15_d16_d17 $d16_d17_d18 $d17_d18_d19 $d18_d19_d20 $d19_d20_d21 $d20_d21_d22 $d21_d22_d23 $d22_d23_d24 $d23_d24_d25 $d24_d25_d26 $d25_d26_d27 $d26_d27_d28 $d27_d28_d29 $d28_d29_d30 $d29_d30_d31 $d30_d31_d0 $d31_d0_d1 $p0_p1 $p1_p2 $p2_p3 $p3_p4 $p4_p5 $p5_p6 $p6_p7 $p7_p8 $p8_p9 $p9_p10 $p10_p11 $p11_p12 $p12_p13 $p13_p14 $p14_p15 $p15_p0 $q0_q1 $q1_q2 $q2_q3 $q3_q4 $q4_q5 $q5_q6 $q6_q7 $q7_q8 $q8_q9 $q9_q10 $q10_q11 $q11_q12 $q12_q13 $q13_q14 $q14_q15 $q15_q16 $q16_q17 $q17_q18 $q18_q19 $q19_q20 $q20_q21 $q21_q22 $q22_q23 $q23_q24 $q24_q25 $q25_q26 $q26_q27 $q27_q28 $q28_q29 $q29_q30 $q30_q31 $q31_q0 $q0_q1_q2_q3 $q1_q2_q3_q4 $q2_q3_q4_q5 $q3_q4_q5_q6 $q4_q5_q6_q7 $q5_q6_q7_q8 $q6_q7_q8_q9 $q7_q8_q9_q10 $q8_q9_q10_q11 $q9_q10_q11_q12 $q10_q11_q12_q13 $q11_q12_q13_q14 $q12_q13_q14_q15 $q13_q14_q15_q16 $q14_q15_q16_q17 $q15_q16_q17_q18 $q16_q17_q18_q19 $q17_q18_q19_q20 $q18_q19_q20_q21 $q19_q20_q21_q22 $q20_q21_q22_q23 $q21_q22_q23_q24 $q22_q23_q24_q25 $q23_q24_q25_q26 $q24_q25_q26_q27 $q25_q26_q27_q28 $q26_q27_q28_q29 $q27_q28_q29_q30 $q28_q29_q30_q31 $q29_q30_q31_q0 $q30_q31_q0_q1 $q31_q0_q1_q2 $q0_q1_q2 $q1_q2_q3 $q2_q3_q4 $q3_q4_q5 $q4_q5_q6 $q5_q6_q7 $q6_q7_q8 $q7_q8_q9 $q8_q9_q10 $q9_q10_q11 $q10_q11_q12 $q11_q12_q13 $q12_q13_q14 $q13_q14_q15 $q14_q15_q16 $q15_q16_q17 $q16_q17_q18 $q17_q18_q19 $q18_q19_q20 $q19_q20_q21 $q20_q21_q22 $q21_q22_q23 $q22_q23_q24 $q23_q24_q25 $q24_q25_q26 $q25_q26_q27 $q26_q27_q28 $q27_q28_q29 $q28_q29_q30 $q29_q30_q31 $q30_q31_q0 $q31_q0_q1 $x0_x1_x2_x3_x4_x5_x6_x7 $x2_x3_x4_x5_x6_x7_x8_x9 $x4_x5_x6_x7_x8_x9_x10_x11 $x6_x7_x8_x9_x10_x11_x12_x13 $x8_x9_x10_x11_x12_x13_x14_x15 $x10_x11_x12_x13_x14_x15_x16_x17 $x12_x13_x14_x15_x16_x17_x18_x19 $x14_x15_x16_x17_x18_x19_x20_x21 $x16_x17_x18_x19_x20_x21_x22_x23 $x18_x19_x20_x21_x22_x23_x24_x25 $w30_wzr $w0_w1 $w2_w3 $w4_w5 $w6_w7 $w8_w9 $w10_w11 $w12_w13 $w14_w15 $w16_w17 $w18_w19 $lr_xzr $x0_x1 $x2_x3 $x4_x5 $x6_x7 $x8_x9 $x10_x11 $x12_x13 $x14_x15 $x16_x17 $x18_x19 $z0_z1 $z1_z2 $z2_z3 $z3_z4 $z4_z5 $z5_z6 $z6_z7 $z7_z8 $z8_z9 $z9_z10 $z10_z11 $z11_z12 $z12_z13 $z13_z14 $z14_z15 $z15_z16 $z16_z17 $z17_z18 $z18_z19 $z19_z20 $z20_z21 $z21_z22 $z22_z23 $z23_z24 $z24_z25 $z25_z26 $z26_z27 $z27_z28 $z28_z29 $z29_z30 $z30_z31 $z31_z0 $z0_z1_z2_z3 $z1_z2_z3_z4 $z2_z3_z4_z5 $z3_z4_z5_z6 $z4_z5_z6_z7 $z5_z6_z7_z8 $z6_z7_z8_z9 $z7_z8_z9_z10 $z8_z9_z10_z11 $z9_z10_z11_z12 $z10_z11_z12_z13 $z11_z12_z13_z14 $z12_z13_z14_z15 $z13_z14_z15_z16 $z14_z15_z16_z17 $z15_z16_z17_z18 $z16_z17_z18_z19 $z17_z18_z19_z20 $z18_z19_z20_z21 $z19_z20_z21_z22 $z20_z21_z22_z23 $z21_z22_z23_z24 $z22_z23_z24_z25 $z23_z24_z25_z26 $z24_z25_z26_z27 $z25_z26_z27_z28 $z26_z27_z28_z29 $z27_z28_z29_z30 $z28_z29_z30_z31 $z29_z30_z31_z0 $z30_z31_z0_z1 $z31_z0_z1_z2 $z0_z1_z2 $z1_z2_z3 $z2_z3_z4 $z3_z4_z5 $z4_z5_z6 $z5_z6_z7 $z6_z7_z8 $z7_z8_z9 $z8_z9_z10 $z9_z10_z11 $z10_z11_z12 $z11_z12_z13 $z12_z13_z14 $z13_z14_z15 $z14_z15_z16 $z15_z16_z17 $z16_z17_z18 $z17_z18_z19 $z18_z19_z20 $z19_z20_z21 $z20_z21_z22 $z21_z22_z23 $z22_z23_z24 $z23_z24_z25 $z24_z25_z26 $z25_z26_z27 $z26_z27_z28 $z27_z28_z29 $z28_z29_z30 $z29_z30_z31 $z30_z31_z0 $z31_z0_z1 $z16_z24 $z17_z25 $z18_z26 $z19_z27 $z20_z28 $z21_z29 $z22_z30 $z23_z31 $z0_z8 $z1_z9 $z2_z10 $z3_z11 $z4_z12 $z5_z13 $z6_z14 $z7_z15 $z16_z20_z24_z28 $z17_z21_z25_z29 $z18_z22_z26_z30 $z19_z23_z27_z31 $z0_z4_z8_z12 $z1_z5_z9_z13 $z2_z6_z10_z14 $z3_z7_z11_z15
call void @bar1()
call void @bar2()
ret void
diff --git a/llvm/test/CodeGen/AArch64/trampoline.ll b/llvm/test/CodeGen/AArch64/trampoline.ll
index 0e682704afbf8..d9016b02a0f80 100644
--- a/llvm/test/CodeGen/AArch64/trampoline.ll
+++ b/llvm/test/CodeGen/AArch64/trampoline.ll
@@ -83,7 +83,7 @@ define i64 @func1() {
; CHECK-LINUX-NEXT: str w9, [sp, #16]
; CHECK-LINUX-NEXT: add x9, sp, #56
; CHECK-LINUX-NEXT: stp x9, x8, [sp, #24]
-; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f
+; CHECK-LINUX-NEXT: mov x8, #132 // =0x84
; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16
; CHECK-LINUX-NEXT: movk x8, #177, lsl #32
; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48
@@ -112,7 +112,7 @@ define i64 @func1() {
; CHECK-PC-NEXT: add x0, sp, #8
; CHECK-PC-NEXT: movk w8, #54815, lsl #16
; CHECK-PC-NEXT: str w8, [sp, #16]
-; CHECK-PC-NEXT: mov x8, #143 // =0x8f
+; CHECK-PC-NEXT: mov x8, #132 // =0x84
; CHECK-PC-NEXT: movk x8, #22528, lsl #16
; CHECK-PC-NEXT: movk x8, #177, lsl #32
; CHECK-PC-NEXT: movk x8, #22528, lsl #48
@@ -148,7 +148,7 @@ define i64 @func1() {
; CHECK-APPLE-NEXT: mov x0, sp
; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16
; CHECK-APPLE-NEXT: str w8, [sp, #8]
-; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f
+; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84
; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16
; CHECK-APPLE-NEXT: movk x8, #177, lsl #32
; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48
@@ -184,7 +184,7 @@ define i64 @func2() {
; CHECK-LINUX-NEXT: add x9, sp, #8
; CHECK-LINUX-NEXT: add x1, x0, #12
; CHECK-LINUX-NEXT: stp x9, x8, [x0, #16]
-; CHECK-LINUX-NEXT: mov x8, #143 // =0x8f
+; CHECK-LINUX-NEXT: mov x8, #132 // =0x84
; CHECK-LINUX-NEXT: movk x8, #22528, lsl #16
; CHECK-LINUX-NEXT: movk x8, #177, lsl #32
; CHECK-LINUX-NEXT: movk x8, #22528, lsl #48
@@ -210,7 +210,7 @@ define i64 @func2() {
; CHECK-PC-NEXT: mov w8, #544 // =0x220
; CHECK-PC-NEXT: movk w8, #54815, lsl #16
; CHECK-PC-NEXT: str w8, [x0, #8]
-; CHECK-PC-NEXT: mov x8, #143 // =0x8f
+; CHECK-PC-NEXT: mov x8, #132 // =0x84
; CHECK-PC-NEXT: movk x8, #22528, lsl #16
; CHECK-PC-NEXT: movk x8, #177, lsl #32
; CHECK-PC-NEXT: movk x8, #22528, lsl #48
@@ -246,7 +246,7 @@ define i64 @func2() {
; CHECK-APPLE-NEXT: mov w8, #544 ; =0x220
; CHECK-APPLE-NEXT: movk w8, #54815, lsl #16
; CHECK-APPLE-NEXT: str w8, [x0, #8]
-; CHECK-APPLE-NEXT: mov x8, #143 ; =0x8f
+; CHECK-APPLE-NEXT: mov x8, #132 ; =0x84
; CHECK-APPLE-NEXT: movk x8, #22528, lsl #16
; CHECK-APPLE-NEXT: movk x8, #177, lsl #32
; CHECK-APPLE-NEXT: movk x8, #22528, lsl #48
>From 3e53925716d0cef9f77fd8057bea1aa1080f8e1b Mon Sep 17 00:00:00 2001
From: Jameson Nash <vtjnash at gmail.com>
Date: Wed, 12 Feb 2025 10:47:42 -0500
Subject: [PATCH 5/5] fixup! [AArch64] fix trampoline implementation: use X15
---
.../lib/Optimizer/CodeGen/BoxedProcedure.cpp | 6 ++---
flang/test/Fir/boxproc.fir | 4 ++--
.../Target/AArch64/AArch64FrameLowering.cpp | 11 +++++-----
.../Target/AArch64/AArch64ISelLowering.cpp | 22 +++++++++----------
4 files changed, 21 insertions(+), 22 deletions(-)
diff --git a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
index c91ead6f0c019..d47b404747bca 100644
--- a/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
+++ b/flang/lib/Optimizer/CodeGen/BoxedProcedure.cpp
@@ -277,9 +277,9 @@ class BoxedProcedurePass
// For PPC32 and PPC64, the thunk is populated by a call to
// __trampoline_setup, which is defined in
// compiler-rt/lib/builtins/trampoline_setup.c and requires the
- // thunk size greater than 32 bytes. For AArch64, RISCV and x86_64, the
- // thunk setup doesn't go through __trampoline_setup and fits in 32
- // bytes.
+ // thunk size greater than 32 bytes. For AArch64, RISCV and x86_64,
+ // the thunk setup doesn't go through __trampoline_setup and fits in
+ // 32 bytes.
fir::SequenceType::Extent thunkSize = triple.getTrampolineSize();
mlir::Type buffTy = SequenceType::get({thunkSize}, i8Ty);
auto buffer = builder.create<AllocaOp>(loc, buffTy);
diff --git a/flang/test/Fir/boxproc.fir b/flang/test/Fir/boxproc.fir
index e99dfd0b92afd..9e5e41a94069c 100644
--- a/flang/test/Fir/boxproc.fir
+++ b/flang/test/Fir/boxproc.fir
@@ -3,7 +3,7 @@
// RUN: %if powerpc-registered-target %{tco --target=powerpc64le-unknown-linux-gnu %s | FileCheck %s --check-prefixes=CHECK,CHECK-PPC %}
// CHECK-LABEL: define void @_QPtest_proc_dummy()
-// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [36 x i8], i64 1, align 1
+// CHECK-AARCH64: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK-X86: %[[VAL_3:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK-PPC: %[[VAL_3:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1
// CHECK: %[[VAL_1:.*]] = alloca { ptr }, i64 1, align 8
@@ -63,7 +63,7 @@ func.func @_QPtest_proc_dummy_other(%arg0: !fir.boxproc<() -> ()>) {
}
// CHECK-LABEL: define void @_QPtest_proc_dummy_char()
-// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [36 x i8], i64 1, align 1
+// CHECK-AARCH64: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK-X86: %[[VAL_20:.*]] = alloca [32 x i8], i64 1, align 1
// CHECK-PPC: %[[VAL_20:.*]] = alloca [4{{[0-8]+}} x i8], i64 1, align 1
// CHECK: %[[VAL_2:.*]] = alloca { { ptr, i64 } }, i64 1, align 8
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 789bd6249b400..538f08e905f4e 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2047,12 +2047,11 @@ void AArch64FrameLowering::emitPrologue(MachineFunction &MF,
// Find an available register to store value of VG to.
unsigned X15Scratch = AArch64::NoRegister;
const AArch64Subtarget &STI = MF.getSubtarget<AArch64Subtarget>();
- if (llvm::any_of(
- MBB.liveins(),
- [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
- return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
- AArch64::X15, LiveIn.PhysReg);
- })) {
+ if (llvm::any_of(MBB.liveins(),
+ [&STI](const MachineBasicBlock::RegisterMaskPair &LiveIn) {
+ return STI.getRegisterInfo()->isSuperOrSubRegisterEq(
+ AArch64::X15, LiveIn.PhysReg);
+ })) {
X15Scratch = findScratchNonCalleeSaveRegister(&MBB);
assert(X15Scratch != AArch64::NoRegister);
#ifndef NDEBUG
diff --git a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
index 795e6bd1c27fa..b214c0c92742b 100644
--- a/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64ISelLowering.cpp
@@ -7296,7 +7296,7 @@ SDValue AArch64TargetLowering::LowerADJUST_TRAMPOLINE(SDValue Op,
SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SelectionDAG &DAG) const {
SDValue Chain = Op.getOperand(0);
- SDValue Trmp = Op.getOperand(1); // trampoline, 36 bytes
+ SDValue Trmp = Op.getOperand(1); // trampoline, >=32 bytes
SDValue FPtr = Op.getOperand(2); // nested function
SDValue Nest = Op.getOperand(3); // 'nest' parameter value
@@ -7311,7 +7311,7 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue OutChains[5];
const Function *Func =
- cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
+ cast<Function>(cast<SrcValueSDNode>(Op.getOperand(5))->getValue());
CallingConv::ID CC = Func->getCallingConv();
unsigned NestReg;
@@ -7330,15 +7330,15 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue Addr = Trmp;
SDLoc dl(Op);
- OutChains[0] =
- DAG.getStore(Chain, dl, DAG.getConstant(0x58000080u | NestReg, dl, MVT::i32), Addr,
- MachinePointerInfo(TrmpAddr));
+ OutChains[0] = DAG.getStore(
+ Chain, dl, DAG.getConstant(0x58000080u | NestReg, dl, MVT::i32), Addr,
+ MachinePointerInfo(TrmpAddr));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(4, dl, MVT::i64));
- OutChains[1] =
- DAG.getStore(Chain, dl, DAG.getConstant(0x580000b0u | FptrReg, dl, MVT::i32), Addr,
- MachinePointerInfo(TrmpAddr, 4));
+ OutChains[1] = DAG.getStore(
+ Chain, dl, DAG.getConstant(0x580000b0u | FptrReg, dl, MVT::i32), Addr,
+ MachinePointerInfo(TrmpAddr, 4));
Addr = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
DAG.getConstant(8, dl, MVT::i64));
@@ -7359,11 +7359,11 @@ SDValue AArch64TargetLowering::LowerINIT_TRAMPOLINE(SDValue Op,
SDValue StoreToken = DAG.getNode(ISD::TokenFactor, dl, MVT::Other, OutChains);
SDValue EndOfTrmp = DAG.getNode(ISD::ADD, dl, MVT::i64, Trmp,
- DAG.getConstant(12, dl, MVT::i64));
+ DAG.getConstant(12, dl, MVT::i64));
// Call clear cache on the trampoline instructions.
- return DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken,
- Trmp, EndOfTrmp);
+ return DAG.getNode(ISD::CLEAR_CACHE, dl, MVT::Other, StoreToken, Trmp,
+ EndOfTrmp);
}
SDValue AArch64TargetLowering::LowerOperation(SDValue Op,
More information about the llvm-commits
mailing list