[llvm] [AArch64][SVE] Avoid extra pop of "FixedObject" with FPAfterSVECalleeSaves (PR #156452)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 2 05:51:12 PDT 2025
https://github.com/MacDue created https://github.com/llvm/llvm-project/pull/156452
Previously, we would pop `FixedObject`-bytes after deallocating the SVE area, then again as part of the "AfterCSRPopSize". This could be seen in the tests `@f6` and `@f9`.
This patch removes the erroneous pop, and refactors `FPAfterSVECalleeSaves` to reuse more of the existing GPR deallocation logic, which allows for post-decrements.
>From 11a11db10ee977687677b9398e29942ec51215bb Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Tue, 2 Sep 2025 08:28:45 +0000
Subject: [PATCH] [AArch64][SVE] Avoid extra pop of "FixedObject" with
FPAfterSVECalleeSaves
Previously, we would pop `FixedObject`-bytes after deallocating the SVE
area, then again as part of the "AfterCSRPopSize". This could be seen
in the tests `@f6` and `@f9`.
This patch removes the erroneous pop, and refactors `FPAfterSVECalleeSaves`
to reuse more of the existing GPR deallocation logic, which allows for
post-decrements.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 45 +++++----
.../CodeGen/AArch64/framelayout-sve-win.mir | 30 ++----
llvm/test/CodeGen/AArch64/win-sve.ll | 98 ++++++-------------
3 files changed, 67 insertions(+), 106 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 7725fa4f1ccb1..4c4483ea707e5 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2570,14 +2570,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
Subtarget.isTargetWindows() && AFI->getSVECalleeSavedStackSize();
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
- // Assume we can't combine the last pop with the sp restore.
- bool CombineAfterCSRBump = false;
+
+ unsigned ProloguePopSize = PrologueSaveSize;
if (FPAfterSVECalleeSaves) {
+ // With FPAfterSVECalleeSaves ProloguePopSize is the amount of stack that
+ // needs to be popped until we reach the start of the SVE save area. The
+ // "FixedObject" stack occurs after the SVE area and must be popped later.
+ ProloguePopSize -= FixedObject;
AfterCSRPopSize += FixedObject;
- } else if (!CombineSPBump && PrologueSaveSize != 0) {
+ }
+
+ // Assume we can't combine the last pop with the sp restore.
+ if (!CombineSPBump && ProloguePopSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
while (Pop->getOpcode() == TargetOpcode::CFI_INSTRUCTION ||
- AArch64InstrInfo::isSEHInstruction(*Pop))
+ AArch64InstrInfo::isSEHInstruction(*Pop) ||
+ (FPAfterSVECalleeSaves && IsSVECalleeSave(Pop)))
Pop = std::prev(Pop);
// Converting the last ldp to a post-index ldp is valid only if the last
// ldp's offset is 0.
@@ -2587,15 +2595,22 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
// may clobber), convert it to a post-index ldp.
if (OffsetOp.getImm() == 0 && AfterCSRPopSize >= 0) {
convertCalleeSaveRestoreToSPPrePostIncDec(
- MBB, Pop, DL, TII, PrologueSaveSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
- MachineInstr::FrameDestroy, PrologueSaveSize);
+ MBB, Pop, DL, TII, ProloguePopSize, NeedsWinCFI, &HasWinCFI, EmitCFI,
+ MachineInstr::FrameDestroy, ProloguePopSize);
+ } else if (FPAfterSVECalleeSaves) {
+ // If not, and FPAfterSVECalleeSaves is enabled, deallocate callee-save
+ // non-SVE registers to move the stack pointer to the start of the SVE
+ // area.
+ emitFrameOffset(MBB, std::next(Pop), DL, AArch64::SP, AArch64::SP,
+ StackOffset::getFixed(ProloguePopSize), TII,
+ MachineInstr::FrameDestroy, false, NeedsWinCFI,
+ &HasWinCFI);
} else {
- // If not, make sure to emit an add after the last ldp.
+ // Otherwise, make sure to emit an add after the last ldp.
// We're doing this by transferring the size to be restored from the
// adjustment *before* the CSR pops to the adjustment *after* the CSR
// pops.
- AfterCSRPopSize += PrologueSaveSize;
- CombineAfterCSRBump = true;
+ AfterCSRPopSize += ProloguePopSize;
}
}
@@ -2711,16 +2726,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
NeedsWinCFI, &HasWinCFI);
}
- // Deallocate callee-save non-SVE registers.
- emitFrameOffset(MBB, RestoreBegin, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(AFI->getCalleeSavedStackSize()), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
- // Deallocate fixed objects.
- emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
- StackOffset::getFixed(FixedObject), TII,
- MachineInstr::FrameDestroy, false, NeedsWinCFI, &HasWinCFI);
-
// Deallocate callee-save SVE registers.
emitFrameOffset(MBB, RestoreEnd, DL, AArch64::SP, AArch64::SP,
DeallocateAfter, TII, MachineInstr::FrameDestroy, false,
@@ -2840,7 +2845,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
MBB, MBB.getFirstTerminator(), DL, AArch64::SP, AArch64::SP,
StackOffset::getFixed(AfterCSRPopSize), TII, MachineInstr::FrameDestroy,
false, NeedsWinCFI, &HasWinCFI, EmitCFI,
- StackOffset::getFixed(CombineAfterCSRBump ? PrologueSaveSize : 0));
+ StackOffset::getFixed(AfterCSRPopSize - ArgumentStackToRestore));
}
}
diff --git a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
index 5933c5daa67ed..b8302e64f282d 100644
--- a/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
+++ b/llvm/test/CodeGen/AArch64/framelayout-sve-win.mir
@@ -380,10 +380,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
- ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
- ; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
- ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
- ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
+ ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
; CHECK-NEXT: $p4 = frame-destroy LDR_PXI $sp, 0 :: (load (s16) from %stack.4)
; CHECK-NEXT: frame-destroy SEH_SavePReg 4, 0
; CHECK-NEXT: $p5 = frame-destroy LDR_PXI $sp, 1 :: (load (s16) from %stack.3)
@@ -430,10 +428,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
- ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.1)
- ; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
- ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
- ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
+ ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.1)
+ ; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 0 :: (load (s128) from %stack.4)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 0
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.3)
@@ -557,10 +553,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
; CHECK-NEXT: $x21, $lr = frame-destroy LDPXi $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
; CHECK-NEXT: frame-destroy SEH_SaveRegP 21, 30, 16
- ; CHECK-NEXT: $x19, $x20 = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
- ; CHECK-NEXT: frame-destroy SEH_SaveRegP 19, 20, 0
- ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 32, 0
- ; CHECK-NEXT: frame-destroy SEH_StackAlloc 32
+ ; CHECK-NEXT: early-clobber $sp, $x19, $x20 = frame-destroy LDPXpost $sp, 4 :: (load (s64) from %stack.4), (load (s64) from %stack.5)
+ ; CHECK-NEXT: frame-destroy SEH_SaveRegP_X 19, 20, -32
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.21)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.20)
@@ -745,10 +739,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDXri $fp, 0, 0
; CHECK-NEXT: frame-destroy SEH_SetFP
- ; CHECK-NEXT: $fp, $lr = frame-destroy LDPXi $sp, 0 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
- ; CHECK-NEXT: frame-destroy SEH_SaveFPLR 0
- ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
- ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
+ ; CHECK-NEXT: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2 :: (load (s64) from %stack.2), (load (s64) from %stack.3)
+ ; CHECK-NEXT: frame-destroy SEH_SaveFPLR_X -16
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.19)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 2
; CHECK-NEXT: $z9 = frame-destroy LDR_ZXI $sp, 3 :: (load (s128) from %stack.18)
@@ -869,10 +861,8 @@ body: |
; CHECK-NEXT: frame-destroy SEH_EpilogStart
; CHECK-NEXT: $sp = frame-destroy ADDVL_XXI $sp, 7, implicit $vg
; CHECK-NEXT: frame-destroy SEH_AllocZ 7
- ; CHECK-NEXT: $lr = frame-destroy LDRXui $sp, 0 :: (load (s64) from %stack.6)
- ; CHECK-NEXT: frame-destroy SEH_SaveReg 30, 0
- ; CHECK-NEXT: $sp = frame-destroy ADDXri $sp, 16, 0
- ; CHECK-NEXT: frame-destroy SEH_StackAlloc 16
+ ; CHECK-NEXT: early-clobber $sp, $lr = frame-destroy LDRXpost $sp, 16 :: (load (s64) from %stack.6)
+ ; CHECK-NEXT: frame-destroy SEH_SaveReg_X 30, -16
; CHECK-NEXT: $z8 = frame-destroy LDR_ZXI $sp, 1 :: (load (s128) from %stack.8)
; CHECK-NEXT: frame-destroy SEH_SaveZReg 8, 1
; CHECK-NEXT: $z23 = frame-destroy LDR_ZXI $sp, 2 :: (load (s128) from %stack.7)
diff --git a/llvm/test/CodeGen/AArch64/win-sve.ll b/llvm/test/CodeGen/AArch64/win-sve.ll
index 53ac9344175a3..8446de1a59cf9 100644
--- a/llvm/test/CodeGen/AArch64/win-sve.ll
+++ b/llvm/test/CodeGen/AArch64/win-sve.ll
@@ -75,10 +75,8 @@ define i32 @f(<vscale x 2 x i64> %x) {
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x28, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x28, 16
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -234,10 +232,8 @@ define void @f2(i64 %n, <vscale x 2 x i64> %x) {
; CHECK-NEXT: .seh_save_fplr 16
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x28, 8
-; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x19, 0
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x19, 32
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -384,10 +380,8 @@ define void @f3(i64 %n, <vscale x 2 x i64> %x) {
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x28, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x28, 16
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -538,10 +532,8 @@ define void @f4(i64 %n, <vscale x 2 x i64> %x) {
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x28, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x28, 16
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -702,10 +694,8 @@ define void @f5(i64 %n, <vscale x 2 x i64> %x) {
; CHECK-NEXT: .seh_save_fplr 16
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x28, 8
-; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x19, 0
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x19, 32
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -860,10 +850,10 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
; CHECK-NEXT: stur x0, [x8, #16]
; CHECK-NEXT: addvl x8, x29, #18
; CHECK-NEXT: ldr x1, [x8, #32]
-; CHECK-NEXT: .Ltmp0:
+; CHECK-NEXT: .Ltmp0: // EH_LABEL
; CHECK-NEXT: add x0, x19, #0
; CHECK-NEXT: bl g6
-; CHECK-NEXT: .Ltmp1:
+; CHECK-NEXT: .Ltmp1: // EH_LABEL
; CHECK-NEXT: // %bb.1: // %invoke.cont
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: add sp, sp, #64
@@ -872,10 +862,8 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
; CHECK-NEXT: .seh_save_fplr 16
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x28, 8
-; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x19, 0
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x19, 32
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -932,8 +920,6 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
; CHECK-NEXT: .seh_save_preg p14, 10
; CHECK-NEXT: ldr p15, [sp, #11, mul vl] // 2-byte Folded Reload
; CHECK-NEXT: .seh_save_preg p15, 11
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: addvl sp, sp, #18
; CHECK-NEXT: .seh_allocz 18
; CHECK-NEXT: add sp, sp, #16
@@ -1024,10 +1010,8 @@ define void @f6(<vscale x 2 x i64> %x, [8 x i64] %pad, i64 %n9) personality ptr
; CHECK-NEXT: .seh_save_fplr 16
; CHECK-NEXT: ldr x28, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x28, 8
-; CHECK-NEXT: ldr x19, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x19, 0
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: ldr x19, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x19, 32
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1144,10 +1128,8 @@ define void @f8(<vscale x 2 x i64> %v) {
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x30, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x30, 16
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 0
; CHECK-NEXT: addvl sp, sp, #1
@@ -1196,14 +1178,10 @@ define void @f9(<vscale x 2 x i64> %v, ...) {
; CHECK-NEXT: //APP
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x30, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x30, 16
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 0
-; CHECK-NEXT: add sp, sp, #64
-; CHECK-NEXT: .seh_stackalloc 64
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: .seh_allocz 1
; CHECK-NEXT: add sp, sp, #64
@@ -1301,10 +1279,8 @@ define void @f10(i64 %n, <vscale x 2 x i64> %x) "frame-pointer"="all" {
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_fplr 8
-; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x28, 0
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x28, 32
; CHECK-NEXT: ldr z8, [sp, #2, mul vl] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 2
; CHECK-NEXT: ldr z9, [sp, #3, mul vl] // 16-byte Folded Reload
@@ -1390,10 +1366,8 @@ define i32 @f11(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible
; CHECK-NEXT: //NO_APP
; CHECK-NEXT: str d0, [sp, #8]
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x30, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x30, 16
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 0
; CHECK-NEXT: addvl sp, sp, #1
@@ -1431,10 +1405,8 @@ define i32 @f12(double %d, <vscale x 4 x i32> %vs) "aarch64_pstate_sm_compatible
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: addvl sp, sp, #1
; CHECK-NEXT: .seh_allocz 1
-; CHECK-NEXT: ldr x30, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x30, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x30, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x30, 16
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 0
; CHECK-NEXT: addvl sp, sp, #1
@@ -1475,10 +1447,8 @@ define i32 @f13(double %d, <vscale x 4 x i32> %vs) "frame-pointer"="all" {
; CHECK-NEXT: .seh_startepilogue
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_fplr 8
-; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x28, 0
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x28, 32
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 0
; CHECK-NEXT: addvl sp, sp, #1
@@ -1521,10 +1491,8 @@ define i32 @f14(double %d, <vscale x 4 x i32> %vs) "frame-pointer"="all" {
; CHECK-NEXT: .seh_allocz 1
; CHECK-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_fplr 8
-; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x28, 0
-; CHECK-NEXT: add sp, sp, #32
-; CHECK-NEXT: .seh_stackalloc 32
+; CHECK-NEXT: ldr x28, [sp], #32 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x28, 32
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 0
; CHECK-NEXT: addvl sp, sp, #1
@@ -1572,10 +1540,8 @@ define tailcc void @f15(double %d, <vscale x 4 x i32> %vs, [9 x i64], i32 %i) {
; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Folded Reload
; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x28, [sp] // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg x28, 0
-; CHECK-NEXT: add sp, sp, #16
-; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: ldr x28, [sp], #16 // 8-byte Folded Reload
+; CHECK-NEXT: .seh_save_reg_x x28, 16
; CHECK-NEXT: ldr z8, [sp] // 16-byte Folded Reload
; CHECK-NEXT: .seh_save_zreg z8, 0
; CHECK-NEXT: addvl sp, sp, #1
More information about the llvm-commits
mailing list