[llvm] [AArch64] Fix Windows prologue handling to pair more registers. (PR #170214)
Eli Friedman via llvm-commits
llvm-commits at lists.llvm.org
Fri Dec 5 14:44:54 PST 2025
https://github.com/efriedma-quic updated https://github.com/llvm/llvm-project/pull/170214
>From b3dfbc48d4071e0fdfc17c9a73040b9913b15dc1 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma at qti.qualcomm.com>
Date: Mon, 1 Dec 2025 13:38:02 -0800
Subject: [PATCH 1/4] [AArch64] Fix Windows prologue handling to pair more
registers.
Currently, there's code to suppress pairing, but we don't actually need
to suppress that; we just need to suppress the formation of
pre-decrement/post-increment instructions.
Pairing saves an instruction in some cases, and enables packed unwind in
some cases. (There's a comment in the code noting we could enable
packed unwind in more cases, but that's not clearly profitable.)
---
.../Target/AArch64/AArch64FrameLowering.cpp | 21 ++--
.../AArch64/AArch64PrologueEpilogue.cpp | 14 ++-
.../CodeGen/AArch64/arm64-windows-calls.ll | 44 ++++----
llvm/test/CodeGen/AArch64/win64_vararg2.ll | 24 ++---
llvm/test/CodeGen/AArch64/wineh-pac.ll | 102 +++++++++---------
.../CodeGen/AArch64/wineh-save-lrpair2.mir | 12 +--
6 files changed, 106 insertions(+), 111 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 7290b3f67c2e3..042099db55824 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1556,7 +1556,6 @@ static bool produceCompactUnwindFrame(const AArch64FrameLowering &AFL,
static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile,
unsigned SpillCount, unsigned Reg1,
unsigned Reg2, bool NeedsWinCFI,
- bool IsFirst,
const TargetRegisterInfo *TRI) {
// If we are generating register pairs for a Windows function that requires
// EH support, then pair consecutive registers only. There are no unwind
@@ -1582,12 +1581,9 @@ static bool invalidateWindowsRegisterPairing(bool SpillExtendedVolatile,
: false;
// If pairing a GPR with LR, the pair can be described by the save_lrpair
- // opcode. If this is the first register pair, it would end up with a
- // predecrement, but there's no save_lrpair_x opcode, so we can only do this
- // if LR is paired with something else than the first register.
- // The save_lrpair opcode requires the first register to be an odd one.
+ // opcode. The save_lrpair opcode requires the first register to be odd.
if (Reg1 >= AArch64::X19 && Reg1 <= AArch64::X27 &&
- (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR && !IsFirst)
+ (Reg1 - AArch64::X19) % 2 == 0 && Reg2 == AArch64::LR)
return false;
return true;
}
@@ -1600,12 +1596,10 @@ static bool invalidateRegisterPairing(bool SpillExtendedVolatile,
unsigned SpillCount, unsigned Reg1,
unsigned Reg2, bool UsesWinAAPCS,
bool NeedsWinCFI, bool NeedsFrameRecord,
- bool IsFirst,
const TargetRegisterInfo *TRI) {
if (UsesWinAAPCS)
return invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
- Reg1, Reg2, NeedsWinCFI, IsFirst,
- TRI);
+ Reg1, Reg2, NeedsWinCFI, TRI);
// If we need to store the frame record, don't pair any register
// with LR other than FP.
@@ -1775,21 +1769,20 @@ void computeCalleeSaveRegisterPairs(const AArch64FrameLowering &AFL,
// Add the next reg to the pair if it is in the same register class.
if (unsigned(i + RegInc) < Count && !HasCSHazardPadding) {
MCRegister NextReg = CSI[i + RegInc].getReg();
- bool IsFirst = i == FirstReg;
unsigned SpillCount = NeedsWinCFI ? FirstReg - i : i;
switch (RPI.Type) {
case RegPairInfo::GPR:
if (AArch64::GPR64RegClass.contains(NextReg) &&
- !invalidateRegisterPairing(
- SpillExtendedVolatile, SpillCount, RPI.Reg1, NextReg, IsWindows,
- NeedsWinCFI, NeedsFrameRecord, IsFirst, TRI))
+ !invalidateRegisterPairing(SpillExtendedVolatile, SpillCount,
+ RPI.Reg1, NextReg, IsWindows,
+ NeedsWinCFI, NeedsFrameRecord, TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR64:
if (AArch64::FPR64RegClass.contains(NextReg) &&
!invalidateWindowsRegisterPairing(SpillExtendedVolatile, SpillCount,
RPI.Reg1, NextReg, NeedsWinCFI,
- IsFirst, TRI))
+ TRI))
RPI.Reg2 = NextReg;
break;
case RegPairInfo::FPR128:
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index 965585f40571b..c9fca019e4074 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -179,11 +179,23 @@ AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
(void)Success;
assert(Success && "unknown load/store opcode");
+ const auto *TRI = Subtarget.getRegisterInfo();
// If the first store isn't right where we want SP then we can't fold the
// update in so create a normal arithmetic instruction instead.
+ //
+ // On Windows, some register pairs involving LR can't be folded because
+ // there isn't a corresponding unwind opcode. (Note that packed unwind expects
+ // a sequence like "sub sp, sp, #16; stp x19, lr, [sp]; sub sp, sp, #16",
+ // but we currently generate "sub sp, sp, #32; stp x19, lr, [sp, #16]". We
+ // could handle that here, but it's not clearly profitable; it saves up to
+ // 4 words of xdata, but it costs 2 instructions.)
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
- CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue()) {
+ CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue() ||
+ (NeedsWinCFI &&
+ (NewOpc == AArch64::LDPXpost || NewOpc == AArch64::STPXpre) &&
+ TRI->getEncodingValue(MBBI->getOperand(0).getReg()) + 1 !=
+ TRI->getEncodingValue(MBBI->getOperand(1).getReg()))) {
// If we are destroying the frame, make sure we add the increment after the
// last frame operation.
if (FrameFlag == MachineInstr::FrameDestroy) {
diff --git a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll
index 666f1cb7bcf6f..41f00a6c41b3b 100644
--- a/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll
+++ b/llvm/test/CodeGen/AArch64/arm64-windows-calls.ll
@@ -143,10 +143,10 @@ define void @call_copy_pod() {
; CHECK-LABEL: call_copy_pod:
; CHECK: .seh_proc call_copy_pod
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .seh_save_reg_x x19, 16
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill
-; CHECK-NEXT: .seh_save_reg x30, 8
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: adrp x19, Pod
; CHECK-NEXT: add x19, x19, :lo12:Pod
@@ -154,10 +154,10 @@ define void @call_copy_pod() {
; CHECK-NEXT: bl copy_pod
; CHECK-NEXT: stp d0, d1, [x19]
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload
-; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg_x x19, 16
+; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_lrpair x19, 0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: .seh_endepilogue
; CHECK-NEXT: ret
; CHECK-NEXT: .seh_endfunclet
@@ -175,10 +175,8 @@ define void @call_copy_notcxx14aggregate() {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: sub sp, sp, #32
; CHECK-NEXT: .seh_stackalloc 32
-; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: .seh_save_reg x19, 16
-; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: .seh_save_reg x30, 24
+; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: adrp x19, NotCXX14Aggregate
; CHECK-NEXT: add x19, x19, :lo12:NotCXX14Aggregate
@@ -188,10 +186,8 @@ define void @call_copy_notcxx14aggregate() {
; CHECK-NEXT: ldp d0, d1, [sp]
; CHECK-NEXT: stp d0, d1, [x19]
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: .seh_save_reg x30, 24
-; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: .seh_save_reg x19, 16
+; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: add sp, sp, #32
; CHECK-NEXT: .seh_stackalloc 32
; CHECK-NEXT: .seh_endepilogue
@@ -211,10 +207,10 @@ define void @call_copy_notpod() {
; CHECK-LABEL: call_copy_notpod:
; CHECK: .seh_proc call_copy_notpod
; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: str x19, [sp, #-16]! // 8-byte Folded Spill
-; CHECK-NEXT: .seh_save_reg_x x19, 16
-; CHECK-NEXT: str x30, [sp, #8] // 8-byte Spill
-; CHECK-NEXT: .seh_save_reg x30, 8
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_lrpair x19, 0
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: adrp x19, NotPod
; CHECK-NEXT: add x19, x19, :lo12:NotPod
@@ -222,10 +218,10 @@ define void @call_copy_notpod() {
; CHECK-NEXT: bl copy_notpod
; CHECK-NEXT: stp x0, x1, [x19]
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp, #8] // 8-byte Reload
-; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x19, [sp], #16 // 8-byte Folded Reload
-; CHECK-NEXT: .seh_save_reg_x x19, 16
+; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_lrpair x19, 0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
; CHECK-NEXT: .seh_endepilogue
; CHECK-NEXT: ret
; CHECK-NEXT: .seh_endfunclet
diff --git a/llvm/test/CodeGen/AArch64/win64_vararg2.ll b/llvm/test/CodeGen/AArch64/win64_vararg2.ll
index 548e6ac5fc0fe..24e815eb65d7f 100644
--- a/llvm/test/CodeGen/AArch64/win64_vararg2.ll
+++ b/llvm/test/CodeGen/AArch64/win64_vararg2.ll
@@ -9,10 +9,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; CHECK-NEXT: // %bb.0:
; CHECK-NEXT: sub sp, sp, #80
; CHECK-NEXT: .seh_stackalloc 80
-; CHECK-NEXT: str x19, [sp, #16] // 8-byte Spill
-; CHECK-NEXT: .seh_save_reg x19, 16
-; CHECK-NEXT: str x30, [sp, #24] // 8-byte Spill
-; CHECK-NEXT: .seh_save_reg x30, 24
+; CHECK-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: .seh_endprologue
; CHECK-NEXT: add x8, sp, #40
; CHECK-NEXT: mov w19, w0
@@ -27,10 +25,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; CHECK-NEXT: cmp w19, w0
; CHECK-NEXT: cset w0, ls
; CHECK-NEXT: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Reload
-; CHECK-NEXT: .seh_save_reg x30, 24
-; CHECK-NEXT: ldr x19, [sp, #16] // 8-byte Reload
-; CHECK-NEXT: .seh_save_reg x19, 16
+; CHECK-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_lrpair x19, 16
; CHECK-NEXT: add sp, sp, #80
; CHECK-NEXT: .seh_stackalloc 80
; CHECK-NEXT: .seh_endepilogue
@@ -43,10 +39,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; GISEL-NEXT: // %bb.0:
; GISEL-NEXT: sub sp, sp, #80
; GISEL-NEXT: .seh_stackalloc 80
-; GISEL-NEXT: str x19, [sp, #16] // 8-byte Spill
-; GISEL-NEXT: .seh_save_reg x19, 16
-; GISEL-NEXT: str x30, [sp, #24] // 8-byte Spill
-; GISEL-NEXT: .seh_save_reg x30, 24
+; GISEL-NEXT: stp x19, x30, [sp, #16] // 16-byte Folded Spill
+; GISEL-NEXT: .seh_save_lrpair x19, 16
; GISEL-NEXT: .seh_endprologue
; GISEL-NEXT: add x8, sp, #40
; GISEL-NEXT: mov w19, w0
@@ -61,10 +55,8 @@ define i1 @va_func(i32 %a, i8 %b, i8 %c, ...) {
; GISEL-NEXT: cmp w19, w0
; GISEL-NEXT: cset w0, ls
; GISEL-NEXT: .seh_startepilogue
-; GISEL-NEXT: ldr x30, [sp, #24] // 8-byte Reload
-; GISEL-NEXT: .seh_save_reg x30, 24
-; GISEL-NEXT: ldr x19, [sp, #16] // 8-byte Reload
-; GISEL-NEXT: .seh_save_reg x19, 16
+; GISEL-NEXT: ldp x19, x30, [sp, #16] // 16-byte Folded Reload
+; GISEL-NEXT: .seh_save_lrpair x19, 16
; GISEL-NEXT: add sp, sp, #80
; GISEL-NEXT: .seh_stackalloc 80
; GISEL-NEXT: .seh_endepilogue
diff --git a/llvm/test/CodeGen/AArch64/wineh-pac.ll b/llvm/test/CodeGen/AArch64/wineh-pac.ll
index 797dd10d7e49d..71dcaa3b82651 100644
--- a/llvm/test/CodeGen/AArch64/wineh-pac.ll
+++ b/llvm/test/CodeGen/AArch64/wineh-pac.ll
@@ -1,63 +1,65 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 6
; RUN: llc < %s -mtriple=aarch64-windows | FileCheck %s
define dso_local i32 @func(ptr %g, i32 %a) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" {
+; CHECK-LABEL: func:
+; CHECK: .seh_proc func
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: hint #27
+; CHECK-NEXT: .seh_pac_sign_lr
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_lrpair x19, 0
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: mov w19, w1
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: mov w0, w19
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_lrpair x19, 0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: hint #31
+; CHECK-NEXT: .seh_pac_sign_lr
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: ret
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
entry:
tail call void %g() #2
ret i32 %a
}
+;; For func2, check that the potentially folded autibsp+ret -> retab
+;; is handled correctly - currently we inhibit producing retab here.
+
define dso_local i32 @func2(ptr %g, i32 %a) "sign-return-address"="non-leaf" "sign-return-address-key"="b_key" "target-features"="+v8.3a" {
+; CHECK-LABEL: func2:
+; CHECK: .seh_proc func2
+; CHECK-NEXT: // %bb.0: // %entry
+; CHECK-NEXT: pacibsp
+; CHECK-NEXT: .seh_pac_sign_lr
+; CHECK-NEXT: sub sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: stp x19, x30, [sp] // 16-byte Folded Spill
+; CHECK-NEXT: .seh_save_lrpair x19, 0
+; CHECK-NEXT: .seh_endprologue
+; CHECK-NEXT: mov w19, w1
+; CHECK-NEXT: blr x0
+; CHECK-NEXT: mov w0, w19
+; CHECK-NEXT: .seh_startepilogue
+; CHECK-NEXT: ldp x19, x30, [sp] // 16-byte Folded Reload
+; CHECK-NEXT: .seh_save_lrpair x19, 0
+; CHECK-NEXT: add sp, sp, #16
+; CHECK-NEXT: .seh_stackalloc 16
+; CHECK-NEXT: autibsp
+; CHECK-NEXT: .seh_pac_sign_lr
+; CHECK-NEXT: .seh_endepilogue
+; CHECK-NEXT: ret
+; CHECK-NEXT: .seh_endfunclet
+; CHECK-NEXT: .seh_endproc
entry:
tail call void %g() #2
ret i32 %a
}
-
-
-; CHECK-LABEL: func:
-; CHECK-NEXT: .seh_proc func
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: hint #27
-; CHECK-NEXT: .seh_pac_sign_lr
-; CHECK-NEXT: str x19, [sp, #-16]!
-; CHECK-NEXT: .seh_save_reg_x x19, 16
-; CHECK-NEXT: str x30, [sp, #8]
-; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: .seh_endprologue
-
-; CHECK: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp, #8]
-; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x19, [sp], #16
-; CHECK-NEXT: .seh_save_reg_x x19, 16
-; CHECK-NEXT: hint #31
-; CHECK-NEXT: .seh_pac_sign_lr
-; CHECK-NEXT: .seh_endepilogue
-; CHECK-NEXT: ret
-; CHECK-NEXT: .seh_endfunclet
-; CHECK-NEXT: .seh_endproc
-
-;; For func2, check that the potentially folded autibsp+ret -> retab
-;; is handled correctly - currently we inhibit producing retab here.
-
-; CHECK-LABEL: func2:
-; CHECK-NEXT: .seh_proc func2
-; CHECK-NEXT: // %bb.0:
-; CHECK-NEXT: pacibsp
-; CHECK-NEXT: .seh_pac_sign_lr
-; CHECK-NEXT: str x19, [sp, #-16]!
-; CHECK-NEXT: .seh_save_reg_x x19, 16
-; CHECK-NEXT: str x30, [sp, #8]
-; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: .seh_endprologue
-
-; CHECK: .seh_startepilogue
-; CHECK-NEXT: ldr x30, [sp, #8]
-; CHECK-NEXT: .seh_save_reg x30, 8
-; CHECK-NEXT: ldr x19, [sp], #16
-; CHECK-NEXT: .seh_save_reg_x x19, 16
-; CHECK-NEXT: autibsp
-; CHECK-NEXT: .seh_pac_sign_lr
-; CHECK-NEXT: .seh_endepilogue
-; CHECK-NEXT: ret
-; CHECK-NEXT: .seh_endfunclet
-; CHECK-NEXT: .seh_endproc
diff --git a/llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir b/llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir
index a78ed0694674a..a91340cb534af 100644
--- a/llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir
+++ b/llvm/test/CodeGen/AArch64/wineh-save-lrpair2.mir
@@ -1,13 +1,13 @@
# RUN: llc -o - %s -mtriple=aarch64-windows -start-before=prologepilog \
# RUN: -stop-after=prologepilog | FileCheck %s
-# Check that lr isn't paired with a GPR if it's the first pair, as
-# that can't be described as a SEH opcode if combined with predecrement.
+# Check that when LR is paired with a GPR, we don't combine it into a
+# predecrement that can't be described as a SEH opcode.
-# CHECK: early-clobber $sp = frame-setup STRXpre killed $x19, $sp, -16
-# CHECK-NEXT: frame-setup SEH_SaveReg_X 19, -16
-# CHECK-NEXT: frame-setup STRXui killed $lr, $sp, 1
-# CHECK-NEXT: frame-setup SEH_SaveReg 30, 8
+# CHECK: $sp = frame-setup SUBXri $sp, 16, 0
+# CHECK-NEXT: frame-setup SEH_StackAlloc 16
+# CHECK-NEXT: frame-setup STPXi killed $x19, killed $lr, $sp, 0
+# CHECK-NEXT: frame-setup SEH_SaveRegP 19, 30, 0
# CHECK-NEXT: frame-setup SEH_PrologEnd
--- |
>From eaa11b84358f71c319c640859f2fddad788d0370 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma at qti.qualcomm.com>
Date: Mon, 1 Dec 2025 15:10:37 -0800
Subject: [PATCH 2/4] Fix test.
---
.../COFF/AArch64/arm64-register-variables.ll | 24 +++++++++----------
1 file changed, 12 insertions(+), 12 deletions(-)
diff --git a/llvm/test/DebugInfo/COFF/AArch64/arm64-register-variables.ll b/llvm/test/DebugInfo/COFF/AArch64/arm64-register-variables.ll
index 950594ed105d6..4b000518ea5eb 100644
--- a/llvm/test/DebugInfo/COFF/AArch64/arm64-register-variables.ll
+++ b/llvm/test/DebugInfo/COFF/AArch64/arm64-register-variables.ll
@@ -21,18 +21,18 @@
; }
-; OBJ: DefRangeRegisterRelSym {
-; OBJ: Kind: S_DEFRANGE_REGISTER_REL (0x1145)
-; OBJ: BaseRegister: ARM64_SP (0x51)
-; OBJ: HasSpilledUDTMember: No
-; OBJ: OffsetInParent: 0
-; OBJ: BasePointerOffset: 12
-; OBJ: LocalVariableAddrRange {
-; OBJ: OffsetStart: .text+0x14
-; OBJ: ISectStart: 0x0
-; OBJ: Range: 0x30
-; OBJ: }
-; OBJ: }
+; OBJ: DefRangeRegisterRelSym {
+; OBJ-NEXT: Kind: S_DEFRANGE_REGISTER_REL (0x1145)
+; OBJ-NEXT: BaseRegister: ARM64_SP (0x51)
+; OBJ-NEXT: HasSpilledUDTMember: No
+; OBJ-NEXT: OffsetInParent: 0
+; OBJ-NEXT: BasePointerOffset: 12
+; OBJ-NEXT: LocalVariableAddrRange {
+; OBJ-NEXT: OffsetStart: .text+0x10
+; OBJ-NEXT: ISectStart: 0x0
+; OBJ-NEXT: Range: 0x2C
+; OBJ-NEXT: }
+; OBJ-NEXT: }
; ModuleID = 't.cpp'
source_filename = "test/DebugInfo/COFF/register-variables-arm64.ll"
>From fa31780dc087ba83d37ef315339c77e6cd9862b6 Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma at qti.qualcomm.com>
Date: Fri, 5 Dec 2025 12:01:16 -0800
Subject: [PATCH 3/4] Fix variable.
---
llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index c9fca019e4074..dd2bbef8639e7 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -179,7 +179,6 @@ AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
(void)Success;
assert(Success && "unknown load/store opcode");
- const auto *TRI = Subtarget.getRegisterInfo();
// If the first store isn't right where we want SP then we can't fold the
// update in so create a normal arithmetic instruction instead.
//
@@ -194,8 +193,8 @@ AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue() ||
(NeedsWinCFI &&
(NewOpc == AArch64::LDPXpost || NewOpc == AArch64::STPXpre) &&
- TRI->getEncodingValue(MBBI->getOperand(0).getReg()) + 1 !=
- TRI->getEncodingValue(MBBI->getOperand(1).getReg()))) {
+ RegInfo->getEncodingValue(MBBI->getOperand(0).getReg()) + 1 !=
+ RegInfo->getEncodingValue(MBBI->getOperand(1).getReg()))) {
// If we are destroying the frame, make sure we add the increment after the
// last frame operation.
if (FrameFlag == MachineInstr::FrameDestroy) {
>From 3b550b99be876457e15f86e030ad7e29762eea8e Mon Sep 17 00:00:00 2001
From: Eli Friedman <efriedma at qti.qualcomm.com>
Date: Fri, 5 Dec 2025 14:44:43 -0800
Subject: [PATCH 4/4] Fix comments about stack bump
We actually do already have code to split the stack bump... but only when optimizing for size.
---
llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp | 10 +++++-----
1 file changed, 5 insertions(+), 5 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
index dd2bbef8639e7..2903a975de0ed 100644
--- a/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
+++ b/llvm/lib/Target/AArch64/AArch64PrologueEpilogue.cpp
@@ -183,11 +183,7 @@ AArch64PrologueEpilogueCommon::convertCalleeSaveRestoreToSPPrePostIncDec(
// update in so create a normal arithmetic instruction instead.
//
// On Windows, some register pairs involving LR can't be folded because
- // there isn't a corresponding unwind opcode. (Note that packed unwind expects
- // a sequence like "sub sp, sp, #16; stp x19, lr, [sp]; sub sp, sp, #16",
- // but we currently generate "sub sp, sp, #32; stp x19, lr, [sp, #16]". We
- // could handle that here, but it's not clearly profitable; it saves up to
- // 4 words of xdata, but it costs 2 instructions.)
+ // there isn't a corresponding unwind opcode.
if (MBBI->getOperand(MBBI->getNumOperands() - 1).getImm() != 0 ||
CSStackSizeInc < MinOffset * (int64_t)Scale.getFixedValue() ||
CSStackSizeInc > MaxOffset * (int64_t)Scale.getFixedValue() ||
@@ -334,6 +330,10 @@ bool AArch64PrologueEpilogueCommon::shouldCombineCSRLocalStackBump(
// (to force a stp with predecrement) to match the packed unwind format,
// provided that there actually are any callee saved registers to merge the
// decrement with.
+ //
+ // Note that for certain paired saves, like "x19, lr", we can't actually
+ // combine the save, but packed unwind still expects a separate stack bump.
+ //
// This is potentially marginally slower, but allows using the packed
// unwind format for functions that both have a local area and callee saved
// registers. Using the packed unwind format notably reduces the size of
More information about the llvm-commits
mailing list