[llvm] [AArch64] Fix frame-pointer offset with hazard padding (PR #118091)
Benjamin Maxwell via llvm-commits
llvm-commits at lists.llvm.org
Fri Nov 29 06:32:58 PST 2024
https://github.com/MacDue updated https://github.com/llvm/llvm-project/pull/118091
>From 122832ef17b0af3e5cef4f813cb395a6a81e1601 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Thu, 28 Nov 2024 15:51:40 +0000
Subject: [PATCH 1/3] [AArch64] Fix frame-pointer offset with hazard padding
The `-aarch64-stack-hazard-size=<val>` option disables register paring
(as the hazard padding may mean the offset is too large for STP/LDP).
This broke setting the frame-pointer offset, as the code to find the
frame record looked for a (FP, LR) register pair.
This patch resolves this by looking for FP, LR as two unpaired registers
when hazard padding is enabled.
---
.../Target/AArch64/AArch64FrameLowering.cpp | 16 +-
llvm/test/CodeGen/AArch64/stack-hazard.ll | 157 +++++++++++++++---
2 files changed, 144 insertions(+), 29 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d6673969aa3056..d593738cf32414 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3167,11 +3167,21 @@ static void computeCalleeSaveRegisterPairs(
(RPI.isScalable() && RPI.Offset >= -256 && RPI.Offset <= 255)) &&
"Offset out of bounds for LDP/STP immediate");
+ auto isFrameRecord = [&] {
+ if (RPI.isPaired())
+ return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
+ : RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
+ // -aarch64-stack-hazard-size=<val> disables register pairing, so look
+ // for the frame record as two unpaired registers.
+ if (AFI->hasStackHazardSlotIndex())
+ return i > 0 && RPI.Reg1 == AArch64::FP &&
+ CSI[i - 1].getReg() == AArch64::LR;
+ return false;
+ };
+
// Save the offset to frame record so that the FP register can point to the
// innermost frame record (spilled FP and LR registers).
- if (NeedsFrameRecord &&
- ((!IsWindows && RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP) ||
- (IsWindows && RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR)))
+ if (NeedsFrameRecord && isFrameRecord())
AFI->setCalleeSaveBaseToFrameRecordOffset(Offset);
RegPairs.push_back(RPI);
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard.ll b/llvm/test/CodeGen/AArch64/stack-hazard.ll
index 50a2e41f45756d..a4c2b30566a951 100644
--- a/llvm/test/CodeGen/AArch64/stack-hazard.ll
+++ b/llvm/test/CodeGen/AArch64/stack-hazard.ll
@@ -337,19 +337,18 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
; CHECK64-LABEL: csr_d8_allocd_framepointer:
; CHECK64: // %bb.0: // %entry
; CHECK64-NEXT: sub sp, sp, #176
-; CHECK64-NEXT: str d8, [sp, #80] // 8-byte Folded Spill
+; CHECK64-NEXT: stp d0, d8, [sp, #72] // 8-byte Folded Spill
; CHECK64-NEXT: stp x29, x30, [sp, #152] // 16-byte Folded Spill
-; CHECK64-NEXT: add x29, sp, #80
-; CHECK64-NEXT: .cfi_def_cfa w29, 96
+; CHECK64-NEXT: add x29, sp, #152
+; CHECK64-NEXT: .cfi_def_cfa w29, 24
; CHECK64-NEXT: .cfi_offset w30, -16
; CHECK64-NEXT: .cfi_offset w29, -24
; CHECK64-NEXT: .cfi_offset b8, -96
; CHECK64-NEXT: //APP
; CHECK64-NEXT: //NO_APP
-; CHECK64-NEXT: stur d0, [x29, #-8]
; CHECK64-NEXT: ldr x29, [sp, #152] // 8-byte Folded Reload
-; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT: mov w0, wzr
+; CHECK64-NEXT: ldr d8, [sp, #80] // 8-byte Folded Reload
; CHECK64-NEXT: add sp, sp, #176
; CHECK64-NEXT: ret
;
@@ -358,17 +357,17 @@ define i32 @csr_d8_allocd_framepointer(double %d) "aarch64_pstate_sm_compatible"
; CHECK1024-NEXT: sub sp, sp, #1056
; CHECK1024-NEXT: str d8, [sp] // 8-byte Folded Spill
; CHECK1024-NEXT: str x29, [sp, #1032] // 8-byte Folded Spill
-; CHECK1024-NEXT: mov x29, sp
+; CHECK1024-NEXT: add x29, sp, #1032
; CHECK1024-NEXT: str x30, [sp, #1040] // 8-byte Folded Spill
; CHECK1024-NEXT: sub sp, sp, #1040
-; CHECK1024-NEXT: .cfi_def_cfa w29, 1056
+; CHECK1024-NEXT: .cfi_def_cfa w29, 24
; CHECK1024-NEXT: .cfi_offset w30, -16
; CHECK1024-NEXT: .cfi_offset w29, -24
; CHECK1024-NEXT: .cfi_offset b8, -1056
; CHECK1024-NEXT: mov w0, wzr
; CHECK1024-NEXT: //APP
; CHECK1024-NEXT: //NO_APP
-; CHECK1024-NEXT: stur d0, [x29, #-8]
+; CHECK1024-NEXT: str d0, [sp, #1032]
; CHECK1024-NEXT: add sp, sp, #1040
; CHECK1024-NEXT: ldr x30, [sp, #1040] // 8-byte Folded Reload
; CHECK1024-NEXT: ldr x29, [sp, #1032] // 8-byte Folded Reload
@@ -2893,8 +2892,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: stp x29, x30, [sp, #128] // 16-byte Folded Spill
; CHECK64-NEXT: stp x9, x20, [sp, #144] // 16-byte Folded Spill
; CHECK64-NEXT: str x19, [sp, #160] // 8-byte Folded Spill
-; CHECK64-NEXT: mov x29, sp
-; CHECK64-NEXT: .cfi_def_cfa w29, 176
+; CHECK64-NEXT: add x29, sp, #128
+; CHECK64-NEXT: .cfi_def_cfa w29, 48
; CHECK64-NEXT: .cfi_offset w19, -16
; CHECK64-NEXT: .cfi_offset w20, -24
; CHECK64-NEXT: .cfi_offset w30, -40
@@ -2913,11 +2912,11 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: mov w20, w0
; CHECK64-NEXT: msub x9, x8, x8, x9
; CHECK64-NEXT: mov sp, x9
-; CHECK64-NEXT: stur x9, [x29, #-80]
-; CHECK64-NEXT: sub x9, x29, #80
-; CHECK64-NEXT: sturh wzr, [x29, #-70]
-; CHECK64-NEXT: stur wzr, [x29, #-68]
-; CHECK64-NEXT: sturh w8, [x29, #-72]
+; CHECK64-NEXT: stur x9, [x29, #-208]
+; CHECK64-NEXT: sub x9, x29, #208
+; CHECK64-NEXT: sturh wzr, [x29, #-198]
+; CHECK64-NEXT: stur wzr, [x29, #-196]
+; CHECK64-NEXT: sturh w8, [x29, #-200]
; CHECK64-NEXT: msr TPIDR2_EL0, x9
; CHECK64-NEXT: .cfi_offset vg, -32
; CHECK64-NEXT: smstop sm
@@ -2926,14 +2925,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK64-NEXT: .cfi_restore vg
; CHECK64-NEXT: smstart za
; CHECK64-NEXT: mrs x8, TPIDR2_EL0
-; CHECK64-NEXT: sub x0, x29, #80
+; CHECK64-NEXT: sub x0, x29, #208
; CHECK64-NEXT: cbnz x8, .LBB33_2
; CHECK64-NEXT: // %bb.1: // %entry
; CHECK64-NEXT: bl __arm_tpidr2_restore
; CHECK64-NEXT: .LBB33_2: // %entry
; CHECK64-NEXT: mov w0, w20
; CHECK64-NEXT: msr TPIDR2_EL0, xzr
-; CHECK64-NEXT: mov sp, x29
+; CHECK64-NEXT: sub sp, x29, #128
; CHECK64-NEXT: .cfi_def_cfa wsp, 176
; CHECK64-NEXT: ldp x20, x19, [sp, #152] // 16-byte Folded Reload
; CHECK64-NEXT: ldr d14, [sp, #8] // 8-byte Folded Reload
@@ -2972,8 +2971,8 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: str x28, [sp, #1112] // 8-byte Folded Spill
; CHECK1024-NEXT: str x20, [sp, #1120] // 8-byte Folded Spill
; CHECK1024-NEXT: str x19, [sp, #1128] // 8-byte Folded Spill
-; CHECK1024-NEXT: mov x29, sp
-; CHECK1024-NEXT: .cfi_def_cfa w29, 1136
+; CHECK1024-NEXT: add x29, sp, #1088
+; CHECK1024-NEXT: .cfi_def_cfa w29, 48
; CHECK1024-NEXT: .cfi_offset w19, -8
; CHECK1024-NEXT: .cfi_offset w20, -16
; CHECK1024-NEXT: .cfi_offset w28, -24
@@ -2993,14 +2992,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: mov w20, w0
; CHECK1024-NEXT: msub x9, x8, x8, x9
; CHECK1024-NEXT: mov sp, x9
-; CHECK1024-NEXT: sub x10, x29, #784
+; CHECK1024-NEXT: sub x10, x29, #1872
; CHECK1024-NEXT: stur x9, [x10, #-256]
-; CHECK1024-NEXT: sub x9, x29, #774
-; CHECK1024-NEXT: sub x10, x29, #772
+; CHECK1024-NEXT: sub x9, x29, #1862
+; CHECK1024-NEXT: sub x10, x29, #1860
; CHECK1024-NEXT: sturh wzr, [x9, #-256]
-; CHECK1024-NEXT: sub x9, x29, #1040
+; CHECK1024-NEXT: sub x9, x29, #2128
; CHECK1024-NEXT: stur wzr, [x10, #-256]
-; CHECK1024-NEXT: sub x10, x29, #776
+; CHECK1024-NEXT: sub x10, x29, #1864
; CHECK1024-NEXT: sturh w8, [x10, #-256]
; CHECK1024-NEXT: msr TPIDR2_EL0, x9
; CHECK1024-NEXT: .cfi_offset vg, -32
@@ -3010,14 +3009,14 @@ define i32 @vastate(i32 %x) "aarch64_inout_za" "aarch64_pstate_sm_enabled" "targ
; CHECK1024-NEXT: .cfi_restore vg
; CHECK1024-NEXT: smstart za
; CHECK1024-NEXT: mrs x8, TPIDR2_EL0
-; CHECK1024-NEXT: sub x0, x29, #1040
+; CHECK1024-NEXT: sub x0, x29, #2128
; CHECK1024-NEXT: cbnz x8, .LBB33_2
; CHECK1024-NEXT: // %bb.1: // %entry
; CHECK1024-NEXT: bl __arm_tpidr2_restore
; CHECK1024-NEXT: .LBB33_2: // %entry
; CHECK1024-NEXT: mov w0, w20
; CHECK1024-NEXT: msr TPIDR2_EL0, xzr
-; CHECK1024-NEXT: mov sp, x29
+; CHECK1024-NEXT: sub sp, x29, #1088
; CHECK1024-NEXT: .cfi_def_cfa wsp, 1136
; CHECK1024-NEXT: ldp d9, d8, [sp, #48] // 16-byte Folded Reload
; CHECK1024-NEXT: ldr x19, [sp, #1128] // 8-byte Folded Reload
@@ -3049,3 +3048,109 @@ entry:
ret i32 %x
}
declare void @other()
+
+declare void @bar(ptr noundef) "aarch64_pstate_sm_compatible"
+
+define i32 @sve_stack_object_and_vla(double %d, i64 %sz) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
+; CHECK0-LABEL: sve_stack_object_and_vla:
+; CHECK0: // %bb.0: // %entry
+; CHECK0-NEXT: stp x29, x30, [sp, #-32]! // 16-byte Folded Spill
+; CHECK0-NEXT: stp x28, x19, [sp, #16] // 16-byte Folded Spill
+; CHECK0-NEXT: mov x29, sp
+; CHECK0-NEXT: addvl sp, sp, #-1
+; CHECK0-NEXT: mov x19, sp
+; CHECK0-NEXT: .cfi_def_cfa w29, 32
+; CHECK0-NEXT: .cfi_offset w19, -8
+; CHECK0-NEXT: .cfi_offset w28, -16
+; CHECK0-NEXT: .cfi_offset w30, -24
+; CHECK0-NEXT: .cfi_offset w29, -32
+; CHECK0-NEXT: lsl x9, x0, #2
+; CHECK0-NEXT: mov x8, sp
+; CHECK0-NEXT: add x9, x9, #15
+; CHECK0-NEXT: and x9, x9, #0xfffffffffffffff0
+; CHECK0-NEXT: sub x0, x8, x9
+; CHECK0-NEXT: mov sp, x0
+; CHECK0-NEXT: mov z0.s, #0 // =0x0
+; CHECK0-NEXT: ptrue p0.s
+; CHECK0-NEXT: st1w { z0.s }, p0, [x29, #-1, mul vl]
+; CHECK0-NEXT: bl bar
+; CHECK0-NEXT: mov w0, wzr
+; CHECK0-NEXT: mov sp, x29
+; CHECK0-NEXT: ldp x28, x19, [sp, #16] // 16-byte Folded Reload
+; CHECK0-NEXT: ldp x29, x30, [sp], #32 // 16-byte Folded Reload
+; CHECK0-NEXT: ret
+;
+; CHECK64-LABEL: sve_stack_object_and_vla:
+; CHECK64: // %bb.0: // %entry
+; CHECK64-NEXT: sub sp, sp, #96
+; CHECK64-NEXT: stp x29, x30, [sp, #64] // 16-byte Folded Spill
+; CHECK64-NEXT: add x29, sp, #64
+; CHECK64-NEXT: stp x28, x19, [sp, #80] // 16-byte Folded Spill
+; CHECK64-NEXT: sub sp, sp, #64
+; CHECK64-NEXT: addvl sp, sp, #-1
+; CHECK64-NEXT: mov x19, sp
+; CHECK64-NEXT: .cfi_def_cfa w29, 32
+; CHECK64-NEXT: .cfi_offset w19, -8
+; CHECK64-NEXT: .cfi_offset w28, -16
+; CHECK64-NEXT: .cfi_offset w30, -24
+; CHECK64-NEXT: .cfi_offset w29, -32
+; CHECK64-NEXT: lsl x9, x0, #2
+; CHECK64-NEXT: mov x8, sp
+; CHECK64-NEXT: add x9, x9, #15
+; CHECK64-NEXT: and x9, x9, #0xfffffffffffffff0
+; CHECK64-NEXT: sub x0, x8, x9
+; CHECK64-NEXT: mov sp, x0
+; CHECK64-NEXT: mov z0.s, #0 // =0x0
+; CHECK64-NEXT: ptrue p0.s
+; CHECK64-NEXT: sub x8, x29, #64
+; CHECK64-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
+; CHECK64-NEXT: bl bar
+; CHECK64-NEXT: mov w0, wzr
+; CHECK64-NEXT: sub sp, x29, #64
+; CHECK64-NEXT: ldp x28, x19, [sp, #80] // 16-byte Folded Reload
+; CHECK64-NEXT: ldp x29, x30, [sp, #64] // 16-byte Folded Reload
+; CHECK64-NEXT: add sp, sp, #96
+; CHECK64-NEXT: ret
+;
+; CHECK1024-LABEL: sve_stack_object_and_vla:
+; CHECK1024: // %bb.0: // %entry
+; CHECK1024-NEXT: sub sp, sp, #1056
+; CHECK1024-NEXT: str x29, [sp, #1024] // 8-byte Folded Spill
+; CHECK1024-NEXT: add x29, sp, #1024
+; CHECK1024-NEXT: str x30, [sp, #1032] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x28, [sp, #1040] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x19, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT: sub sp, sp, #1024
+; CHECK1024-NEXT: addvl sp, sp, #-1
+; CHECK1024-NEXT: mov x19, sp
+; CHECK1024-NEXT: .cfi_def_cfa w29, 32
+; CHECK1024-NEXT: .cfi_offset w19, -8
+; CHECK1024-NEXT: .cfi_offset w28, -16
+; CHECK1024-NEXT: .cfi_offset w30, -24
+; CHECK1024-NEXT: .cfi_offset w29, -32
+; CHECK1024-NEXT: lsl x9, x0, #2
+; CHECK1024-NEXT: mov x8, sp
+; CHECK1024-NEXT: add x9, x9, #15
+; CHECK1024-NEXT: and x9, x9, #0xfffffffffffffff0
+; CHECK1024-NEXT: sub x0, x8, x9
+; CHECK1024-NEXT: mov sp, x0
+; CHECK1024-NEXT: mov z0.s, #0 // =0x0
+; CHECK1024-NEXT: ptrue p0.s
+; CHECK1024-NEXT: sub x8, x29, #1024
+; CHECK1024-NEXT: st1w { z0.s }, p0, [x8, #-1, mul vl]
+; CHECK1024-NEXT: bl bar
+; CHECK1024-NEXT: mov w0, wzr
+; CHECK1024-NEXT: sub sp, x29, #1024
+; CHECK1024-NEXT: ldr x19, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x28, [sp, #1040] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x30, [sp, #1032] // 8-byte Folded Reload
+; CHECK1024-NEXT: ldr x29, [sp, #1024] // 8-byte Folded Reload
+; CHECK1024-NEXT: add sp, sp, #1056
+; CHECK1024-NEXT: ret
+entry:
+ %a = alloca <vscale x 4 x i32>
+ %b = alloca i32, i64 %sz, align 4
+ store <vscale x 4 x i32> zeroinitializer, ptr %a
+ call void @bar(ptr noundef nonnull %b)
+ ret i32 0
+}
>From 59fca0adbc71b986524dd2c6891adff2a808633e Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 29 Nov 2024 14:05:01 +0000
Subject: [PATCH 2/3] Add test with Windows triple
---
.../CodeGen/AArch64/stack-hazard-windows.ll | 118 ++++++++++++++++++
1 file changed, 118 insertions(+)
create mode 100644 llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
diff --git a/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll b/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
new file mode 100644
index 00000000000000..2a034fe5e5290c
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-hazard-windows.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
+; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=0 | FileCheck %s --check-prefixes=CHECK0
+; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=64 | FileCheck %s --check-prefixes=CHECK64
+; RUN: llc < %s -mtriple=aarch64-windows-pc-msvc -aarch64-stack-hazard-size=1024 | FileCheck %s --check-prefixes=CHECK1024
+
+define i32 @fpr_csr_stackobj(double %x) "aarch64_pstate_sm_compatible" "frame-pointer"="all" {
+; CHECK0-LABEL: fpr_csr_stackobj:
+; CHECK0: .seh_proc fpr_csr_stackobj
+; CHECK0-NEXT: // %bb.0: // %entry
+; CHECK0-NEXT: str x23, [sp, #-48]! // 8-byte Folded Spill
+; CHECK0-NEXT: .seh_save_reg_x x23, 48
+; CHECK0-NEXT: stp x29, x30, [sp, #8] // 16-byte Folded Spill
+; CHECK0-NEXT: .seh_save_fplr 8
+; CHECK0-NEXT: stp d9, d10, [sp, #24] // 16-byte Folded Spill
+; CHECK0-NEXT: .seh_save_fregp d9, 24
+; CHECK0-NEXT: add x29, sp, #8
+; CHECK0-NEXT: .seh_add_fp 8
+; CHECK0-NEXT: .seh_endprologue
+; CHECK0-NEXT: mov w0, wzr
+; CHECK0-NEXT: //APP
+; CHECK0-NEXT: //NO_APP
+; CHECK0-NEXT: str d0, [x29, #32]
+; CHECK0-NEXT: .seh_startepilogue
+; CHECK0-NEXT: ldp d9, d10, [sp, #24] // 16-byte Folded Reload
+; CHECK0-NEXT: .seh_save_fregp d9, 24
+; CHECK0-NEXT: ldp x29, x30, [sp, #8] // 16-byte Folded Reload
+; CHECK0-NEXT: .seh_save_fplr 8
+; CHECK0-NEXT: ldr x23, [sp], #48 // 8-byte Folded Reload
+; CHECK0-NEXT: .seh_save_reg_x x23, 48
+; CHECK0-NEXT: .seh_endepilogue
+; CHECK0-NEXT: ret
+; CHECK0-NEXT: .seh_endfunclet
+; CHECK0-NEXT: .seh_endproc
+;
+; CHECK64-LABEL: fpr_csr_stackobj:
+; CHECK64: .seh_proc fpr_csr_stackobj
+; CHECK64-NEXT: // %bb.0: // %entry
+; CHECK64-NEXT: sub sp, sp, #192
+; CHECK64-NEXT: .seh_stackalloc 192
+; CHECK64-NEXT: str x23, [sp, #80] // 8-byte Folded Spill
+; CHECK64-NEXT: .seh_save_reg x23, 80
+; CHECK64-NEXT: str x29, [sp, #88] // 8-byte Folded Spill
+; CHECK64-NEXT: .seh_save_reg x29, 88
+; CHECK64-NEXT: str x30, [sp, #96] // 8-byte Folded Spill
+; CHECK64-NEXT: .seh_save_reg x30, 96
+; CHECK64-NEXT: str d9, [sp, #168] // 8-byte Folded Spill
+; CHECK64-NEXT: .seh_save_freg d9, 168
+; CHECK64-NEXT: str d10, [sp, #176] // 8-byte Folded Spill
+; CHECK64-NEXT: .seh_save_freg d10, 176
+; CHECK64-NEXT: add x29, sp, #88
+; CHECK64-NEXT: .seh_add_fp 88
+; CHECK64-NEXT: .seh_endprologue
+; CHECK64-NEXT: mov w0, wzr
+; CHECK64-NEXT: //APP
+; CHECK64-NEXT: //NO_APP
+; CHECK64-NEXT: stur d0, [x29, #-16]
+; CHECK64-NEXT: .seh_startepilogue
+; CHECK64-NEXT: ldr d10, [sp, #176] // 8-byte Folded Reload
+; CHECK64-NEXT: .seh_save_freg d10, 176
+; CHECK64-NEXT: ldr d9, [sp, #168] // 8-byte Folded Reload
+; CHECK64-NEXT: .seh_save_freg d9, 168
+; CHECK64-NEXT: ldr x30, [sp, #96] // 8-byte Folded Reload
+; CHECK64-NEXT: .seh_save_reg x30, 96
+; CHECK64-NEXT: ldr x29, [sp, #88] // 8-byte Folded Reload
+; CHECK64-NEXT: .seh_save_reg x29, 88
+; CHECK64-NEXT: ldr x23, [sp, #80] // 8-byte Folded Reload
+; CHECK64-NEXT: .seh_save_reg x23, 80
+; CHECK64-NEXT: add sp, sp, #192
+; CHECK64-NEXT: .seh_stackalloc 192
+; CHECK64-NEXT: .seh_endepilogue
+; CHECK64-NEXT: ret
+; CHECK64-NEXT: .seh_endfunclet
+; CHECK64-NEXT: .seh_endproc
+;
+; CHECK1024-LABEL: fpr_csr_stackobj:
+; CHECK1024: .seh_proc fpr_csr_stackobj
+; CHECK1024-NEXT: // %bb.0: // %entry
+; CHECK1024-NEXT: sub sp, sp, #1072
+; CHECK1024-NEXT: str x23, [sp] // 8-byte Folded Spill
+; CHECK1024-NEXT: str x29, [sp, #8] // 8-byte Folded Spill
+; CHECK1024-NEXT: .seh_save_reg x29, 8
+; CHECK1024-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
+; CHECK1024-NEXT: .seh_save_reg x30, 16
+; CHECK1024-NEXT: str d9, [sp, #1048] // 8-byte Folded Spill
+; CHECK1024-NEXT: .seh_save_freg d9, 1048
+; CHECK1024-NEXT: str d10, [sp, #1056] // 8-byte Folded Spill
+; CHECK1024-NEXT: .seh_save_freg d10, 1056
+; CHECK1024-NEXT: add x29, sp, #8
+; CHECK1024-NEXT: .seh_add_fp 8
+; CHECK1024-NEXT: .seh_endprologue
+; CHECK1024-NEXT: sub sp, sp, #1040
+; CHECK1024-NEXT: mov w0, wzr
+; CHECK1024-NEXT: //APP
+; CHECK1024-NEXT: //NO_APP
+; CHECK1024-NEXT: stur d0, [x29, #-16]
+; CHECK1024-NEXT: .seh_startepilogue
+; CHECK1024-NEXT: add sp, sp, #1040
+; CHECK1024-NEXT: .seh_stackalloc 1040
+; CHECK1024-NEXT: ldr d10, [sp, #1056] // 8-byte Folded Reload
+; CHECK1024-NEXT: .seh_save_freg d10, 1056
+; CHECK1024-NEXT: ldr d9, [sp, #1048] // 8-byte Folded Reload
+; CHECK1024-NEXT: .seh_save_freg d9, 1048
+; CHECK1024-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
+; CHECK1024-NEXT: .seh_save_reg x30, 16
+; CHECK1024-NEXT: ldr x29, [sp, #8] // 8-byte Folded Reload
+; CHECK1024-NEXT: .seh_save_reg x29, 8
+; CHECK1024-NEXT: ldr x23, [sp] // 8-byte Folded Reload
+; CHECK1024-NEXT: add sp, sp, #1072
+; CHECK1024-NEXT: .seh_endepilogue
+; CHECK1024-NEXT: ret
+; CHECK1024-NEXT: .seh_endfunclet
+; CHECK1024-NEXT: .seh_endproc
+entry:
+ %a = alloca double
+ tail call void asm sideeffect "", "~{x23},~{d9},~{d10}"()
+ store double %x, ptr %a
+ ret i32 0
+}
>From 1fab93b84538a70ad7f31654cab6aa19a280f1e5 Mon Sep 17 00:00:00 2001
From: Benjamin Maxwell <benjamin.maxwell at arm.com>
Date: Fri, 29 Nov 2024 14:31:58 +0000
Subject: [PATCH 3/3] Fixups
---
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 15 +++++++++------
1 file changed, 9 insertions(+), 6 deletions(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index d593738cf32414..2d5ae1c1d14dc8 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -3171,12 +3171,15 @@ static void computeCalleeSaveRegisterPairs(
if (RPI.isPaired())
return IsWindows ? RPI.Reg1 == AArch64::FP && RPI.Reg2 == AArch64::LR
: RPI.Reg1 == AArch64::LR && RPI.Reg2 == AArch64::FP;
- // -aarch64-stack-hazard-size=<val> disables register pairing, so look
- // for the frame record as two unpaired registers.
- if (AFI->hasStackHazardSlotIndex())
- return i > 0 && RPI.Reg1 == AArch64::FP &&
- CSI[i - 1].getReg() == AArch64::LR;
- return false;
+ // Otherwise, look for the frame record as two unpaired registers. This is
+ // needed for -aarch64-stack-hazard-size=<val>, which disables register
+ // pairing (as the padding may be too large for the LDP/STP offset). Note:
+ // On Windows, this check works out as current reg == FP, next reg == LR,
+ // and on other platforms current reg == FP, previous reg == LR. This
+ // works out as the correct pre-increment or post-increment offsets
+ // respectively.
+ return i > 0 && RPI.Reg1 == AArch64::FP &&
+ CSI[i - 1].getReg() == AArch64::LR;
};
// Save the offset to frame record so that the FP register can point to the
More information about the llvm-commits
mailing list