[llvm] [RegAllocFast] Don't align stack slots if the stack can't be realigned (PR #153682)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 14 13:58:44 PDT 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-arm
Author: Craig Topper (topperc)
<details>
<summary>Changes</summary>
This is the fast regalloc equivalent of 773771ba382b1fbcf6acccc0046bfe731541a599.
---
Full diff: https://github.com/llvm/llvm-project/pull/153682.diff
4 Files Affected:
- (modified) llvm/lib/CodeGen/RegAllocFast.cpp (+7)
- (modified) llvm/test/CodeGen/ARM/legalize-bitcast.ll (+2-2)
- (modified) llvm/test/CodeGen/Thumb2/mve-vector-spill.ll (+49-49)
- (modified) llvm/test/CodeGen/X86/avx-load-store.ll (+2-2)
``````````diff
diff --git a/llvm/lib/CodeGen/RegAllocFast.cpp b/llvm/lib/CodeGen/RegAllocFast.cpp
index 66a206c9b2344..49764a3836afc 100644
--- a/llvm/lib/CodeGen/RegAllocFast.cpp
+++ b/llvm/lib/CodeGen/RegAllocFast.cpp
@@ -474,6 +474,13 @@ int RegAllocFastImpl::getStackSpaceFor(Register VirtReg) {
const TargetRegisterClass &RC = *MRI->getRegClass(VirtReg);
unsigned Size = TRI->getSpillSize(RC);
Align Alignment = TRI->getSpillAlign(RC);
+
+ const MachineFunction &MF = MRI->getMF();
+ auto &ST = MF.getSubtarget();
+ Align CurrentAlign = ST.getFrameLowering()->getStackAlign();
+ if (Alignment > CurrentAlign && !ST.getRegisterInfo()->canRealignStack(MF))
+ Alignment = CurrentAlign;
+
int FrameIdx = MFI->CreateSpillStackObject(Size, Alignment);
// Assign the slot.
diff --git a/llvm/test/CodeGen/ARM/legalize-bitcast.ll b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
index 5b989a099c815..92b77f57915a4 100644
--- a/llvm/test/CodeGen/ARM/legalize-bitcast.ll
+++ b/llvm/test/CodeGen/ARM/legalize-bitcast.ll
@@ -7,7 +7,7 @@ define i32 @vec_to_int() {
; CHECK-LABEL: vec_to_int:
; CHECK: @ %bb.0: @ %bb.0
; CHECK-NEXT: push {r4}
-; CHECK-NEXT: sub sp, sp, #28
+; CHECK-NEXT: sub sp, sp, #16
; CHECK-NEXT: movw r0, :lower16:vec6_p
; CHECK-NEXT: movt r0, :upper16:vec6_p
; CHECK-NEXT: vld1.8 {d16}, [r0]!
@@ -25,7 +25,7 @@ define i32 @vec_to_int() {
; CHECK-NEXT: vrev32.16 q8, q8
; CHECK-NEXT: vmov.f64 d16, d17
; CHECK-NEXT: vmov.32 r0, d16[0]
-; CHECK-NEXT: add sp, sp, #28
+; CHECK-NEXT: add sp, sp, #16
; CHECK-NEXT: pop {r4}
; CHECK-NEXT: bx lr
bb.0:
diff --git a/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll b/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
index 0af77c4fc8283..dab0f85e39534 100644
--- a/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
+++ b/llvm/test/CodeGen/Thumb2/mve-vector-spill.ll
@@ -8,15 +8,15 @@ define arm_aapcs_vfpcc void @spill_vector_i32(<4 x i32> %v, ptr %p) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp, #8] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp, #8] @ 16-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
@@ -29,15 +29,15 @@ define arm_aapcs_vfpcc void @spill_vector_i16(<8 x i16> %v, ptr %p) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp, #8] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp, #8] @ 16-byte Reload
; CHECK-NEXT: vstrh.16 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
@@ -50,15 +50,15 @@ define arm_aapcs_vfpcc void @spill_vector_i8(<16 x i8> %v, ptr %p) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp, #8] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp, #8] @ 16-byte Reload
; CHECK-NEXT: vstrb.8 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
@@ -71,15 +71,15 @@ define arm_aapcs_vfpcc void @spill_vector_i64(<2 x i64> %v, ptr %p) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp, #8] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp, #8] @ 16-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
@@ -92,15 +92,15 @@ define arm_aapcs_vfpcc void @spill_vector_f32(<4 x float> %v, ptr %p) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp, #8] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp, #8] @ 16-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
@@ -113,15 +113,15 @@ define arm_aapcs_vfpcc void @spill_vector_f16(<8 x half> %v, ptr %p) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp, #8] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp, #8] @ 16-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
@@ -134,15 +134,15 @@ define arm_aapcs_vfpcc void @spill_vector_f64(<2 x double> %v, ptr %p) {
; CHECK: @ %bb.0: @ %entry
; CHECK-NEXT: .save {r7, lr}
; CHECK-NEXT: push {r7, lr}
-; CHECK-NEXT: .pad #40
-; CHECK-NEXT: sub sp, #40
-; CHECK-NEXT: str r0, [sp, #12] @ 4-byte Spill
-; CHECK-NEXT: vstrw.32 q0, [sp, #16] @ 16-byte Spill
+; CHECK-NEXT: .pad #24
+; CHECK-NEXT: sub sp, #24
+; CHECK-NEXT: str r0, [sp, #4] @ 4-byte Spill
+; CHECK-NEXT: vstrw.32 q0, [sp, #8] @ 16-byte Spill
; CHECK-NEXT: bl external_function
-; CHECK-NEXT: ldr r0, [sp, #12] @ 4-byte Reload
-; CHECK-NEXT: vldrw.u32 q0, [sp, #16] @ 16-byte Reload
+; CHECK-NEXT: ldr r0, [sp, #4] @ 4-byte Reload
+; CHECK-NEXT: vldrw.u32 q0, [sp, #8] @ 16-byte Reload
; CHECK-NEXT: vstrw.32 q0, [r0]
-; CHECK-NEXT: add sp, #40
+; CHECK-NEXT: add sp, #24
; CHECK-NEXT: pop {r7, pc}
entry:
call void @external_function()
diff --git a/llvm/test/CodeGen/X86/avx-load-store.ll b/llvm/test/CodeGen/X86/avx-load-store.ll
index 3f856d33145d8..cc2cedb6b8b20 100644
--- a/llvm/test/CodeGen/X86/avx-load-store.ll
+++ b/llvm/test/CodeGen/X86/avx-load-store.ll
@@ -34,7 +34,7 @@ define void @test_256_load(ptr nocapture %d, ptr nocapture %f, ptr nocapture %i)
;
; CHECK_O0-LABEL: test_256_load:
; CHECK_O0: # %bb.0: # %entry
-; CHECK_O0-NEXT: subq $184, %rsp
+; CHECK_O0-NEXT: subq $136, %rsp
; CHECK_O0-NEXT: movq %rdi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK_O0-NEXT: movq %rsi, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
; CHECK_O0-NEXT: movq %rdx, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
@@ -54,7 +54,7 @@ define void @test_256_load(ptr nocapture %d, ptr nocapture %f, ptr nocapture %i)
; CHECK_O0-NEXT: vmovapd %ymm2, (%rdi)
; CHECK_O0-NEXT: vmovaps %ymm1, (%rsi)
; CHECK_O0-NEXT: vmovdqa %ymm0, (%rdx)
-; CHECK_O0-NEXT: addq $184, %rsp
+; CHECK_O0-NEXT: addq $136, %rsp
; CHECK_O0-NEXT: vzeroupper
; CHECK_O0-NEXT: retq
entry:
``````````
</details>
https://github.com/llvm/llvm-project/pull/153682
More information about the llvm-commits
mailing list