[llvm] [AArch64] Fix a corner case with large stack allocation (PR #122038)
via llvm-commits
llvm-commits at lists.llvm.org
Tue Jan 14 20:15:45 PST 2025
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: None (ssijaric-nv)
<details>
<summary>Changes</summary>
In the unlikely case where the stack size is greater than 4GB, we may run into the
situation where the local stack size and the callee saved registers stack size
get combined incorrectly when restoring the callee saved registers. This happens
because the stack size in shouldCombineCSRLocalStackBumpInEpilogue is represented
as an 'unsigned', but is passed in as an 'int64_t'. We end up with something like
$fp, $lr = frame-destroy LDPXi $sp, 536870912
This change just makes 'shouldCombineCSRLocalStackBumpInEpilogue' match
'shouldCombineCSRLocalStackBump' where 'StackBumpBytes' is an 'uint64_t'
---
Full diff: https://github.com/llvm/llvm-project/pull/122038.diff
3 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+1-3)
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+1-1)
- (added) llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir (+55)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 64dfb1e39485f8..f5c33498c9d499 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1195,10 +1195,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
}
bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
- MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
+ MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
return false;
-
if (MBB.empty())
return true;
@@ -2363,7 +2362,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
-
bool CombineAfterCSRBump = false;
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 20445e63bcb13e..8f84702f4d2baf 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -146,7 +146,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
- unsigned StackBumpBytes) const;
+ uint64_t StackBumpBytes) const;
void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
diff --git a/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
new file mode 100644
index 00000000000000..4843dfe1d5cffa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
@@ -0,0 +1,55 @@
+# RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+ ; ModuleID = 'bug.c'
+ source_filename = "bug.c"
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ ; Function Attrs: mustprogress noinline optnone uwtable
+ define dso_local noundef i32 @_Z4funcv() #0 {
+ entry:
+ %array = alloca [1073741824 x i32], align 4
+ %arrayidx = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
+ store i32 7, ptr %arrayidx, align 4
+ call void @_Z5func2v()
+ %arrayidx1 = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
+ %0 = load i32, ptr %arrayidx1, align 4
+ ret i32 %0
+ }
+
+ declare void @_Z5func2v() #1
+
+ attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
+ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
+...
+---
+name: _Z4funcv
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+noPhis: true
+isSSA: false
+noVRegs: true
+hasFakeUses: false
+frameInfo:
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+stack:
+ - { id: 0, name: array, size: 4294967296, alignment: 4, local-offset: -4294967296 }
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ renamable $w8 = MOVi32imm 7
+ STRWui killed renamable $w8, %stack.0.array, 20 :: (store (s32) into %ir.arrayidx)
+ ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ BL @_Z5func2v, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp
+ ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ renamable $w0 = LDRWui %stack.0.array, 20 :: (dereferenceable load (s32) from %ir.arrayidx1)
+ ; CHECK: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp
+ RET_ReallyLR implicit killed $w0
+
+...
``````````
</details>
https://github.com/llvm/llvm-project/pull/122038
More information about the llvm-commits
mailing list