[llvm] [AArch64] Fix a corner case with large stack allocation (PR #122038)
via llvm-commits
llvm-commits at lists.llvm.org
Sun Jan 12 20:38:24 PST 2025
https://github.com/ssijaric-nv updated https://github.com/llvm/llvm-project/pull/122038
>From 386066bd563eac41ccfb6c85336c07a19e90bf66 Mon Sep 17 00:00:00 2001
From: Sanjin Sijaric <ssijaric at nvidia.com>
Date: Thu, 19 Dec 2024 12:34:12 -0800
Subject: [PATCH 1/2] [AArch64] Fix a corner case with large stack allocation
In the unlikely case where the stack size is greater than 4GB, we may run into the
situation where the local stack size and the callee saved registers stack size
get combined incorrectly when restoring the callee saved registers. This happens
because the stack size in shouldCombineCSRLocalStackBumpInEpilogue is represented
as an 'unsigned', but is passed in as an 'int64_t'. We end up with something like
$fp, $lr = frame-destroy LDPXi $sp, 536870912
This change just makes 'shouldCombineCSRLocalStackBumpInEpilogue' match
'shouldCombineCSRLocalStackBump' where 'StackBumpBytes' is an 'uint64_t'
---
.../Target/AArch64/AArch64FrameLowering.cpp | 5 +-
.../lib/Target/AArch64/AArch64FrameLowering.h | 2 +-
.../AArch64/aarch64-large-stack-spbump.mir | 55 +++++++++++++++++++
3 files changed, 58 insertions(+), 4 deletions(-)
create mode 100644 llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 64dfb1e39485f8..f10841b9fdcc77 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1195,10 +1195,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
}
bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
- MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
+ MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
return false;
-
if (MBB.empty())
return true;
@@ -2363,7 +2362,7 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
-
+ //
bool CombineAfterCSRBump = false;
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 20445e63bcb13e..8f84702f4d2baf 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -146,7 +146,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
int &MinCSFrameIndex,
int &MaxCSFrameIndex) const;
bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
- unsigned StackBumpBytes) const;
+ uint64_t StackBumpBytes) const;
void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI) const;
void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,
diff --git a/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
new file mode 100644
index 00000000000000..4843dfe1d5cffa
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
@@ -0,0 +1,55 @@
+# RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+ ; ModuleID = 'bug.c'
+ source_filename = "bug.c"
+ target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+ target triple = "aarch64-unknown-linux-gnu"
+
+ ; Function Attrs: mustprogress noinline optnone uwtable
+ define dso_local noundef i32 @_Z4funcv() #0 {
+ entry:
+ %array = alloca [1073741824 x i32], align 4
+ %arrayidx = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
+ store i32 7, ptr %arrayidx, align 4
+ call void @_Z5func2v()
+ %arrayidx1 = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
+ %0 = load i32, ptr %arrayidx1, align 4
+ ret i32 %0
+ }
+
+ declare void @_Z5func2v() #1
+
+ attributes #0 = { mustprogress noinline optnone uwtable "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
+ attributes #1 = { "frame-pointer"="non-leaf" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="generic" "target-features"="+fp-armv8,+neon,+outline-atomics,+v8a,-fmv" }
+...
+---
+name: _Z4funcv
+alignment: 4
+legalized: true
+regBankSelected: true
+selected: true
+tracksRegLiveness: true
+noPhis: true
+isSSA: false
+noVRegs: true
+hasFakeUses: false
+frameInfo:
+ maxAlignment: 4
+ adjustsStack: true
+ hasCalls: true
+ maxCallFrameSize: 0
+stack:
+ - { id: 0, name: array, size: 4294967296, alignment: 4, local-offset: -4294967296 }
+machineFunctionInfo: {}
+body: |
+ bb.1.entry:
+ renamable $w8 = MOVi32imm 7
+ STRWui killed renamable $w8, %stack.0.array, 20 :: (store (s32) into %ir.arrayidx)
+ ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+ BL @_Z5func2v, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp
+ ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+ renamable $w0 = LDRWui %stack.0.array, 20 :: (dereferenceable load (s32) from %ir.arrayidx1)
+ ; CHECK: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp
+ RET_ReallyLR implicit killed $w0
+
+...
>From 80cc1e5636d3ef6e44e587728a4011a5d5e6947b Mon Sep 17 00:00:00 2001
From: Sanjin Sijaric <ssijaric at nvidia.com>
Date: Sun, 12 Jan 2025 11:13:45 -0800
Subject: [PATCH 2/2] Remove the errant comment line
---
llvm/lib/Target/AArch64/AArch64FrameLowering.cpp | 1 -
1 file changed, 1 deletion(-)
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index f10841b9fdcc77..f5c33498c9d499 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -2362,7 +2362,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
}
bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
// Assume we can't combine the last pop with the sp restore.
- //
bool CombineAfterCSRBump = false;
if (!CombineSPBump && PrologueSaveSize != 0) {
MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());
More information about the llvm-commits
mailing list