[llvm] 6789442 - [AArch64] Fix a corner case with large stack allocation (#122038)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jan 18 22:09:29 PST 2025


Author: ssijaric-nv
Date: 2025-01-18T22:09:25-08:00
New Revision: 6789442eb2e1ed92b2157e96e9e9eafed5c53f17

URL: https://github.com/llvm/llvm-project/commit/6789442eb2e1ed92b2157e96e9e9eafed5c53f17
DIFF: https://github.com/llvm/llvm-project/commit/6789442eb2e1ed92b2157e96e9e9eafed5c53f17.diff

LOG: [AArch64] Fix a corner case with large stack allocation (#122038)

In the unlikely case where the stack size is greater than 4GB, we may run into
the situation where the local stack size and the callee saved registers stack
size get combined incorrectly when restoring the callee saved registers. This
happens because the stack size in shouldCombineCSRLocalStackBumpInEpilogue
is represented as an 'unsigned', but is passed in as an 'int64_t'. We end up with
something like

$fp, $lr = frame-destroy LDPXi $sp, 536870912

This change just makes 'shouldCombineCSRLocalStackBumpInEpilogue' match
'shouldCombineCSRLocalStackBump' where 'StackBumpBytes' is an 'uint64_t'

Added: 
    llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir

Modified: 
    llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
    llvm/lib/Target/AArch64/AArch64FrameLowering.h

Removed: 
    


################################################################################
diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index 1582d1999ca1dc..eabe64361938b4 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -1195,10 +1195,9 @@ bool AArch64FrameLowering::shouldCombineCSRLocalStackBump(
 }
 
 bool AArch64FrameLowering::shouldCombineCSRLocalStackBumpInEpilogue(
-    MachineBasicBlock &MBB, unsigned StackBumpBytes) const {
+    MachineBasicBlock &MBB, uint64_t StackBumpBytes) const {
   if (!shouldCombineCSRLocalStackBump(*MBB.getParent(), StackBumpBytes))
     return false;
-
   if (MBB.empty())
     return true;
 
@@ -2363,7 +2362,6 @@ void AArch64FrameLowering::emitEpilogue(MachineFunction &MF,
   }
   bool CombineSPBump = shouldCombineCSRLocalStackBumpInEpilogue(MBB, NumBytes);
   // Assume we can't combine the last pop with the sp restore.
-
   bool CombineAfterCSRBump = false;
   if (!CombineSPBump && PrologueSaveSize != 0) {
     MachineBasicBlock::iterator Pop = std::prev(MBB.getFirstTerminator());

diff  --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index 20445e63bcb13e..8f84702f4d2baf 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -146,7 +146,7 @@ class AArch64FrameLowering : public TargetFrameLowering {
                                       int &MinCSFrameIndex,
                                       int &MaxCSFrameIndex) const;
   bool shouldCombineCSRLocalStackBumpInEpilogue(MachineBasicBlock &MBB,
-                                                unsigned StackBumpBytes) const;
+                                                uint64_t StackBumpBytes) const;
   void emitCalleeSavedGPRLocations(MachineBasicBlock &MBB,
                                    MachineBasicBlock::iterator MBBI) const;
   void emitCalleeSavedSVELocations(MachineBasicBlock &MBB,

diff  --git a/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
new file mode 100644
index 00000000000000..f920813f2b42d5
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/aarch64-large-stack-spbump.mir
@@ -0,0 +1,46 @@
+# RUN: llc -mtriple=aarch64 -run-pass=prologepilog %s -o - | FileCheck %s
+--- |
+  define i32 @_Z4funcv() {
+  entry:
+    %array = alloca [1073741824 x i32], align 4
+    %arrayidx = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
+    store i32 7, ptr %arrayidx, align 4
+    call void @_Z5func2v()
+    %arrayidx1 = getelementptr inbounds [1073741824 x i32], ptr %array, i64 0, i64 20
+    %0 = load i32, ptr %arrayidx1, align 4
+    ret i32 %0
+  }
+ 
+  declare void @_Z5func2v()
+...
+---
+name:            _Z4funcv
+alignment:       4
+legalized:       true
+regBankSelected: true
+selected:        true
+tracksRegLiveness: true
+noPhis:          true
+isSSA:           false
+noVRegs:         true
+hasFakeUses:     false
+frameInfo:
+  maxAlignment:    4
+  adjustsStack:    true
+  hasCalls:        true
+  maxCallFrameSize: 0
+stack:
+  - { id: 0, name: array, size: 4294967296, alignment: 4, local-offset: -4294967296 }
+machineFunctionInfo: {}
+body:             |
+  bb.1.entry:
+    renamable $w8 = MOVi32imm 7
+    STRWui killed renamable $w8, %stack.0.array, 20 :: (store (s32) into %ir.arrayidx)
+    ADJCALLSTACKDOWN 0, 0, implicit-def $sp, implicit $sp
+    BL @_Z5func2v, csr_aarch64_aapcs, implicit-def dead $lr, implicit $sp
+    ADJCALLSTACKUP 0, 0, implicit-def $sp, implicit $sp
+    renamable $w0 = LDRWui %stack.0.array, 20 :: (dereferenceable load (s32) from %ir.arrayidx1)
+    ; CHECK: early-clobber $sp, $fp, $lr = frame-destroy LDPXpost $sp, 2
+    RET_ReallyLR implicit killed $w0
+
+...


        


More information about the llvm-commits mailing list