[llvm] [AArch64] Roundup newly allocated stack size according -mstack-alignment (PR #100859)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 27 01:08:18 PDT 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-backend-aarch64

Author: Allen (vfdff)

<details>
<summary>Changes</summary>

The first patch only roundup the new stack size for subroutine according option -mstack-alignment=n. This assume the of parent call already align, otherwise, the new stack maybe still not aligned. In that case, it need the other option -mstackrealign to force realign the stack.

Fix https://github.com/llvm/llvm-project/issues/97268

---
Full diff: https://github.com/llvm/llvm-project/pull/100859.diff


7 Files Affected:

- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+4) 
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+1-3) 
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+7-1) 
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+10-1) 
- (modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (+4-2) 
- (added) llvm/test/CodeGen/AArch64/stack-align32.ll (+35) 
- (added) llvm/test/CodeGen/AArch64/stack-align64.ll (+35) 


``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b1b83e27c5592..6ea51c706ef55 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -320,6 +320,10 @@ static bool needsWinCFI(const MachineFunction &MF);
 static StackOffset getSVEStackSize(const MachineFunction &MF);
 static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
 
+AArch64FrameLowering::AArch64FrameLowering(MaybeAlign StackAlignOverride)
+    : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), 0,
+                          Align(16), true /*StackRealignable*/) {}
+
 /// Returns true if a homogeneous prolog or epilog code can be emitted
 /// for the size optimization. If possible, a frame helper call is injected.
 /// When Exit block is given, this check is for epilog.
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index da315850d6362..829fa5350aabe 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -20,9 +20,7 @@ namespace llvm {
 
 class AArch64FrameLowering : public TargetFrameLowering {
 public:
-  explicit AArch64FrameLowering()
-      : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
-                            true /*StackRealignable*/) {}
+  AArch64FrameLowering(MaybeAlign StackAlignOverride);
 
   void resetCFIToInitialState(MachineBasicBlock &MBB) const override;
 
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 32a355fe38f1c..9ddfb95bb7435 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -93,6 +93,10 @@ AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
   if (TuneCPUString.empty())
     TuneCPUString = CPUString;
 
+  // The default 16-byte is the minimal required alignment.
+  if (StackAlignOverride && stackAlignment < *StackAlignOverride)
+    stackAlignment = *StackAlignOverride;
+
   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
   initializeProperties(HasMinSize);
 
@@ -320,6 +324,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
                                    StringRef TuneCPU, StringRef FS,
                                    const TargetMachine &TM, bool LittleEndian,
+                                   MaybeAlign StackAlignOverride,
                                    unsigned MinSVEVectorSizeInBitsOverride,
                                    unsigned MaxSVEVectorSizeInBitsOverride,
                                    bool IsStreaming, bool IsStreamingCompatible,
@@ -330,10 +335,11 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
       IsLittle(LittleEndian), IsStreaming(IsStreaming),
       IsStreamingCompatible(IsStreamingCompatible),
+      StackAlignOverride(StackAlignOverride),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
-      TLInfo(TM, *this) {
+      TLInfo(TM, *this), FrameLowering(getStackAlignment()) {
   if (AArch64::isX18ReservedByDefault(TT))
     ReserveXRegister.set(18);
 
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e585aad2f7a68..53ad0781fc8d6 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -81,6 +81,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   bool IsStreaming;
   bool IsStreamingCompatible;
+
+  /// The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  Align stackAlignment = Align(16);
+  /// Override the stack alignment.
+  MaybeAlign StackAlignOverride;
+
   unsigned MinSVEVectorSizeInBits;
   unsigned MaxSVEVectorSizeInBits;
   unsigned VScaleForTuning = 2;
@@ -89,10 +96,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
-  AArch64FrameLowering FrameLowering;
   AArch64InstrInfo InstrInfo;
   AArch64SelectionDAGInfo TSInfo;
   AArch64TargetLowering TLInfo;
+  AArch64FrameLowering FrameLowering;
 
   /// GlobalISel related APIs.
   std::unique_ptr<CallLowering> CallLoweringInfo;
@@ -118,6 +125,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// of the specified triple.
   AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
                    StringRef FS, const TargetMachine &TM, bool LittleEndian,
+                   MaybeAlign StackAlignOverride = Align(16),
                    unsigned MinSVEVectorSizeInBitsOverride = 0,
                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
                    bool IsStreaming = false, bool IsStreamingCompatible = false,
@@ -141,6 +149,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   const AArch64RegisterInfo *getRegisterInfo() const override {
     return &getInstrInfo()->getRegisterInfo();
   }
+  Align getStackAlignment() const { return stackAlignment; }
   const CallLowering *getCallLowering() const override;
   const InlineAsmLowering *getInlineAsmLowering() const override;
   InstructionSelector *getInstructionSelector() const override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index bcd677310d124..a188d8802a9fc 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -461,8 +461,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = std::make_unique<AArch64Subtarget>(
-        TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
-        MaxSVEVectorSize, IsStreaming, IsStreamingCompatible, HasMinSize);
+        TargetTriple, CPU, TuneCPU, FS, *this, isLittle,
+        MaybeAlign(F.getParent()->getOverrideStackAlignment()),
+        MinSVEVectorSize, MaxSVEVectorSize, IsStreaming, IsStreamingCompatible,
+        HasMinSize);
   }
 
   assert((!IsStreaming || I->hasSME()) && "Expected SME to be available");
diff --git a/llvm/test/CodeGen/AArch64/stack-align32.ll b/llvm/test/CodeGen/AArch64/stack-align32.ll
new file mode 100644
index 0000000000000..0bf98fe8c916f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-align32.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-unknown-linux-gnu -O0  < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @other()
+
+define dso_local void @func_align32() #0 {
+; CHECK-LABEL: func_align32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    bl other
+; CHECK-NEXT:    .cfi_def_cfa wsp, 32
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
+entry:
+  call void @other()
+  ret void
+}
+
+
+attributes #0 = { uwtable "frame-pointer"="non-leaf" }
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"override-stack-alignment", i32 32}
diff --git a/llvm/test/CodeGen/AArch64/stack-align64.ll b/llvm/test/CodeGen/AArch64/stack-align64.ll
new file mode 100644
index 0000000000000..21e4ad0269ce4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-align64.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-unknown-linux-gnu -O0  < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @other()
+
+define dso_local void @func_align64() #0 {
+; CHECK-LABEL: func_align64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #48
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    bl other
+; CHECK-NEXT:    .cfi_def_cfa wsp, 64
+; CHECK-NEXT:    ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
+entry:
+  call void @other()
+  ret void
+}
+
+
+attributes #0 = { uwtable "frame-pointer"="non-leaf" }
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"override-stack-alignment", i32 64}

``````````

</details>


https://github.com/llvm/llvm-project/pull/100859


More information about the llvm-commits mailing list