[llvm] [AArch64] Roundup newly allocated stack size according -mstack-alignment (PR #100859)

via llvm-commits llvm-commits at lists.llvm.org
Sat Jul 27 01:07:45 PDT 2024


https://github.com/vfdff created https://github.com/llvm/llvm-project/pull/100859

The first patch only roundup the new stack size for subroutine according option -mstack-alignment=n. This assume the of parent call already align, otherwise, the new stack maybe still not aligned. In that case, it need the other option -mstackrealign to force realign the stack.

Fix https://github.com/llvm/llvm-project/issues/97268

>From fa0fefee7ec92711e114bd54283a96f7d9961020 Mon Sep 17 00:00:00 2001
From: zhongyunde 00443407 <zhongyunde at huawei.com>
Date: Mon, 15 Jul 2024 08:52:59 -0400
Subject: [PATCH] [AArch64] Roundup newly allocated stack size according
 -mstack-alignment

The first patch only roundup the new stack size for subroutine according option
-mstack-alignment=n. This assume the of parent call already align, otherwise,
the new stack maybe still not aligned. In that case, it need the other option
-mstackrealign to force realign the stack.

Fix https://github.com/llvm/llvm-project/issues/97268
---
 .../Target/AArch64/AArch64FrameLowering.cpp   |  4 +++
 .../lib/Target/AArch64/AArch64FrameLowering.h |  4 +--
 llvm/lib/Target/AArch64/AArch64Subtarget.cpp  |  8 ++++-
 llvm/lib/Target/AArch64/AArch64Subtarget.h    | 11 +++++-
 .../Target/AArch64/AArch64TargetMachine.cpp   |  6 ++--
 llvm/test/CodeGen/AArch64/stack-align32.ll    | 35 +++++++++++++++++++
 llvm/test/CodeGen/AArch64/stack-align64.ll    | 35 +++++++++++++++++++
 7 files changed, 96 insertions(+), 7 deletions(-)
 create mode 100644 llvm/test/CodeGen/AArch64/stack-align32.ll
 create mode 100644 llvm/test/CodeGen/AArch64/stack-align64.ll

diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b1b83e27c5592..6ea51c706ef55 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -320,6 +320,10 @@ static bool needsWinCFI(const MachineFunction &MF);
 static StackOffset getSVEStackSize(const MachineFunction &MF);
 static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
 
+AArch64FrameLowering::AArch64FrameLowering(MaybeAlign StackAlignOverride)
+    : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), 0,
+                          Align(16), true /*StackRealignable*/) {}
+
 /// Returns true if a homogeneous prolog or epilog code can be emitted
 /// for the size optimization. If possible, a frame helper call is injected.
 /// When Exit block is given, this check is for epilog.
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index da315850d6362..829fa5350aabe 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -20,9 +20,7 @@ namespace llvm {
 
 class AArch64FrameLowering : public TargetFrameLowering {
 public:
-  explicit AArch64FrameLowering()
-      : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
-                            true /*StackRealignable*/) {}
+  AArch64FrameLowering(MaybeAlign StackAlignOverride);
 
   void resetCFIToInitialState(MachineBasicBlock &MBB) const override;
 
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 32a355fe38f1c..9ddfb95bb7435 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -93,6 +93,10 @@ AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
   if (TuneCPUString.empty())
     TuneCPUString = CPUString;
 
+  // The default 16-byte is the minimal required alignment.
+  if (StackAlignOverride && stackAlignment < *StackAlignOverride)
+    stackAlignment = *StackAlignOverride;
+
   ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
   initializeProperties(HasMinSize);
 
@@ -320,6 +324,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
 AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
                                    StringRef TuneCPU, StringRef FS,
                                    const TargetMachine &TM, bool LittleEndian,
+                                   MaybeAlign StackAlignOverride,
                                    unsigned MinSVEVectorSizeInBitsOverride,
                                    unsigned MaxSVEVectorSizeInBitsOverride,
                                    bool IsStreaming, bool IsStreamingCompatible,
@@ -330,10 +335,11 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
       CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
       IsLittle(LittleEndian), IsStreaming(IsStreaming),
       IsStreamingCompatible(IsStreamingCompatible),
+      StackAlignOverride(StackAlignOverride),
       MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
       MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
       InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
-      TLInfo(TM, *this) {
+      TLInfo(TM, *this), FrameLowering(getStackAlignment()) {
   if (AArch64::isX18ReservedByDefault(TT))
     ReserveXRegister.set(18);
 
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e585aad2f7a68..53ad0781fc8d6 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -81,6 +81,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
 
   bool IsStreaming;
   bool IsStreamingCompatible;
+
+  /// The minimum alignment known to hold of the stack frame on
+  /// entry to the function and which must be maintained by every function.
+  Align stackAlignment = Align(16);
+  /// Override the stack alignment.
+  MaybeAlign StackAlignOverride;
+
   unsigned MinSVEVectorSizeInBits;
   unsigned MaxSVEVectorSizeInBits;
   unsigned VScaleForTuning = 2;
@@ -89,10 +96,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// TargetTriple - What processor and OS we're targeting.
   Triple TargetTriple;
 
-  AArch64FrameLowering FrameLowering;
   AArch64InstrInfo InstrInfo;
   AArch64SelectionDAGInfo TSInfo;
   AArch64TargetLowering TLInfo;
+  AArch64FrameLowering FrameLowering;
 
   /// GlobalISel related APIs.
   std::unique_ptr<CallLowering> CallLoweringInfo;
@@ -118,6 +125,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   /// of the specified triple.
   AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
                    StringRef FS, const TargetMachine &TM, bool LittleEndian,
+                   MaybeAlign StackAlignOverride = Align(16),
                    unsigned MinSVEVectorSizeInBitsOverride = 0,
                    unsigned MaxSVEVectorSizeInBitsOverride = 0,
                    bool IsStreaming = false, bool IsStreamingCompatible = false,
@@ -141,6 +149,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
   const AArch64RegisterInfo *getRegisterInfo() const override {
     return &getInstrInfo()->getRegisterInfo();
   }
+  Align getStackAlignment() const { return stackAlignment; }
   const CallLowering *getCallLowering() const override;
   const InlineAsmLowering *getInlineAsmLowering() const override;
   InstructionSelector *getInstructionSelector() const override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index bcd677310d124..a188d8802a9fc 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -461,8 +461,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
     // function that reside in TargetOptions.
     resetTargetOptions(F);
     I = std::make_unique<AArch64Subtarget>(
-        TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
-        MaxSVEVectorSize, IsStreaming, IsStreamingCompatible, HasMinSize);
+        TargetTriple, CPU, TuneCPU, FS, *this, isLittle,
+        MaybeAlign(F.getParent()->getOverrideStackAlignment()),
+        MinSVEVectorSize, MaxSVEVectorSize, IsStreaming, IsStreamingCompatible,
+        HasMinSize);
   }
 
   assert((!IsStreaming || I->hasSME()) && "Expected SME to be available");
diff --git a/llvm/test/CodeGen/AArch64/stack-align32.ll b/llvm/test/CodeGen/AArch64/stack-align32.ll
new file mode 100644
index 0000000000000..0bf98fe8c916f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-align32.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-unknown-linux-gnu -O0  < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @other()
+
+define dso_local void @func_align32() #0 {
+; CHECK-LABEL: func_align32:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 32
+; CHECK-NEXT:    stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #16
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    bl other
+; CHECK-NEXT:    .cfi_def_cfa wsp, 32
+; CHECK-NEXT:    ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #32
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
+entry:
+  call void @other()
+  ret void
+}
+
+
+attributes #0 = { uwtable "frame-pointer"="non-leaf" }
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"override-stack-alignment", i32 32}
diff --git a/llvm/test/CodeGen/AArch64/stack-align64.ll b/llvm/test/CodeGen/AArch64/stack-align64.ll
new file mode 100644
index 0000000000000..21e4ad0269ce4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-align64.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-unknown-linux-gnu -O0  < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @other()
+
+define dso_local void @func_align64() #0 {
+; CHECK-LABEL: func_align64:
+; CHECK:       // %bb.0: // %entry
+; CHECK-NEXT:    sub sp, sp, #64
+; CHECK-NEXT:    .cfi_def_cfa_offset 64
+; CHECK-NEXT:    stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT:    add x29, sp, #48
+; CHECK-NEXT:    .cfi_def_cfa w29, 16
+; CHECK-NEXT:    .cfi_offset w30, -8
+; CHECK-NEXT:    .cfi_offset w29, -16
+; CHECK-NEXT:    bl other
+; CHECK-NEXT:    .cfi_def_cfa wsp, 64
+; CHECK-NEXT:    ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT:    add sp, sp, #64
+; CHECK-NEXT:    .cfi_def_cfa_offset 0
+; CHECK-NEXT:    .cfi_restore w30
+; CHECK-NEXT:    .cfi_restore w29
+; CHECK-NEXT:    ret
+entry:
+  call void @other()
+  ret void
+}
+
+
+attributes #0 = { uwtable "frame-pointer"="non-leaf" }
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"override-stack-alignment", i32 64}



More information about the llvm-commits mailing list