[llvm] [AArch64] Roundup newly allocated stack size according -mstack-alignment (PR #100859)
via llvm-commits
llvm-commits at lists.llvm.org
Sat Jul 27 01:08:18 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-backend-aarch64
Author: Allen (vfdff)
<details>
<summary>Changes</summary>
The first patch only roundup the new stack size for subroutine according option -mstack-alignment=n. This assume the of parent call already align, otherwise, the new stack maybe still not aligned. In that case, it need the other option -mstackrealign to force realign the stack.
Fix https://github.com/llvm/llvm-project/issues/97268
---
Full diff: https://github.com/llvm/llvm-project/pull/100859.diff
7 Files Affected:
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.cpp (+4)
- (modified) llvm/lib/Target/AArch64/AArch64FrameLowering.h (+1-3)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.cpp (+7-1)
- (modified) llvm/lib/Target/AArch64/AArch64Subtarget.h (+10-1)
- (modified) llvm/lib/Target/AArch64/AArch64TargetMachine.cpp (+4-2)
- (added) llvm/test/CodeGen/AArch64/stack-align32.ll (+35)
- (added) llvm/test/CodeGen/AArch64/stack-align64.ll (+35)
``````````diff
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
index b1b83e27c5592..6ea51c706ef55 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.cpp
@@ -320,6 +320,10 @@ static bool needsWinCFI(const MachineFunction &MF);
static StackOffset getSVEStackSize(const MachineFunction &MF);
static Register findScratchNonCalleeSaveRegister(MachineBasicBlock *MBB);
+AArch64FrameLowering::AArch64FrameLowering(MaybeAlign StackAlignOverride)
+ : TargetFrameLowering(StackGrowsDown, StackAlignOverride.valueOrOne(), 0,
+ Align(16), true /*StackRealignable*/) {}
+
/// Returns true if a homogeneous prolog or epilog code can be emitted
/// for the size optimization. If possible, a frame helper call is injected.
/// When Exit block is given, this check is for epilog.
diff --git a/llvm/lib/Target/AArch64/AArch64FrameLowering.h b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
index da315850d6362..829fa5350aabe 100644
--- a/llvm/lib/Target/AArch64/AArch64FrameLowering.h
+++ b/llvm/lib/Target/AArch64/AArch64FrameLowering.h
@@ -20,9 +20,7 @@ namespace llvm {
class AArch64FrameLowering : public TargetFrameLowering {
public:
- explicit AArch64FrameLowering()
- : TargetFrameLowering(StackGrowsDown, Align(16), 0, Align(16),
- true /*StackRealignable*/) {}
+ AArch64FrameLowering(MaybeAlign StackAlignOverride);
void resetCFIToInitialState(MachineBasicBlock &MBB) const override;
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
index 32a355fe38f1c..9ddfb95bb7435 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.cpp
@@ -93,6 +93,10 @@ AArch64Subtarget &AArch64Subtarget::initializeSubtargetDependencies(
if (TuneCPUString.empty())
TuneCPUString = CPUString;
+ // The default 16-byte is the minimal required alignment.
+ if (StackAlignOverride && stackAlignment < *StackAlignOverride)
+ stackAlignment = *StackAlignOverride;
+
ParseSubtargetFeatures(CPUString, TuneCPUString, FS);
initializeProperties(HasMinSize);
@@ -320,6 +324,7 @@ void AArch64Subtarget::initializeProperties(bool HasMinSize) {
AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
StringRef TuneCPU, StringRef FS,
const TargetMachine &TM, bool LittleEndian,
+ MaybeAlign StackAlignOverride,
unsigned MinSVEVectorSizeInBitsOverride,
unsigned MaxSVEVectorSizeInBitsOverride,
bool IsStreaming, bool IsStreamingCompatible,
@@ -330,10 +335,11 @@ AArch64Subtarget::AArch64Subtarget(const Triple &TT, StringRef CPU,
CustomCallSavedXRegs(AArch64::GPR64commonRegClass.getNumRegs()),
IsLittle(LittleEndian), IsStreaming(IsStreaming),
IsStreamingCompatible(IsStreamingCompatible),
+ StackAlignOverride(StackAlignOverride),
MinSVEVectorSizeInBits(MinSVEVectorSizeInBitsOverride),
MaxSVEVectorSizeInBits(MaxSVEVectorSizeInBitsOverride), TargetTriple(TT),
InstrInfo(initializeSubtargetDependencies(FS, CPU, TuneCPU, HasMinSize)),
- TLInfo(TM, *this) {
+ TLInfo(TM, *this), FrameLowering(getStackAlignment()) {
if (AArch64::isX18ReservedByDefault(TT))
ReserveXRegister.set(18);
diff --git a/llvm/lib/Target/AArch64/AArch64Subtarget.h b/llvm/lib/Target/AArch64/AArch64Subtarget.h
index e585aad2f7a68..53ad0781fc8d6 100644
--- a/llvm/lib/Target/AArch64/AArch64Subtarget.h
+++ b/llvm/lib/Target/AArch64/AArch64Subtarget.h
@@ -81,6 +81,13 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
bool IsStreaming;
bool IsStreamingCompatible;
+
+ /// The minimum alignment known to hold of the stack frame on
+ /// entry to the function and which must be maintained by every function.
+ Align stackAlignment = Align(16);
+ /// Override the stack alignment.
+ MaybeAlign StackAlignOverride;
+
unsigned MinSVEVectorSizeInBits;
unsigned MaxSVEVectorSizeInBits;
unsigned VScaleForTuning = 2;
@@ -89,10 +96,10 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
/// TargetTriple - What processor and OS we're targeting.
Triple TargetTriple;
- AArch64FrameLowering FrameLowering;
AArch64InstrInfo InstrInfo;
AArch64SelectionDAGInfo TSInfo;
AArch64TargetLowering TLInfo;
+ AArch64FrameLowering FrameLowering;
/// GlobalISel related APIs.
std::unique_ptr<CallLowering> CallLoweringInfo;
@@ -118,6 +125,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
/// of the specified triple.
AArch64Subtarget(const Triple &TT, StringRef CPU, StringRef TuneCPU,
StringRef FS, const TargetMachine &TM, bool LittleEndian,
+ MaybeAlign StackAlignOverride = Align(16),
unsigned MinSVEVectorSizeInBitsOverride = 0,
unsigned MaxSVEVectorSizeInBitsOverride = 0,
bool IsStreaming = false, bool IsStreamingCompatible = false,
@@ -141,6 +149,7 @@ class AArch64Subtarget final : public AArch64GenSubtargetInfo {
const AArch64RegisterInfo *getRegisterInfo() const override {
return &getInstrInfo()->getRegisterInfo();
}
+ Align getStackAlignment() const { return stackAlignment; }
const CallLowering *getCallLowering() const override;
const InlineAsmLowering *getInlineAsmLowering() const override;
InstructionSelector *getInstructionSelector() const override;
diff --git a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
index bcd677310d124..a188d8802a9fc 100644
--- a/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
+++ b/llvm/lib/Target/AArch64/AArch64TargetMachine.cpp
@@ -461,8 +461,10 @@ AArch64TargetMachine::getSubtargetImpl(const Function &F) const {
// function that reside in TargetOptions.
resetTargetOptions(F);
I = std::make_unique<AArch64Subtarget>(
- TargetTriple, CPU, TuneCPU, FS, *this, isLittle, MinSVEVectorSize,
- MaxSVEVectorSize, IsStreaming, IsStreamingCompatible, HasMinSize);
+ TargetTriple, CPU, TuneCPU, FS, *this, isLittle,
+ MaybeAlign(F.getParent()->getOverrideStackAlignment()),
+ MinSVEVectorSize, MaxSVEVectorSize, IsStreaming, IsStreamingCompatible,
+ HasMinSize);
}
assert((!IsStreaming || I->hasSME()) && "Expected SME to be available");
diff --git a/llvm/test/CodeGen/AArch64/stack-align32.ll b/llvm/test/CodeGen/AArch64/stack-align32.ll
new file mode 100644
index 0000000000000..0bf98fe8c916f
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-align32.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-unknown-linux-gnu -O0 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @other()
+
+define dso_local void @func_align32() #0 {
+; CHECK-LABEL: func_align32:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #32
+; CHECK-NEXT: .cfi_def_cfa_offset 32
+; CHECK-NEXT: stp x29, x30, [sp, #16] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #16
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: bl other
+; CHECK-NEXT: .cfi_def_cfa wsp, 32
+; CHECK-NEXT: ldp x29, x30, [sp, #16] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #32
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: ret
+entry:
+ call void @other()
+ ret void
+}
+
+
+attributes #0 = { uwtable "frame-pointer"="non-leaf" }
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"override-stack-alignment", i32 32}
diff --git a/llvm/test/CodeGen/AArch64/stack-align64.ll b/llvm/test/CodeGen/AArch64/stack-align64.ll
new file mode 100644
index 0000000000000..21e4ad0269ce4
--- /dev/null
+++ b/llvm/test/CodeGen/AArch64/stack-align64.ll
@@ -0,0 +1,35 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -mtriple aarch64-unknown-linux-gnu -O0 < %s | FileCheck %s
+
+target datalayout = "e-m:e-i8:8:32-i16:16:32-i64:64-i128:128-n32:64-S128-Fn32"
+target triple = "aarch64-unknown-linux-gnu"
+
+declare void @other()
+
+define dso_local void @func_align64() #0 {
+; CHECK-LABEL: func_align64:
+; CHECK: // %bb.0: // %entry
+; CHECK-NEXT: sub sp, sp, #64
+; CHECK-NEXT: .cfi_def_cfa_offset 64
+; CHECK-NEXT: stp x29, x30, [sp, #48] // 16-byte Folded Spill
+; CHECK-NEXT: add x29, sp, #48
+; CHECK-NEXT: .cfi_def_cfa w29, 16
+; CHECK-NEXT: .cfi_offset w30, -8
+; CHECK-NEXT: .cfi_offset w29, -16
+; CHECK-NEXT: bl other
+; CHECK-NEXT: .cfi_def_cfa wsp, 64
+; CHECK-NEXT: ldp x29, x30, [sp, #48] // 16-byte Folded Reload
+; CHECK-NEXT: add sp, sp, #64
+; CHECK-NEXT: .cfi_def_cfa_offset 0
+; CHECK-NEXT: .cfi_restore w30
+; CHECK-NEXT: .cfi_restore w29
+; CHECK-NEXT: ret
+entry:
+ call void @other()
+ ret void
+}
+
+
+attributes #0 = { uwtable "frame-pointer"="non-leaf" }
+!llvm.module.flags = !{!0}
+!0 = !{i32 1, !"override-stack-alignment", i32 64}
``````````
</details>
https://github.com/llvm/llvm-project/pull/100859
More information about the llvm-commits
mailing list