[llvm] [llvm][X86] Fix merging of large sp updates (PR #125007)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Jan 29 17:05:30 PST 2025
https://github.com/macurtis-amd created https://github.com/llvm/llvm-project/pull/125007
None
>From 7914b85930c7fc8c852179ef93525d42be683118 Mon Sep 17 00:00:00 2001
From: Matthew Curtis <macurtis at amd.com>
Date: Wed, 29 Jan 2025 14:31:21 -0600
Subject: [PATCH] [llvm][X86] Fix merging of large sp updates
---
llvm/lib/Target/X86/X86ExpandPseudo.cpp | 2 +-
llvm/lib/Target/X86/X86FrameLowering.cpp | 63 +++++++++++--------
llvm/lib/Target/X86/X86FrameLowering.h | 5 +-
.../test/CodeGen/X86/merge-huge-sp-updates.ll | 25 ++++++++
4 files changed, 66 insertions(+), 29 deletions(-)
create mode 100644 llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
diff --git a/llvm/lib/Target/X86/X86ExpandPseudo.cpp b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
index 78db8413e62c9b..ad8c02c1f0d999 100644
--- a/llvm/lib/Target/X86/X86ExpandPseudo.cpp
+++ b/llvm/lib/Target/X86/X86ExpandPseudo.cpp
@@ -284,7 +284,7 @@ bool X86ExpandPseudo::expandMI(MachineBasicBlock &MBB,
// Adjust stack pointer.
int StackAdj = StackAdjust.getImm();
int MaxTCDelta = X86FI->getTCReturnAddrDelta();
- int Offset = 0;
+ int64_t Offset = 0;
assert(MaxTCDelta <= 0 && "MaxTCDelta should never be positive");
// Incoporate the retaddr area.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.cpp b/llvm/lib/Target/X86/X86FrameLowering.cpp
index f8ed75f189a776..9d89bfc7273ff2 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.cpp
+++ b/llvm/lib/Target/X86/X86FrameLowering.cpp
@@ -391,9 +391,9 @@ MachineInstrBuilder X86FrameLowering::BuildStackAdjustment(
return MI;
}
-int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
- MachineBasicBlock::iterator &MBBI,
- bool doMergeWithPrevious) const {
+int64_t X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ bool doMergeWithPrevious) const {
if ((doMergeWithPrevious && MBBI == MBB.begin()) ||
(!doMergeWithPrevious && MBBI == MBB.end()))
return 0;
@@ -415,27 +415,38 @@ int X86FrameLowering::mergeSPUpdates(MachineBasicBlock &MBB,
if (doMergeWithPrevious && PI != MBB.begin() && PI->isCFIInstruction())
PI = std::prev(PI);
- unsigned Opc = PI->getOpcode();
- int Offset = 0;
-
- if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
- PI->getOperand(0).getReg() == StackPtr) {
- assert(PI->getOperand(1).getReg() == StackPtr);
- Offset = PI->getOperand(2).getImm();
- } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
- PI->getOperand(0).getReg() == StackPtr &&
- PI->getOperand(1).getReg() == StackPtr &&
- PI->getOperand(2).getImm() == 1 &&
- PI->getOperand(3).getReg() == X86::NoRegister &&
- PI->getOperand(5).getReg() == X86::NoRegister) {
- // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
- Offset = PI->getOperand(4).getImm();
- } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
- PI->getOperand(0).getReg() == StackPtr) {
- assert(PI->getOperand(1).getReg() == StackPtr);
- Offset = -PI->getOperand(2).getImm();
- } else
- return 0;
+ int64_t Offset = 0;
+ for (;;) {
+ unsigned Opc = PI->getOpcode();
+
+ if ((Opc == X86::ADD64ri32 || Opc == X86::ADD32ri) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ assert(PI->getOperand(1).getReg() == StackPtr);
+ Offset = PI->getOperand(2).getImm();
+ } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
+ PI->getOperand(0).getReg() == StackPtr &&
+ PI->getOperand(1).getReg() == StackPtr &&
+ PI->getOperand(2).getImm() == 1 &&
+ PI->getOperand(3).getReg() == X86::NoRegister &&
+ PI->getOperand(5).getReg() == X86::NoRegister) {
+ // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
+ Offset = PI->getOperand(4).getImm();
+ } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB32ri) &&
+ PI->getOperand(0).getReg() == StackPtr) {
+ assert(PI->getOperand(1).getReg() == StackPtr);
+ Offset = -PI->getOperand(2).getImm();
+ } else
+ return 0;
+
+ constexpr int64_t Chunk = (1LL << 31) - 1;
+ if (Offset < Chunk)
+ break;
+
+ if (doMergeWithPrevious ? (PI == MBB.begin()) : (PI == MBB.end()))
+ return 0;
+
+ PI = doMergeWithPrevious ? std::prev(PI) : std::next(PI);
+ }
PI = MBB.erase(PI);
if (PI != MBB.end() && PI->isCFIInstruction()) {
@@ -2457,7 +2468,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (HasFP) {
if (X86FI->hasSwiftAsyncContext()) {
// Discard the context.
- int Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
+ int64_t Offset = 16 + mergeSPUpdates(MBB, MBBI, true);
emitSPUpdate(MBB, MBBI, DL, Offset, /*InEpilogue*/ true);
}
// Pop EBP.
@@ -2618,7 +2629,7 @@ void X86FrameLowering::emitEpilogue(MachineFunction &MF,
if (Terminator == MBB.end() || !isTailCallOpcode(Terminator->getOpcode())) {
// Add the return addr area delta back since we are not tail calling.
- int Offset = -1 * X86FI->getTCReturnAddrDelta();
+ int64_t Offset = -1 * X86FI->getTCReturnAddrDelta();
assert(Offset >= 0 && "TCDelta should never be positive");
if (Offset) {
// Check for possible merge with preceding ADD instruction.
diff --git a/llvm/lib/Target/X86/X86FrameLowering.h b/llvm/lib/Target/X86/X86FrameLowering.h
index 02fe8ee02a7e45..79d8ea1b217694 100644
--- a/llvm/lib/Target/X86/X86FrameLowering.h
+++ b/llvm/lib/Target/X86/X86FrameLowering.h
@@ -138,8 +138,9 @@ class X86FrameLowering : public TargetFrameLowering {
/// it is an ADD/SUB/LEA instruction it is deleted argument and the
/// stack adjustment is returned as a positive value for ADD/LEA and
/// a negative for SUB.
- int mergeSPUpdates(MachineBasicBlock &MBB, MachineBasicBlock::iterator &MBBI,
- bool doMergeWithPrevious) const;
+ int64_t mergeSPUpdates(MachineBasicBlock &MBB,
+ MachineBasicBlock::iterator &MBBI,
+ bool doMergeWithPrevious) const;
/// Emit a series of instructions to increment / decrement the stack
/// pointer by a constant value.
diff --git a/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
new file mode 100644
index 00000000000000..4828a7595bbcf9
--- /dev/null
+++ b/llvm/test/CodeGen/X86/merge-huge-sp-updates.ll
@@ -0,0 +1,25 @@
+; RUN: llc < %s -O3 -mtriple=x86_64-linux-unknown -verify-machineinstrs -o %t.s
+; RUN: FileCheck --input-file=%t.s %s
+
+; Check that the stack update after calling bar gets merged into the second add
+; and not the first which is already at the chunk size limit (0x7FFFFFFF).
+
+define void @foo(ptr %rhs) {
+; CHECK-LABEL: foo
+entry:
+ %lhs = alloca [5 x [5 x [3 x [162 x [161 x [161 x double]]]]]], align 16
+ store ptr %lhs, ptr %rhs, align 8
+ %0 = call i32 @baz()
+ call void @bar(i64 0, i64 0, i64 0, i64 0, i64 0, ptr null, ptr %rhs, ptr null, ptr %rhs)
+; CHECK: call{{.*}}bar
+; CHECK: addq{{.*}}$2147483647, %rsp
+; CHECK: addq{{.*}}$372037585, %rsp
+ ret void
+}
+
+declare void @bar(i64, i64, i64, i64, i64, ptr, ptr, ptr, ptr)
+
+declare i32 @baz()
+
+
+; RUN: llvm-mc -triple x86_64-unknown-unknown %t.s
More information about the llvm-commits
mailing list