[llvm] r227458 - [X86] Use single add/sub for large stack offsets
Robert Lougher
rob.lougher at gmail.com
Thu Jan 29 08:18:30 PST 2015
Author: rlougher
Date: Thu Jan 29 10:18:29 2015
New Revision: 227458
URL: http://llvm.org/viewvc/llvm-project?rev=227458&view=rev
Log:
[X86] Use single add/sub for large stack offsets
For large stack offsets the compiler generates multiple immediate mode
sub/add instructions in the prologue/epilogue. This patch makes the
compiler place the final amount to be added/subtracted into a register,
which is then added/substracted with a single operation.
Differential Revision: http://reviews.llvm.org/D7226
Added:
llvm/trunk/test/CodeGen/X86/huge-stack-offset.ll
Modified:
llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=227458&r1=227457&r2=227458&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Thu Jan 29 10:18:29 2015
@@ -82,6 +82,14 @@ static unsigned getADDriOpcode(unsigned
}
}
+static unsigned getSUBrrOpcode(unsigned isLP64) {
+ return isLP64 ? X86::SUB64rr : X86::SUB32rr;
+}
+
+static unsigned getADDrrOpcode(unsigned isLP64) {
+ return isLP64 ? X86::ADD64rr : X86::ADD32rr;
+}
+
static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
if (IsLP64) {
if (isInt<8>(Imm))
@@ -165,6 +173,18 @@ static unsigned findDeadCallerSavedReg(M
return 0;
}
+static bool isEAXLiveIn(MachineFunction &MF) {
+ for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
+ EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
+ unsigned Reg = II->first;
+
+ if (Reg == X86::RAX || Reg == X86::EAX || Reg == X86::AX ||
+ Reg == X86::AH || Reg == X86::AL)
+ return true;
+ }
+
+ return false;
+}
/// emitSPUpdate - Emit a series of instructions to increment / decrement the
/// stack pointer by a constant value.
@@ -187,6 +207,32 @@ void emitSPUpdate(MachineBasicBlock &MBB
DebugLoc DL = MBB.findDebugLoc(MBBI);
while (Offset) {
+ if (Offset > Chunk) {
+ // Rather than emit a long series of instructions for large offsets,
+ // load the offset into a register and do one sub/add
+ unsigned Reg = 0;
+
+ if (isSub && !isEAXLiveIn(*MBB.getParent()))
+ Reg = (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX);
+ else
+ Reg = findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
+
+ if (Reg) {
+ Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
+ BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
+ .addImm(Offset);
+ Opc = isSub
+ ? getSUBrrOpcode(Is64BitTarget)
+ : getADDrrOpcode(Is64BitTarget);
+ MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+ .addReg(StackPtr)
+ .addReg(Reg);
+ MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+ Offset = 0;
+ continue;
+ }
+ }
+
uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
if (ThisVal == (Is64BitTarget ? 8 : 4)) {
// Use push / pop instead.
@@ -316,19 +362,6 @@ static int mergeSPUpdates(MachineBasicBl
return Offset;
}
-static bool isEAXLiveIn(MachineFunction &MF) {
- for (MachineRegisterInfo::livein_iterator II = MF.getRegInfo().livein_begin(),
- EE = MF.getRegInfo().livein_end(); II != EE; ++II) {
- unsigned Reg = II->first;
-
- if (Reg == X86::EAX || Reg == X86::AX ||
- Reg == X86::AH || Reg == X86::AL)
- return true;
- }
-
- return false;
-}
-
void
X86FrameLowering::emitCalleeSavedFrameMoves(MachineBasicBlock &MBB,
MachineBasicBlock::iterator MBBI,
Added: llvm/trunk/test/CodeGen/X86/huge-stack-offset.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/huge-stack-offset.ll?rev=227458&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/huge-stack-offset.ll (added)
+++ llvm/trunk/test/CodeGen/X86/huge-stack-offset.ll Thu Jan 29 10:18:29 2015
@@ -0,0 +1,59 @@
+; RUN: llc < %s -mtriple=x86_64-linux-unknown | FileCheck %s --check-prefix=CHECK-64
+; RUN: llc < %s -mtriple=i386-linux-unknown | FileCheck %s --check-prefix=CHECK-32
+
+; Test that a large stack offset uses a single add/sub instruction to
+; adjust the stack pointer.
+
+define void @foo() nounwind {
+; CHECK-64-LABEL: foo:
+; CHECK-64: movabsq $50000000{{..}}, %rax
+; CHECK-64-NEXT: subq %rax, %rsp
+; CHECK-64-NOT: subq $2147483647, %rsp
+; CHECK-64: movabsq $50000000{{..}}, [[RAX:%r..]]
+; CHECK-64-NEXT: addq [[RAX]], %rsp
+
+; CHECK-32-LABEL: foo:
+; CHECK-32: movl $50000000{{..}}, %eax
+; CHECK-32-NEXT: subl %eax, %esp
+; CHECK-32-NOT: subl $2147483647, %esp
+; CHECK-32: movl $50000000{{..}}, [[EAX:%e..]]
+; CHECK-32-NEXT: addl [[EAX]], %esp
+ %1 = alloca [5000000000 x i8], align 16
+ %2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0
+ call void @bar(i8* %2)
+ ret void
+}
+
+; Verify that we do not clobber the return value.
+
+define i32 @foo2() nounwind {
+; CHECK-64-LABEL: foo2:
+; CHECK-64: movl $10, %eax
+; CHECK-64-NOT: movabsq ${{.*}}, %rax
+
+; CHECK-32-LABEL: foo2:
+; CHECK-32: movl $10, %eax
+; CHECK-32-NOT: movl ${{.*}}, %eax
+ %1 = alloca [5000000000 x i8], align 16
+ %2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0
+ call void @bar(i8* %2)
+ ret i32 10
+}
+
+; Verify that we do not clobber EAX when using inreg attribute
+
+define i32 @foo3(i32 inreg %x) nounwind {
+; CHECK-64-LABEL: foo3:
+; CHECK-64: movabsq $50000000{{..}}, %rax
+; CHECK-64-NEXT: subq %rax, %rsp
+
+; CHECK-32-LABEL: foo3:
+; CHECK-32: subl $2147483647, %esp
+; CHECK-32-NOT: movl ${{.*}}, %eax
+ %1 = alloca [5000000000 x i8], align 16
+ %2 = getelementptr inbounds [5000000000 x i8]* %1, i32 0, i32 0
+ call void @bar(i8* %2)
+ ret i32 %x
+}
+
+declare void @bar(i8*)
More information about the llvm-commits
mailing list