[PATCH] [X86] Use single add/sub for large stack offsets

Robert Lougher rob.lougher at gmail.com
Wed Jan 28 08:29:40 PST 2015


Hi grosbach,

The following code generates an object file which is over 30 MB in size:

```
void bar(char *a);
void foo (void) {
    char b[5000000000000000];
    bar(b);
}
```

The reason is that the code to push the space on the stack consists of over 2 million subtract
instructions of the form:

```
    subq    $2147483647, %rsp       # imm = 0x7FFFFFFF
```

Instead of doing multiple immediate mode subtracts, the compiler should place the final amount to
be subtracted into a register and do a single subtraction.

http://reviews.llvm.org/D7226

Files:
  lib/Target/X86/X86FrameLowering.cpp
  test/CodeGen/X86/huge-stack-offset.ll

Index: lib/Target/X86/X86FrameLowering.cpp
===================================================================
--- lib/Target/X86/X86FrameLowering.cpp
+++ lib/Target/X86/X86FrameLowering.cpp
@@ -82,6 +82,14 @@
   }
 }
 
+static unsigned getSUBrrOpcode(unsigned isLP64) {
+  return isLP64 ? X86::SUB64rr : X86::SUB32rr;
+}
+
+static unsigned getADDrrOpcode(unsigned isLP64) {
+  return isLP64 ? X86::ADD64rr : X86::ADD32rr;
+}
+
 static unsigned getANDriOpcode(bool IsLP64, int64_t Imm) {
   if (IsLP64) {
     if (isInt<8>(Imm))
@@ -187,6 +195,28 @@
   DebugLoc DL = MBB.findDebugLoc(MBBI);
 
   while (Offset) {
+    if (Offset > Chunk) {
+      // Rather than emit a long series of instructions for large offsets,
+      // load the offset into a register and do one sub/add
+      unsigned Reg = isSub
+        ? (unsigned)(Is64BitTarget ? X86::RAX : X86::EAX)
+        : findDeadCallerSavedReg(MBB, MBBI, TRI, Is64BitTarget);
+      if (Reg) {
+        Opc = Is64BitTarget ? X86::MOV64ri : X86::MOV32ri;
+        BuildMI(MBB, MBBI, DL, TII.get(Opc), Reg)
+          .addImm(Offset);
+        Opc = isSub
+          ? getSUBrrOpcode(Is64BitTarget)
+          : getADDrrOpcode(Is64BitTarget);
+        MachineInstr *MI = BuildMI(MBB, MBBI, DL, TII.get(Opc), StackPtr)
+          .addReg(StackPtr)
+          .addReg(Reg);
+        MI->getOperand(3).setIsDead(); // The EFLAGS implicit def is dead.
+        Offset = 0;
+        continue;
+      }
+    }
+
     uint64_t ThisVal = (Offset > Chunk) ? Chunk : Offset;
     if (ThisVal == (Is64BitTarget ? 8 : 4)) {
       // Use push / pop instead.
Index: test/CodeGen/X86/huge-stack-offset.ll
===================================================================
--- test/CodeGen/X86/huge-stack-offset.ll
+++ test/CodeGen/X86/huge-stack-offset.ll
@@ -0,0 +1,34 @@
+; RUN: llc < %s -march=x86-64 | FileCheck %s
+
+; Test that a large stack offset uses a single add/sub instruction to
+; adjust the stack pointer.
+
+define void @foo() nounwind {
+; CHECK-LABEL: foo:
+; CHECK:      movabsq $5000000000000008, %rax
+; CHECK-NEXT: subq    %rax, %rsp
+; CHECK-NOT:  subq    $2147483647, %rsp
+; CHECK:      movabsq $5000000000000008, [[RAX:%r..]]
+; CHECK-NEXT: addq    [[RAX]], %rsp
+  %1 = alloca [5000000000000000 x i8], align 16
+  %2 = getelementptr inbounds [5000000000000000 x i8]* %1, i32 0, i32 0
+  call void @bar(i8* %2)
+  ret void
+}
+
+; Verify that we do not clobber the return value.
+
+define i32 @foo2() nounwind {
+; CHECK-LABEL: foo2:
+; CHECK:      movabsq $5000000000000008, %rax
+; CHECK-NEXT: subq    %rax, %rsp
+; CHECK-NOT:  subq    $2147483647, %rsp
+; CHECK:      movl	$10, %eax
+; CHECK-NOT:  movabsq $5000000000000008, %rax
+  %1 = alloca [5000000000000000 x i8], align 16
+  %2 = getelementptr inbounds [5000000000000000 x i8]* %1, i32 0, i32 0
+  call void @bar(i8* %2)
+  ret i32 10
+}
+
+declare void @bar(i8*)

EMAIL PREFERENCES
  http://reviews.llvm.org/settings/panel/emailpreferences/
-------------- next part --------------
A non-text attachment was scrubbed...
Name: D7226.18891.patch
Type: text/x-patch
Size: 2889 bytes
Desc: not available
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20150128/9927031a/attachment.bin>


More information about the llvm-commits mailing list