[llvm] r265623 - Re-commit r265039 "[X86] Merge adjacent stack adjustments in eliminateCallFramePseudoInstr (PR27140)"

Hans Wennborg via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 6 17:05:49 PDT 2016


Author: hans
Date: Wed Apr  6 19:05:49 2016
New Revision: 265623

URL: http://llvm.org/viewvc/llvm-project?rev=265623&view=rev
Log:
Re-commit r265039 "[X86] Merge adjacent stack adjustments in eliminateCallFramePseudoInstr (PR27140)"

Third time's the charm? The previous attempt (r265345) caused ASan test
failures on X86, as broken CFI caused stack traces to not work.

This version of the patch makes sure not to merge with stack adjustments
that have CFI, and to not add merged instructions' offests to the CFI
about to be generated.

This is already covered by the lit tests; I just got the expectations
wrong previously.

Modified:
    llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
    llvm/trunk/test/CodeGen/X86/2006-05-02-InstrSched1.ll
    llvm/trunk/test/CodeGen/X86/fold-push.ll
    llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll
    llvm/trunk/test/CodeGen/X86/localescape.ll
    llvm/trunk/test/CodeGen/X86/memset-2.ll
    llvm/trunk/test/CodeGen/X86/movtopush.ll
    llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll
    llvm/trunk/test/CodeGen/X86/push-cfi.ll

Modified: llvm/trunk/lib/Target/X86/X86FrameLowering.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Target/X86/X86FrameLowering.cpp?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/lib/Target/X86/X86FrameLowering.cpp (original)
+++ llvm/trunk/lib/Target/X86/X86FrameLowering.cpp Wed Apr  6 19:05:49 2016
@@ -375,14 +375,25 @@ int X86FrameLowering::mergeSPUpdates(Mac
   unsigned Opc = PI->getOpcode();
   int Offset = 0;
 
+  if (!doMergeWithPrevious && NI != MBB.end() &&
+      NI->getOpcode() == TargetOpcode::CFI_INSTRUCTION) {
+    // Don't merge with the next instruction if it has CFI.
+    return Offset;
+  }
+
   if ((Opc == X86::ADD64ri32 || Opc == X86::ADD64ri8 ||
        Opc == X86::ADD32ri || Opc == X86::ADD32ri8) &&
       PI->getOperand(0).getReg() == StackPtr){
+    assert(PI->getOperand(1).getReg() == StackPtr);
     Offset += PI->getOperand(2).getImm();
     MBB.erase(PI);
     if (!doMergeWithPrevious) MBBI = NI;
   } else if ((Opc == X86::LEA32r || Opc == X86::LEA64_32r) &&
-             PI->getOperand(0).getReg() == StackPtr) {
+             PI->getOperand(0).getReg() == StackPtr &&
+             PI->getOperand(1).getReg() == StackPtr &&
+             PI->getOperand(2).getImm() == 1 &&
+             PI->getOperand(3).getReg() == X86::NoRegister &&
+             PI->getOperand(5).getReg() == X86::NoRegister) {
     // For LEAs we have: def = lea SP, FI, noreg, Offset, noreg.
     Offset += PI->getOperand(4).getImm();
     MBB.erase(PI);
@@ -390,6 +401,7 @@ int X86FrameLowering::mergeSPUpdates(Mac
   } else if ((Opc == X86::SUB64ri32 || Opc == X86::SUB64ri8 ||
               Opc == X86::SUB32ri || Opc == X86::SUB32ri8) &&
              PI->getOperand(0).getReg() == StackPtr) {
+    assert(PI->getOperand(1).getReg() == StackPtr);
     Offset -= PI->getOperand(2).getImm();
     MBB.erase(PI);
     if (!doMergeWithPrevious) MBBI = NI;
@@ -2533,13 +2545,23 @@ eliminateCallFramePseudoInstr(MachineFun
       BuildCFI(MBB, I, DL, 
                MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt));
 
-    if (Amount) {
-      // Add Amount to SP to destroy a frame, and subtract to setup.
-      int Offset = isDestroy ? Amount : -Amount;
-
-      if (!(Fn->optForMinSize() && 
-            adjustStackWithPops(MBB, I, DL, Offset)))
-        BuildStackAdjustment(MBB, I, DL, Offset, /*InEpilogue=*/false);
+    // Add Amount to SP to destroy a frame, or subtract to setup.
+    int64_t StackAdjustment = isDestroy ? Amount : -Amount;
+    int64_t CfaAdjustment = -StackAdjustment;
+
+    if (StackAdjustment) {
+      // Merge with any previous or following adjustment instruction. Note: the
+      // instructions merged with here do not have CFI, so their stack
+      // adjustments do not feed into CfaAdjustment.
+      StackAdjustment += mergeSPUpdates(MBB, I, true);
+      StackAdjustment += mergeSPUpdates(MBB, I, false);
+
+      if (StackAdjustment) {
+        if (!(Fn->optForMinSize() &&
+              adjustStackWithPops(MBB, I, DL, StackAdjustment)))
+          BuildStackAdjustment(MBB, I, DL, StackAdjustment,
+                               /*InEpilogue=*/false);
+      }
     }
 
     if (DwarfCFI && !hasFP(MF)) {
@@ -2549,14 +2571,12 @@ eliminateCallFramePseudoInstr(MachineFun
       // CFI only for EH purposes or for debugging. EH only requires the CFA
       // offset to be correct at each call site, while for debugging we want
       // it to be more precise.
-      int CFAOffset = Amount;
+
       // TODO: When not using precise CFA, we also need to adjust for the
       // InternalAmt here.
-
-      if (CFAOffset) {
-        CFAOffset = isDestroy ? -CFAOffset : CFAOffset;
-        BuildCFI(MBB, I, DL, 
-                 MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset));
+      if (CfaAdjustment) {
+        BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(
+                                 nullptr, CfaAdjustment));
       }
     }
 

Modified: llvm/trunk/test/CodeGen/X86/2006-05-02-InstrSched1.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2006-05-02-InstrSched1.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/2006-05-02-InstrSched1.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2006-05-02-InstrSched1.ll Wed Apr  6 19:05:49 2016
@@ -1,6 +1,6 @@
 ; REQUIRES: asserts
 ; RUN: llc < %s -mtriple=i686-unknown-linux -relocation-model=static -stats 2>&1 | \
-; RUN:   grep asm-printer | grep 15
+; RUN:   grep asm-printer | grep 14
 ;
 ; It's possible to schedule this in 14 instructions by avoiding
 ; callee-save registers, but the scheduler isn't currently that

Modified: llvm/trunk/test/CodeGen/X86/fold-push.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/fold-push.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/fold-push.ll (original)
+++ llvm/trunk/test/CodeGen/X86/fold-push.ll Wed Apr  6 19:05:49 2016
@@ -14,7 +14,7 @@ define void @test(i32 %a, i32 %b) optsiz
 ; SLM: movl (%esp), [[RELOAD:%e..]]
 ; SLM-NEXT: pushl [[RELOAD]]
 ; CHECK: calll
-; CHECK-NEXT: addl $4, %esp
+; CHECK-NEXT: addl $8, %esp
   %c = add i32 %a, %b
   call void @foo(i32 %c)
   call void asm sideeffect "nop", "~{ax},~{bx},~{cx},~{dx},~{bp},~{si},~{di}"()

Modified: llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll (original)
+++ llvm/trunk/test/CodeGen/X86/force-align-stack-alloca.ll Wed Apr  6 19:05:49 2016
@@ -32,15 +32,21 @@ define i64 @g(i32 %i) nounwind {
 ; CHECK:      movl   %{{...}}, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
-; Next we set up the memset call, and then undo it.
+; Next we set up the memset call.
 ; CHECK:      subl   $20, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
+; CHECK:      pushl
+; CHECK:      pushl
+; CHECK:      pushl
 ; CHECK:      calll  memset
-; CHECK-NEXT: addl   $32, %esp
+;
+; Deallocating 32 bytes of outgoing call frame for memset and
+; allocating 28 bytes for calling f yields a 4-byte adjustment:
+; CHECK-NEXT: addl   $4, %esp
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ;
-; Next we set up the call to 'f'.
-; CHECK:      subl   $28, %esp
+; And move on to call 'f', and then restore the stack.
+; CHECK:      pushl
 ; CHECK-NOT:         {{[^ ,]*}}, %esp
 ; CHECK:      calll  f
 ; CHECK-NEXT: addl   $32, %esp

Modified: llvm/trunk/test/CodeGen/X86/localescape.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/localescape.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/localescape.ll (original)
+++ llvm/trunk/test/CodeGen/X86/localescape.ll Wed Apr  6 19:05:49 2016
@@ -137,6 +137,5 @@ define void @alloc_func_no_frameaddr() {
 ; X86: movl $13, (%esp)
 ; X86: pushl $0
 ; X86: calll _print_framealloc_from_fp
-; X86: addl    $4, %esp
-; X86: addl    $8, %esp
+; X86: addl    $12, %esp
 ; X86: retl

Modified: llvm/trunk/test/CodeGen/X86/memset-2.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/memset-2.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/memset-2.ll (original)
+++ llvm/trunk/test/CodeGen/X86/memset-2.ll Wed Apr  6 19:05:49 2016
@@ -6,8 +6,7 @@ declare void @llvm.memset.i32(i8*, i8, i
 define fastcc void @t1() nounwind {
 ; CHECK-LABEL: t1:
 ; CHECK:       ## BB#0: ## %entry
-; CHECK-NEXT:    subl $12, %esp
-; CHECK-NEXT:    subl $4, %esp
+; CHECK-NEXT:    subl $16, %esp
 ; CHECK-NEXT:    pushl $188
 ; CHECK-NEXT:    pushl $0
 ; CHECK-NEXT:    pushl $0

Modified: llvm/trunk/test/CodeGen/X86/movtopush.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/movtopush.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/movtopush.ll (original)
+++ llvm/trunk/test/CodeGen/X86/movtopush.ll Wed Apr  6 19:05:49 2016
@@ -2,6 +2,7 @@
 ; RUN: llc < %s -mtriple=i686-windows -no-x86-call-frame-opt | FileCheck %s -check-prefix=NOPUSH
 ; RUN: llc < %s -mtriple=x86_64-windows | FileCheck %s -check-prefix=X64
 ; RUN: llc < %s -mtriple=i686-windows -stackrealign -stack-alignment=32 | FileCheck %s -check-prefix=ALIGNED
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s -check-prefix=LINUX
 
 %class.Class = type { i32 }
 %struct.s = type { i64 }
@@ -12,6 +13,10 @@ declare x86_thiscallcc void @thiscall(%c
 declare void @oneparam(i32 %a)
 declare void @eightparams(i32 %a, i32 %b, i32 %c, i32 %d, i32 %e, i32 %f, i32 %g, i32 %h)
 declare void @struct(%struct.s* byval %a, i32 %b, i32 %c, i32 %d)
+declare void @inalloca(<{ %struct.s }>* inalloca)
+
+declare i8* @llvm.stacksave()
+declare void @llvm.stackrestore(i8*)
 
 ; We should get pushes for x86, even though there is a reserved call frame.
 ; Make sure we don't touch x86-64, and that turning it off works.
@@ -223,8 +228,7 @@ entry:
 ; NORMAL-NEXT: pushl $2
 ; NORMAL-NEXT: pushl $1
 ; NORMAL-NEXT: call
-; NORMAL-NEXT: addl $16, %esp
-; NORMAL-NEXT: subl $20, %esp
+; NORMAL-NEXT: subl $4, %esp
 ; NORMAL-NEXT: movl 20(%esp), [[E1:%e..]]
 ; NORMAL-NEXT: movl 24(%esp), [[E2:%e..]]
 ; NORMAL-NEXT: movl    [[E2]], 4(%esp)
@@ -261,7 +265,7 @@ entry:
 ; NORMAL-NEXT: pushl $2
 ; NORMAL-NEXT: pushl $1
 ; NORMAL-NEXT: calll *16(%esp)
-; NORMAL-NEXT: addl $16, %esp
+; NORMAL-NEXT: addl $24, %esp
 define void @test10() optsize {
   %stack_fptr = alloca void (i32, i32, i32, i32)*
   store void (i32, i32, i32, i32)* @good, void (i32, i32, i32, i32)** %stack_fptr
@@ -314,8 +318,7 @@ entry:
 ; NORMAL-NEXT: pushl    $2
 ; NORMAL-NEXT: pushl    $1
 ; NORMAL-NEXT: calll _good
-; NORMAL-NEXT: addl    $16, %esp
-; NORMAL-NEXT: subl    $20, %esp
+; NORMAL-NEXT: subl    $4, %esp
 ; NORMAL: movl    $8, 16(%esp)
 ; NORMAL-NEXT: movl    $7, 12(%esp)
 ; NORMAL-NEXT: movl    $6, 8(%esp)
@@ -358,3 +361,54 @@ entry:
   call void @good(i32 %val1, i32 %val2, i32 %val3, i32 %add)
   ret i32* %ptr3
 }
+
+; Make sure to fold adjacent stack adjustments.
+; LINUX-LABEL: pr27140:
+; LINUX: subl    $12, %esp
+; LINUX: .cfi_def_cfa_offset 16
+; LINUX-NOT: sub
+; LINUX: pushl   $4
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: pushl   $3
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: pushl   $2
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: pushl   $1
+; LINUX: .cfi_adjust_cfa_offset 4
+; LINUX: calll   good
+; LINUX: addl    $28, %esp
+; LINUX: .cfi_adjust_cfa_offset -16
+; LINUX-NOT: add
+; LINUX: retl
+define void @pr27140() optsize {
+entry:
+  tail call void @good(i32 1, i32 2, i32 3, i32 4)
+  ret void
+}
+
+; Check that a stack restore (leal -4(%ebp), %esp) doesn't get merged with a
+; stack adjustment (addl $12, %esp). Just because it's a lea doesn't mean it's
+; simply decreasing the stack pointer.
+; NORMAL-LABEL: test14:
+; NORMAL: calll _B_func
+; NORMAL: leal -4(%ebp), %esp
+; NORMAL-NOT: %esp
+; NORMAL: retl
+%struct.A = type { i32, i32 }
+%struct.B = type { i8 }
+declare x86_thiscallcc %struct.B* @B_ctor(%struct.B* returned, %struct.A* byval)
+declare void @B_func(%struct.B* sret, %struct.B*, i32)
+define void @test14(%struct.A* %a) {
+entry:
+  %ref.tmp = alloca %struct.B, align 1
+  %agg.tmp = alloca i64, align 4
+  %tmpcast = bitcast i64* %agg.tmp to %struct.A*
+  %tmp = alloca %struct.B, align 1
+  %0 = bitcast %struct.A* %a to i64*
+  %1 = load i64, i64* %0, align 4
+  store i64 %1, i64* %agg.tmp, align 4
+  %call = call x86_thiscallcc %struct.B* @B_ctor(%struct.B* %ref.tmp, %struct.A* byval %tmpcast)
+  %2 = getelementptr inbounds %struct.B, %struct.B* %tmp, i32 0, i32 0
+  call void @B_func(%struct.B* sret %tmp, %struct.B* %ref.tmp, i32 1)
+  ret void
+}

Modified: llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll (original)
+++ llvm/trunk/test/CodeGen/X86/push-cfi-debug.ll Wed Apr  6 19:05:49 2016
@@ -23,7 +23,7 @@ declare x86_stdcallcc void @stdfoo(i32,
 ; CHECK: .cfi_adjust_cfa_offset 4
 ; CHECK: calll stdfoo
 ; CHECK: .cfi_adjust_cfa_offset -8
-; CHECK: addl $8, %esp
+; CHECK: addl $20, %esp
 ; CHECK: .cfi_adjust_cfa_offset -8
 define void @test1() #0 !dbg !4 {
 entry:

Modified: llvm/trunk/test/CodeGen/X86/push-cfi.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/push-cfi.ll?rev=265623&r1=265622&r2=265623&view=diff
==============================================================================
--- llvm/trunk/test/CodeGen/X86/push-cfi.ll (original)
+++ llvm/trunk/test/CodeGen/X86/push-cfi.ll Wed Apr  6 19:05:49 2016
@@ -82,7 +82,7 @@ cleanup:
 ; LINUX-NEXT: Ltmp{{[0-9]+}}:
 ; LINUX-NEXT: .cfi_adjust_cfa_offset 4
 ; LINUX-NEXT: call
-; LINUX-NEXT: addl $16, %esp
+; LINUX-NEXT: addl $28, %esp
 ; LINUX: .cfi_adjust_cfa_offset -16
 ; DARWIN-NOT: .cfi_escape
 ; DARWIN-NOT: pushl




More information about the llvm-commits mailing list