[llvm-commits] [llvm] r85441 - in /llvm/trunk: lib/CodeGen/BranchFolding.cpp test/CodeGen/X86/2008-02-18-TailMergingBug.ll test/CodeGen/X86/2008-05-12-tailmerge-5.ll test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll

Wed Oct 28 15:10:21 PDT 2009

Author: bwilson
Date: Wed Oct 28 17:10:20 2009
New Revision: 85441

URL: http://llvm.org/viewvc/llvm-project?rev=85441&view=rev
Log:
Reimplement BranchFolding change to avoid tail merging for a 1 instruction
common tail, except when the OptimizeForSize function attribute is present.
Radar 7338114.

Modified:
    llvm/trunk/lib/CodeGen/BranchFolding.cpp
    llvm/trunk/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
    llvm/trunk/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
    llvm/trunk/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll

Modified: llvm/trunk/lib/CodeGen/BranchFolding.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/BranchFolding.cpp?rev=85441&r1=85440&r2=85441&view=diff

==============================================================================

--- llvm/trunk/lib/CodeGen/BranchFolding.cpp (original)
+++ llvm/trunk/lib/CodeGen/BranchFolding.cpp Wed Oct 28 17:10:20 2009
@@ -18,6 +18,7 @@
 
 #define DEBUG_TYPE "branchfolding"
 #include "BranchFolding.h"
+#include "llvm/Function.h"
 #include "llvm/CodeGen/Passes.h"
 #include "llvm/CodeGen/MachineModuleInfo.h"
 #include "llvm/CodeGen/MachineFunctionPass.h"
@@ -465,22 +466,23 @@
        CurMPIter!=B && CurMPIter->first==CurHash;
        --CurMPIter) {
     for (MPIterator I = prior(CurMPIter); I->first==CurHash ; --I) {
-      unsigned CommonTailLen = ComputeCommonTailLength(
-                                        CurMPIter->second,
-                                        I->second,
-                                        TrialBBI1, TrialBBI2);
+      unsigned CommonTailLen = ComputeCommonTailLength(CurMPIter->second,
+                                                       I->second,
+                                                       TrialBBI1, TrialBBI2);
       // If we will have to split a block, there should be at least
-      // minCommonTailLength instructions in common; if not, at worst
-      // we will be replacing a fallthrough into the common tail with a
-      // branch, which at worst breaks even with falling through into
-      // the duplicated common tail, so 1 instruction in common is enough.
-      // We will always pick a block we do not have to split as the common
-      // tail if there is one.
-      // (Empty blocks will get forwarded and need not be considered.)
+      // minCommonTailLength instructions in common.  Otherwise, if we are
+      // optimizing for code size, 1 instruction in common is enough.  At
+      // worst we will be replacing a fallthrough into the common tail with a
+      // branch, which at worst breaks even with falling through into the
+      // duplicated common tail.  We will always pick a block we do not have
+      // to split as the common tail if there is one.  (Empty blocks will get
+      // forwarded and need not be considered.)
+      MachineFunction *MF = CurMPIter->second->getParent();
       if (CommonTailLen >= minCommonTailLength ||
           (CommonTailLen > 0 &&
-           (TrialBBI1==CurMPIter->second->begin() ||
-            TrialBBI2==I->second->begin()))) {
+           MF->getFunction()->hasFnAttr(Attribute::OptimizeForSize) &&
+           (TrialBBI1 == CurMPIter->second->begin() ||
+            TrialBBI2 == I->second->begin()))) {
         if (CommonTailLen > maxCommonTailLength) {
           SameTails.clear();
           maxCommonTailLength = CommonTailLen;

Modified: llvm/trunk/test/CodeGen/X86/2008-02-18-TailMergingBug.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-02-18-TailMergingBug.ll?rev=85441&r1=85440&r2=85441&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-02-18-TailMergingBug.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-02-18-TailMergingBug.ll Wed Oct 28 17:10:20 2009
@@ -3,7 +3,7 @@
 
 @.str = internal constant [48 x i8] c"transformed bounds: (%.2f, %.2f), (%.2f, %.2f)\0A\00"		; <[48 x i8]*> [#uses=1]
 
-define void @minmax(float* %result) nounwind  {
+define void @minmax(float* %result) nounwind optsize {
 entry:
 	%tmp2 = load float* %result, align 4		; <float> [#uses=6]
 	%tmp4 = getelementptr float* %result, i32 2		; <float*> [#uses=5]

Modified: llvm/trunk/test/CodeGen/X86/2008-05-12-tailmerge-5.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/2008-05-12-tailmerge-5.ll?rev=85441&r1=85440&r2=85441&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/2008-05-12-tailmerge-5.ll (original)
+++ llvm/trunk/test/CodeGen/X86/2008-05-12-tailmerge-5.ll Wed Oct 28 17:10:20 2009
@@ -6,7 +6,7 @@
 target triple = "x86_64-apple-darwin8"
 	%struct.BoundaryAlignment = type { [3 x i8], i8, i16, i16, i8, [2 x i8] }
 
-define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signext  %c, i16 signext  %t, i16 signext  %u, i8 signext  %d) nounwind  {
+define void @passing2(i64 %str.0, i64 %str.1, i16 signext  %s, i32 %j, i8 signext  %c, i16 signext  %t, i16 signext  %u, i8 signext  %d) nounwind optsize {
 entry:
 	%str_addr = alloca %struct.BoundaryAlignment		; <%struct.BoundaryAlignment*> [#uses=7]
 	%s_addr = alloca i16		; <i16*> [#uses=1]

Modified: llvm/trunk/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll?rev=85441&r1=85440&r2=85441&view=diff

==============================================================================
--- llvm/trunk/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll (original)
+++ llvm/trunk/test/CodeGen/X86/convert-2-addr-3-addr-inc64.ll Wed Oct 28 17:10:20 2009
@@ -2,7 +2,7 @@
 ; RUN:   grep {asm-printer} | grep {Number of machine instrs printed} | grep 5
 ; RUN: grep {leal	1(\%rsi),} %t
 
-define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2) nounwind {
+define fastcc zeroext i8 @fullGtU(i32 %i1, i32 %i2) nounwind optsize {
 entry:
   %0 = add i32 %i2, 1           ; <i32> [#uses=1]
   %1 = sext i32 %0 to i64               ; <i64> [#uses=1]