[llvm-commits] [llvm] r83602 - in /llvm/trunk: lib/Transforms/IPO/Inliner.cpp test/Transforms/Inline/nested-inline.ll

Dale Johannesen dalej at apple.com
Thu Oct 8 17:11:32 PDT 2009


Author: johannes
Date: Thu Oct  8 19:11:32 2009
New Revision: 83602

URL: http://llvm.org/viewvc/llvm-project?rev=83602&view=rev
Log:
When considering whether to inline Callee into Caller,
and that will make Caller too big to inline, see if it
might be better to inline Caller into its callers instead.
This situation is described in PR 2973, although I haven't
tried the specific case in SPASS.


Added:
    llvm/trunk/test/Transforms/Inline/nested-inline.ll
Modified:
    llvm/trunk/lib/Transforms/IPO/Inliner.cpp

Modified: llvm/trunk/lib/Transforms/IPO/Inliner.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/Inliner.cpp?rev=83602&r1=83601&r2=83602&view=diff

==============================================================================
--- llvm/trunk/lib/Transforms/IPO/Inliner.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/Inliner.cpp Thu Oct  8 19:11:32 2009
@@ -189,9 +189,9 @@
   
   int Cost = IC.getValue();
   int CurrentThreshold = InlineThreshold;
-  Function *Fn = CS.getCaller();
-  if (Fn && !Fn->isDeclaration() &&
-      Fn->hasFnAttr(Attribute::OptimizeForSize) &&
+  Function *Caller = CS.getCaller();
+  if (Caller && !Caller->isDeclaration() &&
+      Caller->hasFnAttr(Attribute::OptimizeForSize) &&
       InlineLimit.getNumOccurrences() == 0 &&
       InlineThreshold != 50)
     CurrentThreshold = 50;
@@ -203,8 +203,72 @@
     return false;
   }
   
+  // Try to detect the case where the current inlining candidate caller
+  // (call it B) is a static function and is an inlining candidate elsewhere,
+  // and the current candidate callee (call it C) is large enough that
+  // inlining it into B would make B too big to inline later.  In these
+  // circumstances it may be best not to inline C into B, but to inline B
+  // into its callers.
+  if (Caller->hasLocalLinkage()) {
+    int TotalSecondaryCost = 0;
+    bool outerCallsFound = false;
+    bool allOuterCallsWillBeInlined = true;
+    bool someOuterCallWouldNotBeInlined = false;
+    for (Value::use_iterator I = Caller->use_begin(), E =Caller->use_end(); 
+         I != E; ++I) {
+      CallSite CS2 = CallSite::get(*I);
+
+      // If this isn't a call to Caller (it could be some other sort
+      // of reference) skip it.
+      if (CS2.getInstruction() == 0 || CS2.getCalledFunction() != Caller)
+        continue;
+
+      InlineCost IC2 = getInlineCost(CS2);
+      if (IC2.isNever())
+        allOuterCallsWillBeInlined = false;
+      if (IC2.isAlways() || IC2.isNever())
+        continue;
+
+      outerCallsFound = true;
+      int Cost2 = IC2.getValue();
+      int CurrentThreshold2 = InlineThreshold;
+      Function *Caller2 = CS2.getCaller();
+      if (Caller2 && !Caller2->isDeclaration() &&
+          Caller2->hasFnAttr(Attribute::OptimizeForSize) &&
+          InlineThreshold != 50)
+        CurrentThreshold2 = 50;
+
+      float FudgeFactor2 = getInlineFudgeFactor(CS2);
+
+      if (Cost2 >= (int)(CurrentThreshold2 * FudgeFactor2))
+        allOuterCallsWillBeInlined = false;
+
+      // See if we have this case.  The magic 6 is what InlineCost assigns
+      // for the call instruction, which we would be deleting.
+      if (Cost2 < (int)(CurrentThreshold2 * FudgeFactor2) &&
+          Cost2 + Cost - 6 >= (int)(CurrentThreshold2 * FudgeFactor2)) {
+        someOuterCallWouldNotBeInlined = true;
+        TotalSecondaryCost += Cost2;
+      }
+    }
+    // If all outer calls to Caller would get inlined, the cost for the last
+    // one is set very low by getInlineCost, in anticipation that Caller will
+    // be removed entirely.  We did not account for this above unless there
+    // is only one caller of Caller.
+    if (allOuterCallsWillBeInlined && Caller->use_begin() != Caller->use_end())
+      TotalSecondaryCost -= 15000;
+
+    if (outerCallsFound && someOuterCallWouldNotBeInlined && 
+        TotalSecondaryCost < Cost) {
+      DEBUG(errs() << "    NOT Inlining: " << *CS.getInstruction() << 
+           " Cost = " << Cost << 
+           ", outer Cost = " << TotalSecondaryCost << '\n');
+      return false;
+    }
+  }
+
   DEBUG(errs() << "    Inlining: cost=" << Cost
-        << ", Call: " << *CS.getInstruction() << "\n");
+        << ", Call: " << *CS.getInstruction() << '\n');
   return true;
 }
 
@@ -232,7 +296,7 @@
     for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB)
       for (BasicBlock::iterator I = BB->begin(), E = BB->end(); I != E; ++I) {
         CallSite CS = CallSite::get(I);
-        // If this this isn't a call, or it is a call to an intrinsic, it can
+        // If this isn't a call, or it is a call to an intrinsic, it can
         // never be inlined.
         if (CS.getInstruction() == 0 || isa<IntrinsicInst>(I))
           continue;
@@ -279,7 +343,7 @@
       // try to do so.
       if (!shouldInline(CS))
         continue;
-      
+
       Function *Caller = CS.getCaller();
       // Attempt to inline the function...
       if (!InlineCallIfPossible(CS, CG, TD, InlinedArrayAllocas))

Added: llvm/trunk/test/Transforms/Inline/nested-inline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/nested-inline.ll?rev=83602&view=auto

==============================================================================
--- llvm/trunk/test/Transforms/Inline/nested-inline.ll (added)
+++ llvm/trunk/test/Transforms/Inline/nested-inline.ll Thu Oct  8 19:11:32 2009
@@ -0,0 +1,111 @@
+; RUN: opt < %s -inline -S | FileCheck %s
+; Test that bar and bar2 are both inlined throughout and removed.
+ at A = weak global i32 0		; <i32*> [#uses=1]
+ at B = weak global i32 0		; <i32*> [#uses=1]
+ at C = weak global i32 0		; <i32*> [#uses=1]
+
+define fastcc void @foo(i32 %X) {
+entry:
+; CHECK: @foo
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @ext( i32* %ALL )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+; CHECK-NOT: @bar
+define internal fastcc void @bar(i32 %X) {
+entry:
+	%ALL = alloca i32, align 4		; <i32*> [#uses=1]
+	%tmp1 = and i32 %X, 1		; <i32> [#uses=1]
+	%tmp1.upgrd.1 = icmp eq i32 %tmp1, 0		; <i1> [#uses=1]
+	br i1 %tmp1.upgrd.1, label %cond_next, label %cond_true
+
+cond_true:		; preds = %entry
+	store i32 1, i32* @A
+	br label %cond_next
+
+cond_next:		; preds = %cond_true, %entry
+	%tmp4 = and i32 %X, 2		; <i32> [#uses=1]
+	%tmp4.upgrd.2 = icmp eq i32 %tmp4, 0		; <i1> [#uses=1]
+	br i1 %tmp4.upgrd.2, label %cond_next7, label %cond_true5
+
+cond_true5:		; preds = %cond_next
+	store i32 1, i32* @B
+	br label %cond_next7
+
+cond_next7:		; preds = %cond_true5, %cond_next
+	%tmp10 = and i32 %X, 4		; <i32> [#uses=1]
+	%tmp10.upgrd.3 = icmp eq i32 %tmp10, 0		; <i1> [#uses=1]
+	br i1 %tmp10.upgrd.3, label %cond_next13, label %cond_true11
+
+cond_true11:		; preds = %cond_next7
+	store i32 1, i32* @C
+	br label %cond_next13
+
+cond_next13:		; preds = %cond_true11, %cond_next7
+	%tmp16 = and i32 %X, 8		; <i32> [#uses=1]
+	%tmp16.upgrd.4 = icmp eq i32 %tmp16, 0		; <i1> [#uses=1]
+	br i1 %tmp16.upgrd.4, label %UnifiedReturnBlock, label %cond_true17
+
+cond_true17:		; preds = %cond_next13
+	call void @foo( i32 %X )
+	ret void
+
+UnifiedReturnBlock:		; preds = %cond_next13
+	ret void
+}
+
+define internal fastcc void @bar2(i32 %X) {
+entry:
+	call void @foo( i32 %X )
+	ret void
+}
+
+declare void @ext(i32*)
+
+define void @test(i32 %X) {
+entry:
+; CHECK: test
+; CHECK-NOT: @bar
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	tail call fastcc void @bar2( i32 %X )
+	ret void
+; CHECK: ret
+}





More information about the llvm-commits mailing list