[llvm-commits] [llvm] r102798 - in /llvm/trunk: lib/Analysis/InlineCost.cpp test/Transforms/Inline/2009-01-12-RecursiveInline.ll test/Transforms/Inline/noinline-recursive-fn.ll

Fri Apr 30 15:37:22 PDT 2010

Author: lattner
Date: Fri Apr 30 17:37:22 2010
New Revision: 102798

URL: http://llvm.org/viewvc/llvm-project?rev=102798&view=rev
Log:
Dan recently disabled recursive inlining within a function, but we
were still inlining self-recursive functions into other functions.

Inlining a recursive function into itself has the potential to
reduce recursion depth by a factor of 2, inlining a recursive
function into something else reduces recursion depth by exactly 
1.  Since inlining a recursive function into something else is a
weird form of loop peeling, turn this off.

The deleted testcase was added by Dale in r62107, since then
we're leaning towards not inlining recursive stuff ever.  In any
case, if we like inlining recursive stuff, it should be done 
within the recursive function itself to get the algorithm 
recursion depth win.


Added:
    llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll
Removed:
    llvm/trunk/test/Transforms/Inline/2009-01-12-RecursiveInline.ll
Modified:
    llvm/trunk/lib/Analysis/InlineCost.cpp

Modified: llvm/trunk/lib/Analysis/InlineCost.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=102798&r1=102797&r2=102798&view=diff
==============================================================================

--- llvm/trunk/lib/Analysis/InlineCost.cpp (original)
+++ llvm/trunk/lib/Analysis/InlineCost.cpp Fri Apr 30 17:37:22 2010
@@ -159,10 +159,18 @@
       // it.  This is a hack because we depend on the user marking their local
       // variables as volatile if they are live across a setjmp call, and they
       // probably won't do this in callers.
-      if (Function *F = CS.getCalledFunction())
+      if (Function *F = CS.getCalledFunction()) {
         if (F->isDeclaration() && 
             (F->getName() == "setjmp" || F->getName() == "_setjmp"))
           NeverInline = true;
+       
+        // If this call is to function itself, then the function is recursive.
+        // Inlining it into other functions is a bad idea, because this is
+        // basically just a form of loop peeling, and our metrics aren't useful
+        // for that case.
+        if (F == BB->getParent())
+          NeverInline = true;
+      }
 
       if (!isa<IntrinsicInst>(II) && !callIsSmall(CS.getCalledFunction())) {
         // Each argument to a call takes on average one instruction to set up.

Removed: llvm/trunk/test/Transforms/Inline/2009-01-12-RecursiveInline.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/2009-01-12-RecursiveInline.ll?rev=102797&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/2009-01-12-RecursiveInline.ll (original)
+++ llvm/trunk/test/Transforms/Inline/2009-01-12-RecursiveInline.ll (removed)
@@ -1,92 +0,0 @@
-; RUN: opt < %s -inline -S | grep {call.*fib} | count 4
-; First call to fib from fib is inlined, producing 2 instead of 1, total 3.
-; Second call to fib from fib is not inlined because new body of fib exceeds
-; inlining limit of 200.  Plus call in main = 4 total.
-
-; ModuleID = '<stdin>'
-target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
-target triple = "i386-apple-darwin9.6"
-@"\01LC" = internal constant [5 x i8] c"%ld\0A\00"		; <[5 x i8]*> [#uses=1]
-
-define i32 @fib(i32 %n) nounwind {
-entry:
-	%n_addr = alloca i32		; <i32*> [#uses=4]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %n, i32* %n_addr
-	%1 = load i32* %n_addr, align 4		; <i32> [#uses=1]
-	%2 = icmp ule i32 %1, 1		; <i1> [#uses=1]
-	br i1 %2, label %bb, label %bb1
-
-bb:		; preds = %entry
-	store i32 1, i32* %0, align 4
-	br label %bb2
-
-bb1:		; preds = %entry
-	%3 = load i32* %n_addr, align 4		; <i32> [#uses=1]
-	%4 = sub i32 %3, 2		; <i32> [#uses=1]
-	%5 = call i32 @fib(i32 %4) nounwind		; <i32> [#uses=1]
-	%6 = load i32* %n_addr, align 4		; <i32> [#uses=1]
-	%7 = sub i32 %6, 1		; <i32> [#uses=1]
-	%8 = call i32 @fib(i32 %7) nounwind		; <i32> [#uses=1]
-	%9 = add i32 %5, %8		; <i32> [#uses=1]
-	store i32 %9, i32* %0, align 4
-	br label %bb2
-
-bb2:		; preds = %bb1, %bb
-	%10 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %10, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb2
-	%retval3 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval3
-}
-
-define i32 @main(i32 %argc, i8** %argv) nounwind {
-entry:
-	%argc_addr = alloca i32		; <i32*> [#uses=2]
-	%argv_addr = alloca i8**		; <i8***> [#uses=2]
-	%retval = alloca i32		; <i32*> [#uses=2]
-	%N = alloca i32		; <i32*> [#uses=2]
-	%0 = alloca i32		; <i32*> [#uses=2]
-	%iftmp.0 = alloca i32		; <i32*> [#uses=3]
-	%"alloca point" = bitcast i32 0 to i32		; <i32> [#uses=0]
-	store i32 %argc, i32* %argc_addr
-	store i8** %argv, i8*** %argv_addr
-	%1 = load i32* %argc_addr, align 4		; <i32> [#uses=1]
-	%2 = icmp eq i32 %1, 2		; <i1> [#uses=1]
-	br i1 %2, label %bb, label %bb1
-
-bb:		; preds = %entry
-	%3 = load i8*** %argv_addr, align 4		; <i8**> [#uses=1]
-	%4 = getelementptr i8** %3, i32 1		; <i8**> [#uses=1]
-	%5 = load i8** %4, align 4		; <i8*> [#uses=1]
-	%6 = call i32 @atoi(i8* %5) nounwind		; <i32> [#uses=1]
-	store i32 %6, i32* %iftmp.0, align 4
-	br label %bb2
-
-bb1:		; preds = %entry
-	store i32 43, i32* %iftmp.0, align 4
-	br label %bb2
-
-bb2:		; preds = %bb1, %bb
-	%7 = load i32* %iftmp.0, align 4		; <i32> [#uses=1]
-	store i32 %7, i32* %N, align 4
-	%8 = load i32* %N, align 4		; <i32> [#uses=1]
-	%9 = call i32 @fib(i32 %8) nounwind		; <i32> [#uses=1]
-	%10 = call i32 (i8*, ...)* @printf(i8* getelementptr ([5 x i8]* @"\01LC", i32 0, i32 0), i32 %9) nounwind		; <i32> [#uses=0]
-	store i32 0, i32* %0, align 4
-	%11 = load i32* %0, align 4		; <i32> [#uses=1]
-	store i32 %11, i32* %retval, align 4
-	br label %return
-
-return:		; preds = %bb2
-	%retval3 = load i32* %retval		; <i32> [#uses=1]
-	ret i32 %retval3
-}
-
-declare i32 @atoi(i8*)
-
-declare i32 @printf(i8*, ...) nounwind

Added: llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll?rev=102798&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll (added)
+++ llvm/trunk/test/Transforms/Inline/noinline-recursive-fn.ll Fri Apr 30 17:37:22 2010
@@ -0,0 +1,32 @@
+; The inliner should never inline recursive functions into other functions.
+; This effectively is just peeling off the first iteration of a loop, and the
+; inliner heuristics are not set up for this.
+
+; RUN: opt -inline %s -S | grep "call void @foo(i32 42)"
+
+target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64"
+target triple = "x86_64-apple-darwin10.3"
+
+ at g = common global i32 0                          ; <i32*> [#uses=1]
+
+define internal void @foo(i32 %x) nounwind ssp {
+entry:
+  %"alloca point" = bitcast i32 0 to i32          ; <i32> [#uses=0]
+  %0 = icmp slt i32 %x, 0                         ; <i1> [#uses=1]
+  br i1 %0, label %return, label %bb
+
+bb:                                               ; preds = %entry
+  %1 = sub nsw i32 %x, 1                          ; <i32> [#uses=1]
+  call void @foo(i32 %1) nounwind ssp
+  volatile store i32 1, i32* @g, align 4
+  ret void
+
+return:                                           ; preds = %entry
+  ret void
+}
+
+define void @bonk() nounwind ssp {
+entry:
+  call void @foo(i32 42) nounwind ssp
+  ret void
+}