[llvm-commits] [llvm] r164207 - in /llvm/trunk: include/llvm/Analysis/InlineCost.h lib/Analysis/InlineCost.cpp test/Transforms/Inline/recurseive.ll

Nick Lewycky nicholas at mxc.ca
Wed Sep 19 02:15:04 PDT 2012


Nadav Rotem wrote:
> Author: nadav
> Date: Wed Sep 19 03:08:04 2012
> New Revision: 164207
>
> URL: http://llvm.org/viewvc/llvm-project?rev=164207&view=rev
> Log:
> Prevent inlining of callees which allocate lots of memory into a recursive caller.
>
> Example:
>
> void foo() {
>   ... foo();   // I'm recursive!
>
>    bar();
> }
>
> bar() {  int a[1000];  // large stack size }
>
> rdar://10853263
>
>
> Added:
>      llvm/trunk/test/Transforms/Inline/recurseive.ll

Typo in test name.

> Modified:
>      llvm/trunk/include/llvm/Analysis/InlineCost.h
>      llvm/trunk/lib/Analysis/InlineCost.cpp
>
> Modified: llvm/trunk/include/llvm/Analysis/InlineCost.h
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/Analysis/InlineCost.h?rev=164207&r1=164206&r2=164207&view=diff
> ==============================================================================
> --- llvm/trunk/include/llvm/Analysis/InlineCost.h (original)
> +++ llvm/trunk/include/llvm/Analysis/InlineCost.h Wed Sep 19 03:08:04 2012
> @@ -36,6 +36,9 @@
>       const int LastCallToStaticBonus = -15000;
>       const int ColdccPenalty = 2000;
>       const int NoreturnPenalty = 10000;
> +    /// Do not inline functions which allocate this many bytes on the stack
> +    /// when the caller is recursive.
> +    const int TotalAllocaSizeRecursiveCaller = 1024;

Fundamentally, I don't like this approach. What does it matter whether 
the function is being inlined recursively or whether we're examining a 
1000-function deep chain where each fn allocates a huge amount of stack 
space?

We should either have a cut-off for "bytes of stack allocated that we 
can't reclaim by stack slot coloring", or else there should be a penalty 
for stack allocations (maybe starting above a given size) that can't be 
optimized away. Recursiveness doesn't enter into it.

If you think I'm mistaken and that recuriveness is a special case, then 
as a thought puzzle, consider what it means when the function is 
recursive but passes entirely different arguments to itself. Do you have 
any interest in analyzing the callee to see if it's a leaf-node in the 
recursion, based on the parameters being passed at the call-site? Is 
this at all different from calling an entirely different function?

Nick

>     }
>
>     /// \brief Represents the cost of inlining a function.
>
> Modified: llvm/trunk/lib/Analysis/InlineCost.cpp
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Analysis/InlineCost.cpp?rev=164207&r1=164206&r2=164207&view=diff
> ==============================================================================
> --- llvm/trunk/lib/Analysis/InlineCost.cpp (original)
> +++ llvm/trunk/lib/Analysis/InlineCost.cpp Wed Sep 19 03:08:04 2012
> @@ -51,9 +51,12 @@
>     int Cost;
>     const bool AlwaysInline;
>
> -  bool IsRecursive;
> +  bool IsCallerRecursive;
> +  bool IsRecursiveCall;
>     bool ExposesReturnsTwice;
>     bool HasDynamicAlloca;
> +  /// Number of bytes allocated statically by the callee.
> +  uint64_t AllocatedSize;
>     unsigned NumInstructions, NumVectorInstructions;
>     int FiftyPercentVectorBonus, TenPercentVectorBonus;
>     int VectorBonus;
> @@ -126,7 +129,8 @@
>     CallAnalyzer(const TargetData *TD, Function&Callee, int Threshold)
>       : TD(TD), F(Callee), Threshold(Threshold), Cost(0),
>         AlwaysInline(F.hasFnAttr(Attribute::AlwaysInline)),
> -      IsRecursive(false), ExposesReturnsTwice(false), HasDynamicAlloca(false),
> +      IsCallerRecursive(false), IsRecursiveCall(false),
> +      ExposesReturnsTwice(false), HasDynamicAlloca(false), AllocatedSize(0),
>         NumInstructions(0), NumVectorInstructions(0),
>         FiftyPercentVectorBonus(0), TenPercentVectorBonus(0), VectorBonus(0),
>         NumConstantArgs(0), NumConstantOffsetPtrArgs(0), NumAllocaArgs(0),
> @@ -269,6 +273,13 @@
>     // FIXME: Check whether inlining will turn a dynamic alloca into a static
>     // alloca, and handle that case.
>
> +  // Accumulate the allocated size.
> +  if (I.isStaticAlloca()) {
> +    Type *Ty = I.getAllocatedType();
> +    AllocatedSize += (TD ? TD->getTypeAllocSize(Ty) :
> +                      Ty->getPrimitiveSizeInBits());
> +  }
> +
>     // We will happily inline static alloca instructions or dynamic alloca
>     // instructions in always-inline situations.
>     if (AlwaysInline || I.isStaticAlloca())
> @@ -625,7 +636,7 @@
>       if (F == CS.getInstruction()->getParent()->getParent()) {
>         // This flag will fully abort the analysis, so don't bother with anything
>         // else.
> -      IsRecursive = true;
> +      IsRecursiveCall = true;
>         return false;
>       }
>
> @@ -712,7 +723,14 @@
>         Cost += InlineConstants::InstrCost;
>
>       // If the visit this instruction detected an uninlinable pattern, abort.
> -    if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
> +    if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
> +      return false;
> +
> +    // If the caller is a recursive function then we don't want to inline
> +    // functions which allocate a lot of stack space because it would increase
> +    // the caller stack usage dramatically.
> +    if (IsCallerRecursive&&
> +        AllocatedSize>  InlineConstants::TotalAllocaSizeRecursiveCaller)
>         return false;
>
>       if (NumVectorInstructions>  NumInstructions/2)
> @@ -831,12 +849,14 @@
>         Cost += InlineConstants::LastCallToStaticBonus;
>
>       // If the instruction after the call, or if the normal destination of the
> -    // invoke is an unreachable instruction, the function is noreturn.  As such,
> -    // there is little point in inlining this unless there is literally zero cost.
> -    if (InvokeInst *II = dyn_cast<InvokeInst>(CS.getInstruction())) {
> +    // invoke is an unreachable instruction, the function is noreturn. As such,
> +    // there is little point in inlining this unless there is literally zero
> +    // cost.
> +    Instruction *Instr = CS.getInstruction();
> +    if (InvokeInst *II = dyn_cast<InvokeInst>(Instr)) {
>         if (isa<UnreachableInst>(II->getNormalDest()->begin()))
>           Threshold = 1;
> -    } else if (isa<UnreachableInst>(++BasicBlock::iterator(CS.getInstruction())))
> +    } else if (isa<UnreachableInst>(++BasicBlock::iterator(Instr)))
>         Threshold = 1;
>
>       // If this function uses the coldcc calling convention, prefer not to inline
> @@ -852,6 +872,20 @@
>     if (F.empty())
>       return true;
>
> +  Function *Caller = CS.getInstruction()->getParent()->getParent();
> +  // Check if the caller function is recursive itself.
> +  for (Value::use_iterator U = Caller->use_begin(), E = Caller->use_end();
> +       U != E; ++U) {
> +    CallSite Site(cast<Value>(*U));
> +    if (!Site)
> +      continue;
> +    Instruction *I = Site.getInstruction();
> +    if (I->getParent()->getParent() == Caller) {
> +      IsCallerRecursive = true;
> +      break;
> +    }
> +  }
> +
>     // Track whether we've seen a return instruction. The first return
>     // instruction is free, as at least one will usually disappear in inlining.
>     bool HasReturn = false;
> @@ -908,9 +942,9 @@
>
>       // We never want to inline functions that contain an indirectbr.  This is
>       // incorrect because all the blockaddress's (in static global initializers
> -    // for example) would be referring to the original function, and this indirect
> -    // jump would jump from the inlined copy of the function into the original
> -    // function which is extremely undefined behavior.
> +    // for example) would be referring to the original function, and this
> +    // indirect jump would jump from the inlined copy of the function into the
> +    // original function which is extremely undefined behavior.
>       // FIXME: This logic isn't really right; we can safely inline functions
>       // with indirectbr's as long as no other function or global references the
>       // blockaddress of a block within the current function.  And as a QOI issue,
> @@ -928,8 +962,16 @@
>       // Analyze the cost of this block. If we blow through the threshold, this
>       // returns false, and we can bail on out.
>       if (!analyzeBlock(BB)) {
> -      if (IsRecursive || ExposesReturnsTwice || HasDynamicAlloca)
> +      if (IsRecursiveCall || ExposesReturnsTwice || HasDynamicAlloca)
>           return false;
> +
> +      // If the caller is a recursive function then we don't want to inline
> +      // functions which allocate a lot of stack space because it would increase
> +      // the caller stack usage dramatically.
> +      if (IsCallerRecursive&&
> +          AllocatedSize>  InlineConstants::TotalAllocaSizeRecursiveCaller)
> +        return false;
> +
>         break;
>       }
>
> @@ -955,7 +997,8 @@
>
>       // If we're unable to select a particular successor, just count all of
>       // them.
> -    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize; ++TIdx)
> +    for (unsigned TIdx = 0, TSize = TI->getNumSuccessors(); TIdx != TSize;
> +         ++TIdx)
>         BBWorklist.insert(TI->getSuccessor(TIdx));
>
>       // If we had any successors at this point, than post-inlining is likely to
> @@ -1003,7 +1046,8 @@
>         Callee->hasFnAttr(Attribute::NoInline) || CS.isNoInline())
>       return llvm::InlineCost::getNever();
>
> -  DEBUG(llvm::dbgs()<<  "      Analyzing call of "<<  Callee->getName()<<  "...\n");
> +  DEBUG(llvm::dbgs()<<  "      Analyzing call of "<<  Callee->getName()
> +<<  "...\n");
>
>     CallAnalyzer CA(TD, *Callee, Threshold);
>     bool ShouldInline = CA.analyzeCall(CS);
>
> Added: llvm/trunk/test/Transforms/Inline/recurseive.ll
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/recurseive.ll?rev=164207&view=auto
> ==============================================================================
> --- llvm/trunk/test/Transforms/Inline/recurseive.ll (added)
> +++ llvm/trunk/test/Transforms/Inline/recurseive.ll Wed Sep 19 03:08:04 2012
> @@ -0,0 +1,38 @@
> +; RUN: opt %s -inline -S | FileCheck %s
> +
> +target datalayout = "e-p:32:32:32-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:32:64-f32:32:32-f64:32:64-v64:64:64-v128:128:128-a0:0:64-f80:128:128"
> +target triple = "i386-apple-darwin10.0"
> +
> +; rdar://10853263
> +
> +; Make sure that the callee is still here.
> +; CHECK: define i32 @callee
> +define i32 @callee(i32 %param) {
> + %yyy = alloca [100000 x i8]
> + %r = bitcast [100000 x i8]* %yyy to i8*
> + call void @foo2(i8* %r)
> + ret i32 4
> +}
> +
> +; CHECK: define i32 @caller
> +; CHECK-NEXT: entry:
> +; CHECK-NOT: alloca
> +; CHECK: ret
> +define i32 @caller(i32 %param) {
> +entry:
> +  %t = call i32 @foo(i32 %param)
> +  %cmp = icmp eq i32 %t, -1
> +  br i1 %cmp, label %exit, label %cont
> +
> +cont:
> +  %r = call i32 @caller(i32 %t)
> +  %f = call i32 @callee(i32 %r)
> +  br label %cont
> +exit:
> +  ret i32 4
> +}
> +
> +declare void @foo2(i8* %in)
> +
> +declare i32 @foo(i32 %param)
> +
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at cs.uiuc.edu
> http://lists.cs.uiuc.edu/mailman/listinfo/llvm-commits
>




More information about the llvm-commits mailing list