[llvm] r304599 - [PartialInlining] Minor cost anaysis tuning

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Fri Jun 2 15:08:04 PDT 2017


Author: davidxl
Date: Fri Jun  2 17:08:04 2017
New Revision: 304599

URL: http://llvm.org/viewvc/llvm-project?rev=304599&view=rev
Log:
[PartialInlining] Minor cost anaysis tuning

Also added a test option and 2 cost analysis related tests.

Added:
    llvm/trunk/test/Transforms/CodeExtractor/cost.ll
    llvm/trunk/test/Transforms/CodeExtractor/cost_meta.ll
Modified:
    llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp

Modified: llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp?rev=304599&r1=304598&r2=304599&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/PartialInlining.cpp Fri Jun  2 17:08:04 2017
@@ -68,6 +68,10 @@ static cl::opt<int>
                              cl::desc("Relative frequency of outline region to "
                                       "the entry block"));
 
+static cl::opt<unsigned> ExtraOutliningPenalty(
+    "partial-inlining-extra-penalty", cl::init(0), cl::Hidden,
+    cl::desc("A debug option to add additional penalty to the computed one."));
+
 namespace {
 
 struct FunctionOutliningInfo {
@@ -83,7 +87,7 @@ struct FunctionOutliningInfo {
   SmallVector<BasicBlock *, 4> Entries;
   // The return block that is not included in the outlined region.
   BasicBlock *ReturnBlock;
-  // The dominating block of the region ot be outlined.
+  // The dominating block of the region to be outlined.
   BasicBlock *NonReturnBlock;
   // The set of blocks in Entries that that are predecessors to ReturnBlock
   SmallVector<BasicBlock *, 4> ReturnBlockPreds;
@@ -407,11 +411,23 @@ BranchProbability PartialInlinerImpl::ge
   if (hasProfileData(F, OI))
     return OutlineRegionRelFreq;
 
-  // When profile data is not available, we need to be very
-  // conservative in estimating the overall savings. We need to make sure
-  // the outline region relative frequency is not below the threshold
-  // specified by the option.
-  OutlineRegionRelFreq = std::max(OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
+  // When profile data is not available, we need to be conservative in
+  // estimating the overall savings. Static branch prediction can usually
+  // guess the branch direction right (taken/non-taken), but the guessed
+  // branch probability is usually not biased enough. In case when the
+  // outlined region is predicted to be likely, its probability needs
+  // to be made higher (more biased) to not under-estimate the cost of
+  // function outlining. On the other hand, if the outlined region
+  // is predicted to be less likely, the predicted probablity is usually
+  // higher than the actual. For instance, the actual probability of the
+  // less likely target is only 5%, but the guessed probablity can be
+  // 40%. In the latter case, there is no need for further adjustement.
+  // FIXME: add an option for this.
+  if (OutlineRegionRelFreq < BranchProbability(45, 100))
+    return OutlineRegionRelFreq;
+
+  OutlineRegionRelFreq = std::max(
+      OutlineRegionRelFreq, BranchProbability(OutlineRegionFreqPercent, 100));
 
   return OutlineRegionRelFreq;
 }
@@ -496,6 +512,26 @@ int PartialInlinerImpl::computeBBInlineC
     if (isa<DbgInfoIntrinsic>(I))
       continue;
 
+    switch (I->getOpcode()) {
+    case Instruction::BitCast:
+    case Instruction::PtrToInt:
+    case Instruction::IntToPtr:
+    case Instruction::Alloca:
+      continue;
+    case Instruction::GetElementPtr:
+      if (cast<GetElementPtrInst>(I)->hasAllZeroIndices())
+        continue;
+    default:
+      break;
+    }
+
+    IntrinsicInst *IntrInst = dyn_cast<IntrinsicInst>(I);
+    if (IntrInst) {
+      if (IntrInst->getIntrinsicID() == Intrinsic::lifetime_start ||
+          IntrInst->getIntrinsicID() == Intrinsic::lifetime_end)
+        continue;
+    }
+
     if (CallInst *CI = dyn_cast<CallInst>(I)) {
       InlineCost += getCallsiteCost(CallSite(CI), DL);
       continue;
@@ -519,7 +555,13 @@ std::tuple<int, int, int> PartialInliner
     Function *F, const FunctionOutliningInfo *OI, Function *OutlinedFunction,
     BasicBlock *OutliningCallBB) {
   // First compute the cost of the outlined region 'OI' in the original
-  // function 'F':
+  // function 'F'.
+  // FIXME: The code extractor (outliner) can now do code sinking/hoisting
+  // to reduce outlining cost. The hoisted/sunk code currently do not
+  // incur any runtime cost so it is still OK to compare the outlined
+  // function cost with the outlined region in the original function.
+  // If this ever changes, we will need to introduce new extractor api
+  // to pass the information.
   int OutlinedRegionCost = 0;
   for (BasicBlock &BB : *F) {
     if (&BB != OI->ReturnBlock &&
@@ -539,11 +581,16 @@ std::tuple<int, int, int> PartialInliner
   for (BasicBlock &BB : *OutlinedFunction) {
     OutlinedFunctionCost += computeBBInlineCost(&BB);
   }
+  // The code extractor introduces a new root and exit stub blocks with
+  // additional unconditional branches. Those branches will be eliminated
+  // later with bb layout. The cost should be adjusted accordingly:
+  OutlinedFunctionCost -= 2 * InlineConstants::InstrCost;
 
   assert(OutlinedFunctionCost >= OutlinedRegionCost &&
          "Outlined function cost should be no less than the outlined region");
-  int OutliningRuntimeOverhead =
-      OutliningFuncCallCost + (OutlinedFunctionCost - OutlinedRegionCost);
+  int OutliningRuntimeOverhead = OutliningFuncCallCost +
+                                 (OutlinedFunctionCost - OutlinedRegionCost) +
+                                 ExtraOutliningPenalty;
 
   return std::make_tuple(OutliningFuncCallCost, OutliningRuntimeOverhead,
                          OutlinedRegionCost);

Added: llvm/trunk/test/Transforms/CodeExtractor/cost.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/cost.ll?rev=304599&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/cost.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/cost.ll Fri Jun  2 17:08:04 2017
@@ -0,0 +1,64 @@
+; RUN: opt -S < %s  -partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
+; RUN: opt -S < %s  -passes=partial-inliner -partial-inlining-extra-penalty=10 | FileCheck %s
+define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
+bb:
+;  ptr != null is predicted to be true 
+  %tmp = icmp ne i32* %arg, null
+  br i1 %tmp, label %bb8, label %bb1
+
+; bb1 is not likely
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp3 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp4 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp5 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp6 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp7 = tail call i32 @foo(i32* nonnull %arg)
+  br label %bb8
+
+bb8:                                              ; preds = %bb1, %bb
+  %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp9
+}
+
+define i32 @outline_region_likely(i32* %arg) local_unnamed_addr {
+bb:
+;  ptr == null is predicted to be false
+  %tmp = icmp eq i32* %arg, null
+  br i1 %tmp, label %bb8, label %bb1
+
+; bb1 is likely
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp3 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp4 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp5 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp6 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp7 = tail call i32 @foo(i32* nonnull %arg)
+  br label %bb8
+
+bb8:                                              ; preds = %bb1, %bb
+  %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp9
+}
+
+declare i32 @foo(i32* %arg)
+
+define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
+; CHECK-LABEL: @dummy_caller
+  %tmp = call i32 @outline_region_notlikely(i32* %arg)
+; CHECK:  call void @outline_region_notlikely.2_bb1
+  %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
+; CHECK: %tmp2 = tail call i32 @outline_region_likely(i32* %arg)
+  ret i32 %tmp
+
+}
+
+; CHECK-LABEL: define internal void @outline_region_notlikely.2_bb1(i32* %arg) {
+; CHECK-NEXT: newFuncRoot:
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}

Added: llvm/trunk/test/Transforms/CodeExtractor/cost_meta.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/CodeExtractor/cost_meta.ll?rev=304599&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/CodeExtractor/cost_meta.ll (added)
+++ llvm/trunk/test/Transforms/CodeExtractor/cost_meta.ll Fri Jun  2 17:08:04 2017
@@ -0,0 +1,41 @@
+; RUN: opt -S < %s  -partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
+; RUN: opt -S < %s  -passes=partial-inliner -partial-inlining-extra-penalty=2000 | FileCheck %s
+define i32 @outline_region_notlikely(i32* %arg) local_unnamed_addr {
+bb:
+;  ptr != null is predicted to be true 
+  %tmp = icmp ne i32* %arg, null
+  br i1 %tmp, label %bb8, label %bb1, !prof !2
+
+; bb1 is not likely
+bb1:                                              ; preds = %bb
+  %tmp2 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp3 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp4 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp5 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp6 = tail call i32 @foo(i32* nonnull %arg)
+  %tmp7 = tail call i32 @foo(i32* nonnull %arg)
+  br label %bb8
+
+bb8:                                              ; preds = %bb1, %bb
+  %tmp9 = phi i32 [ 0, %bb1 ], [ 1, %bb ]
+  ret i32 %tmp9
+}
+
+define i32 @dummy_caller(i32* %arg) local_unnamed_addr {
+; CHECK-LABEL: @dummy_caller
+  %tmp = call i32 @outline_region_notlikely(i32* %arg)
+  ret i32 %tmp
+ }
+
+
+; CHECK-LABEL: define internal void @outline_region_notlikely.1_bb1(i32* %arg) {
+; CHECK-NEXT: newFuncRoot:
+
+declare i32 @foo(i32 * %arg)
+
+!llvm.module.flags = !{!0}
+!llvm.ident = !{!1}
+
+!0 = !{i32 1, !"wchar_size", i32 4}
+!1 = !{!"clang version 5.0.0 (trunk 304489)"}
+!2 = !{!"branch_weights", i32 2000, i32 1}




More information about the llvm-commits mailing list