[llvm] r346108 - [HotColdSplitting] Use TTI to inform outlining threshold

Vedant Kumar via llvm-commits llvm-commits at lists.llvm.org
Sun Nov 4 15:11:58 PST 2018


Author: vedantk
Date: Sun Nov  4 15:11:57 2018
New Revision: 346108

URL: http://llvm.org/viewvc/llvm-project?rev=346108&view=rev
Log:
[HotColdSplitting] Use TTI to inform outlining threshold

Using TargetTransformInfo allows the splitting pass to factor in the
code size cost of instructions as it decides whether or not outlining is
profitable.

This did not regress the overall amount of outlining seen on the handful
of internal frameworks I tested.

Thanks to Jun Bum Lim for suggesting this!

Differential Revision: https://reviews.llvm.org/D53835

Added:
    llvm/trunk/test/Transforms/HotColdSplit/X86/
    llvm/trunk/test/Transforms/HotColdSplit/X86/lit.local.cfg
    llvm/trunk/test/Transforms/HotColdSplit/X86/outline-expensive.ll
Modified:
    llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
    llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll
    llvm/trunk/test/Transforms/HotColdSplit/minsize.ll
    llvm/trunk/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll

Modified: llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp?rev=346108&r1=346107&r2=346108&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp (original)
+++ llvm/trunk/lib/Transforms/IPO/HotColdSplitting.cpp Sun Nov  4 15:11:57 2018
@@ -66,10 +66,10 @@ using namespace llvm;
 static cl::opt<bool> EnableStaticAnalyis("hot-cold-static-analysis",
                               cl::init(true), cl::Hidden);
 
-static cl::opt<unsigned> MinOutliningInstCount(
-    "min-outlining-inst-count", cl::init(3), cl::Hidden,
-    cl::desc("Minimum number of instructions needed for a single-block region "
-             "to be an outlining candidate"));
+static cl::opt<int>
+    MinOutliningThreshold("min-outlining-thresh", cl::init(3), cl::Hidden,
+                          cl::desc("Code size threshold for outlining within a "
+                                   "single BB (as a multiple of TCC_Basic)"));
 
 namespace {
 
@@ -135,14 +135,18 @@ static bool mayExtractBlock(const BasicB
   return !BB.hasAddressTaken();
 }
 
-/// Check whether \p BB has at least \p Min non-debug, non-terminator
-/// instructions.
-static bool hasMinimumInstCount(const BasicBlock &BB, unsigned Min) {
-  unsigned Count = 0;
+/// Check whether \p BB is profitable to outline (i.e. its code size cost meets
+/// the threshold set in \p MinOutliningThreshold).
+static bool isProfitableToOutline(const BasicBlock &BB,
+                                  TargetTransformInfo &TTI) {
+  int Cost = 0;
   for (const Instruction &I : BB) {
     if (isa<DbgInfoIntrinsic>(&I) || &I == BB.getTerminator())
       continue;
-    if (++Count >= Min)
+
+    Cost += TTI.getInstructionCost(&I, TargetTransformInfo::TCK_CodeSize);
+
+    if (Cost >= (MinOutliningThreshold * TargetTransformInfo::TCC_Basic))
       return true;
   }
   return false;
@@ -156,8 +160,10 @@ static bool hasMinimumInstCount(const Ba
 ///
 /// Return an empty sequence if the cold region is too small to outline, or if
 /// the cold region has no warm predecessors.
-static BlockSequence
-findMaximalColdRegion(BasicBlock &SinkBB, DominatorTree &DT, PostDomTree &PDT) {
+static BlockSequence findMaximalColdRegion(BasicBlock &SinkBB,
+                                           TargetTransformInfo &TTI,
+                                           DominatorTree &DT,
+                                           PostDomTree &PDT) {
   // The maximal cold region.
   BlockSequence ColdRegion = {};
 
@@ -241,8 +247,7 @@ findMaximalColdRegion(BasicBlock &SinkBB
     ++SuccIt;
   }
 
-  if (ColdRegion.size() == 1 &&
-      !hasMinimumInstCount(*ColdRegion[0], MinOutliningInstCount))
+  if (ColdRegion.size() == 1 && !isProfitableToOutline(*ColdRegion[0], TTI))
     return {};
 
   return ColdRegion;
@@ -251,6 +256,7 @@ findMaximalColdRegion(BasicBlock &SinkBB
 /// Get the largest cold region in \p F.
 static BlockSequence getLargestColdRegion(Function &F, ProfileSummaryInfo &PSI,
                                           BlockFrequencyInfo *BFI,
+                                          TargetTransformInfo &TTI,
                                           DominatorTree &DT, PostDomTree &PDT) {
   // Keep track of the largest cold region.
   BlockSequence LargestColdRegion = {};
@@ -270,7 +276,7 @@ static BlockSequence getLargestColdRegio
     });
 
     // Find a maximal cold region we can outline.
-    BlockSequence ColdRegion = findMaximalColdRegion(BB, DT, PDT);
+    BlockSequence ColdRegion = findMaximalColdRegion(BB, TTI, DT, PDT);
     if (ColdRegion.empty()) {
       LLVM_DEBUG(dbgs() << "  Skipping (block not profitable to extract)\n");
       continue;
@@ -305,7 +311,7 @@ public:
 private:
   bool shouldOutlineFrom(const Function &F) const;
   Function *extractColdRegion(const BlockSequence &Region, DominatorTree &DT,
-                              BlockFrequencyInfo *BFI,
+                              BlockFrequencyInfo *BFI, TargetTransformInfo &TTI,
                               OptimizationRemarkEmitter &ORE, unsigned Count);
   SmallPtrSet<const Function *, 2> OutlinedFunctions;
   ProfileSummaryInfo *PSI;
@@ -365,6 +371,7 @@ bool HotColdSplitting::shouldOutlineFrom
 Function *HotColdSplitting::extractColdRegion(const BlockSequence &Region,
                                               DominatorTree &DT,
                                               BlockFrequencyInfo *BFI,
+                                              TargetTransformInfo &TTI,
                                               OptimizationRemarkEmitter &ORE,
                                               unsigned Count) {
   assert(!Region.empty());
@@ -393,7 +400,7 @@ Function *HotColdSplitting::extractColdR
     CallInst *CI = cast<CallInst>(U);
     CallSite CS(CI);
     NumColdRegionsOutlined++;
-    if (GetTTI(*OutF).useColdCCForColdCall(*OutF)) {
+    if (TTI.useColdCCForColdCall(*OutF)) {
       OutF->setCallingConv(CallingConv::Cold);
       CS.setCallingConv(CallingConv::Cold);
     }
@@ -437,14 +444,15 @@ bool HotColdSplitting::run(Module &M) {
     PostDomTree PDT(F);
     PDT.recalculate(F);
     BlockFrequencyInfo *BFI = GetBFI(F);
+    TargetTransformInfo &TTI = GetTTI(F);
 
-    BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, DT, PDT);
+    BlockSequence ColdRegion = getLargestColdRegion(F, *PSI, BFI, TTI, DT, PDT);
     if (ColdRegion.empty())
       continue;
 
     OptimizationRemarkEmitter &ORE = (*GetORE)(F);
     Function *Outlined =
-        extractColdRegion(ColdRegion, DT, BFI, ORE, /*Count=*/1);
+        extractColdRegion(ColdRegion, DT, BFI, TTI, ORE, /*Count=*/1);
     if (Outlined) {
       OutlinedFunctions.insert(Outlined);
       Changed = true;

Added: llvm/trunk/test/Transforms/HotColdSplit/X86/lit.local.cfg
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/X86/lit.local.cfg?rev=346108&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/X86/lit.local.cfg (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/X86/lit.local.cfg Sun Nov  4 15:11:57 2018
@@ -0,0 +1,3 @@
+if not 'X86' in config.root.targets:
+    config.unsupported = True
+

Added: llvm/trunk/test/Transforms/HotColdSplit/X86/outline-expensive.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/X86/outline-expensive.ll?rev=346108&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/X86/outline-expensive.ll (added)
+++ llvm/trunk/test/Transforms/HotColdSplit/X86/outline-expensive.ll Sun Nov  4 15:11:57 2018
@@ -0,0 +1,25 @@
+; The magic number 6 comes from (1 * TCC_Expensive) + (1 * CostOfCallX86).
+; RUN: opt -hotcoldsplit -min-outlining-thresh=6 -S < %s | FileCheck %s
+
+; Test that we outline even though there are only two cold instructions. TTI
+; should determine that they are expensive in terms of code size.
+
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
+; CHECK-LABEL: @fun
+; CHECK: call void @fun.cold.1
+define void @fun(i32 %x) {
+entry:
+  br i1 undef, label %if.then, label %if.else
+
+if.then:
+  ret void
+
+if.else:
+  %y = sdiv i32 %x, 111
+  call void @sink(i32 %y)
+  ret void
+}
+
+declare void @sink(i32 %x) cold

Modified: llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll?rev=346108&r1=346107&r2=346108&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll (original)
+++ llvm/trunk/test/Transforms/HotColdSplit/do-not-split.ll Sun Nov  4 15:11:57 2018
@@ -1,6 +1,9 @@
 ; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
 ; RUN: opt -passes=hotcoldsplit -S < %s | FileCheck %s
 
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
 ; Check that these functions are not split. Outlined functions are called from a
 ; basic block named codeRepl.
 

Modified: llvm/trunk/test/Transforms/HotColdSplit/minsize.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/minsize.ll?rev=346108&r1=346107&r2=346108&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/minsize.ll (original)
+++ llvm/trunk/test/Transforms/HotColdSplit/minsize.ll Sun Nov  4 15:11:57 2018
@@ -1,8 +1,10 @@
 ; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
 
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
 ; CHECK-LABEL: @fun
 ; CHECK: call void @fun.cold.1
-
 define void @fun() {
 entry:
   br i1 undef, label %if.then, label %if.else

Modified: llvm/trunk/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll?rev=346108&r1=346107&r2=346108&view=diff
==============================================================================
--- llvm/trunk/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll (original)
+++ llvm/trunk/test/Transforms/HotColdSplit/split-out-dbg-val-of-arg.ll Sun Nov  4 15:11:57 2018
@@ -1,5 +1,8 @@
 ; RUN: opt -hotcoldsplit -S < %s | FileCheck %s
 
+target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-apple-macosx10.14.0"
+
 ; CHECK-LABEL: define {{.*}}@foo.cold
 ; CHECK-NOT: llvm.dbg.value
 




More information about the llvm-commits mailing list