[llvm] r248777 - [LoopUnswitch] Add block frequency analysis to recognize hot/cold regions

Chen Li via llvm-commits llvm-commits at lists.llvm.org
Mon Sep 28 22:03:33 PDT 2015


Author: chenli
Date: Tue Sep 29 00:03:32 2015
New Revision: 248777

URL: http://llvm.org/viewvc/llvm-project?rev=248777&view=rev
Log:
[LoopUnswitch] Add block frequency analysis to recognize hot/cold regions

Summary: This patch adds block frequency analysis to LoopUnswitch pass to recognize hot/cold regions. For cold regions the pass only performs trivial unswitches since they do not increase code size, and for hot regions everything works as before. This helps to minimize code growth in cold regions and be more aggressive in hot regions. Currently the default cold regions are blocks with frequencies below 20% of function entry frequency, and it can be adjusted via -loop-unswitch-cold-block-frequency flag. The entire feature is controlled via -loop-unswitch-with-block-frequency flag and it is off by default.

Reviewers: broune, silvas, dnovillo, reames

Subscribers: davidxl, llvm-commits

Differential Revision: http://reviews.llvm.org/D11605

Added:
    llvm/trunk/test/Transforms/LoopUnswitch/cold-loop.ll
Modified:
    llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp

Modified: llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp?rev=248777&r1=248776&r2=248777&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp (original)
+++ llvm/trunk/lib/Transforms/Scalar/LoopUnswitch.cpp Tue Sep 29 00:03:32 2015
@@ -38,6 +38,10 @@
 #include "llvm/Analysis/LoopPass.h"
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
+#include "llvm/Analysis/BlockFrequencyInfoImpl.h"
+#include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/BranchProbabilityInfo.h"
+#include "llvm/Support/BranchProbability.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/Dominators.h"
@@ -71,6 +75,19 @@ static cl::opt<unsigned>
 Threshold("loop-unswitch-threshold", cl::desc("Max loop size to unswitch"),
           cl::init(100), cl::Hidden);
 
+static cl::opt<bool>
+LoopUnswitchWithBlockFrequency("loop-unswitch-with-block-frequency",
+    cl::init(false), cl::Hidden,
+    cl::desc("Enable the use of the block frequency analysis to access PGO "
+             "heuristics to minimize code growth in cold regions."));
+
+static cl::opt<unsigned>
+ColdnessThreshold("loop-unswitch-coldness-threshold", cl::init(1), cl::Hidden,
+    cl::desc("Coldness threshold in percentage. The loop header frequency "
+             "(relative to the entry frequency) is compared with this "
+             "threshold to determine if non-trivial unswitching should be "
+             "enabled."));
+
 namespace {
 
   class LUAnalysisCache {
@@ -155,6 +172,13 @@ namespace {
 
     LUAnalysisCache BranchesInfo;
 
+    bool EnabledPGO;
+
+    // BFI and ColdEntryFreq are only used when PGO and
+    // LoopUnswitchWithBlockFrequency are enabled.
+    BlockFrequencyInfo BFI;
+    BlockFrequency ColdEntryFreq;
+
     bool OptimizeForSize;
     bool redoLoop;
 
@@ -416,6 +440,20 @@ bool LoopUnswitch::runOnLoop(Loop *L, LP
   DT = &getAnalysis<DominatorTreeWrapperPass>().getDomTree();
   currentLoop = L;
   Function *F = currentLoop->getHeader()->getParent();
+
+  EnabledPGO = F->getEntryCount().hasValue();
+
+  if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+    BranchProbabilityInfo BPI(*F, *LI);
+    BFI.calculate(*L->getHeader()->getParent(), BPI, *LI);
+
+    // Use BranchProbability to compute a minimum frequency based on
+    // function entry baseline frequency. Loops with headers below this
+    // frequency are considered as cold.
+    const BranchProbability ColdProb(ColdnessThreshold, 100);
+    ColdEntryFreq = BlockFrequency(BFI.getEntryFreq()) * ColdProb;
+  }
+
   bool Changed = false;
   do {
     assert(currentLoop->isLCSSAForm(*DT));
@@ -468,6 +506,16 @@ bool LoopUnswitch::processCurrentLoop()
       loopHeader->getParent()->hasFnAttribute(Attribute::OptimizeForSize))
     return false;
 
+  if (LoopUnswitchWithBlockFrequency && EnabledPGO) {
+    // Compute the weighted frequency of the hottest block in the
+    // loop (loopHeader in this case since inner loops should be
+    // processed before outer loop). If it is less than ColdFrequency,
+    // we should not unswitch.
+    BlockFrequency LoopEntryFreq = BFI.getBlockFreq(loopHeader);
+    if (LoopEntryFreq < ColdEntryFreq)
+      return false;
+  }
+
   // Loop over all of the basic blocks in the loop.  If we find an interior
   // block that is branching on a loop-invariant condition, we can unswitch this
   // loop.

Added: llvm/trunk/test/Transforms/LoopUnswitch/cold-loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/LoopUnswitch/cold-loop.ll?rev=248777&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/LoopUnswitch/cold-loop.ll (added)
+++ llvm/trunk/test/Transforms/LoopUnswitch/cold-loop.ll Tue Sep 29 00:03:32 2015
@@ -0,0 +1,52 @@
+; RUN: opt < %s -loop-unswitch -loop-unswitch-with-block-frequency -S 2>&1 | FileCheck %s
+
+;; trivial condition should be unswithed regardless of coldness.
+define i32 @test1(i1 %cond1, i1 %cond2) !prof !1 {
+  br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+; CHECK: br i1 true, label %continue, label %loop_exit.loopexit
+  br i1 %cond2, label %continue, label %loop_exit  ; trivial condition
+
+continue:
+  call void @some_func1() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+;; cold non-trivial condition should not be unswitched.
+define i32 @test2(i32* %var, i1 %cond1, i1 %cond2) !prof !1 {
+  br i1 %cond1, label %loop_begin, label %loop_exit, !prof !0
+
+loop_begin:
+  store i32 1, i32* %var
+; CHECK: br i1 %cond2, label %continue1, label %continue2
+  br i1 %cond2, label %continue1, label %continue2  ; non-trivial condition
+
+continue1:
+  call void @some_func1() noreturn nounwind
+  br label %joint
+
+continue2:
+  call void @some_func2() noreturn nounwind
+  br label %joint
+
+joint:
+;; unswitching will duplicate these calls.
+  call void @some_func3() noreturn nounwind
+  call void @some_func4() noreturn nounwind
+  br label %loop_begin
+
+loop_exit:
+  ret i32 0
+}
+
+declare void @some_func1() noreturn
+declare void @some_func2() noreturn
+declare void @some_func3() noreturn
+declare void @some_func4() noreturn
+
+!0 = !{!"branch_weights", i32 1, i32 100000000}
+!1 = !{!"function_entry_count", i64 100}




More information about the llvm-commits mailing list