[llvm] r218472 - [MachineSink+PGO] Teach MachineSink to use BlockFrequencyInfo

Bruno Cardoso Lopes bruno.cardoso at gmail.com
Thu Sep 25 16:14:26 PDT 2014


Author: bruno
Date: Thu Sep 25 18:14:26 2014
New Revision: 218472

URL: http://llvm.org/viewvc/llvm-project?rev=218472&view=rev
Log:
[MachineSink+PGO] Teach MachineSink to use BlockFrequencyInfo

Machine Sink uses loop depth information to select between successors BBs to
sink machine instructions into, where BBs within smaller loop depths are
preferable.  This patch adds support for choosing between successors by using
profile information from BlockFrequencyInfo instead, whenever the information
is available.

Tested it under SPEC2006 train (average of 30 runs for each program); ~1.5%
execution speedup in average on x86-64 darwin.

<rdar://problem/18021659>

Added:
    llvm/trunk/test/CodeGen/X86/sink-blockfreq.ll
Modified:
    llvm/trunk/lib/CodeGen/MachineSink.cpp

Modified: llvm/trunk/lib/CodeGen/MachineSink.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineSink.cpp?rev=218472&r1=218471&r2=218472&view=diff
==============================================================================
--- llvm/trunk/lib/CodeGen/MachineSink.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineSink.cpp Thu Sep 25 18:14:26 2014
@@ -21,6 +21,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/CodeGen/MachineBlockFrequencyInfo.h"
 #include "llvm/CodeGen/MachineDominators.h"
 #include "llvm/CodeGen/MachineLoopInfo.h"
 #include "llvm/CodeGen/MachinePostDominators.h"
@@ -41,6 +42,12 @@ SplitEdges("machine-sink-split",
            cl::desc("Split critical edges during machine sinking"),
            cl::init(true), cl::Hidden);
 
+static cl::opt<bool>
+UseBlockFreqInfo("machine-sink-bfi",
+           cl::desc("Use block frequency info to find successors to sink"),
+           cl::init(true), cl::Hidden);
+
+
 STATISTIC(NumSunk,      "Number of machine instructions sunk");
 STATISTIC(NumSplit,     "Number of critical edges split");
 STATISTIC(NumCoalesces, "Number of copies coalesced");
@@ -53,6 +60,7 @@ namespace {
     MachineDominatorTree *DT;      // Machine dominator tree
     MachinePostDominatorTree *PDT; // Machine post dominator tree
     MachineLoopInfo *LI;
+    const MachineBlockFrequencyInfo *MBFI;
     AliasAnalysis *AA;
 
     // Remember which edges have been considered for breaking.
@@ -81,6 +89,8 @@ namespace {
       AU.addPreserved<MachineDominatorTree>();
       AU.addPreserved<MachinePostDominatorTree>();
       AU.addPreserved<MachineLoopInfo>();
+      if (UseBlockFreqInfo)
+        AU.addRequired<MachineBlockFrequencyInfo>();
     }
 
     void releaseMemory() override {
@@ -247,6 +257,7 @@ bool MachineSinking::runOnMachineFunctio
   DT = &getAnalysis<MachineDominatorTree>();
   PDT = &getAnalysis<MachinePostDominatorTree>();
   LI = &getAnalysis<MachineLoopInfo>();
+  MBFI = UseBlockFreqInfo ? &getAnalysis<MachineBlockFrequencyInfo>() : nullptr;
   AA = &getAnalysis<AliasAnalysis>();
 
   bool EverMadeChange = false;
@@ -566,14 +577,20 @@ MachineBasicBlock *MachineSinking::FindS
       }
 
       // Otherwise, we should look at all the successors and decide which one
-      // we should sink to.
-      // We give successors with smaller loop depth higher priority.
-      SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(), MBB->succ_end());
-      // Sort Successors according to their loop depth.
+      // we should sink to. If we have reliable block frequency information
+      // (frequency != 0) available, give successors with smaller frequencies
+      // higher priority, otherwise prioritize smaller loop depths.
+      SmallVector<MachineBasicBlock*, 4> Succs(MBB->succ_begin(),
+                                               MBB->succ_end());
+      // Sort Successors according to their loop depth or block frequency info.
       std::stable_sort(
           Succs.begin(), Succs.end(),
-          [this](const MachineBasicBlock *LHS, const MachineBasicBlock *RHS) {
-            return LI->getLoopDepth(LHS) < LI->getLoopDepth(RHS);
+          [this](const MachineBasicBlock *L, const MachineBasicBlock *R) {
+            uint64_t LHSFreq = MBFI ? MBFI->getBlockFreq(L).getFrequency() : 0;
+            uint64_t RHSFreq = MBFI ? MBFI->getBlockFreq(R).getFrequency() : 0;
+            bool HasBlockFreq = LHSFreq != 0 && RHSFreq != 0;
+            return HasBlockFreq ? LHSFreq < RHSFreq
+                                : LI->getLoopDepth(L) < LI->getLoopDepth(R);
           });
       for (SmallVectorImpl<MachineBasicBlock *>::iterator SI = Succs.begin(),
              E = Succs.end(); SI != E; ++SI) {

Added: llvm/trunk/test/CodeGen/X86/sink-blockfreq.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/sink-blockfreq.ll?rev=218472&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/sink-blockfreq.ll (added)
+++ llvm/trunk/test/CodeGen/X86/sink-blockfreq.ll Thu Sep 25 18:14:26 2014
@@ -0,0 +1,45 @@
+; RUN: llc -disable-machine-licm -machine-sink-bfi=true -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_BFI
+; RUN: llc -disable-machine-licm -machine-sink-bfi=false -mtriple=x86_64-apple-darwin < %s | FileCheck %s -check-prefix=MSINK_NOBFI
+
+; Test that by changing BlockFrequencyInfo we change the order in which
+; machine-sink looks for sucessor blocks. By not using BFI, both G and B
+; have the same loop depth and no instructions is sinked - B is selected but
+; can't be used as to avoid breaking a non profitable critical edge. By using
+; BFI, "mul" is sinked into the less frequent block G.
+define i32 @sink_freqinfo(i32 %a, i32 %b) nounwind uwtable ssp {
+; MSINK_BFI-LABEL: sink_freqinfo
+; MSINK_BFI: jl
+; MSINK_BFI-NEXT: ## BB#
+; MSINK_BFI-NEXT: imull
+
+; MSINK_NOBFI-LABEL: sink_freqinfo
+; MSINK_NOBFI: imull
+; MSINK_NOBFI: jl
+entry:
+  br label %B
+
+B:
+  %ee = phi i32 [ 0, %entry ], [ %inc, %F ]
+  %xx = sub i32 %a, %ee
+  %cond0 = icmp slt i32 %xx, 0
+  br i1 %cond0, label %F, label %exit, !prof !0
+
+F:
+  %inc = add nsw i32 %xx, 2
+  %aa = mul nsw i32 %b, %inc
+  %exitcond = icmp slt i32 %inc, %a
+  br i1 %exitcond, label %B, label %G, !prof !1
+
+G:
+  %ii = add nsw i32 %aa, %a
+  %ll = add i32 %b, 45
+  %exitcond2 = icmp sge i32 %ii, %b
+  br i1 %exitcond2, label %G, label %exit, !prof !2
+
+exit:
+  ret i32 0
+}
+
+!0 = metadata !{metadata !"branch_weights", i32 4, i32 1}
+!1 = metadata !{metadata !"branch_weights", i32 128, i32 1}
+!2 = metadata !{metadata !"branch_weights", i32 1, i32 1}





More information about the llvm-commits mailing list