[llvm] r253414 - Improving edge probabilities computation when choosing the best successor in machine block placement.

Tue Nov 17 16:52:52 PST 2015

Author: conghou
Date: Tue Nov 17 18:52:52 2015
New Revision: 253414

URL: http://llvm.org/viewvc/llvm-project?rev=253414&view=rev
Log:
Improving edge probabilities computation when choosing the best successor in machine block placement.

When looking for the best successor from the outer loop for a block
belonging to an inner loop, the edge probability computation can be
improved so that edges in the inner loop are ignored. For example,
suppose we are building chains for the non-loop part of the following
code, and looking for B1's best successor. Assume the true body is very
hot, then B3 should be the best candidate. However, because of the
existence of the back edge from B1 to B0, the probability from B1 to B3
can be very small, preventing B3 to be its successor. In this patch, when
computing the probability of the edge from B1 to B3, the weight on the
back edge B1->B0 is ignored, so that B1->B3 will have 100% probability.

if (...)
  do {
    B0;
    ... // some branches
    B1;
  } while(...);
else
  B2;
B3;


Differential revision: http://reviews.llvm.org/D10825



Added:
    llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
Modified:
    llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp

Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp?rev=253414&r1=253413&r2=253414&view=diff
==============================================================================

--- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
+++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Tue Nov 17 18:52:52 2015
@@ -389,22 +389,50 @@ MachineBlockPlacement::selectBestSuccess
   uint32_t BestWeight = 0;
   uint32_t WeightScale = 0;
   uint32_t SumWeight = MBPI->getSumForBlock(BB, WeightScale);
-  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+
+  // Adjust sum of weights by excluding weights on edges pointing to blocks that
+  // is either not in BlockFilter or is already in the current chain. Consider
+  // the following CFG:
+  //
+  //     --->A
+  //     |  / \
+  //     | B   C
+  //     |  \ / \
+  //     ----D   E
+  //
+  // Assume A->C is very hot (>90%), and C->D has a 50% probability, then after
+  // A->C is chosen as a fall-through, D won't be selected as a successor of C
+  // due to CFG constraint (the probability of C->D is not greater than
+  // HotProb). If we exclude E that is not in BlockFilter when calculating the
+  // probability of C->D, D will be selected and we will get A C D B as the
+  // layout of this loop.
+  uint32_t AdjustedSumWeight = SumWeight;
+  SmallVector<MachineBasicBlock *, 4> Successors;
   for (MachineBasicBlock *Succ : BB->successors()) {
-    if (BlockFilter && !BlockFilter->count(Succ))
-      continue;
-    BlockChain &SuccChain = *BlockToChain[Succ];
-    if (&SuccChain == &Chain) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Already merged!\n");
-      continue;
-    }
-    if (Succ != *SuccChain.begin()) {
-      DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
-      continue;
+    bool SkipSucc = false;
+    if (BlockFilter && !BlockFilter->count(Succ)) {
+      SkipSucc = true;
+    } else {
+      BlockChain *SuccChain = BlockToChain[Succ];
+      if (SuccChain == &Chain) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ)
+                     << " -> Already merged!\n");
+        SkipSucc = true;
+      } else if (Succ != *SuccChain->begin()) {
+        DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> Mid chain!\n");
+        continue;
+      }
     }
+    if (SkipSucc)
+      AdjustedSumWeight -= MBPI->getEdgeWeight(BB, Succ) / WeightScale;
+    else
+      Successors.push_back(Succ);
+  }
 
+  DEBUG(dbgs() << "Attempting merge from: " << getBlockName(BB) << "\n");
+  for (MachineBasicBlock *Succ : Successors) {
     uint32_t SuccWeight = MBPI->getEdgeWeight(BB, Succ);
-    BranchProbability SuccProb(SuccWeight / WeightScale, SumWeight);
+    BranchProbability SuccProb(SuccWeight / WeightScale, AdjustedSumWeight);
 
     // If we outline optional branches, look whether Succ is unavoidable, i.e.
     // dominates all terminators of the MachineFunction. If it does, other
@@ -432,6 +460,7 @@ MachineBlockPlacement::selectBestSuccess
 
     // Only consider successors which are either "hot", or wouldn't violate
     // any CFG constraints.
+    BlockChain &SuccChain = *BlockToChain[Succ];
     if (SuccChain.LoopPredecessors != 0) {
       if (SuccProb < HotProb) {
         DEBUG(dbgs() << "    " << getBlockName(Succ) << " -> " << SuccProb
@@ -441,8 +470,9 @@ MachineBlockPlacement::selectBestSuccess
 
       // Make sure that a hot successor doesn't have a globally more
       // important predecessor.
+      BranchProbability RealSuccProb(SuccWeight / WeightScale, SumWeight);
       BlockFrequency CandidateEdgeFreq =
-          MBFI->getBlockFreq(BB) * SuccProb * HotProb.getCompl();
+          MBFI->getBlockFreq(BB) * RealSuccProb * HotProb.getCompl();
       bool BadCFGConflict = false;
       for (MachineBasicBlock *Pred : Succ->predecessors()) {
         if (Pred == Succ || (BlockFilter && !BlockFilter->count(Pred)) ||

Added: llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll?rev=253414&view=auto
==============================================================================
--- llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll (added)
+++ llvm/trunk/test/CodeGen/X86/code_placement_ignore_succ_in_inner_loop.ll Tue Nov 17 18:52:52 2015
@@ -0,0 +1,123 @@
+; RUN: llc -mcpu=corei7 -mtriple=x86_64-linux < %s | FileCheck %s -check-prefix=CHECK
+
+define void @foo() {
+; Test that when determining the edge probability from a node in an inner loop
+; to a node in an outer loop, the weights on edges in the inner loop should be
+; ignored if we are building the chain for the outer loop.
+;
+; CHECK-LABEL: foo:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %while.body, label %if.end.1, !prof !1
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.then.1, label %while.cond
+
+if.then.1:
+  call void @d()
+  br label %while.cond
+
+while.cond:
+  %call3 = call zeroext i1 @a()
+  br i1 %call3, label %while.body, label %if.end
+
+if.end.1:
+  call void @d()
+  br label %if.end
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @bar() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its peer loop, the weights on edges in the first loop should be
+; ignored.
+;
+; CHECK-LABEL: bar:
+; CHECK: callq c
+; CHECK: callq b
+
+entry:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !1
+
+if.then:
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.then, label %while.body, !prof !2
+
+while.body:
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %while.body, label %if.end, !prof !2
+
+if.else:
+  call void @b()
+  br label %if.end
+
+if.end:
+  call void @c()
+  ret void
+}
+
+define void @par() {
+; Test that when determining the edge probability from a node in a loop to a
+; node in its outer loop, the weights on edges in the outer loop should be
+; ignored if we are building the chain for the inner loop.
+;
+; CHECK-LABEL: par:
+; CHECK: callq c
+; CHECK: callq d
+; CHECK: callq b
+
+entry:
+  br label %if.cond
+
+if.cond:
+  %call = call zeroext i1 @a()
+  br i1 %call, label %if.then, label %if.else, !prof !3
+
+if.then:
+  call void @b()
+  br label %if.end
+
+if.else:
+  call void @c()
+  %call1 = call zeroext i1 @a()
+  br i1 %call1, label %if.end, label %exit, !prof !4
+
+if.end:
+  call void @d()
+  %call2 = call zeroext i1 @a()
+  br i1 %call2, label %if.cond, label %if.end.2, !prof !2
+
+if.end.2:
+  call void @e()
+  br label %if.cond
+
+exit:
+  ret void
+}
+
+declare zeroext i1 @a()
+declare void @b()
+declare void @c()
+declare void @d()
+declare void @e()
+
+!1 = !{!"branch_weights", i32 10, i32 1}
+!2 = !{!"branch_weights", i32 100, i32 1}
+!3 = !{!"branch_weights", i32 1, i32 100}
+!4 = !{!"branch_weights", i32 1, i32 1}