[llvm] r277187 - Codegen: MachineBlockPlacement Improve probability layout.

Xinliang David Li via llvm-commits llvm-commits at lists.llvm.org
Tue Aug 30 20:43:25 PDT 2016


https://reviews.llvm.org/D22892

David

On Tue, Aug 30, 2016 at 7:44 PM, Philip Reames via llvm-commits <
llvm-commits at lists.llvm.org> wrote:

> Was there a phabricator review for this?  I want to catch up on the
> discussion.
>
>
> On 07/29/2016 11:09 AM, Kyle Butt via llvm-commits wrote:
>
>> Author: iteratee
>> Date: Fri Jul 29 13:09:28 2016
>> New Revision: 277187
>>
>> URL: http://llvm.org/viewvc/llvm-project?rev=277187&view=rev
>> Log:
>> Codegen: MachineBlockPlacement Improve probability layout.
>>
>> The following pattern was being layed out poorly:
>>
>>                A
>>               / \
>>              B   C
>>             / \ / \
>>            D   E   ? (Doesn't matter)
>>
>> Where A->B is far more likely than A->C, and prob(B->D) = prob(B->E)
>>
>> The current algorithm gives:
>> A,B,C,E (D goes on worklist)
>>
>> It does this even if C has a frequency count of 0. This patch
>> adjusts the layout calculation so that if freq(B->E) >> freq(C->E)
>> then we go ahead and layout E rather than C. Fallthrough half the time
>> is better than fallthrough never, or fallthrough very rarely. The
>> resulting layout is:
>>
>> A,B,E, (C and D are in a worklist)
>>
>> Modified:
>>      llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>>      llvm/trunk/test/CodeGen/X86/block-placement.ll
>>
>> Modified: llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/CodeGen/
>> MachineBlockPlacement.cpp?rev=277187&r1=277186&r2=277187&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp (original)
>> +++ llvm/trunk/lib/CodeGen/MachineBlockPlacement.cpp Fri Jul 29 13:09:28
>> 2016
>> @@ -631,18 +631,46 @@ bool MachineBlockPlacement::hasBetterLay
>>     // BB->Succ. This is equivalent to looking the CFG backward with
>> backward
>>     // edge: Prob(Succ->BB) needs to >= HotProb in order to be selected
>> (without
>>     // profile data).
>> -
>> +  // ------------------------------------------------------------
>> --------------
>> +  // Case 3: forked diamond
>> +  //       S
>> +  //      / \
>> +  //     /   \
>> +  //   BB    Pred
>> +  //   | \   / |
>> +  //   |  \ /  |
>> +  //   |   X   |
>> +  //   |  / \  |
>> +  //   | /   \ |
>> +  //   S1     S2
>> +  //
>> +  // The current block is BB and edge BB->S1 is now being evaluated.
>> +  // As above S->BB was already selected because
>> +  // prob(S->BB) > prob(S->Pred). Assume that prob(BB->S1) >=
>> prob(BB->S2).
>> +  //
>> +  // topo-order:
>> +  //
>> +  //     S-------|                     ---S
>> +  //     |       |                     |  |
>> +  //  ---BB      |                     |  BB
>> +  //  |          |                     |  |
>> +  //  |  Pred----|                     |  S1----
>> +  //  |  |                             |       |
>> +  //  --(S1 or S2)                     ---Pred--
>> +  //
>> +  // topo-cost = freq(S->Pred) + freq(BB->S1) + freq(BB->S2)
>> +  //    + min(freq(Pred->S1), freq(Pred->S2))
>> +  // Non-topo-order cost:
>> +  // In the worst case, S2 will not get laid out after Pred.
>> +  // non-topo-cost = 2 * freq(S->Pred) + freq(BB->S2).
>> +  // To be conservative, we can assume that min(freq(Pred->S1),
>> freq(Pred->S2))
>> +  // is 0. Then the non topo layout is better when
>> +  // freq(S->Pred) < freq(BB->S1).
>> +  // This is exactly what is checked below.
>> +  // Note there are other shapes that apply (Pred may not be a single
>> block,
>> +  // but they all fit this general pattern.)
>>     BranchProbability HotProb = getLayoutSuccessorProbThreshold(BB);
>>   -  // Forward checking. For case 2, SuccProb will be 1.
>> -  if (SuccProb < HotProb) {
>> -    DEBUG(dbgs() << "    Not a candidate: " << getBlockName(Succ) << " "
>> -                 << "Respecting topological ordering because "
>> -                 << "probability is less than prob treshold: "
>> -                 << SuccProb << "\n");
>> -    return true;
>> -  }
>> -
>>     // Make sure that a hot successor doesn't have a globally more
>>     // important predecessor.
>>     BlockFrequency CandidateEdgeFreq = MBFI->getBlockFreq(BB) *
>> RealSuccProb;
>> @@ -653,11 +681,11 @@ bool MachineBlockPlacement::hasBetterLay
>>           (BlockFilter && !BlockFilter->count(Pred)) ||
>>           BlockToChain[Pred] == &Chain)
>>         continue;
>> -    // Do backward checking. For case 1, it is actually redundant check.
>> For
>> -    // case 2 above, we need a backward checking to filter out edges
>> that are
>> -    // not 'strongly' biased. With profile data available, the check is
>> mostly
>> -    // redundant too (when threshold prob is set at 50%) unless S has
>> more than
>> -    // two successors.
>> +    // Do backward checking.
>> +    // For all cases above, we need a backward checking to filter out
>> edges that
>> +    // are not 'strongly' biased. With profile data available, the check
>> is
>> +    // mostly redundant for case 2 (when threshold prob is set at 50%)
>> unless S
>> +    // has more than two successors.
>>       // BB  Pred
>>       //  \ /
>>       //  Succ
>> @@ -666,6 +694,8 @@ bool MachineBlockPlacement::hasBetterLay
>>       //      i.e. freq(BB->Succ) > freq(BB->Succ) * HotProb +
>> freq(Pred->Succ) *
>>       //      HotProb
>>       //      i.e. freq((BB->Succ) * (1 - HotProb) > freq(Pred->Succ) *
>> HotProb
>> +    // Case 1 is covered too, because the first equation reduces to:
>> +    // prob(BB->Succ) > HotProb. (freq(Succ) = freq(BB) for a triangle)
>>       BlockFrequency PredEdgeFreq =
>>           MBFI->getBlockFreq(Pred) * MBPI->getEdgeProbability(Pred, Succ);
>>       if (PredEdgeFreq * HotProb >= CandidateEdgeFreq *
>> HotProb.getCompl()) {
>>
>> Modified: llvm/trunk/test/CodeGen/X86/block-placement.ll
>> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/CodeGen/
>> X86/block-placement.ll?rev=277187&r1=277186&r2=277187&view=diff
>> ============================================================
>> ==================
>> --- llvm/trunk/test/CodeGen/X86/block-placement.ll (original)
>> +++ llvm/trunk/test/CodeGen/X86/block-placement.ll Fri Jul 29 13:09:28
>> 2016
>> @@ -1283,6 +1283,174 @@ exit:
>>     ret void
>>   }
>>   +declare void @a()
>> +declare void @b()
>> +
>> +define void @test_forked_hot_diamond(i32* %a) {
>> +; Test that a hot-branch with probability > 80% followed by a 50/50
>> branch
>> +; will not place the cold predecessor if the probability for the
>> fallthrough
>> +; remains above 80%
>> +; CHECK-LABEL: test_forked_hot_diamond
>> +; CHECK: %entry
>> +; CHECK: %then
>> +; CHECK: %fork1
>> +; CHECK: %else
>> +; CHECK: %fork2
>> +; CHECK: %exit
>> +entry:
>> +  %gep1 = getelementptr i32, i32* %a, i32 1
>> +  %val1 = load i32, i32* %gep1
>> +  %cond1 = icmp ugt i32 %val1, 1
>> +  br i1 %cond1, label %then, label %else, !prof !5
>> +
>> +then:
>> +  call void @hot_function()
>> +  %gep2 = getelementptr i32, i32* %a, i32 2
>> +  %val2 = load i32, i32* %gep2
>> +  %cond2 = icmp ugt i32 %val2, 2
>> +  br i1 %cond2, label %fork1, label %fork2, !prof !8
>> +
>> +else:
>> +  call void @cold_function()
>> +  %gep3 = getelementptr i32, i32* %a, i32 3
>> +  %val3 = load i32, i32* %gep3
>> +  %cond3 = icmp ugt i32 %val3, 3
>> +  br i1 %cond3, label %fork1, label %fork2, !prof !8
>> +
>> +fork1:
>> +  call void @a()
>> +  br label %exit
>> +
>> +fork2:
>> +  call void @b()
>> +  br label %exit
>> +
>> +exit:
>> +  call void @hot_function()
>> +  ret void
>> +}
>> +
>> +define void @test_forked_hot_diamond_gets_cold(i32* %a) {
>> +; Test that a hot-branch with probability > 80% followed by a 50/50
>> branch
>> +; will place the cold predecessor if the probability for the fallthrough
>> +; falls below 80%
>> +; The probability for both branches is 85%. For then2 vs else1
>> +; this results in a compounded probability of 83%.
>> +; Neither then2->fork1 nor then2->fork2 has a large enough relative
>> +; probability to break the CFG.
>> +; Relative probs:
>> +; then2 -> fork1 vs else1 -> fork1 = 71%
>> +; then2 -> fork2 vs else2 -> fork2 = 74%
>> +; CHECK-LABEL: test_forked_hot_diamond_gets_cold
>> +; CHECK: %entry
>> +; CHECK: %then1
>> +; CHECK: %then2
>> +; CHECK: %else1
>> +; CHECK: %fork1
>> +; CHECK: %else2
>> +; CHECK: %fork2
>> +; CHECK: %exit
>> +entry:
>> +  %gep1 = getelementptr i32, i32* %a, i32 1
>> +  %val1 = load i32, i32* %gep1
>> +  %cond1 = icmp ugt i32 %val1, 1
>> +  br i1 %cond1, label %then1, label %else1, !prof !9
>> +
>> +then1:
>> +  call void @hot_function()
>> +  %gep2 = getelementptr i32, i32* %a, i32 2
>> +  %val2 = load i32, i32* %gep2
>> +  %cond2 = icmp ugt i32 %val2, 2
>> +  br i1 %cond2, label %then2, label %else2, !prof !9
>> +
>> +else1:
>> +  call void @cold_function()
>> +  br label %fork1
>> +
>> +then2:
>> +  call void @hot_function()
>> +  %gep3 = getelementptr i32, i32* %a, i32 3
>> +  %val3 = load i32, i32* %gep2
>> +  %cond3 = icmp ugt i32 %val2, 3
>> +  br i1 %cond3, label %fork1, label %fork2, !prof !8
>> +
>> +else2:
>> +  call void @cold_function()
>> +  br label %fork2
>> +
>> +fork1:
>> +  call void @a()
>> +  br label %exit
>> +
>> +fork2:
>> +  call void @b()
>> +  br label %exit
>> +
>> +exit:
>> +  call void @hot_function()
>> +  ret void
>> +}
>> +
>> +define void @test_forked_hot_diamond_stays_hot(i32* %a) {
>> +; Test that a hot-branch with probability > 88.88% (1:8) followed by a
>> 50/50
>> +; branch will not place the cold predecessor as the probability for the
>> +; fallthrough stays above 80%
>> +; (1:8) followed by (1:1) is still (1:4)
>> +; Here we use 90% probability because two in a row
>> +; have a 89 % probability vs the original branch.
>> +; CHECK-LABEL: test_forked_hot_diamond_stays_hot
>> +; CHECK: %entry
>> +; CHECK: %then1
>> +; CHECK: %then2
>> +; CHECK: %fork1
>> +; CHECK: %else1
>> +; CHECK: %else2
>> +; CHECK: %fork2
>> +; CHECK: %exit
>> +entry:
>> +  %gep1 = getelementptr i32, i32* %a, i32 1
>> +  %val1 = load i32, i32* %gep1
>> +  %cond1 = icmp ugt i32 %val1, 1
>> +  br i1 %cond1, label %then1, label %else1, !prof !10
>> +
>> +then1:
>> +  call void @hot_function()
>> +  %gep2 = getelementptr i32, i32* %a, i32 2
>> +  %val2 = load i32, i32* %gep2
>> +  %cond2 = icmp ugt i32 %val2, 2
>> +  br i1 %cond2, label %then2, label %else2, !prof !10
>> +
>> +else1:
>> +  call void @cold_function()
>> +  br label %fork1
>> +
>> +then2:
>> +  call void @hot_function()
>> +  %gep3 = getelementptr i32, i32* %a, i32 3
>> +  %val3 = load i32, i32* %gep2
>> +  %cond3 = icmp ugt i32 %val2, 3
>> +  br i1 %cond3, label %fork1, label %fork2, !prof !8
>> +
>> +else2:
>> +  call void @cold_function()
>> +  br label %fork2
>> +
>> +fork1:
>> +  call void @a()
>> +  br label %exit
>> +
>> +fork2:
>> +  call void @b()
>> +  br label %exit
>> +
>> +exit:
>> +  call void @hot_function()
>> +  ret void
>> +}
>> +
>>   !5 = !{!"branch_weights", i32 84, i32 16}
>>   !6 = !{!"function_entry_count", i32 10}
>>   !7 = !{!"branch_weights", i32 60, i32 40}
>> +!8 = !{!"branch_weights", i32 5001, i32 4999}
>> +!9 = !{!"branch_weights", i32 85, i32 15}
>> +!10 = !{!"branch_weights", i32 90, i32 10}
>>
>>
>> _______________________________________________
>> llvm-commits mailing list
>> llvm-commits at lists.llvm.org
>> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>>
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
>
-------------- next part --------------
An HTML attachment was scrubbed...
URL: <http://lists.llvm.org/pipermail/llvm-commits/attachments/20160830/6e3ac5b0/attachment.html>


More information about the llvm-commits mailing list