[llvm] r295115 - Fix a bug in caller's BFI update code after inlining.

Easwaran Raman via llvm-commits llvm-commits at lists.llvm.org
Tue Feb 14 14:49:28 PST 2017


Author: eraman
Date: Tue Feb 14 16:49:28 2017
New Revision: 295115

URL: http://llvm.org/viewvc/llvm-project?rev=295115&view=rev
Log:
Fix a bug in caller's BFI update code after inlining.

Multiple blocks in the callee can be mapped to a single cloned block
since we prune the callee as we clone it. The existing code
iterates over the value map and clones the block frequency (and
eventually scales the frequencies of the cloned blocks). Value map's
iteration is not deterministic and so the cloned block might get the
frequency of any of the original blocks. The fix is to set the max of
the original frequencies to the cloned block. The first block in the
sequence must have this max frequency and, in the call context,
subsequent blocks must have its frequency.

Differential Revision: https://reviews.llvm.org/D29696

Added:
    llvm/trunk/test/Transforms/Inline/bfi-update.ll
Modified:
    llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp

Modified: llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp?rev=295115&r1=295114&r2=295115&view=diff
==============================================================================
--- llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp (original)
+++ llvm/trunk/lib/Transforms/Utils/InlineFunction.cpp Tue Feb 14 16:49:28 2017
@@ -1411,9 +1411,16 @@ static void updateCallerBFI(BasicBlock *
       continue;
     auto *OrigBB = cast<BasicBlock>(Entry.first);
     auto *ClonedBB = cast<BasicBlock>(Entry.second);
-    ClonedBBs.insert(ClonedBB);
-    CallerBFI->setBlockFreq(ClonedBB,
-                            CalleeBFI->getBlockFreq(OrigBB).getFrequency());
+    uint64_t Freq = CalleeBFI->getBlockFreq(OrigBB).getFrequency();
+    if (!ClonedBBs.insert(ClonedBB).second) {
+      // Multiple blocks in the callee might get mapped to one cloned block in
+      // the caller since we prune the callee as we clone it. When that happens,
+      // we want to use the maximum among the original blocks' frequencies.
+      uint64_t NewFreq = CallerBFI->getBlockFreq(ClonedBB).getFrequency();
+      if (NewFreq > Freq)
+        Freq = NewFreq;
+    }
+    CallerBFI->setBlockFreq(ClonedBB, Freq);
   }
   BasicBlock *EntryClone = cast<BasicBlock>(VMap.lookup(&CalleeEntryBlock));
   CallerBFI->setBlockFreqAndScale(

Added: llvm/trunk/test/Transforms/Inline/bfi-update.ll
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/Transforms/Inline/bfi-update.ll?rev=295115&view=auto
==============================================================================
--- llvm/trunk/test/Transforms/Inline/bfi-update.ll (added)
+++ llvm/trunk/test/Transforms/Inline/bfi-update.ll Tue Feb 14 16:49:28 2017
@@ -0,0 +1,93 @@
+; RUN: opt < %s -passes='require<profile-summary>,cgscc(inline)' -S -inline-threshold=50 -inline-cold-callsite-threshold=0 -hot-callsite-threshold=50 | FileCheck %s
+; This tests incremental updates to caller's BFI as a callee gets inlined.
+; In bottom-up inlining, first c->e inlining is considered and fails because
+; e's size exceeds the threshold of 50. Then a->c inlining is considered and it
+; succeeds. a's BFI is updated incrementally. As c's blocks get pruned, the 
+; block with label cond_false is removed and since the remanining code is
+; straight-line a single block gets cloned into a. This block should get the
+; maximum block frequency among the original blocks in c. If it gets the
+; frequency of the block with label cond_true in @c, its frequency will be
+; 1/10th of function a's entry block frequency, resulting in a callsite count of
+; 2 (since a's entry count is 20) which means that a->e callsite will be
+; considered cold and not inlined. 
+
+ at data = external global i32
+; CHECK-LABEL: define i32 @a(
+define i32 @a(i32 %a1) !prof !21 {
+; CHECK-NOT: call i32 @c
+; CHECK-NOT: call i32 @e
+; CHECK: ret
+entry:
+  %cond = icmp sle i32 %a1, 1
+  %a2 = call i32 @c(i32 1)
+  br label %exit
+exit:
+  ret i32 %a2
+}
+
+declare void @ext();
+
+; CHECK: @c(i32 %c1) !prof [[COUNT1:![0-9]+]]
+define i32 @c(i32 %c1) !prof !23 {
+  call void @ext()
+  %cond = icmp sle i32 %c1, 1
+  br i1 %cond, label %cond_true, label %cond_false, !prof !25
+
+cond_false:
+  br label %exit
+
+cond_true:
+  %c11 = call i32 @e(i32 %c1)
+  br label %exit
+exit:
+  %c12 = phi i32 [ 0, %cond_false], [ %c11, %cond_true ]
+  ret i32 %c12
+}
+
+
+; CHECK: @e(i32 %c1) !prof [[COUNT2:![0-9]+]]
+define i32 @e(i32 %c1) !prof !24 {
+  call void @ext()
+  call void @ext()
+  %cond = icmp sle i32 %c1, 1
+  br i1 %cond, label %cond_true, label %cond_false
+
+cond_false:
+  call void @ext()
+  %c2 = load i32, i32* @data, align 4
+  %c3 = add i32 %c1, %c2
+  %c4 = mul i32 %c3, %c2
+  %c5 = add i32 %c4, %c2
+  %c6 = mul i32 %c5, %c2
+  %c7 = add i32 %c6, %c2
+  %c8 = mul i32 %c7, %c2
+  %c9 = add i32 %c8, %c2
+  %c10 = mul i32 %c9, %c2
+  ret i32 %c10
+
+cond_true:
+  ret i32 0
+}
+
+; CHECK: [[COUNT1]] = !{!"function_entry_count", i64 480}
+; CHECK: [[COUNT2]] = !{!"function_entry_count", i64 80}
+!21 = !{!"function_entry_count", i64 20}
+!23 = !{!"function_entry_count", i64 500}
+!24 = !{!"function_entry_count", i64 100}
+!25 = !{!"branch_weights", i32 1, i32 9}
+
+!llvm.module.flags = !{!1}
+!1 = !{i32 1, !"ProfileSummary", !2}
+!2 = !{!3, !4, !5, !6, !7, !8, !9, !10}
+!3 = !{!"ProfileFormat", !"InstrProf"}
+!4 = !{!"TotalCount", i64 10000}
+!5 = !{!"MaxCount", i64 1000}
+!6 = !{!"MaxInternalCount", i64 1}
+!7 = !{!"MaxFunctionCount", i64 1000}
+!8 = !{!"NumCounts", i64 3}
+!9 = !{!"NumFunctions", i64 3}
+!10 = !{!"DetailedSummary", !11}
+!11 = !{!12, !13, !14}
+!12 = !{i32 10000, i64 1000, i32 1}
+!13 = !{i32 999000, i64 1000, i32 1}
+!14 = !{i32 999999, i64 5, i32 2}




More information about the llvm-commits mailing list