[llvm] Bfi precision (PR #66285)
via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 13 13:27:39 PDT 2023
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-analysis
<details>
<summary>Changes</summary>
I noticed that we often have poor precision in the `BlockFrequencyInfo` results because of how `convertFloatingToInteger` in the algorithm chooses factors.
This is a request for comments for choosing more aggressive factors! Look at changes in `test/Analysis/BlockFrequencyInfo/precision.ll` to get an impression for the poor precision and improvements.
Has this been tried before? Is there history I am not aware of?
Right now this change does not work yet as it triggers overflows and some instabilities in various places that I am working through. I thought it may be interesting to get some early feedback though in case this change is miguided...
--
Full diff: https://github.com/llvm/llvm-project/pull/66285.diff
9 Files Affected:
- (modified) llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp (+9-20)
- (modified) llvm/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll (+10-9)
- (added) llvm/test/Analysis/BlockFrequencyInfo/precision.ll (+43)
- (modified) llvm/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll (+3-3)
- (modified) llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll (+2-3)
- (modified) llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll (+1-1)
- (modified) llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll (+1-1)
- (modified) llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll (+5-5)
<pre>
diff --git a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
index 82b1e3b9eede709..93661d2edeb3083 100644
--- a/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
+++ b/llvm/lib/Analysis/BlockFrequencyInfoImpl.cpp
@@ -481,26 +481,15 @@ void BlockFrequencyInfoImplBase::distributeMass(const BlockNode &Source,
static void convertFloatingToInteger(BlockFrequencyInfoImplBase &BFI,
const Scaled64 &Min, const Scaled64 &Max) {
- // Scale the Factor to a size that creates integers. Ideally, integers would
- // be scaled so that Max == UINT64_MAX so that they can be best
- // differentiated. However, in the presence of large frequency values, small
- // frequencies are scaled down to 1, making it impossible to differentiate
- // small, unequal numbers. When the spread between Min and Max frequencies
- // fits well within MaxBits, we make the scale be at least 8.
- const unsigned MaxBits = 64;
- const unsigned SpreadBits = (Max / Min).lg();
- Scaled64 ScalingFactor;
- if (SpreadBits <= MaxBits - 3) {
- // If the values are small enough, make the scaling factor at least 8 to
- // allow distinguishing small values.
- ScalingFactor = Min.inverse();
- ScalingFactor <<= 3;
- } else {
- // If the values need more than MaxBits to be represented, saturate small
- // frequency values down to 1 by using a scaling factor that benefits large
- // frequency values.
- ScalingFactor = Scaled64(1, MaxBits) / Max;
- }
+ // Scale the Factor to a size that creates integers. If possible scale
+ // integers so that Max == UINT64_MAX so that they can be best differentiated.
+ // Is is possible that the range between min and max cannot be accurately
+ // represented in a 64bit integer without either loosing precision for small
+ // values (so small unequal numbers all map to 1) or saturaturing big numbers
+ // loosing precision for big numbers (so unequal big numbers may map to
+ // UINT64_MAX). We choose to loose precision for small numbers.
+ const unsigned MaxBits = sizeof(Scaled64::DigitsType) * CHAR_BIT;
+ Scaled64 ScalingFactor = Scaled64(1, MaxBits) / Max;
// Translate the floats to integers.
LLVM_DEBUG(dbgs() << "float-to-int: min = " << Min << ", max = " << Max
diff --git a/llvm/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll b/llvm/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
index 41226a1cdfbaf32..fadb47bd256b772 100644
--- a/llvm/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
+++ b/llvm/test/Analysis/BlockFrequencyInfo/loops_with_profile_info.ll
@@ -59,7 +59,7 @@ declare i32 @printf(i8*, ...)
; CHECK: Printing analysis {{.*}} for function 'main':
; CHECK-NEXT: block-frequency-info: main
-define i32 @main() {
+define i32 @main() !prof !6 {
entry:
%retval = alloca i32, align 4
%i = alloca i32, align 4
@@ -93,7 +93,7 @@ for.cond4: ; preds = %for.inc, %for.body3
%cmp5 = icmp slt i32 %2, 100
br i1 %cmp5, label %for.body6, label %for.end, !prof !3
-; CHECK: - for.body6: float = 500000.5, int = 4000004
+; CHECK: - for.body6: {{.*}}, count = 1000000
for.body6: ; preds = %for.cond4
call void @bar()
br label %for.inc
@@ -143,7 +143,7 @@ for.cond16: ; preds = %for.inc19, %for.bod
%cmp17 = icmp slt i32 %8, 10000
br i1 %cmp17, label %for.body18, label %for.end21, !prof !4
-; CHECK: - for.body18: float = 499999.9, int = 3999998
+; CHECK: - for.body18: {{.*}}, count = 1000000
for.body18: ; preds = %for.cond16
call void @bar()
br label %for.inc19
@@ -175,7 +175,7 @@ for.cond26: ; preds = %for.inc29, %for.end
%cmp27 = icmp slt i32 %12, 1000000
br i1 %cmp27, label %for.body28, label %for.end31, !prof !5
-; CHECK: - for.body28: float = 499995.2, int = 3999961
+; CHECK: - for.body28: {{.*}}, count = 1000224
for.body28: ; preds = %for.cond26
call void @bar()
br label %for.inc29
@@ -197,8 +197,9 @@ for.end31: ; preds = %for.cond26
!llvm.ident = !{!0}
!0 = !{!"clang version 3.7.0 (trunk 232635) (llvm/trunk 232636)"}
-!1 = !{!"branch_weights", i32 101, i32 2}
-!2 = !{!"branch_weights", i32 10001, i32 101}
-!3 = !{!"branch_weights", i32 1000001, i32 10001}
-!4 = !{!"branch_weights", i32 1000001, i32 101}
-!5 = !{!"branch_weights", i32 1000001, i32 2}
+!1 = !{!"branch_weights", i32 100, i32 1}
+!2 = !{!"branch_weights", i32 10000, i32 100}
+!3 = !{!"branch_weights", i32 1000000, i32 10000}
+!4 = !{!"branch_weights", i32 1000000, i32 100}
+!5 = !{!"branch_weights", i32 1000000, i32 1}
+!6 = !{!"function_entry_count", i32 1}
diff --git a/llvm/test/Analysis/BlockFrequencyInfo/precision.ll b/llvm/test/Analysis/BlockFrequencyInfo/precision.ll
new file mode 100644
index 000000000000000..7408d002d065d5b
--- /dev/null
+++ b/llvm/test/Analysis/BlockFrequencyInfo/precision.ll
@@ -0,0 +1,43 @@
+; RUN: opt < %s -disable-output -passes="print<block-freq>" 2>&1 | FileCheck %s
+; Sanity check precision for small-ish min/max spread.
+
+ at g = global i32 0
+
+; CHECK-LABEL: block-frequency-info: func0
+; CHECK: - entry: float = 1.0, {{.*}}, count = 1000
+; CHECK: - cmp0_true: float = 0.4, {{.*}}, count = 400
+; CHECK: - cmp0_false: float = 0.6, {{.*}}, count = 600
+; CHECK: - cmp1_true: float = 0.1, {{.*}}, count = 100
+; CHECK: - cmp1_false: float = 0.3, {{.*}}, count = 300
+; CHECK: - join: float = 1.0, {{.*}}, count = 1000
+
+define void @func0(i32 %a0, i32 %a1) !prof !0 {
+entry:
+ %cmp0 = icmp ne i32 %a0, 0
+ br i1 %cmp0, label %cmp0_true, label %cmp0_false, !prof !1
+
+cmp0_true:
+ store volatile i32 1, ptr @g
+ %cmp1 = icmp ne i32 %a1, 0
+ br i1 %cmp1, label %cmp1_true, label %cmp1_false, !prof !2
+
+cmp0_false:
+ store volatile i32 2, ptr @g
+ br label %join
+
+cmp1_true:
+ store volatile i32 3, ptr @g
+ br label %join
+
+cmp1_false:
+ store volatile i32 4, ptr @g
+ br label %join
+
+join:
+ store volatile i32 5, ptr @g
+ ret void
+}
+
+!0 = !{!"function_entry_count", i64 1000}
+!1 = !{!"branch_weights", i32 400, i32 600}
+!2 = !{!"branch_weights", i32 1, i32 3}
diff --git a/llvm/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll b/llvm/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
index 63568456d0e58c8..f1eb19dbfdbea42 100644
--- a/llvm/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
+++ b/llvm/test/Transforms/CodeExtractor/MultipleExitBranchProb.ll
@@ -17,8 +17,8 @@ return: ; preds = %entry
define internal i32 @dummyCaller(i1 %cond) !prof !1 {
entry:
-%val = call i32 @inlinedFunc(i1 %cond)
-ret i32 %val
+ %val = call i32 @inlinedFunc(i1 %cond)
+ ret i32 %val
; CHECK-LABEL: @dummyCaller
; CHECK: call
@@ -31,4 +31,4 @@ ret i32 %val
!2 = !{!"branch_weights", i32 5, i32 5}
!3 = !{!"branch_weights", i32 4, i32 1}
-; CHECK: [[COUNT1]] = !{!"branch_weights", i32 31, i32 8}
+; CHECK: [[COUNT1]] = !{!"branch_weights", i32 858993459, i32 214748365}
diff --git a/llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll b/llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll
index 69e84e942de65bf..64d7c229ff4e52c 100644
--- a/llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll
+++ b/llvm/test/Transforms/ConstantHoisting/AArch64/consthoist-unreachable.ll
@@ -10,14 +10,13 @@
define void @c() {
; CHECK-LABEL: @c(
; CHECK-NEXT: entry:
+; CHECK-NEXT: [[CONST:%.*]] = bitcast i32 1232131 to i32
; CHECK-NEXT: [[TOBOOL:%.*]] = icmp ne i16 0, 0
; CHECK-NEXT: br i1 undef, label [[LBL1_US:%.*]], label [[ENTRY_ENTRY_SPLIT_CRIT_EDGE:%.*]]
; CHECK: entry.entry.split_crit_edge:
-; CHECK-NEXT: [[CONST:%.*]] = bitcast i32 1232131 to i32
; CHECK-NEXT: br label [[LBL1:%.*]]
; CHECK: lbl1.us:
-; CHECK-NEXT: [[CONST1:%.*]] = bitcast i32 1232131 to i32
-; CHECK-NEXT: store i32 [[CONST1]], ptr @c.a, align 1
+; CHECK-NEXT: store i32 [[CONST]], ptr @c.a, align 1
; CHECK-NEXT: br label [[FOR_COND4:%.*]]
; CHECK: lbl1:
; CHECK-NEXT: store i32 [[CONST]], ptr @c.a, align 1
diff --git a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
index b1fc96ea77ed034..4f413a50837dd69 100644
--- a/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
+++ b/llvm/test/Transforms/LoopVectorize/X86/no_fpmath_with_hotness.ll
@@ -108,5 +108,5 @@ attributes #0 = { nounwind }
isOptimized: true, flags: "-O2",
splitDebugFilename: "abc.debug", emissionKind: 2)
!29 = !{!"function_entry_count", i64 3}
-!30 = !{!"branch_weights", i32 99, i32 1}
+!30 = !{!"branch_weights", i32 10000, i32 1}
!31 = !{!"branch_weights", i32 1, i32 99}
diff --git a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
index ed107b10dcd9874..4da1d099645bee2 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info-2.ll
@@ -198,5 +198,5 @@ attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "l
!55 = distinct !{!55, !43}
!56 = !{!"function_entry_count", i64 3}
!57 = !{!"function_entry_count", i64 50}
-!58 = !{!"branch_weights", i32 99, i32 1}
+!58 = !{!"branch_weights", i32 10000, i32 1}
!59 = !{!"branch_weights", i32 1, i32 99}
diff --git a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
index 30d11a12c79c4bc..4b7b714a2562800 100644
--- a/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
+++ b/llvm/test/Transforms/LoopVectorize/diag-with-hotness-info.ll
@@ -209,5 +209,5 @@ attributes #0 = { norecurse nounwind ssp uwtable "disable-tail-calls"="false" "l
!55 = distinct !{!55, !43}
!56 = !{!"function_entry_count", i64 3}
!57 = !{!"function_entry_count", i64 50}
-!58 = !{!"branch_weights", i32 99, i32 1}
+!58 = !{!"branch_weights", i32 10000, i32 1}
!59 = !{!"branch_weights", i32 1, i32 99}
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll
index 19e83649723d642..105494942d383d5 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update-2.ll
@@ -14,8 +14,8 @@ T1: ; preds = %0
%v1 = call i32 @f1(), !prof !12
%cond3 = icmp eq i32 %v1, 412
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1)
-;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+;; The distribution factor -9223372036854775808 stands for 53.85%, whic is from 7/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -9223372036854775808)
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg !13
;; Probe 7 has two copies, since they don't share the same inline context, they are not
;; considered sharing samples, thus their distribution factors are not fixed up.
@@ -29,8 +29,8 @@ T1: ; preds = %0
Merge: ; preds = %0
%v2 = call i32 @f2(), !prof !12
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1)
-;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 8513881922462547968)
+;; The distribution factor -9223372036854775808 stands for 46.25%, which is from 6/6+7.
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -9223372036854775808)
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 8513881922462547968), !dbg !13
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 7, i32 0, i64 -1)
call void @llvm.pseudoprobe(i64 6699318081062747564, i64 7, i32 0, i64 -1), !dbg !18
@@ -77,4 +77,4 @@ attributes #0 = { inaccessiblememonly nounwind willreturn }
!16 = distinct !DILocation(line: 10, column: 11, scope: !17)
!17 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646551)
!18 = !DILocation(line: 53, column: 3, scope: !15, inlinedAt: !19)
-!19 = !DILocation(line: 12, column: 3, scope: !4)
\ No newline at end of file
+!19 = !DILocation(line: 12, column: 3, scope: !4)
</pre>
</details>
https://github.com/llvm/llvm-project/pull/66285
More information about the llvm-commits
mailing list