[clang] [llvm] [PseudoProbe] Mix and reorder block and call probe ID in lexical order (PR #75092)
Lei Wang via cfe-commits
cfe-commits at lists.llvm.org
Thu Feb 29 19:37:00 PST 2024
https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/75092
>From e6f735a336f87659ee3236fc795ad4b7107dff4d Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 10 Dec 2023 18:30:42 -0800
Subject: [PATCH 1/5] [PseudoProbe] Mix and reorder block and call probe ID in
lexical order
---
clang/test/CodeGen/pseudo-probe-emit.c | 8 +++----
.../llvm/Transforms/IPO/SampleProfileProbe.h | 3 +--
.../lib/Transforms/IPO/SampleProfileProbe.cpp | 19 +++++-----------
.../Inputs/pseudo-probe-profile.prof | 8 +++----
.../Inputs/pseudo-probe-update.prof | 8 +++----
.../SampleProfile/pseudo-probe-dangle.ll | 12 +++++-----
.../pseudo-probe-discriminator.ll | 6 ++---
.../pseudo-probe-profile-metadata-2.ll | 15 ++++++-------
.../SampleProfile/pseudo-probe-profile.ll | 22 +++++++++----------
.../SampleProfile/pseudo-probe-update.ll | 11 +++++-----
.../SampleProfile/pseudo-probe-verify.ll | 16 +++++++-------
11 files changed, 59 insertions(+), 69 deletions(-)
diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c
index c7a3f7e6d5b02b..360f831e842945 100644
--- a/clang/test/CodeGen/pseudo-probe-emit.c
+++ b/clang/test/CodeGen/pseudo-probe-emit.c
@@ -10,9 +10,9 @@ void foo(int x) {
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
if (x == 0)
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
- bar();
+ bar(); // probe id : 3
else
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
- go();
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+ go(); // probe id : 5
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1)
}
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 0f2729a9462de2..69b87adf105fd1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -82,8 +82,7 @@ class SampleProfileProber {
uint32_t getBlockId(const BasicBlock *BB) const;
uint32_t getCallsiteId(const Instruction *Call) const;
void computeCFGHash();
- void computeProbeIdForBlocks();
- void computeProbeIdForCallsites();
+ void computeProbeId();
Function *F;
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 090e5560483edb..975c1bc2347aa4 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -173,8 +173,7 @@ SampleProfileProber::SampleProfileProber(Function &Func,
BlockProbeIds.clear();
CallProbeIds.clear();
LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
- computeProbeIdForBlocks();
- computeProbeIdForCallsites();
+ computeProbeId();
computeCFGHash();
}
@@ -207,7 +206,10 @@ void SampleProfileProber::computeCFGHash() {
<< ", Hash = " << FunctionHash << "\n");
}
-void SampleProfileProber::computeProbeIdForBlocks() {
+void SampleProfileProber::computeProbeId() {
+ LLVMContext &Ctx = F->getContext();
+ Module *M = F->getParent();
+
DenseSet<BasicBlock *> KnownColdBlocks;
computeEHOnlyBlocks(*F, KnownColdBlocks);
// Insert pseudo probe to non-cold blocks only. This will reduce IR size as
@@ -216,18 +218,9 @@ void SampleProfileProber::computeProbeIdForBlocks() {
++LastProbeId;
if (!KnownColdBlocks.contains(&BB))
BlockProbeIds[&BB] = LastProbeId;
- }
-}
-void SampleProfileProber::computeProbeIdForCallsites() {
- LLVMContext &Ctx = F->getContext();
- Module *M = F->getParent();
-
- for (auto &BB : *F) {
for (auto &I : BB) {
- if (!isa<CallBase>(I))
- continue;
- if (isa<IntrinsicInst>(&I))
+ if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I))
continue;
// The current implementation uses the lower 16 bits of the discriminator
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
index ba4c6117dc96ab..d3847946b94033 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
@@ -1,8 +1,8 @@
foo:3200:13
1: 13
2: 7
- 3: 6
- 4: 13
- 5: 7 _Z3barv:2 _Z3foov:5
- 6: 6 _Z3barv:4 _Z3foov:2
+ 4: 6
+ 6: 13
+ 3: 7 _Z3barv:2 _Z3foov:5
+ 5: 6 _Z3barv:4 _Z3foov:2
!CFGChecksum: 563022570642068
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
index 62f9bd5992e735..213bf0b6f81cc4 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
@@ -1,8 +1,8 @@
foo:3200:13
1: 13
2: 7
- 3: 6
- 4: 13
- 5: 7
- 6: 6
+ 4: 6
+ 6: 13
+ 7: 7
+ 9: 6
!CFGChecksum: 844530426352218
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
index 4647a34fc2f620..f0b6fdf62d9696 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
@@ -23,21 +23,21 @@ Merge:
; JT-LABEL-NO: T
; JT-LABEL-NO: F
; JT-LABEL: Merge
+; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4
; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3
-; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2
-; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; ASM-NOT: .pseudoprobe 6699318081062747564 4
; ASM-NOT: .pseudoprobe 6699318081062747564 3
-; ASM-NOT: .pseudoprobe 6699318081062747564 2
-; ASM: .pseudoprobe 6699318081062747564 4 0 0
+; ASM: .pseudoprobe 6699318081062747564 5 0 0
ret i32 %call
}
;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too.
; MIR-tail: bb.0
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
-; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2
; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3
-; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0
+; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 4
+; MIR-tail: PSEUDO_PROBE [[#GUID:]], 5, 0, 0
define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
index 62f0737875aec3..97b0ed600ad106 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
@@ -62,10 +62,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra
; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
-
+
; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]])
-; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646575)
+; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646559)
; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]])
-; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646583)
+; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646567)
; PROBE: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
; PROBE: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
index 148f3ede5ab48a..379dcfcab338d9 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
@@ -29,7 +29,7 @@ if.else:
br label %return
return:
- call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1)
+ call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1)
%1 = load i32, ptr %retval, align 4
ret i32 %1
}
@@ -55,13 +55,12 @@ attributes #0 = {"use-sample-profile"}
!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
!10 = !{!"function_entry_count", i64 14}
!11 = !{!"branch_weights", i32 100, i32 0}
-;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 1.0.
-!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575)
+;; A discriminator of 186646559 which is 0xB20001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 1.0.
+!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646559)
!13 = distinct !DILocation(line: 10, column: 11, scope: !12)
-;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 0.
-!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775)
+;; A discriminator of 134217759 which is 0x800001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 0.
+!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217759)
!15 = distinct !DILocation(line: 10, column: 11, scope: !14)
!16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
index 474b6668b0a7a7..867a49dbaed2ee 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
@@ -22,12 +22,12 @@ if.then:
if.else:
; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
call void %f(i32 2)
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
store i32 2, ptr %retval, align 4
br label %return
return:
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
%1 = load i32, ptr %retval, align 4
ret i32 %1
}
@@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"}
; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
+;; A discriminator of 119537695 which is 0x720001f in hexdecimal, stands for an indirect call probe
+;; with an index of 3.
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537695)
+; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
-; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
-;; with an index of 6.
; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
!llvm.module.flags = !{!9, !10}
@@ -83,7 +83,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '7'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '5'
+;YAML-NEXT: - ProbeId: '3'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
@@ -113,7 +113,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '6'
+;YAML-NEXT: - ProbeId: '5'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
@@ -128,7 +128,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '3'
+;YAML-NEXT: - ProbeId: '4'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
@@ -143,7 +143,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '13'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '4'
+;YAML-NEXT: - ProbeId: '6'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
index 992afedd14f75f..217b61970933dc 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
@@ -14,15 +14,15 @@ T1:
%v1 = call i32 @f1()
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -8513881372706734080)
%cond3 = icmp eq i32 %v1, 412
br label %Merge
F1:
; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
%v2 = call i32 @f2()
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 8513881922462547968)
br label %Merge
Merge:
@@ -30,11 +30,11 @@ Merge:
%B = phi i32 [%v1, %T1], [%v2, %F1]
br i1 %A, label %T2, label %F2
T2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 7, i32 0, i64 -1)
call void @f3()
ret i32 %B
F2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 9, i32 0, i64 -1)
ret i32 %B
}
@@ -42,4 +42,3 @@ F2:
; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
attributes #0 = {"use-sample-profile"}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
index f70e5189ab1293..b622cfbd6634ef 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
@@ -4,7 +4,7 @@
; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
; VERIFY: Function foo:
-; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00
+; VERIFY-DAG: Probe 5 previous factor 1.00 current factor 5.00
; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00
declare void @foo2() nounwind
@@ -27,15 +27,15 @@ bb7.preheader:
bb10:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
%indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
%tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
@@ -50,14 +50,14 @@ bb10:
br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
bb24:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
ret void
}
;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
;; with an index of 6 and a scale of -1%.
; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
-; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646575)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
>From 5e6b16f041101d4db38748bc9bfc3ec60536f73c Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 10 Jan 2024 10:10:34 -0800
Subject: [PATCH 2/5] Detect the order and add a summary flag to the profile
---
llvm/include/llvm/ProfileData/SampleProf.h | 10 ++-
.../llvm/ProfileData/SampleProfReader.h | 3 +
llvm/lib/ProfileData/SampleProf.cpp | 1 +
llvm/lib/ProfileData/SampleProfReader.cpp | 5 ++
llvm/lib/ProfileData/SampleProfWriter.cpp | 4 ++
llvm/lib/Transforms/IPO/SampleProfile.cpp | 8 +++
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 62 +++++++++++++++++++
7 files changed, 91 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 8ac84d4b933f20..a5ac05c9cdff75 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -201,6 +201,10 @@ enum class SecProfSummaryFlags : uint32_t {
/// SecFlagIsPreInlined means this profile contains ShouldBeInlined
/// contexts thus this is CS preinliner computed.
SecFlagIsPreInlined = (1 << 4),
+ /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the
+ /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of
+ /// the order that callsite probe IDs are always after the BB probe IDs.
+ SecFlagIsMixedProbeOrder = (1 << 5),
};
enum class SecFuncMetadataFlags : uint32_t {
@@ -466,7 +470,7 @@ struct SampleContextFrame {
LineLocation Location;
SampleContextFrame() : Location(0, 0) {}
-
+
SampleContextFrame(FunctionId Func, LineLocation Location)
: Func(Func), Location(Location) {}
@@ -527,7 +531,7 @@ class SampleContext {
: Func(Name), State(UnknownContext), Attributes(ContextNone) {
assert(!Name.empty() && "Name is empty");
}
-
+
SampleContext(FunctionId Func)
: Func(Func), State(UnknownContext), Attributes(ContextNone) {}
@@ -1178,6 +1182,8 @@ class FunctionSamples {
static bool ProfileIsProbeBased;
+ static bool ProfileIsMixedProbeOrder;
+
static bool ProfileIsCS;
static bool ProfileIsPreInlined;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 9e8f543909cdbd..6551f9374d4aa7 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -525,6 +525,9 @@ class SampleProfileReader {
/// \brief Whether samples are collected based on pseudo probes.
bool ProfileIsProbeBased = false;
+ /// Whether profiles are in mixed BB and callsite probe order.
+ bool ProfileIsMixedProbeOrder = false;
+
/// Whether function profiles are context-sensitive flat profiles.
bool ProfileIsCS = false;
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 59fa71899ed47b..830b83a25a3e42 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -41,6 +41,7 @@ static cl::opt<bool> GenerateMergedBaseProfiles(
namespace llvm {
namespace sampleprof {
bool FunctionSamples::ProfileIsProbeBased = false;
+bool FunctionSamples::ProfileIsMixedProbeOrder = false;
bool FunctionSamples::ProfileIsCS = false;
bool FunctionSamples::ProfileIsPreInlined = false;
bool FunctionSamples::UseMD5 = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 98d0aa794529c5..6476f6cb7ca141 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -704,6 +704,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
FunctionSamples::ProfileIsFS = ProfileIsFS = true;
+ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+ FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder =
+ true;
break;
case SecNameTable: {
bool FixedLengthMD5 =
@@ -1369,6 +1372,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
Flags.append("preInlined,");
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
Flags.append("fs-discriminator,");
+ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+ Flags.append("mixed-probe-order,");
break;
case SecFuncOffsetTable:
if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 625e523f13cec0..4a708c94dac518 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -437,6 +437,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
+ if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased &&
+ FunctionSamples::ProfileIsMixedProbeOrder)
+ addSectionFlag(SecProfSummary,
+ SecProfSummaryFlags::SecFlagIsMixedProbeOrder);
uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
switch (Type) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index ffc26f1a0a972d..a8a2697aa6a7ed 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -2173,6 +2173,14 @@ bool SampleProfileLoader::doInitialization(Module &M,
DS_Warning));
return false;
}
+
+ if (!FunctionSamples::ProfileIsMixedProbeOrder) {
+ const char *Msg =
+ "Pseudo-probe-based profile is on an old version ID order which "
+ "could cause profile mismatch(performance regression)";
+ Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
+ DS_Warning));
+ }
}
if (ReportProfileStaleness || PersistProfileStaleness ||
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5aa44108f96605..4771dda88d89c0 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -513,6 +513,66 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
return ProfileMap.Create(Context);
}
+// Use a heuristic to determine probe order by checking callsite insertion
+// position relative to the BB probes. Sort all the BB probes is in a list, for
+// each calliste, compute "ratio = insert position / length of the list". For
+// the old order, the probe ids for BB should be all before(smaller than) the
+// probe ids for callsite, this ratio should be equal to or close to 1.
+bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) {
+ // Use flattned profile to maximize the callsites in the profile.
+ SampleProfileMap flattenProfile;
+ ProfileConverter::flattenProfile(Profiles, flattenProfile);
+
+ uint32_t PossibleOldOrderNum = 0;
+ uint32_t PossibleNewOrderNum = 0;
+
+ for (const auto &I : flattenProfile) {
+ const FunctionSamples &FS = I.second;
+ // Skip small functions whose profile order are likely random.
+ if (FS.getBodySamples().size() < 10)
+ continue;
+
+ std::set<uint32_t> PossibleBBProbeIDs;
+ std::set<uint32_t> CallsiteIDs;
+ for (const auto &I : FS.getBodySamples()) {
+ if (I.second.getCallTargets().empty())
+ PossibleBBProbeIDs.insert(I.first.LineOffset);
+ else
+ CallsiteIDs.insert(I.first.LineOffset);
+ }
+
+ if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty())
+ continue;
+
+ uint32_t DistanceToBeginSum = 0;
+ for (const auto &ID : CallsiteIDs)
+ DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(),
+ PossibleBBProbeIDs.upper_bound(ID));
+ uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size();
+
+ // Note that PossibleBBProbeIDs could contains some callsite probe id, the
+ // ratio is not exactly 1 for the old order, hence use a smaller threshold
+ // to determine.
+ if ((float)DistanceToBeginSum / LengthSum > 0.8)
+ PossibleOldOrderNum++;
+ else
+ PossibleNewOrderNum++;
+ }
+ return PossibleNewOrderNum >= PossibleOldOrderNum;
+}
+
+void warnOrWriteProbeOrderFlag(const SampleProfileMap &Profiles) {
+ if (FunctionSamples::ProfileIsProbeBased) {
+ FunctionSamples::ProfileIsMixedProbeOrder = true;
+ if (!checkProbeIDIsInMixedOrder(Profiles)) {
+ WithColor::warning()
+ << "Pseudo-probe-based profile is on an old version ID order which "
+ "could cause profile mismatch(performance regression)\n";
+ FunctionSamples::ProfileIsMixedProbeOrder = false;
+ }
+ }
+}
+
void ProfileGenerator::generateProfile() {
collectProfiledFunctions();
@@ -535,6 +595,7 @@ void ProfileGenerator::postProcessProfiles() {
trimColdProfiles(ProfileMap, ColdCountThreshold);
filterAmbiguousProfile(ProfileMap);
calculateAndShowDensity(ProfileMap);
+ warnOrWriteProbeOrderFlag(ProfileMap);
}
void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
@@ -1068,6 +1129,7 @@ void CSProfileGenerator::postProcessProfiles() {
FunctionSamples::ProfileIsCS = false;
}
filterAmbiguousProfile(ProfileMap);
+ warnOrWriteProbeOrderFlag(ProfileMap);
}
void ProfileGeneratorBase::computeSummaryAndThreshold(
>From 6fe180efa591e8282d7d9e72c6a7645ce34b2b74 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 28 Feb 2024 21:26:13 -0800
Subject: [PATCH 3/5] Revert "Detect the order and add a summary flag to the
profile"
This reverts commit 28b0374b93f2072d03804ff1d8a8b2fd81c5a218.
---
llvm/include/llvm/ProfileData/SampleProf.h | 10 +--
.../llvm/ProfileData/SampleProfReader.h | 3 -
llvm/lib/ProfileData/SampleProf.cpp | 1 -
llvm/lib/ProfileData/SampleProfReader.cpp | 5 --
llvm/lib/ProfileData/SampleProfWriter.cpp | 4 --
llvm/lib/Transforms/IPO/SampleProfile.cpp | 10 +--
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 62 -------------------
7 files changed, 3 insertions(+), 92 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index a5ac05c9cdff75..8ac84d4b933f20 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -201,10 +201,6 @@ enum class SecProfSummaryFlags : uint32_t {
/// SecFlagIsPreInlined means this profile contains ShouldBeInlined
/// contexts thus this is CS preinliner computed.
SecFlagIsPreInlined = (1 << 4),
- /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the
- /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of
- /// the order that callsite probe IDs are always after the BB probe IDs.
- SecFlagIsMixedProbeOrder = (1 << 5),
};
enum class SecFuncMetadataFlags : uint32_t {
@@ -470,7 +466,7 @@ struct SampleContextFrame {
LineLocation Location;
SampleContextFrame() : Location(0, 0) {}
-
+
SampleContextFrame(FunctionId Func, LineLocation Location)
: Func(Func), Location(Location) {}
@@ -531,7 +527,7 @@ class SampleContext {
: Func(Name), State(UnknownContext), Attributes(ContextNone) {
assert(!Name.empty() && "Name is empty");
}
-
+
SampleContext(FunctionId Func)
: Func(Func), State(UnknownContext), Attributes(ContextNone) {}
@@ -1182,8 +1178,6 @@ class FunctionSamples {
static bool ProfileIsProbeBased;
- static bool ProfileIsMixedProbeOrder;
-
static bool ProfileIsCS;
static bool ProfileIsPreInlined;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 6551f9374d4aa7..9e8f543909cdbd 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -525,9 +525,6 @@ class SampleProfileReader {
/// \brief Whether samples are collected based on pseudo probes.
bool ProfileIsProbeBased = false;
- /// Whether profiles are in mixed BB and callsite probe order.
- bool ProfileIsMixedProbeOrder = false;
-
/// Whether function profiles are context-sensitive flat profiles.
bool ProfileIsCS = false;
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 830b83a25a3e42..59fa71899ed47b 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -41,7 +41,6 @@ static cl::opt<bool> GenerateMergedBaseProfiles(
namespace llvm {
namespace sampleprof {
bool FunctionSamples::ProfileIsProbeBased = false;
-bool FunctionSamples::ProfileIsMixedProbeOrder = false;
bool FunctionSamples::ProfileIsCS = false;
bool FunctionSamples::ProfileIsPreInlined = false;
bool FunctionSamples::UseMD5 = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 6476f6cb7ca141..98d0aa794529c5 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -704,9 +704,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
FunctionSamples::ProfileIsFS = ProfileIsFS = true;
- if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
- FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder =
- true;
break;
case SecNameTable: {
bool FixedLengthMD5 =
@@ -1372,8 +1369,6 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
Flags.append("preInlined,");
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
Flags.append("fs-discriminator,");
- if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
- Flags.append("mixed-probe-order,");
break;
case SecFuncOffsetTable:
if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 4a708c94dac518..625e523f13cec0 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -437,10 +437,6 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
- if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased &&
- FunctionSamples::ProfileIsMixedProbeOrder)
- addSectionFlag(SecProfSummary,
- SecProfSummaryFlags::SecFlagIsMixedProbeOrder);
uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
switch (Type) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a8a2697aa6a7ed..402920d3a9a2d8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1190,7 +1190,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
-
+
Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
@@ -2173,14 +2173,6 @@ bool SampleProfileLoader::doInitialization(Module &M,
DS_Warning));
return false;
}
-
- if (!FunctionSamples::ProfileIsMixedProbeOrder) {
- const char *Msg =
- "Pseudo-probe-based profile is on an old version ID order which "
- "could cause profile mismatch(performance regression)";
- Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
- DS_Warning));
- }
}
if (ReportProfileStaleness || PersistProfileStaleness ||
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 4771dda88d89c0..5aa44108f96605 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -513,66 +513,6 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
return ProfileMap.Create(Context);
}
-// Use a heuristic to determine probe order by checking callsite insertion
-// position relative to the BB probes. Sort all the BB probes is in a list, for
-// each calliste, compute "ratio = insert position / length of the list". For
-// the old order, the probe ids for BB should be all before(smaller than) the
-// probe ids for callsite, this ratio should be equal to or close to 1.
-bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) {
- // Use flattned profile to maximize the callsites in the profile.
- SampleProfileMap flattenProfile;
- ProfileConverter::flattenProfile(Profiles, flattenProfile);
-
- uint32_t PossibleOldOrderNum = 0;
- uint32_t PossibleNewOrderNum = 0;
-
- for (const auto &I : flattenProfile) {
- const FunctionSamples &FS = I.second;
- // Skip small functions whose profile order are likely random.
- if (FS.getBodySamples().size() < 10)
- continue;
-
- std::set<uint32_t> PossibleBBProbeIDs;
- std::set<uint32_t> CallsiteIDs;
- for (const auto &I : FS.getBodySamples()) {
- if (I.second.getCallTargets().empty())
- PossibleBBProbeIDs.insert(I.first.LineOffset);
- else
- CallsiteIDs.insert(I.first.LineOffset);
- }
-
- if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty())
- continue;
-
- uint32_t DistanceToBeginSum = 0;
- for (const auto &ID : CallsiteIDs)
- DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(),
- PossibleBBProbeIDs.upper_bound(ID));
- uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size();
-
- // Note that PossibleBBProbeIDs could contains some callsite probe id, the
- // ratio is not exactly 1 for the old order, hence use a smaller threshold
- // to determine.
- if ((float)DistanceToBeginSum / LengthSum > 0.8)
- PossibleOldOrderNum++;
- else
- PossibleNewOrderNum++;
- }
- return PossibleNewOrderNum >= PossibleOldOrderNum;
-}
-
-void warnOrWriteProbeOrderFlag(const SampleProfileMap &Profiles) {
- if (FunctionSamples::ProfileIsProbeBased) {
- FunctionSamples::ProfileIsMixedProbeOrder = true;
- if (!checkProbeIDIsInMixedOrder(Profiles)) {
- WithColor::warning()
- << "Pseudo-probe-based profile is on an old version ID order which "
- "could cause profile mismatch(performance regression)\n";
- FunctionSamples::ProfileIsMixedProbeOrder = false;
- }
- }
-}
-
void ProfileGenerator::generateProfile() {
collectProfiledFunctions();
@@ -595,7 +535,6 @@ void ProfileGenerator::postProcessProfiles() {
trimColdProfiles(ProfileMap, ColdCountThreshold);
filterAmbiguousProfile(ProfileMap);
calculateAndShowDensity(ProfileMap);
- warnOrWriteProbeOrderFlag(ProfileMap);
}
void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
@@ -1129,7 +1068,6 @@ void CSProfileGenerator::postProcessProfiles() {
FunctionSamples::ProfileIsCS = false;
}
filterAmbiguousProfile(ProfileMap);
- warnOrWriteProbeOrderFlag(ProfileMap);
}
void ProfileGeneratorBase::computeSummaryAndThreshold(
>From f25b13f071aab68bcc9ad3d98fdde871670a739c Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 29 Feb 2024 18:04:37 -0800
Subject: [PATCH 4/5] Error out if the checksum mismatch is high
---
llvm/lib/Transforms/IPO/SampleProfile.cpp | 84 ++++++++++++++++++++++-
1 file changed, 83 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 402920d3a9a2d8..b599645cbc7df3 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -234,6 +234,25 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
cl::desc(
"Skip relative hotness check for ICP up to given number of targets."));
+static cl::opt<unsigned> ChecksumMismatchFuncHotBlockSkip(
+ "checksum-mismatch-func-hot-block-skip", cl::Hidden, cl::init(100),
+ cl::desc("For checksum-mismatch error check, skip checking the function "
+ "that the num of its hot(on average) blocks is smaller than the "
+ "given number."));
+
+static cl::opt<unsigned> ChecksumMismatchNumFuncSkip(
+ "checksum-mismatch-num-func-skip", cl::Hidden, cl::init(100),
+ cl::desc(
+ "For checksum-mismatch error check, skip the check if the number of "
+ "total selected function is smaller than the given number."));
+
+static cl::opt<unsigned> ChecksumMismatchErrorThreshold(
+ "checksum-mismatch-error-threshold", cl::Hidden, cl::init(90),
+ cl::desc(
+ "For checksum-mismatch error check, error out if the percentage of "
+ "function mismatched-checksum is higher than the given percentage "
+ "threshold"));
+
static cl::opt<bool> CallsitePrioritizedInline(
"sample-profile-prioritized-inline", cl::Hidden,
@@ -630,6 +649,8 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
void generateMDProfMetadata(Function &F);
+ bool errorIfHighChecksumMismatch(Module &M, ProfileSummaryInfo *PSI,
+ const SampleProfileMap &Profiles);
/// Map from function name to Function *. Used to find the function from
/// the function name. If the function name contains suffix, additional
@@ -1190,7 +1211,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
-
+
Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
@@ -2184,6 +2205,61 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
+// Note that this is a module-level check. Even if one module is errored out,
+// the entire build will be errored out. However, the user could make big
+// changes to functions in single module but those changes might not be
+// performance significant to the whole binary. Therefore, we use a conservative
+// approach to make sure we only error out if it globally impacts the binary
+// performance. To achieve this, we use heuristics to select a reasonable
+// big set of functions that are supposed to be globally performance
+// significant, only compute and check the mismatch within those functions. The
+// function selection is based on two criteria: 1) The function is "hot" enough,
+// which is tuned by a hotness-based flag(ChecksumMismatchFuncHotBlockSkip). 2)
+// The num of function is large enough which is tuned by the
+// ChecksumMismatchNumFuncSkip flag.
+bool SampleProfileLoader::errorIfHighChecksumMismatch(
+ Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
+ assert(FunctionSamples::ProfileIsProbeBased &&
+ "Only support for probe-based profile");
+ uint64_t TotalSelectedFunc = 0;
+ uint64_t NumMismatchedFunc = 0;
+ for (const auto &I : Profiles) {
+ const auto &FS = I.second;
+ const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
+ if (!FuncDesc)
+ continue;
+
+ // We want to select a set of functions that are globally performance
+ // significant, in other words, if those functions profiles are
+ // checksum-mismatched and dropped, the whole binary will likely be
+ // impacted, so here we use a hotness-based threshold to control the
+ // selection.
+ if (FS.getTotalSamples() >=
+ ChecksumMismatchFuncHotBlockSkip * PSI->getOrCompHotCountThreshold())
+ continue;
+
+ TotalSelectedFunc++;
+ if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
+ NumMismatchedFunc++;
+ }
+ // Make sure that the num of selected function is not too small to distinguish
+ // from the user's benign changes.
+ if (TotalSelectedFunc < ChecksumMismatchNumFuncSkip)
+ return false;
+
+ // Finally check the mismatch percentage against the threshold.
+ if (NumMismatchedFunc * 100 >=
+ TotalSelectedFunc * ChecksumMismatchErrorThreshold) {
+ auto &Ctx = M.getContext();
+ const char *Msg =
+ "The FDO profile is too old and will cause big performance regression, "
+ "please drop the profile and re-collect a new one.";
+ Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
+ return true;
+ }
+ return false;
+}
+
void SampleProfileMatcher::findIRAnchors(
const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
// For inlined code, recover the original callsite and callee by finding the
@@ -2715,6 +2791,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummary::PSK_Sample);
PSI->refresh();
}
+
+ // Error out if the profile checksum mismatch is too high.
+ if (FunctionSamples::ProfileIsProbeBased &&
+ errorIfHighChecksumMismatch(M, PSI, Reader->getProfiles()))
+ return false;
+
// Compute the total number of samples collected in this profile.
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
>From 6641409c4ee90807e48023ea071be016f52c11ff Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 29 Feb 2024 19:36:13 -0800
Subject: [PATCH 5/5] Revert "Error out if the checksum mismatch is high"
This reverts commit f25b13f071aab68bcc9ad3d98fdde871670a739c.
---
llvm/lib/Transforms/IPO/SampleProfile.cpp | 84 +----------------------
1 file changed, 1 insertion(+), 83 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index b599645cbc7df3..402920d3a9a2d8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -234,25 +234,6 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
cl::desc(
"Skip relative hotness check for ICP up to given number of targets."));
-static cl::opt<unsigned> ChecksumMismatchFuncHotBlockSkip(
- "checksum-mismatch-func-hot-block-skip", cl::Hidden, cl::init(100),
- cl::desc("For checksum-mismatch error check, skip checking the function "
- "that the num of its hot(on average) blocks is smaller than the "
- "given number."));
-
-static cl::opt<unsigned> ChecksumMismatchNumFuncSkip(
- "checksum-mismatch-num-func-skip", cl::Hidden, cl::init(100),
- cl::desc(
- "For checksum-mismatch error check, skip the check if the number of "
- "total selected function is smaller than the given number."));
-
-static cl::opt<unsigned> ChecksumMismatchErrorThreshold(
- "checksum-mismatch-error-threshold", cl::Hidden, cl::init(90),
- cl::desc(
- "For checksum-mismatch error check, error out if the percentage of "
- "function mismatched-checksum is higher than the given percentage "
- "threshold"));
-
static cl::opt<bool> CallsitePrioritizedInline(
"sample-profile-prioritized-inline", cl::Hidden,
@@ -649,8 +630,6 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
void generateMDProfMetadata(Function &F);
- bool errorIfHighChecksumMismatch(Module &M, ProfileSummaryInfo *PSI,
- const SampleProfileMap &Profiles);
/// Map from function name to Function *. Used to find the function from
/// the function name. If the function name contains suffix, additional
@@ -1211,7 +1190,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
-
+
Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
@@ -2205,61 +2184,6 @@ bool SampleProfileLoader::doInitialization(Module &M,
return true;
}
-// Note that this is a module-level check. Even if one module is errored out,
-// the entire build will be errored out. However, the user could make big
-// changes to functions in single module but those changes might not be
-// performance significant to the whole binary. Therefore, we use a conservative
-// approach to make sure we only error out if it globally impacts the binary
-// performance. To achieve this, we use heuristics to select a reasonable
-// big set of functions that are supposed to be globally performance
-// significant, only compute and check the mismatch within those functions. The
-// function selection is based on two criteria: 1) The function is "hot" enough,
-// which is tuned by a hotness-based flag(ChecksumMismatchFuncHotBlockSkip). 2)
-// The num of function is large enough which is tuned by the
-// ChecksumMismatchNumFuncSkip flag.
-bool SampleProfileLoader::errorIfHighChecksumMismatch(
- Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
- assert(FunctionSamples::ProfileIsProbeBased &&
- "Only support for probe-based profile");
- uint64_t TotalSelectedFunc = 0;
- uint64_t NumMismatchedFunc = 0;
- for (const auto &I : Profiles) {
- const auto &FS = I.second;
- const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
- if (!FuncDesc)
- continue;
-
- // We want to select a set of functions that are globally performance
- // significant, in other words, if those functions profiles are
- // checksum-mismatched and dropped, the whole binary will likely be
- // impacted, so here we use a hotness-based threshold to control the
- // selection.
- if (FS.getTotalSamples() >=
- ChecksumMismatchFuncHotBlockSkip * PSI->getOrCompHotCountThreshold())
- continue;
-
- TotalSelectedFunc++;
- if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
- NumMismatchedFunc++;
- }
- // Make sure that the num of selected function is not too small to distinguish
- // from the user's benign changes.
- if (TotalSelectedFunc < ChecksumMismatchNumFuncSkip)
- return false;
-
- // Finally check the mismatch percentage against the threshold.
- if (NumMismatchedFunc * 100 >=
- TotalSelectedFunc * ChecksumMismatchErrorThreshold) {
- auto &Ctx = M.getContext();
- const char *Msg =
- "The FDO profile is too old and will cause big performance regression, "
- "please drop the profile and re-collect a new one.";
- Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
- return true;
- }
- return false;
-}
-
void SampleProfileMatcher::findIRAnchors(
const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
// For inlined code, recover the original callsite and callee by finding the
@@ -2791,12 +2715,6 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
ProfileSummary::PSK_Sample);
PSI->refresh();
}
-
- // Error out if the profile checksum mismatch is too high.
- if (FunctionSamples::ProfileIsProbeBased &&
- errorIfHighChecksumMismatch(M, PSI, Reader->getProfiles()))
- return false;
-
// Compute the total number of samples collected in this profile.
for (const auto &I : Reader->getProfiles())
TotalCollectedSamples += I.second.getTotalSamples();
More information about the cfe-commits
mailing list