[clang] [llvm] [PseudoProbe] Mix and reorder block and call probe ID in lexical order (PR #75092)
Lei Wang via cfe-commits
cfe-commits at lists.llvm.org
Wed Jan 10 11:30:28 PST 2024
https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/75092
>From ccfee2d0c5399b03b53ef79a4645ab0d10efeafd Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 10 Dec 2023 18:30:42 -0800
Subject: [PATCH 1/2] [PseudoProbe] Mix and reorder block and call probe ID in
lexical order
---
clang/test/CodeGen/pseudo-probe-emit.c | 8 +++----
.../llvm/Transforms/IPO/SampleProfileProbe.h | 3 +--
.../lib/Transforms/IPO/SampleProfileProbe.cpp | 19 +++++-----------
.../Inputs/pseudo-probe-profile.prof | 8 +++----
.../Inputs/pseudo-probe-update.prof | 8 +++----
.../SampleProfile/pseudo-probe-dangle.ll | 12 +++++-----
.../pseudo-probe-discriminator.ll | 6 ++---
.../pseudo-probe-profile-metadata-2.ll | 15 ++++++-------
.../SampleProfile/pseudo-probe-profile.ll | 22 +++++++++----------
.../SampleProfile/pseudo-probe-update.ll | 11 +++++-----
.../SampleProfile/pseudo-probe-verify.ll | 16 +++++++-------
11 files changed, 59 insertions(+), 69 deletions(-)
diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c
index c7a3f7e6d5b02b..360f831e842945 100644
--- a/clang/test/CodeGen/pseudo-probe-emit.c
+++ b/clang/test/CodeGen/pseudo-probe-emit.c
@@ -10,9 +10,9 @@ void foo(int x) {
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
if (x == 0)
// CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
- bar();
+ bar(); // probe id : 3
else
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
- go();
- // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+ go(); // probe id : 5
+ // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1)
}
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 0f2729a9462de2..69b87adf105fd1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -82,8 +82,7 @@ class SampleProfileProber {
uint32_t getBlockId(const BasicBlock *BB) const;
uint32_t getCallsiteId(const Instruction *Call) const;
void computeCFGHash();
- void computeProbeIdForBlocks();
- void computeProbeIdForCallsites();
+ void computeProbeId();
Function *F;
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 8f0b12d0cfedfc..6c6bb18bcb3c9d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -173,8 +173,7 @@ SampleProfileProber::SampleProfileProber(Function &Func,
BlockProbeIds.clear();
CallProbeIds.clear();
LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
- computeProbeIdForBlocks();
- computeProbeIdForCallsites();
+ computeProbeId();
computeCFGHash();
}
@@ -209,7 +208,10 @@ void SampleProfileProber::computeCFGHash() {
<< ", Hash = " << FunctionHash << "\n");
}
-void SampleProfileProber::computeProbeIdForBlocks() {
+void SampleProfileProber::computeProbeId() {
+ LLVMContext &Ctx = F->getContext();
+ Module *M = F->getParent();
+
DenseSet<BasicBlock *> KnownColdBlocks;
computeEHOnlyBlocks(*F, KnownColdBlocks);
// Insert pseudo probe to non-cold blocks only. This will reduce IR size as
@@ -218,18 +220,9 @@ void SampleProfileProber::computeProbeIdForBlocks() {
++LastProbeId;
if (!KnownColdBlocks.contains(&BB))
BlockProbeIds[&BB] = LastProbeId;
- }
-}
-void SampleProfileProber::computeProbeIdForCallsites() {
- LLVMContext &Ctx = F->getContext();
- Module *M = F->getParent();
-
- for (auto &BB : *F) {
for (auto &I : BB) {
- if (!isa<CallBase>(I))
- continue;
- if (isa<IntrinsicInst>(&I))
+ if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I))
continue;
// The current implementation uses the lower 16 bits of the discriminator
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
index ba4c6117dc96ab..d3847946b94033 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
@@ -1,8 +1,8 @@
foo:3200:13
1: 13
2: 7
- 3: 6
- 4: 13
- 5: 7 _Z3barv:2 _Z3foov:5
- 6: 6 _Z3barv:4 _Z3foov:2
+ 4: 6
+ 6: 13
+ 3: 7 _Z3barv:2 _Z3foov:5
+ 5: 6 _Z3barv:4 _Z3foov:2
!CFGChecksum: 563022570642068
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
index 62f9bd5992e735..213bf0b6f81cc4 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
@@ -1,8 +1,8 @@
foo:3200:13
1: 13
2: 7
- 3: 6
- 4: 13
- 5: 7
- 6: 6
+ 4: 6
+ 6: 13
+ 7: 7
+ 9: 6
!CFGChecksum: 844530426352218
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
index 4647a34fc2f620..f0b6fdf62d9696 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
@@ -23,21 +23,21 @@ Merge:
; JT-LABEL-NO: T
; JT-LABEL-NO: F
; JT-LABEL: Merge
+; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4
; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3
-; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2
-; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; ASM-NOT: .pseudoprobe 6699318081062747564 4
; ASM-NOT: .pseudoprobe 6699318081062747564 3
-; ASM-NOT: .pseudoprobe 6699318081062747564 2
-; ASM: .pseudoprobe 6699318081062747564 4 0 0
+; ASM: .pseudoprobe 6699318081062747564 5 0 0
ret i32 %call
}
;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too.
; MIR-tail: bb.0
; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
-; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2
; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3
-; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0
+; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 4
+; MIR-tail: PSEUDO_PROBE [[#GUID:]], 5, 0, 0
define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
index 62f0737875aec3..97b0ed600ad106 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
@@ -62,10 +62,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra
; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
-
+
; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]])
-; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646575)
+; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646559)
; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]])
-; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646583)
+; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646567)
; PROBE: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
; PROBE: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
index 148f3ede5ab48a..379dcfcab338d9 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
@@ -29,7 +29,7 @@ if.else:
br label %return
return:
- call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1)
+ call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1)
%1 = load i32, ptr %retval, align 4
ret i32 %1
}
@@ -55,13 +55,12 @@ attributes #0 = {"use-sample-profile"}
!9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
!10 = !{!"function_entry_count", i64 14}
!11 = !{!"branch_weights", i32 100, i32 0}
-;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 1.0.
-!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575)
+;; A discriminator of 186646559 which is 0xB20001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 1.0.
+!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646559)
!13 = distinct !DILocation(line: 10, column: 11, scope: !12)
-;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 0.
-!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775)
+;; A discriminator of 134217759 which is 0x800001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 0.
+!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217759)
!15 = distinct !DILocation(line: 10, column: 11, scope: !14)
!16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
index 474b6668b0a7a7..867a49dbaed2ee 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
@@ -22,12 +22,12 @@ if.then:
if.else:
; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
call void %f(i32 2)
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
store i32 2, ptr %retval, align 4
br label %return
return:
- ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+ ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
%1 = load i32, ptr %retval, align 4
ret i32 %1
}
@@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"}
; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
+;; A discriminator of 119537695 which is 0x720001f in hexdecimal, stands for an indirect call probe
+;; with an index of 3.
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537695)
+; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
-; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
-;; with an index of 6.
; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
!llvm.module.flags = !{!9, !10}
@@ -83,7 +83,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '7'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '5'
+;YAML-NEXT: - ProbeId: '3'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
@@ -113,7 +113,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '6'
+;YAML-NEXT: - ProbeId: '5'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
@@ -128,7 +128,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '6'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '3'
+;YAML-NEXT: - ProbeId: '4'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
@@ -143,7 +143,7 @@ attributes #0 = {"use-sample-profile"}
;YAML-NEXT: - String: 'Applied '
;YAML-NEXT: - NumSamples: '13'
;YAML-NEXT: - String: ' samples from profile (ProbeId='
-;YAML-NEXT: - ProbeId: '4'
+;YAML-NEXT: - ProbeId: '6'
;YAML-NEXT: - String: ', Factor='
;YAML-NEXT: - Factor: '1.000000e+00'
;YAML-NEXT: - String: ', OriginalSamples='
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
index 992afedd14f75f..217b61970933dc 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
@@ -14,15 +14,15 @@ T1:
%v1 = call i32 @f1()
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -8513881372706734080)
%cond3 = icmp eq i32 %v1, 412
br label %Merge
F1:
; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
%v2 = call i32 @f2()
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 8513881922462547968)
br label %Merge
Merge:
@@ -30,11 +30,11 @@ Merge:
%B = phi i32 [%v1, %T1], [%v2, %F1]
br i1 %A, label %T2, label %F2
T2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 7, i32 0, i64 -1)
call void @f3()
ret i32 %B
F2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 9, i32 0, i64 -1)
ret i32 %B
}
@@ -42,4 +42,3 @@ F2:
; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
attributes #0 = {"use-sample-profile"}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
index f70e5189ab1293..b622cfbd6634ef 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
@@ -4,7 +4,7 @@
; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
; VERIFY: Function foo:
-; VERIFY-DAG: Probe 6 previous factor 1.00 current factor 5.00
+; VERIFY-DAG: Probe 5 previous factor 1.00 current factor 5.00
; VERIFY-DAG: Probe 4 previous factor 1.00 current factor 5.00
declare void @foo2() nounwind
@@ -27,15 +27,15 @@ bb7.preheader:
bb10:
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
%indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
%tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
@@ -50,14 +50,14 @@ bb10:
br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
bb24:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
ret void
}
;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
;; with an index of 6 and a scale of -1%.
; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
-; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646575)
!llvm.dbg.cu = !{!0}
!llvm.module.flags = !{!9, !10}
>From 6ff88479ab47186372ce59a2eb13d420a53e3d80 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 10 Jan 2024 10:10:34 -0800
Subject: [PATCH 2/2] Detect the order and add a summary flag to the profile
---
llvm/include/llvm/ProfileData/SampleProf.h | 10 +++-
.../llvm/ProfileData/SampleProfReader.h | 3 +
llvm/lib/ProfileData/SampleProf.cpp | 1 +
llvm/lib/ProfileData/SampleProfReader.cpp | 5 ++
llvm/lib/ProfileData/SampleProfWriter.cpp | 4 ++
llvm/lib/Transforms/IPO/SampleProfile.cpp | 10 +++-
llvm/tools/llvm-profgen/ProfileGenerator.cpp | 58 +++++++++++++++++++
7 files changed, 88 insertions(+), 3 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 66aaf602d0e1d9..4e5191ef076a52 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -201,6 +201,10 @@ enum class SecProfSummaryFlags : uint32_t {
/// SecFlagIsPreInlined means this profile contains ShouldBeInlined
/// contexts thus this is CS preinliner computed.
SecFlagIsPreInlined = (1 << 4),
+ /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the
+ /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of
+ /// the order that callsite probe IDs are always after the BB probe IDs.
+ SecFlagIsMixedProbeOrder = (1 << 5),
};
enum class SecFuncMetadataFlags : uint32_t {
@@ -466,7 +470,7 @@ struct SampleContextFrame {
LineLocation Location;
SampleContextFrame() : Location(0, 0) {}
-
+
SampleContextFrame(FunctionId Func, LineLocation Location)
: Func(Func), Location(Location) {}
@@ -527,7 +531,7 @@ class SampleContext {
: Func(Name), State(UnknownContext), Attributes(ContextNone) {
assert(!Name.empty() && "Name is empty");
}
-
+
SampleContext(FunctionId Func)
: Func(Func), State(UnknownContext), Attributes(ContextNone) {}
@@ -1178,6 +1182,8 @@ class FunctionSamples {
static bool ProfileIsProbeBased;
+ static bool ProfileIsMixedProbeOrder;
+
static bool ProfileIsCS;
static bool ProfileIsPreInlined;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 9e8f543909cdbd..6551f9374d4aa7 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -525,6 +525,9 @@ class SampleProfileReader {
/// \brief Whether samples are collected based on pseudo probes.
bool ProfileIsProbeBased = false;
+ /// Whether profiles are in mixed BB and callsite probe order.
+ bool ProfileIsMixedProbeOrder = false;
+
/// Whether function profiles are context-sensitive flat profiles.
bool ProfileIsCS = false;
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 59fa71899ed47b..830b83a25a3e42 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -41,6 +41,7 @@ static cl::opt<bool> GenerateMergedBaseProfiles(
namespace llvm {
namespace sampleprof {
bool FunctionSamples::ProfileIsProbeBased = false;
+bool FunctionSamples::ProfileIsMixedProbeOrder = false;
bool FunctionSamples::ProfileIsCS = false;
bool FunctionSamples::ProfileIsPreInlined = false;
bool FunctionSamples::UseMD5 = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 98d0aa794529c5..6476f6cb7ca141 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -704,6 +704,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
FunctionSamples::ProfileIsFS = ProfileIsFS = true;
+ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+ FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder =
+ true;
break;
case SecNameTable: {
bool FixedLengthMD5 =
@@ -1369,6 +1372,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
Flags.append("preInlined,");
if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
Flags.append("fs-discriminator,");
+ if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+ Flags.append("mixed-probe-order,");
break;
case SecFuncOffsetTable:
if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 625e523f13cec0..4a708c94dac518 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -437,6 +437,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
+ if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased &&
+ FunctionSamples::ProfileIsMixedProbeOrder)
+ addSectionFlag(SecProfSummary,
+ SecProfSummaryFlags::SecFlagIsMixedProbeOrder);
uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
switch (Type) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2fd8668d15e200..2baf7f47aaea7d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1129,7 +1129,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
continue;
-
+
Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
@@ -2112,6 +2112,14 @@ bool SampleProfileLoader::doInitialization(Module &M,
DS_Warning));
return false;
}
+
+ if (!FunctionSamples::ProfileIsMixedProbeOrder) {
+ const char *Msg =
+ "Pseudo-probe-based profile is on an old version ID order which "
+ "could cause profile mismatch(performance regression)";
+ Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
+ DS_Warning));
+ }
}
if (ReportProfileStaleness || PersistProfileStaleness ||
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index c4028e6b132871..dbc1ea79691c87 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -559,6 +559,54 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
}
}
+// Use a heuristic to determine probe order by checking callsite insertion
+// position relative to the BB probes. Sort all the BB probes is in a list, for
+// each calliste, compute "ratio = insert position / length of the list". For
+// the old order, the probe ids for BB should be all before(smaller than) the
+// probe ids for callsite, this ratio should be equal to or close to 1.
+inline bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) {
+ // Use flattned profile to maximize the callsites in the profile.
+ SampleProfileMap flattenProfile;
+ ProfileConverter::flattenProfile(Profiles, flattenProfile);
+
+ uint32_t PossibleOldOrderNum = 0;
+ uint32_t PossibleNewOrderNum = 0;
+
+ for (const auto &I : flattenProfile) {
+ const FunctionSamples &FS = I.second;
+ // Skip small functions whose profile order are likely random.
+ if (FS.getBodySamples().size() < 10)
+ continue;
+
+ std::set<uint32_t> PossibleBBProbeIDs;
+ std::set<uint32_t> CallsiteIDs;
+ for (const auto &I : FS.getBodySamples()) {
+ if (I.second.getCallTargets().empty())
+ PossibleBBProbeIDs.insert(I.first.LineOffset);
+ else
+ CallsiteIDs.insert(I.first.LineOffset);
+ }
+
+ if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty())
+ continue;
+
+ uint32_t DistanceToBeginSum = 0;
+ for (const auto &ID : CallsiteIDs)
+ DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(),
+ PossibleBBProbeIDs.upper_bound(ID));
+ uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size();
+
+ // Note that PossibleBBProbeIDs could contains some callsite probe id, the
+ // ratio is not exactly 1 for the old order, hence use a smaller threshold
+ // to determine.
+ if ((float)DistanceToBeginSum / LengthSum > 0.8)
+ PossibleOldOrderNum++;
+ else
+ PossibleNewOrderNum++;
+ }
+ return PossibleNewOrderNum >= PossibleOldOrderNum;
+}
+
void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
const BranchSample &BranchCounters) {
for (const auto &Entry : BranchCounters) {
@@ -1024,6 +1072,16 @@ void CSProfileGenerator::postProcessProfiles() {
CSConverter.convertCSProfiles();
FunctionSamples::ProfileIsCS = false;
}
+
+ if (FunctionSamples::ProfileIsProbeBased) {
+ FunctionSamples::ProfileIsMixedProbeOrder = true;
+ if (!checkProbeIDIsInMixedOrder(ProfileMap)) {
+ WithColor::warning()
+ << "Pseudo-probe-based profile is on an old version ID order which "
+ "could cause profile mismatch(performance regression)\n";
+ FunctionSamples::ProfileIsMixedProbeOrder = false;
+ }
+ }
}
void ProfileGeneratorBase::computeSummaryAndThreshold(
More information about the cfe-commits
mailing list