[clang] [llvm] [PseudoProbe] Mix and reorder block and call probe ID in lexical order (PR #75092)

Lei Wang via cfe-commits cfe-commits at lists.llvm.org
Wed Jan 10 11:30:28 PST 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/75092

>From ccfee2d0c5399b03b53ef79a4645ab0d10efeafd Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 10 Dec 2023 18:30:42 -0800
Subject: [PATCH 1/2] [PseudoProbe] Mix and reorder block and call probe ID in
 lexical order

---
 clang/test/CodeGen/pseudo-probe-emit.c        |  8 +++----
 .../llvm/Transforms/IPO/SampleProfileProbe.h  |  3 +--
 .../lib/Transforms/IPO/SampleProfileProbe.cpp | 19 +++++-----------
 .../Inputs/pseudo-probe-profile.prof          |  8 +++----
 .../Inputs/pseudo-probe-update.prof           |  8 +++----
 .../SampleProfile/pseudo-probe-dangle.ll      | 12 +++++-----
 .../pseudo-probe-discriminator.ll             |  6 ++---
 .../pseudo-probe-profile-metadata-2.ll        | 15 ++++++-------
 .../SampleProfile/pseudo-probe-profile.ll     | 22 +++++++++----------
 .../SampleProfile/pseudo-probe-update.ll      | 11 +++++-----
 .../SampleProfile/pseudo-probe-verify.ll      | 16 +++++++-------
 11 files changed, 59 insertions(+), 69 deletions(-)

diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c
index c7a3f7e6d5b02b..360f831e842945 100644
--- a/clang/test/CodeGen/pseudo-probe-emit.c
+++ b/clang/test/CodeGen/pseudo-probe-emit.c
@@ -10,9 +10,9 @@ void foo(int x) {
   // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   if (x == 0)
     // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
-    bar();
+    bar();  // probe id : 3
   else
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
-    go();
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+    go(); // probe id : 5
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1)
 }
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 0f2729a9462de2..69b87adf105fd1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -82,8 +82,7 @@ class SampleProfileProber {
   uint32_t getBlockId(const BasicBlock *BB) const;
   uint32_t getCallsiteId(const Instruction *Call) const;
   void computeCFGHash();
-  void computeProbeIdForBlocks();
-  void computeProbeIdForCallsites();
+  void computeProbeId();
 
   Function *F;
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 8f0b12d0cfedfc..6c6bb18bcb3c9d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -173,8 +173,7 @@ SampleProfileProber::SampleProfileProber(Function &Func,
   BlockProbeIds.clear();
   CallProbeIds.clear();
   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
-  computeProbeIdForBlocks();
-  computeProbeIdForCallsites();
+  computeProbeId();
   computeCFGHash();
 }
 
@@ -209,7 +208,10 @@ void SampleProfileProber::computeCFGHash() {
                     << ", Hash = " << FunctionHash << "\n");
 }
 
-void SampleProfileProber::computeProbeIdForBlocks() {
+void SampleProfileProber::computeProbeId() {
+  LLVMContext &Ctx = F->getContext();
+  Module *M = F->getParent();
+
   DenseSet<BasicBlock *> KnownColdBlocks;
   computeEHOnlyBlocks(*F, KnownColdBlocks);
   // Insert pseudo probe to non-cold blocks only. This will reduce IR size as
@@ -218,18 +220,9 @@ void SampleProfileProber::computeProbeIdForBlocks() {
     ++LastProbeId;
     if (!KnownColdBlocks.contains(&BB))
       BlockProbeIds[&BB] = LastProbeId;
-  }
-}
 
-void SampleProfileProber::computeProbeIdForCallsites() {
-  LLVMContext &Ctx = F->getContext();
-  Module *M = F->getParent();
-
-  for (auto &BB : *F) {
     for (auto &I : BB) {
-      if (!isa<CallBase>(I))
-        continue;
-      if (isa<IntrinsicInst>(&I))
+      if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I))
         continue;
 
       // The current implementation uses the lower 16 bits of the discriminator
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
index ba4c6117dc96ab..d3847946b94033 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
@@ -1,8 +1,8 @@
 foo:3200:13
  1: 13
  2: 7
- 3: 6
- 4: 13
- 5: 7 _Z3barv:2 _Z3foov:5
- 6: 6 _Z3barv:4 _Z3foov:2
+ 4: 6
+ 6: 13
+ 3: 7 _Z3barv:2 _Z3foov:5
+ 5: 6 _Z3barv:4 _Z3foov:2
  !CFGChecksum: 563022570642068
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
index 62f9bd5992e735..213bf0b6f81cc4 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
@@ -1,8 +1,8 @@
 foo:3200:13
  1: 13
  2: 7
- 3: 6
- 4: 13
- 5: 7
- 6: 6
+ 4: 6
+ 6: 13
+ 7: 7
+ 9: 6
  !CFGChecksum: 844530426352218
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
index 4647a34fc2f620..f0b6fdf62d9696 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
@@ -23,21 +23,21 @@ Merge:
 ; JT-LABEL-NO: T
 ; JT-LABEL-NO: F
 ; JT-LABEL: Merge
+; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4
 ; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3
-; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2
-; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; ASM-NOT: .pseudoprobe	6699318081062747564 4
 ; ASM-NOT: .pseudoprobe	6699318081062747564 3
-; ASM-NOT: .pseudoprobe	6699318081062747564 2
-; ASM: .pseudoprobe	6699318081062747564 4 0 0
+; ASM: .pseudoprobe	6699318081062747564 5 0 0
 	ret i32 %call
 }
 
 ;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too.
 ; MIR-tail: bb.0
 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
-; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2
 ; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3
-; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0
+; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 4
+; MIR-tail: PSEUDO_PROBE [[#GUID:]], 5, 0, 0
 
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
index 62f0737875aec3..97b0ed600ad106 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
@@ -62,10 +62,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra
 ; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
 ; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
 
-           
+
 ; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]])
-; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646575)
+; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646559)
 ; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]])
-; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646583)
+; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646567)
 ; PROBE: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
 ; PROBE: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
index 148f3ede5ab48a..379dcfcab338d9 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
@@ -29,7 +29,7 @@ if.else:
   br label %return
 
 return:
-  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1)
   %1 = load i32, ptr %retval, align 4
   ret i32 %1
 }
@@ -55,13 +55,12 @@ attributes #0 = {"use-sample-profile"}
 !9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
 !10 = !{!"function_entry_count", i64 14}
 !11 = !{!"branch_weights", i32 100, i32 0}
-;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 1.0.
-!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575)
+;; A discriminator of 186646559 which is 0xB20001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 1.0.
+!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646559)
 !13 = distinct !DILocation(line: 10, column: 11, scope: !12)
-;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 0.
-!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775)
+;; A discriminator of 134217759 which is 0x800001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 0.
+!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217759)
 !15 = distinct !DILocation(line: 10, column: 11, scope: !14)
 !16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
index 474b6668b0a7a7..867a49dbaed2ee 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
@@ -22,12 +22,12 @@ if.then:
 if.else:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
   call void %f(i32 2)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
   store i32 2, ptr %retval, align 4
   br label %return
 
 return:
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
   %1 = load i32, ptr %retval, align 4
   ret i32 %1
 }
@@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"}
 
 ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
 ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
+;; A discriminator of 119537695 which is 0x720001f in hexdecimal, stands for an indirect call probe
+;; with an index of 3.
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537695)
+; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
 ;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
 ;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
-; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
-;; with an index of 6.
 ; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
 ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
 
 !llvm.module.flags = !{!9, !10}
@@ -83,7 +83,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '5'
+;YAML-NEXT:    - ProbeId:         '3'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
@@ -113,7 +113,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '6'
+;YAML-NEXT:    - ProbeId:         '5'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
@@ -128,7 +128,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '3'
+;YAML-NEXT:    - ProbeId:         '4'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
@@ -143,7 +143,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '4'
+;YAML-NEXT:    - ProbeId:         '6'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
index 992afedd14f75f..217b61970933dc 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
@@ -14,15 +14,15 @@ T1:
 	%v1 = call i32 @f1()
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
 ;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -8513881372706734080)
     %cond3 = icmp eq i32 %v1, 412
 	br label %Merge
 F1:
 ; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
 	%v2 = call i32 @f2()
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
 ;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 8513881922462547968)
 	br label %Merge
 Merge:
 
@@ -30,11 +30,11 @@ Merge:
 	%B = phi i32 [%v1, %T1], [%v2, %F1]
 	br i1 %A, label %T2, label %F2
 T2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 7, i32 0, i64 -1)
 	call void @f3()
 	ret i32 %B
 F2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 9, i32 0, i64 -1)
 	ret i32 %B
 }
 
@@ -42,4 +42,3 @@ F2:
 ; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
 
 attributes #0 = {"use-sample-profile"}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
index f70e5189ab1293..b622cfbd6634ef 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
@@ -4,7 +4,7 @@
 
 ; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
 ; VERIFY: Function foo:
-; VERIFY-DAG: Probe 6	previous factor 1.00	current factor 5.00
+; VERIFY-DAG: Probe 5	previous factor 1.00	current factor 5.00
 ; VERIFY-DAG: Probe 4	previous factor 1.00	current factor 5.00
 
 declare void @foo2() nounwind
@@ -27,15 +27,15 @@ bb7.preheader:
 
 bb10:
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
   %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
   %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
@@ -50,14 +50,14 @@ bb10:
   br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
 
 bb24:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
   ret void
 }
 
 ;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
 ;; with an index of 6 and a scale of -1%.
 ; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
-; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646575)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}

>From 6ff88479ab47186372ce59a2eb13d420a53e3d80 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 10 Jan 2024 10:10:34 -0800
Subject: [PATCH 2/2] Detect the order and add a summary flag to the profile

---
 llvm/include/llvm/ProfileData/SampleProf.h    | 10 +++-
 .../llvm/ProfileData/SampleProfReader.h       |  3 +
 llvm/lib/ProfileData/SampleProf.cpp           |  1 +
 llvm/lib/ProfileData/SampleProfReader.cpp     |  5 ++
 llvm/lib/ProfileData/SampleProfWriter.cpp     |  4 ++
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 10 +++-
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 58 +++++++++++++++++++
 7 files changed, 88 insertions(+), 3 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 66aaf602d0e1d9..4e5191ef076a52 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -201,6 +201,10 @@ enum class SecProfSummaryFlags : uint32_t {
   /// SecFlagIsPreInlined means this profile contains ShouldBeInlined
   /// contexts thus this is CS preinliner computed.
   SecFlagIsPreInlined = (1 << 4),
+  /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the
+  /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of
+  /// the order that callsite probe IDs are always after the BB probe IDs.
+  SecFlagIsMixedProbeOrder = (1 << 5),
 };
 
 enum class SecFuncMetadataFlags : uint32_t {
@@ -466,7 +470,7 @@ struct SampleContextFrame {
   LineLocation Location;
 
   SampleContextFrame() : Location(0, 0) {}
-  
+
   SampleContextFrame(FunctionId Func, LineLocation Location)
       : Func(Func), Location(Location) {}
 
@@ -527,7 +531,7 @@ class SampleContext {
       : Func(Name), State(UnknownContext), Attributes(ContextNone) {
         assert(!Name.empty() && "Name is empty");
       }
-  
+
   SampleContext(FunctionId Func)
       : Func(Func), State(UnknownContext), Attributes(ContextNone) {}
 
@@ -1178,6 +1182,8 @@ class FunctionSamples {
 
   static bool ProfileIsProbeBased;
 
+  static bool ProfileIsMixedProbeOrder;
+
   static bool ProfileIsCS;
 
   static bool ProfileIsPreInlined;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 9e8f543909cdbd..6551f9374d4aa7 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -525,6 +525,9 @@ class SampleProfileReader {
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
+  /// Whether profiles are in mixed BB and callsite probe order.
+  bool ProfileIsMixedProbeOrder = false;
+
   /// Whether function profiles are context-sensitive flat profiles.
   bool ProfileIsCS = false;
 
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 59fa71899ed47b..830b83a25a3e42 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -41,6 +41,7 @@ static cl::opt<bool> GenerateMergedBaseProfiles(
 namespace llvm {
 namespace sampleprof {
 bool FunctionSamples::ProfileIsProbeBased = false;
+bool FunctionSamples::ProfileIsMixedProbeOrder = false;
 bool FunctionSamples::ProfileIsCS = false;
 bool FunctionSamples::ProfileIsPreInlined = false;
 bool FunctionSamples::UseMD5 = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 98d0aa794529c5..6476f6cb7ca141 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -704,6 +704,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+      FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder =
+          true;
     break;
   case SecNameTable: {
     bool FixedLengthMD5 =
@@ -1369,6 +1372,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
       Flags.append("preInlined,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       Flags.append("fs-discriminator,");
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+      Flags.append("mixed-probe-order,");
     break;
   case SecFuncOffsetTable:
     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 625e523f13cec0..4a708c94dac518 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -437,6 +437,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
   if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
+  if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased &&
+      FunctionSamples::ProfileIsMixedProbeOrder)
+    addSectionFlag(SecProfSummary,
+                   SecProfSummaryFlags::SecFlagIsMixedProbeOrder);
 
   uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
   switch (Type) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2fd8668d15e200..2baf7f47aaea7d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1129,7 +1129,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
         CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
     if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
       continue;
-    
+
     Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
     // Add to the import list only when it's defined out of module.
     if (!Func || Func->isDeclaration())
@@ -2112,6 +2112,14 @@ bool SampleProfileLoader::doInitialization(Module &M,
                                                DS_Warning));
       return false;
     }
+
+    if (!FunctionSamples::ProfileIsMixedProbeOrder) {
+      const char *Msg =
+          "Pseudo-probe-based profile is on an old version ID order which "
+          "could cause profile mismatch(performance regression)";
+      Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
+                                               DS_Warning));
+    }
   }
 
   if (ReportProfileStaleness || PersistProfileStaleness ||
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index c4028e6b132871..dbc1ea79691c87 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -559,6 +559,54 @@ void ProfileGenerator::populateBodySamplesWithProbesForAllFunctions(
   }
 }
 
+// Use a heuristic to determine probe order by checking callsite insertion
+// position relative to the BB probes. Sort all the BB probes is in a list, for
+// each calliste, compute "ratio = insert position / length of the list". For
+// the old order, the probe ids for BB should be all before(smaller than) the
+// probe ids for callsite, this ratio should be equal to or close to 1.
+inline bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) {
+  // Use flattned profile to maximize the callsites in the profile.
+  SampleProfileMap flattenProfile;
+  ProfileConverter::flattenProfile(Profiles, flattenProfile);
+
+  uint32_t PossibleOldOrderNum = 0;
+  uint32_t PossibleNewOrderNum = 0;
+
+  for (const auto &I : flattenProfile) {
+    const FunctionSamples &FS = I.second;
+    // Skip small functions whose profile order are likely random.
+    if (FS.getBodySamples().size() < 10)
+      continue;
+
+    std::set<uint32_t> PossibleBBProbeIDs;
+    std::set<uint32_t> CallsiteIDs;
+    for (const auto &I : FS.getBodySamples()) {
+      if (I.second.getCallTargets().empty())
+        PossibleBBProbeIDs.insert(I.first.LineOffset);
+      else
+        CallsiteIDs.insert(I.first.LineOffset);
+    }
+
+    if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty())
+      continue;
+
+    uint32_t DistanceToBeginSum = 0;
+    for (const auto &ID : CallsiteIDs)
+      DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(),
+                                          PossibleBBProbeIDs.upper_bound(ID));
+    uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size();
+
+    // Note that PossibleBBProbeIDs could contains some callsite probe id, the
+    // ratio is not exactly 1 for the old order, hence use a smaller threshold
+    // to determine.
+    if ((float)DistanceToBeginSum / LengthSum > 0.8)
+      PossibleOldOrderNum++;
+    else
+      PossibleNewOrderNum++;
+  }
+  return PossibleNewOrderNum >= PossibleOldOrderNum;
+}
+
 void ProfileGenerator::populateBoundarySamplesWithProbesForAllFunctions(
     const BranchSample &BranchCounters) {
   for (const auto &Entry : BranchCounters) {
@@ -1024,6 +1072,16 @@ void CSProfileGenerator::postProcessProfiles() {
     CSConverter.convertCSProfiles();
     FunctionSamples::ProfileIsCS = false;
   }
+
+  if (FunctionSamples::ProfileIsProbeBased) {
+    FunctionSamples::ProfileIsMixedProbeOrder = true;
+    if (!checkProbeIDIsInMixedOrder(ProfileMap)) {
+      WithColor::warning()
+          << "Pseudo-probe-based profile is on an old version ID order which "
+             "could cause profile mismatch(performance regression)\n";
+      FunctionSamples::ProfileIsMixedProbeOrder = false;
+    }
+  }
 }
 
 void ProfileGeneratorBase::computeSummaryAndThreshold(



More information about the cfe-commits mailing list