[clang] [llvm] [PseudoProbe] Mix and reorder block and call probe ID in lexical order (PR #75092)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Thu Feb 29 19:19:55 PST 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/75092

>From e6f735a336f87659ee3236fc795ad4b7107dff4d Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 10 Dec 2023 18:30:42 -0800
Subject: [PATCH 1/4] [PseudoProbe] Mix and reorder block and call probe ID in
 lexical order

---
 clang/test/CodeGen/pseudo-probe-emit.c        |  8 +++----
 .../llvm/Transforms/IPO/SampleProfileProbe.h  |  3 +--
 .../lib/Transforms/IPO/SampleProfileProbe.cpp | 19 +++++-----------
 .../Inputs/pseudo-probe-profile.prof          |  8 +++----
 .../Inputs/pseudo-probe-update.prof           |  8 +++----
 .../SampleProfile/pseudo-probe-dangle.ll      | 12 +++++-----
 .../pseudo-probe-discriminator.ll             |  6 ++---
 .../pseudo-probe-profile-metadata-2.ll        | 15 ++++++-------
 .../SampleProfile/pseudo-probe-profile.ll     | 22 +++++++++----------
 .../SampleProfile/pseudo-probe-update.ll      | 11 +++++-----
 .../SampleProfile/pseudo-probe-verify.ll      | 16 +++++++-------
 11 files changed, 59 insertions(+), 69 deletions(-)

diff --git a/clang/test/CodeGen/pseudo-probe-emit.c b/clang/test/CodeGen/pseudo-probe-emit.c
index c7a3f7e6d5b02b..360f831e842945 100644
--- a/clang/test/CodeGen/pseudo-probe-emit.c
+++ b/clang/test/CodeGen/pseudo-probe-emit.c
@@ -10,9 +10,9 @@ void foo(int x) {
   // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 1, i32 0, i64 -1)
   if (x == 0)
     // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 2, i32 0, i64 -1)
-    bar();
+    bar();  // probe id : 3
   else
-    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 3, i32 0, i64 -1)
-    go();
-  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+    // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 4, i32 0, i64 -1)
+    go(); // probe id : 5
+  // CHECK: call void @llvm.pseudoprobe(i64 [[#GUID]], i64 6, i32 0, i64 -1)
 }
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 0f2729a9462de2..69b87adf105fd1 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -82,8 +82,7 @@ class SampleProfileProber {
   uint32_t getBlockId(const BasicBlock *BB) const;
   uint32_t getCallsiteId(const Instruction *Call) const;
   void computeCFGHash();
-  void computeProbeIdForBlocks();
-  void computeProbeIdForCallsites();
+  void computeProbeId();
 
   Function *F;
 
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 090e5560483edb..975c1bc2347aa4 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -173,8 +173,7 @@ SampleProfileProber::SampleProfileProber(Function &Func,
   BlockProbeIds.clear();
   CallProbeIds.clear();
   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
-  computeProbeIdForBlocks();
-  computeProbeIdForCallsites();
+  computeProbeId();
   computeCFGHash();
 }
 
@@ -207,7 +206,10 @@ void SampleProfileProber::computeCFGHash() {
                     << ", Hash = " << FunctionHash << "\n");
 }
 
-void SampleProfileProber::computeProbeIdForBlocks() {
+void SampleProfileProber::computeProbeId() {
+  LLVMContext &Ctx = F->getContext();
+  Module *M = F->getParent();
+
   DenseSet<BasicBlock *> KnownColdBlocks;
   computeEHOnlyBlocks(*F, KnownColdBlocks);
   // Insert pseudo probe to non-cold blocks only. This will reduce IR size as
@@ -216,18 +218,9 @@ void SampleProfileProber::computeProbeIdForBlocks() {
     ++LastProbeId;
     if (!KnownColdBlocks.contains(&BB))
       BlockProbeIds[&BB] = LastProbeId;
-  }
-}
 
-void SampleProfileProber::computeProbeIdForCallsites() {
-  LLVMContext &Ctx = F->getContext();
-  Module *M = F->getParent();
-
-  for (auto &BB : *F) {
     for (auto &I : BB) {
-      if (!isa<CallBase>(I))
-        continue;
-      if (isa<IntrinsicInst>(&I))
+      if (!isa<CallBase>(I) || isa<IntrinsicInst>(&I))
         continue;
 
       // The current implementation uses the lower 16 bits of the discriminator
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
index ba4c6117dc96ab..d3847946b94033 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile.prof
@@ -1,8 +1,8 @@
 foo:3200:13
  1: 13
  2: 7
- 3: 6
- 4: 13
- 5: 7 _Z3barv:2 _Z3foov:5
- 6: 6 _Z3barv:4 _Z3foov:2
+ 4: 6
+ 6: 13
+ 3: 7 _Z3barv:2 _Z3foov:5
+ 5: 6 _Z3barv:4 _Z3foov:2
  !CFGChecksum: 563022570642068
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
index 62f9bd5992e735..213bf0b6f81cc4 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-update.prof
@@ -1,8 +1,8 @@
 foo:3200:13
  1: 13
  2: 7
- 3: 6
- 4: 13
- 5: 7
- 6: 6
+ 4: 6
+ 6: 13
+ 7: 7
+ 9: 6
  !CFGChecksum: 844530426352218
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
index 4647a34fc2f620..f0b6fdf62d9696 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-dangle.ll
@@ -23,21 +23,21 @@ Merge:
 ; JT-LABEL-NO: T
 ; JT-LABEL-NO: F
 ; JT-LABEL: Merge
+; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4
 ; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3
-; JT-NOT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2
-; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+; JT: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; ASM-NOT: .pseudoprobe	6699318081062747564 4
 ; ASM-NOT: .pseudoprobe	6699318081062747564 3
-; ASM-NOT: .pseudoprobe	6699318081062747564 2
-; ASM: .pseudoprobe	6699318081062747564 4 0 0
+; ASM: .pseudoprobe	6699318081062747564 5 0 0
 	ret i32 %call
 }
 
 ;; Check block T and F are gone, and their probes (probe 2 and 3) are gone too.
 ; MIR-tail: bb.0
 ; MIR-tail: PSEUDO_PROBE [[#GUID:]], 1, 0, 0
-; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 2
 ; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 3
-; MIR-tail: PSEUDO_PROBE [[#GUID:]], 4, 0, 0
+; MIR-tail-NOT: PSEUDO_PROBE [[#GUID:]], 4
+; MIR-tail: PSEUDO_PROBE [[#GUID:]], 5, 0, 0
 
 
 define i32 @test(i32 %a, i32 %b, i32 %c) {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
index 62f0737875aec3..97b0ed600ad106 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-discriminator.ll
@@ -62,10 +62,10 @@ attributes #1 = { "disable-tail-calls"="false" "less-precise-fpmad"="false" "fra
 ; DEBUG: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
 ; DEBUG: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
 
-           
+
 ; PROBE: ![[CALL1]] = !DILocation(line: 4, column: 3, scope: ![[CALL1BLOCK:[0-9]+]])
-; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646575)
+; PROBE: ![[CALL1BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646559)
 ; PROBE: ![[CALL2]] = !DILocation(line: 4, column: 9, scope: ![[CALL2BLOCK:[0-9]+]])
-; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646583)
+; PROBE: ![[CALL2BLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 186646567)
 ; PROBE: ![[INST]] = !DILocation(line: 4, column: 15, scope: ![[INSTBLOCK:[0-9]+]])
 ; PROBE: ![[INSTBLOCK]] = !DILexicalBlockFile({{.*}} discriminator: 4)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
index 148f3ede5ab48a..379dcfcab338d9 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-metadata-2.ll
@@ -29,7 +29,7 @@ if.else:
   br label %return
 
 return:
-  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1)
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 6, i32 0, i64 -1)
   %1 = load i32, ptr %retval, align 4
   ret i32 %1
 }
@@ -55,13 +55,12 @@ attributes #0 = {"use-sample-profile"}
 !9 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !5, isOptimized: false, runtimeVersion: 0, emissionKind: NoDebug)
 !10 = !{!"function_entry_count", i64 14}
 !11 = !{!"branch_weights", i32 100, i32 0}
-;; A discriminator of 186646575 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 1.0.
-!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646575)
+;; A discriminator of 186646559 which is 0xB20001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 1.0.
+!12 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 186646559)
 !13 = distinct !DILocation(line: 10, column: 11, scope: !12)
-;; A discriminator of 134217775 which is 0x6f80057 in hexdecimal, stands for an indirect call probe
-;; with an index of 5 and probe factor of 0.
-!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217775)
+;; A discriminator of 134217759 which is 0x800001F in hexdecimal, stands for an indirect call probe
+;; with an index of 3 and probe factor of 0.
+!14 = !DILexicalBlockFile(scope: !4, file: !5, discriminator: 134217759)
 !15 = distinct !DILocation(line: 10, column: 11, scope: !14)
 !16 = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
index 474b6668b0a7a7..867a49dbaed2ee 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile.ll
@@ -22,12 +22,12 @@ if.then:
 if.else:
   ; CHECK: call {{.*}}, !dbg ![[#PROBE2:]], !prof ![[PROF2:[0-9]+]]
   call void %f(i32 2)
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
   store i32 2, ptr %retval, align 4
   br label %return
 
 return:
-  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
+  ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
   %1 = load i32, ptr %retval, align 4
   ret i32 %1
 }
@@ -36,14 +36,14 @@ attributes #0 = {"use-sample-profile"}
 
 ; CHECK: ![[PD1]] = !{!"branch_weights", i32 8, i32 7}
 ; CHECK: ![[#PROBE1]] = !DILocation(line: 0, scope: ![[#SCOPE1:]])
+;; A discriminator of 119537695 which is 0x720001f in hexdecimal, stands for an indirect call probe
+;; with an index of 3.
+; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537695)
+; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
 ;; A discriminator of 119537711 which is 0x720002f in hexdecimal, stands for an indirect call probe
 ;; with an index of 5.
-; CHECK: ![[#SCOPE1]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
-; CHECK: ![[PROF1]] = !{!"VP", i32 0, i64 7, i64 9191153033785521275, i64 5, i64 -1069303473483922844, i64 2}
-;; A discriminator of 119537719 which is 0x7200037 in hexdecimal, stands for an indirect call probe
-;; with an index of 6.
 ; CHECK: ![[#PROBE2]] = !DILocation(line: 0, scope: ![[#SCOPE2:]])
-; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537719)
+; CHECK: ![[#SCOPE2]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 119537711)
 ; CHECK: ![[PROF2]] = !{!"VP", i32 0, i64 6, i64 -1069303473483922844, i64 4, i64 9191153033785521275, i64 2}
 
 !llvm.module.flags = !{!9, !10}
@@ -83,7 +83,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '7'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '5'
+;YAML-NEXT:    - ProbeId:         '3'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
@@ -113,7 +113,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '6'
+;YAML-NEXT:    - ProbeId:         '5'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
@@ -128,7 +128,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '6'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '3'
+;YAML-NEXT:    - ProbeId:         '4'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
@@ -143,7 +143,7 @@ attributes #0 = {"use-sample-profile"}
 ;YAML-NEXT:    - String:          'Applied '
 ;YAML-NEXT:    - NumSamples:      '13'
 ;YAML-NEXT:    - String:          ' samples from profile (ProbeId='
-;YAML-NEXT:    - ProbeId:         '4'
+;YAML-NEXT:    - ProbeId:         '6'
 ;YAML-NEXT:    - String:          ', Factor='
 ;YAML-NEXT:    - Factor:          '1.000000e+00'
 ;YAML-NEXT:    - String:          ', OriginalSamples='
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
index 992afedd14f75f..217b61970933dc 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-update.ll
@@ -14,15 +14,15 @@ T1:
 	%v1 = call i32 @f1()
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
 ;; The distribution factor -8513881372706734080 stands for 53.85%, whic is from 7/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -8513881372706734080)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -8513881372706734080)
     %cond3 = icmp eq i32 %v1, 412
 	br label %Merge
 F1:
 ; CHECK: %v2 = call i32 @f2(), !prof ![[#PROF2:]]
 	%v2 = call i32 @f2()
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 3, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
 ;; The distribution factor 8513881922462547968 stands for 46.25%, which is from 6/6+7.
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 8513881922462547968)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 8513881922462547968)
 	br label %Merge
 Merge:
 
@@ -30,11 +30,11 @@ Merge:
 	%B = phi i32 [%v1, %T1], [%v2, %F1]
 	br i1 %A, label %T2, label %F2
 T2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 7, i32 0, i64 -1)
 	call void @f3()
 	ret i32 %B
 F2:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 9, i32 0, i64 -1)
 	ret i32 %B
 }
 
@@ -42,4 +42,3 @@ F2:
 ; CHECK: ![[#PROF2]] = !{!"branch_weights", i32 6}
 
 attributes #0 = {"use-sample-profile"}
-
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
index f70e5189ab1293..b622cfbd6634ef 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-verify.ll
@@ -4,7 +4,7 @@
 
 ; VERIFY: *** Pseudo Probe Verification After LoopFullUnrollPass ***
 ; VERIFY: Function foo:
-; VERIFY-DAG: Probe 6	previous factor 1.00	current factor 5.00
+; VERIFY-DAG: Probe 5	previous factor 1.00	current factor 5.00
 ; VERIFY-DAG: Probe 4	previous factor 1.00	current factor 5.00
 
 declare void @foo2() nounwind
@@ -27,15 +27,15 @@ bb7.preheader:
 
 bb10:
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 4, i32 0, i64 -1)
-; CHECK: call void @foo2(), !dbg ![[#PROBE6:]] 
+; CHECK: call void @foo2(), !dbg ![[#PROBE6:]]
 ; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 2, i32 0, i64 -1)
   %indvars.iv = phi i64 [ 0, %bb7.preheader ], [ %indvars.iv.next, %bb10 ]
   %tmp1.14 = phi i32 [ %tmp1.06, %bb7.preheader ], [ %spec.select, %bb10 ]
@@ -50,14 +50,14 @@ bb10:
   br i1 %exitcond.not, label %bb3.loopexit, label %bb10, !llvm.loop !13
 
 bb24:
-; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 5, i32 0, i64 -1)
+; CHECK: call void @llvm.pseudoprobe(i64 [[#GUID:]], i64 6, i32 0, i64 -1)
   ret void
 }
 
 ;; A discriminator of 186646583 which is 0xb200037 in hexdecimal, stands for a direct call probe
 ;; with an index of 6 and a scale of -1%.
 ; CHECK: ![[#PROBE6]] = !DILocation(line: 2, column: 20, scope: ![[#SCOPE:]])
-; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646583)
+; CHECK: ![[#SCOPE]] = !DILexicalBlockFile(scope: ![[#]], file: ![[#]], discriminator: 186646575)
 
 !llvm.dbg.cu = !{!0}
 !llvm.module.flags = !{!9, !10}

>From 5e6b16f041101d4db38748bc9bfc3ec60536f73c Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 10 Jan 2024 10:10:34 -0800
Subject: [PATCH 2/4] Detect the order and add a summary flag to the profile

---
 llvm/include/llvm/ProfileData/SampleProf.h    | 10 ++-
 .../llvm/ProfileData/SampleProfReader.h       |  3 +
 llvm/lib/ProfileData/SampleProf.cpp           |  1 +
 llvm/lib/ProfileData/SampleProfReader.cpp     |  5 ++
 llvm/lib/ProfileData/SampleProfWriter.cpp     |  4 ++
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |  8 +++
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 62 +++++++++++++++++++
 7 files changed, 91 insertions(+), 2 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 8ac84d4b933f20..a5ac05c9cdff75 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -201,6 +201,10 @@ enum class SecProfSummaryFlags : uint32_t {
   /// SecFlagIsPreInlined means this profile contains ShouldBeInlined
   /// contexts thus this is CS preinliner computed.
   SecFlagIsPreInlined = (1 << 4),
+  /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the
+  /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of
+  /// the order that callsite probe IDs are always after the BB probe IDs.
+  SecFlagIsMixedProbeOrder = (1 << 5),
 };
 
 enum class SecFuncMetadataFlags : uint32_t {
@@ -466,7 +470,7 @@ struct SampleContextFrame {
   LineLocation Location;
 
   SampleContextFrame() : Location(0, 0) {}
-  
+
   SampleContextFrame(FunctionId Func, LineLocation Location)
       : Func(Func), Location(Location) {}
 
@@ -527,7 +531,7 @@ class SampleContext {
       : Func(Name), State(UnknownContext), Attributes(ContextNone) {
         assert(!Name.empty() && "Name is empty");
       }
-  
+
   SampleContext(FunctionId Func)
       : Func(Func), State(UnknownContext), Attributes(ContextNone) {}
 
@@ -1178,6 +1182,8 @@ class FunctionSamples {
 
   static bool ProfileIsProbeBased;
 
+  static bool ProfileIsMixedProbeOrder;
+
   static bool ProfileIsCS;
 
   static bool ProfileIsPreInlined;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 9e8f543909cdbd..6551f9374d4aa7 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -525,6 +525,9 @@ class SampleProfileReader {
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
+  /// Whether profiles are in mixed BB and callsite probe order.
+  bool ProfileIsMixedProbeOrder = false;
+
   /// Whether function profiles are context-sensitive flat profiles.
   bool ProfileIsCS = false;
 
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 59fa71899ed47b..830b83a25a3e42 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -41,6 +41,7 @@ static cl::opt<bool> GenerateMergedBaseProfiles(
 namespace llvm {
 namespace sampleprof {
 bool FunctionSamples::ProfileIsProbeBased = false;
+bool FunctionSamples::ProfileIsMixedProbeOrder = false;
 bool FunctionSamples::ProfileIsCS = false;
 bool FunctionSamples::ProfileIsPreInlined = false;
 bool FunctionSamples::UseMD5 = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 98d0aa794529c5..6476f6cb7ca141 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -704,6 +704,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+      FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder =
+          true;
     break;
   case SecNameTable: {
     bool FixedLengthMD5 =
@@ -1369,6 +1372,8 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
       Flags.append("preInlined,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       Flags.append("fs-discriminator,");
+    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
+      Flags.append("mixed-probe-order,");
     break;
   case SecFuncOffsetTable:
     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 625e523f13cec0..4a708c94dac518 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -437,6 +437,10 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
   if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
+  if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased &&
+      FunctionSamples::ProfileIsMixedProbeOrder)
+    addSectionFlag(SecProfSummary,
+                   SecProfSummaryFlags::SecFlagIsMixedProbeOrder);
 
   uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
   switch (Type) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index ffc26f1a0a972d..a8a2697aa6a7ed 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -2173,6 +2173,14 @@ bool SampleProfileLoader::doInitialization(Module &M,
                                                DS_Warning));
       return false;
     }
+
+    if (!FunctionSamples::ProfileIsMixedProbeOrder) {
+      const char *Msg =
+          "Pseudo-probe-based profile is on an old version ID order which "
+          "could cause profile mismatch(performance regression)";
+      Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
+                                               DS_Warning));
+    }
   }
 
   if (ReportProfileStaleness || PersistProfileStaleness ||
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 5aa44108f96605..4771dda88d89c0 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -513,6 +513,66 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
   return ProfileMap.Create(Context);
 }
 
+// Use a heuristic to determine probe order by checking callsite insertion
+// position relative to the BB probes. Sort all the BB probes is in a list, for
+// each calliste, compute "ratio = insert position / length of the list". For
+// the old order, the probe ids for BB should be all before(smaller than) the
+// probe ids for callsite, this ratio should be equal to or close to 1.
+bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) {
+  // Use flattned profile to maximize the callsites in the profile.
+  SampleProfileMap flattenProfile;
+  ProfileConverter::flattenProfile(Profiles, flattenProfile);
+
+  uint32_t PossibleOldOrderNum = 0;
+  uint32_t PossibleNewOrderNum = 0;
+
+  for (const auto &I : flattenProfile) {
+    const FunctionSamples &FS = I.second;
+    // Skip small functions whose profile order are likely random.
+    if (FS.getBodySamples().size() < 10)
+      continue;
+
+    std::set<uint32_t> PossibleBBProbeIDs;
+    std::set<uint32_t> CallsiteIDs;
+    for (const auto &I : FS.getBodySamples()) {
+      if (I.second.getCallTargets().empty())
+        PossibleBBProbeIDs.insert(I.first.LineOffset);
+      else
+        CallsiteIDs.insert(I.first.LineOffset);
+    }
+
+    if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty())
+      continue;
+
+    uint32_t DistanceToBeginSum = 0;
+    for (const auto &ID : CallsiteIDs)
+      DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(),
+                                          PossibleBBProbeIDs.upper_bound(ID));
+    uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size();
+
+    // Note that PossibleBBProbeIDs could contains some callsite probe id, the
+    // ratio is not exactly 1 for the old order, hence use a smaller threshold
+    // to determine.
+    if ((float)DistanceToBeginSum / LengthSum > 0.8)
+      PossibleOldOrderNum++;
+    else
+      PossibleNewOrderNum++;
+  }
+  return PossibleNewOrderNum >= PossibleOldOrderNum;
+}
+
+void warnOrWriteProbeOrderFlag(const SampleProfileMap &Profiles) {
+  if (FunctionSamples::ProfileIsProbeBased) {
+    FunctionSamples::ProfileIsMixedProbeOrder = true;
+    if (!checkProbeIDIsInMixedOrder(Profiles)) {
+      WithColor::warning()
+          << "Pseudo-probe-based profile is on an old version ID order which "
+             "could cause profile mismatch(performance regression)\n";
+      FunctionSamples::ProfileIsMixedProbeOrder = false;
+    }
+  }
+}
+
 void ProfileGenerator::generateProfile() {
   collectProfiledFunctions();
 
@@ -535,6 +595,7 @@ void ProfileGenerator::postProcessProfiles() {
   trimColdProfiles(ProfileMap, ColdCountThreshold);
   filterAmbiguousProfile(ProfileMap);
   calculateAndShowDensity(ProfileMap);
+  warnOrWriteProbeOrderFlag(ProfileMap);
 }
 
 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
@@ -1068,6 +1129,7 @@ void CSProfileGenerator::postProcessProfiles() {
     FunctionSamples::ProfileIsCS = false;
   }
   filterAmbiguousProfile(ProfileMap);
+  warnOrWriteProbeOrderFlag(ProfileMap);
 }
 
 void ProfileGeneratorBase::computeSummaryAndThreshold(

>From 6fe180efa591e8282d7d9e72c6a7645ce34b2b74 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 28 Feb 2024 21:26:13 -0800
Subject: [PATCH 3/4] Revert "Detect the order and add a summary flag to the
 profile"

This reverts commit 28b0374b93f2072d03804ff1d8a8b2fd81c5a218.
---
 llvm/include/llvm/ProfileData/SampleProf.h    | 10 +--
 .../llvm/ProfileData/SampleProfReader.h       |  3 -
 llvm/lib/ProfileData/SampleProf.cpp           |  1 -
 llvm/lib/ProfileData/SampleProfReader.cpp     |  5 --
 llvm/lib/ProfileData/SampleProfWriter.cpp     |  4 --
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 10 +--
 llvm/tools/llvm-profgen/ProfileGenerator.cpp  | 62 -------------------
 7 files changed, 3 insertions(+), 92 deletions(-)

diff --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index a5ac05c9cdff75..8ac84d4b933f20 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -201,10 +201,6 @@ enum class SecProfSummaryFlags : uint32_t {
   /// SecFlagIsPreInlined means this profile contains ShouldBeInlined
   /// contexts thus this is CS preinliner computed.
   SecFlagIsPreInlined = (1 << 4),
-  /// SecFlagIsMixedProbeOrder means in a pseude-probe based profile, the
-  /// callsite and BB probe IDs are mixed and sorted in lexcial order instead of
-  /// the order that callsite probe IDs are always after the BB probe IDs.
-  SecFlagIsMixedProbeOrder = (1 << 5),
 };
 
 enum class SecFuncMetadataFlags : uint32_t {
@@ -470,7 +466,7 @@ struct SampleContextFrame {
   LineLocation Location;
 
   SampleContextFrame() : Location(0, 0) {}
-
+  
   SampleContextFrame(FunctionId Func, LineLocation Location)
       : Func(Func), Location(Location) {}
 
@@ -531,7 +527,7 @@ class SampleContext {
       : Func(Name), State(UnknownContext), Attributes(ContextNone) {
         assert(!Name.empty() && "Name is empty");
       }
-
+  
   SampleContext(FunctionId Func)
       : Func(Func), State(UnknownContext), Attributes(ContextNone) {}
 
@@ -1182,8 +1178,6 @@ class FunctionSamples {
 
   static bool ProfileIsProbeBased;
 
-  static bool ProfileIsMixedProbeOrder;
-
   static bool ProfileIsCS;
 
   static bool ProfileIsPreInlined;
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 6551f9374d4aa7..9e8f543909cdbd 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -525,9 +525,6 @@ class SampleProfileReader {
   /// \brief Whether samples are collected based on pseudo probes.
   bool ProfileIsProbeBased = false;
 
-  /// Whether profiles are in mixed BB and callsite probe order.
-  bool ProfileIsMixedProbeOrder = false;
-
   /// Whether function profiles are context-sensitive flat profiles.
   bool ProfileIsCS = false;
 
diff --git a/llvm/lib/ProfileData/SampleProf.cpp b/llvm/lib/ProfileData/SampleProf.cpp
index 830b83a25a3e42..59fa71899ed47b 100644
--- a/llvm/lib/ProfileData/SampleProf.cpp
+++ b/llvm/lib/ProfileData/SampleProf.cpp
@@ -41,7 +41,6 @@ static cl::opt<bool> GenerateMergedBaseProfiles(
 namespace llvm {
 namespace sampleprof {
 bool FunctionSamples::ProfileIsProbeBased = false;
-bool FunctionSamples::ProfileIsMixedProbeOrder = false;
 bool FunctionSamples::ProfileIsCS = false;
 bool FunctionSamples::ProfileIsPreInlined = false;
 bool FunctionSamples::UseMD5 = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 6476f6cb7ca141..98d0aa794529c5 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -704,9 +704,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
       FunctionSamples::ProfileIsPreInlined = ProfileIsPreInlined = true;
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       FunctionSamples::ProfileIsFS = ProfileIsFS = true;
-    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
-      FunctionSamples::ProfileIsMixedProbeOrder = ProfileIsMixedProbeOrder =
-          true;
     break;
   case SecNameTable: {
     bool FixedLengthMD5 =
@@ -1372,8 +1369,6 @@ static std::string getSecFlagsStr(const SecHdrTableEntry &Entry) {
       Flags.append("preInlined,");
     if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagFSDiscriminator))
       Flags.append("fs-discriminator,");
-    if (hasSecFlag(Entry, SecProfSummaryFlags::SecFlagIsMixedProbeOrder))
-      Flags.append("mixed-probe-order,");
     break;
   case SecFuncOffsetTable:
     if (hasSecFlag(Entry, SecFuncOffsetFlags::SecFlagOrdered))
diff --git a/llvm/lib/ProfileData/SampleProfWriter.cpp b/llvm/lib/ProfileData/SampleProfWriter.cpp
index 4a708c94dac518..625e523f13cec0 100644
--- a/llvm/lib/ProfileData/SampleProfWriter.cpp
+++ b/llvm/lib/ProfileData/SampleProfWriter.cpp
@@ -437,10 +437,6 @@ std::error_code SampleProfileWriterExtBinaryBase::writeOneSection(
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagIsPreInlined);
   if (Type == SecProfSummary && FunctionSamples::ProfileIsFS)
     addSectionFlag(SecProfSummary, SecProfSummaryFlags::SecFlagFSDiscriminator);
-  if (Type == SecProfSummary && FunctionSamples::ProfileIsProbeBased &&
-      FunctionSamples::ProfileIsMixedProbeOrder)
-    addSectionFlag(SecProfSummary,
-                   SecProfSummaryFlags::SecFlagIsMixedProbeOrder);
 
   uint64_t SectionStart = markSectionStart(Type, LayoutIdx);
   switch (Type) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index a8a2697aa6a7ed..402920d3a9a2d8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -1190,7 +1190,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
         CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
     if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
       continue;
-
+    
     Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
     // Add to the import list only when it's defined out of module.
     if (!Func || Func->isDeclaration())
@@ -2173,14 +2173,6 @@ bool SampleProfileLoader::doInitialization(Module &M,
                                                DS_Warning));
       return false;
     }
-
-    if (!FunctionSamples::ProfileIsMixedProbeOrder) {
-      const char *Msg =
-          "Pseudo-probe-based profile is on an old version ID order which "
-          "could cause profile mismatch(performance regression)";
-      Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg,
-                                               DS_Warning));
-    }
   }
 
   if (ReportProfileStaleness || PersistProfileStaleness ||
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 4771dda88d89c0..5aa44108f96605 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -513,66 +513,6 @@ ProfileGenerator::getTopLevelFunctionProfile(FunctionId FuncName) {
   return ProfileMap.Create(Context);
 }
 
-// Use a heuristic to determine probe order by checking callsite insertion
-// position relative to the BB probes. Sort all the BB probes is in a list, for
-// each calliste, compute "ratio = insert position / length of the list". For
-// the old order, the probe ids for BB should be all before(smaller than) the
-// probe ids for callsite, this ratio should be equal to or close to 1.
-bool checkProbeIDIsInMixedOrder(const SampleProfileMap &Profiles) {
-  // Use flattned profile to maximize the callsites in the profile.
-  SampleProfileMap flattenProfile;
-  ProfileConverter::flattenProfile(Profiles, flattenProfile);
-
-  uint32_t PossibleOldOrderNum = 0;
-  uint32_t PossibleNewOrderNum = 0;
-
-  for (const auto &I : flattenProfile) {
-    const FunctionSamples &FS = I.second;
-    // Skip small functions whose profile order are likely random.
-    if (FS.getBodySamples().size() < 10)
-      continue;
-
-    std::set<uint32_t> PossibleBBProbeIDs;
-    std::set<uint32_t> CallsiteIDs;
-    for (const auto &I : FS.getBodySamples()) {
-      if (I.second.getCallTargets().empty())
-        PossibleBBProbeIDs.insert(I.first.LineOffset);
-      else
-        CallsiteIDs.insert(I.first.LineOffset);
-    }
-
-    if (PossibleBBProbeIDs.empty() || CallsiteIDs.empty())
-      continue;
-
-    uint32_t DistanceToBeginSum = 0;
-    for (const auto &ID : CallsiteIDs)
-      DistanceToBeginSum += std::distance(PossibleBBProbeIDs.begin(),
-                                          PossibleBBProbeIDs.upper_bound(ID));
-    uint32_t LengthSum = PossibleBBProbeIDs.size() * CallsiteIDs.size();
-
-    // Note that PossibleBBProbeIDs could contains some callsite probe id, the
-    // ratio is not exactly 1 for the old order, hence use a smaller threshold
-    // to determine.
-    if ((float)DistanceToBeginSum / LengthSum > 0.8)
-      PossibleOldOrderNum++;
-    else
-      PossibleNewOrderNum++;
-  }
-  return PossibleNewOrderNum >= PossibleOldOrderNum;
-}
-
-void warnOrWriteProbeOrderFlag(const SampleProfileMap &Profiles) {
-  if (FunctionSamples::ProfileIsProbeBased) {
-    FunctionSamples::ProfileIsMixedProbeOrder = true;
-    if (!checkProbeIDIsInMixedOrder(Profiles)) {
-      WithColor::warning()
-          << "Pseudo-probe-based profile is on an old version ID order which "
-             "could cause profile mismatch(performance regression)\n";
-      FunctionSamples::ProfileIsMixedProbeOrder = false;
-    }
-  }
-}
-
 void ProfileGenerator::generateProfile() {
   collectProfiledFunctions();
 
@@ -595,7 +535,6 @@ void ProfileGenerator::postProcessProfiles() {
   trimColdProfiles(ProfileMap, ColdCountThreshold);
   filterAmbiguousProfile(ProfileMap);
   calculateAndShowDensity(ProfileMap);
-  warnOrWriteProbeOrderFlag(ProfileMap);
 }
 
 void ProfileGenerator::trimColdProfiles(const SampleProfileMap &Profiles,
@@ -1129,7 +1068,6 @@ void CSProfileGenerator::postProcessProfiles() {
     FunctionSamples::ProfileIsCS = false;
   }
   filterAmbiguousProfile(ProfileMap);
-  warnOrWriteProbeOrderFlag(ProfileMap);
 }
 
 void ProfileGeneratorBase::computeSummaryAndThreshold(

>From f25b13f071aab68bcc9ad3d98fdde871670a739c Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 29 Feb 2024 18:04:37 -0800
Subject: [PATCH 4/4] Error out if the checksum mismatch is high

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp | 84 ++++++++++++++++++++++-
 1 file changed, 83 insertions(+), 1 deletion(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 402920d3a9a2d8..b599645cbc7df3 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -234,6 +234,25 @@ static cl::opt<unsigned> ProfileICPRelativeHotnessSkip(
     cl::desc(
         "Skip relative hotness check for ICP up to given number of targets."));
 
+static cl::opt<unsigned> ChecksumMismatchFuncHotBlockSkip(
+    "checksum-mismatch-func-hot-block-skip", cl::Hidden, cl::init(100),
+    cl::desc("For checksum-mismatch error check, skip checking the function "
+             "that the num of its hot(on average) blocks is smaller than the "
+             "given number."));
+
+static cl::opt<unsigned> ChecksumMismatchNumFuncSkip(
+    "checksum-mismatch-num-func-skip", cl::Hidden, cl::init(100),
+    cl::desc(
+        "For checksum-mismatch error check, skip the check if the number of "
+        "total selected function is smaller than the given number."));
+
+static cl::opt<unsigned> ChecksumMismatchErrorThreshold(
+    "checksum-mismatch-error-threshold", cl::Hidden, cl::init(90),
+    cl::desc(
+        "For checksum-mismatch error check, error out if the percentage of "
+        "function mismatched-checksum is higher than the given percentage "
+        "threshold"));
+
 static cl::opt<bool> CallsitePrioritizedInline(
     "sample-profile-prioritized-inline", cl::Hidden,
 
@@ -630,6 +649,8 @@ class SampleProfileLoader final : public SampleProfileLoaderBaseImpl<Function> {
   std::vector<Function *> buildFunctionOrder(Module &M, LazyCallGraph &CG);
   std::unique_ptr<ProfiledCallGraph> buildProfiledCallGraph(Module &M);
   void generateMDProfMetadata(Function &F);
+  bool errorIfHighChecksumMismatch(Module &M, ProfileSummaryInfo *PSI,
+                                   const SampleProfileMap &Profiles);
 
   /// Map from function name to Function *. Used to find the function from
   /// the function name. If the function name contains suffix, additional
@@ -1190,7 +1211,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
         CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
     if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
       continue;
-    
+
     Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
     // Add to the import list only when it's defined out of module.
     if (!Func || Func->isDeclaration())
@@ -2184,6 +2205,61 @@ bool SampleProfileLoader::doInitialization(Module &M,
   return true;
 }
 
+// Note that this is a module-level check. Even if one module is errored out,
+// the entire build will be errored out. However, the user could make big
+// changes to functions in single module but those changes might not be
+// performance significant to the whole binary. Therefore, we use a conservative
+// approach to make sure we only error out if it globally impacts the binary
+// performance. To achieve this, we use heuristics to select a reasonable
+// big set of functions that are supposed to be globally performance
+// significant, only compute and check the mismatch within those functions. The
+// function selection is based on two criteria: 1) The function is "hot" enough,
+// which is tuned by a hotness-based flag(ChecksumMismatchFuncHotBlockSkip). 2)
+// The num of function is large enough which is tuned by the
+// ChecksumMismatchNumFuncSkip flag.
+bool SampleProfileLoader::errorIfHighChecksumMismatch(
+    Module &M, ProfileSummaryInfo *PSI, const SampleProfileMap &Profiles) {
+  assert(FunctionSamples::ProfileIsProbeBased &&
+         "Only support for probe-based profile");
+  uint64_t TotalSelectedFunc = 0;
+  uint64_t NumMismatchedFunc = 0;
+  for (const auto &I : Profiles) {
+    const auto &FS = I.second;
+    const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
+    if (!FuncDesc)
+      continue;
+
+    // We want to select a set of functions that are globally performance
+    // significant, in other words, if those functions profiles are
+    // checksum-mismatched and dropped, the whole binary will likely be
+    // impacted, so here we use a hotness-based threshold to control the
+    // selection.
+    if (FS.getTotalSamples() >=
+        ChecksumMismatchFuncHotBlockSkip * PSI->getOrCompHotCountThreshold())
+      continue;
+
+    TotalSelectedFunc++;
+    if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS))
+      NumMismatchedFunc++;
+  }
+  // Make sure that the num of selected function is not too small to distinguish
+  // from the user's benign changes.
+  if (TotalSelectedFunc < ChecksumMismatchNumFuncSkip)
+    return false;
+
+  // Finally check the mismatch percentage against the threshold.
+  if (NumMismatchedFunc * 100 >=
+      TotalSelectedFunc * ChecksumMismatchErrorThreshold) {
+    auto &Ctx = M.getContext();
+    const char *Msg =
+        "The FDO profile is too old and will cause big performance regression, "
+        "please drop the profile and re-collect a new one.";
+    Ctx.diagnose(DiagnosticInfoSampleProfile(M.getModuleIdentifier(), Msg));
+    return true;
+  }
+  return false;
+}
+
 void SampleProfileMatcher::findIRAnchors(
     const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
   // For inlined code, recover the original callsite and callee by finding the
@@ -2715,6 +2791,12 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
                         ProfileSummary::PSK_Sample);
     PSI->refresh();
   }
+
+  // Error out if the profile checksum mismatch is too high.
+  if (FunctionSamples::ProfileIsProbeBased &&
+      errorIfHighChecksumMismatch(M, PSI, Reader->getProfiles()))
+    return false;
+
   // Compute the total number of samples collected in this profile.
   for (const auto &I : Reader->getProfiles())
     TotalCollectedSamples += I.second.getTotalSamples();



More information about the llvm-commits mailing list