[llvm] [PseudoProbe] Extend to skip instrumenting probe into the dests of invoke (PR #79919)

via llvm-commits llvm-commits at lists.llvm.org
Mon Jan 29 16:30:19 PST 2024


llvmbot wrote:


<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-llvm-transforms

Author: Lei Wang (wlei-llvm)

<details>
<summary>Changes</summary>

As before we only skip instrumenting probe of `unwind`(`KnownColdBlock`) block, this PR extends to skip the both EH flow from `invoke`, i.e. also skip the `normal` dest. For more contexts: when doing call-to-invoke conversion, the block is split by the `invoke` and two extra blocks(`normal` and `unwind`) are added. With this PR, the instrumentation is the same as the one before the call-to-invoke conversion. 
One significant benefit is this can help mitigate the "unstable IR" issue(https://discourse.llvm.org/t/ipo-for-linkonce-odr-functions/69404), the two versions now are on the same probe instrumentation, expected to be the same checksum. 
To achieve the same checksum, some tweaks is needed:
-  Now it also skips incrementing the probe ID for the skipped probe.
-  The checksum is also computed based on the CFG that skips the EH edges.

We observed this fixes ~5% mismatched samples.

As it changes the way computing the checksum, there could be backwards compatibility checksum mismatch issue, but it should be small scope and also hopefully the profile matching will mitigate this.


---
Full diff: https://github.com/llvm/llvm-project/pull/79919.diff


5 Files Affected:

- (modified) llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h (+5-2) 
- (modified) llvm/lib/Transforms/IPO/SampleProfileProbe.cpp (+38-11) 
- (modified) llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll (+2-2) 
- (modified) llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll (+1-1) 
- (added) llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll (+155) 


``````````diff
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
index 0f2729a9462de2..0db7b2dcc36600 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileProbe.h
@@ -81,8 +81,11 @@ class SampleProfileProber {
   uint64_t getFunctionHash() const { return FunctionHash; }
   uint32_t getBlockId(const BasicBlock *BB) const;
   uint32_t getCallsiteId(const Instruction *Call) const;
-  void computeCFGHash();
-  void computeProbeIdForBlocks();
+  void findInvokeNormalDests(DenseSet<BasicBlock *> &InvokeNormalDests);
+  void computeCFGHash(const DenseSet<BasicBlock *> &InvokeNormalDests,
+                      const DenseSet<BasicBlock *> &KnownColdBlocks);
+  void computeProbeIdForBlocks(const DenseSet<BasicBlock *> &InvokeNormalDests,
+                               const DenseSet<BasicBlock *> &KnownColdBlocks);
   void computeProbeIdForCallsites();
 
   Function *F;
diff --git a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
index 090e5560483edb..9263909d4eafbf 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileProbe.cpp
@@ -173,24 +173,51 @@ SampleProfileProber::SampleProfileProber(Function &Func,
   BlockProbeIds.clear();
   CallProbeIds.clear();
   LastProbeId = (uint32_t)PseudoProbeReservedId::Last;
-  computeProbeIdForBlocks();
+
+  DenseSet<BasicBlock *> InvokeNormalDests;
+  findInvokeNormalDests(InvokeNormalDests);
+  DenseSet<BasicBlock *> KnownColdBlocks;
+  computeEHOnlyBlocks(*F, KnownColdBlocks);
+
+  computeProbeIdForBlocks(InvokeNormalDests, KnownColdBlocks);
   computeProbeIdForCallsites();
-  computeCFGHash();
+  computeCFGHash(InvokeNormalDests, KnownColdBlocks);
+}
+
+void SampleProfileProber::findInvokeNormalDests(
+    DenseSet<BasicBlock *> &InvokeNormalDests) {
+  for (auto &BB : *F) {
+    auto *TI = BB.getTerminator();
+    if (auto *II = dyn_cast<InvokeInst>(TI))
+      InvokeNormalDests.insert(II->getNormalDest());
+  }
 }
 
 // Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index
 // value of each BB in the CFG. The higher 32 bits record the number of edges
 // preceded by the number of indirect calls.
 // This is derived from FuncPGOInstrumentation<Edge, BBInfo>::computeCFGHash().
-void SampleProfileProber::computeCFGHash() {
+void SampleProfileProber::computeCFGHash(
+    const DenseSet<BasicBlock *> &InvokeNormalDests,
+    const DenseSet<BasicBlock *> &KnownColdBlocks) {
   std::vector<uint8_t> Indexes;
   JamCRC JC;
   for (auto &BB : *F) {
-    for (BasicBlock *Succ : successors(&BB)) {
+    // Skip the EH flow blocks.
+    if (InvokeNormalDests.contains(&BB) || KnownColdBlocks.contains(&BB))
+      continue;
+
+    // Find the original successors by skipping the EH flow succs.
+    auto *Succ = &BB;
+    auto *TI = Succ->getTerminator();
+    while (auto *II = dyn_cast<InvokeInst>(TI)) {
+      Succ = II->getNormalDest();
+      TI = Succ->getTerminator();
+    }
+
       auto Index = getBlockId(Succ);
       for (int J = 0; J < 4; J++)
         Indexes.push_back((uint8_t)(Index >> (J * 8)));
-    }
   }
 
   JC.update(Indexes);
@@ -207,15 +234,15 @@ void SampleProfileProber::computeCFGHash() {
                     << ", Hash = " << FunctionHash << "\n");
 }
 
-void SampleProfileProber::computeProbeIdForBlocks() {
-  DenseSet<BasicBlock *> KnownColdBlocks;
-  computeEHOnlyBlocks(*F, KnownColdBlocks);
+void SampleProfileProber::computeProbeIdForBlocks(
+    const DenseSet<BasicBlock *> &InvokeNormalDests,
+    const DenseSet<BasicBlock *> &KnownColdBlocks) {
   // Insert pseudo probe to non-cold blocks only. This will reduce IR size as
   // well as the binary size while retaining the profile quality.
   for (auto &BB : *F) {
-    ++LastProbeId;
-    if (!KnownColdBlocks.contains(&BB))
-      BlockProbeIds[&BB] = LastProbeId;
+      if (InvokeNormalDests.contains(&BB) || KnownColdBlocks.contains(&BB))
+        continue;
+      BlockProbeIds[&BB] = ++LastProbeId;
   }
 }
 
diff --git a/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll b/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll
index 21dd8c0fe92414..f915aaccc06e17 100644
--- a/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll
+++ b/llvm/test/ThinLTO/X86/pseudo-probe-desc-import.ll
@@ -12,8 +12,8 @@
 ; RUN: llvm-lto -thinlto-action=import %t3.bc -thinlto-index=%t3.index.bc -o /dev/null 2>&1 | FileCheck %s  --check-prefix=WARN
 
 
-; CHECK-NOT: {i64 6699318081062747564, i64 4294967295, !"foo"
-; CHECK: !{i64 -2624081020897602054, i64 281479271677951, !"main"
+; CHECK-NOT: {i64 6699318081062747564, i64 [[#]], !"foo"
+; CHECK: !{i64 -2624081020897602054, i64 [[#]], !"main"
 
 ; WARN: warning: Pseudo-probe ignored: source module '{{.*}}' is compiled with -fpseudo-probe-for-profiling while destination module '{{.*}}' is not
 
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll
index 697ef44fb7ed71..9954914bca4380 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-eh.ll
@@ -18,7 +18,7 @@ entry:
           to label %ret unwind label %lpad
 
 ret:
-; CHECK: call void @llvm.pseudoprobe
+; CHECK-NOT: call void @llvm.pseudoprobe
   ret void
 
 lpad:                                             ; preds = %entry
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll
new file mode 100644
index 00000000000000..f132f864e59458
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-invoke.ll
@@ -0,0 +1,155 @@
+; REQUIRES: x86_64-linux
+; RUN: opt < %s -passes=pseudo-probe -S -o - | FileCheck %s
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+$__clang_call_terminate = comdat any
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: mustprogress noinline nounwind uwtable
+define dso_local void @_Z3barv() #0 personality ptr @__gxx_personality_v0 !dbg !14 {
+entry:
+; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 1
+  %0 = load volatile i32, ptr @x, align 4, !dbg !17, !tbaa !19
+  %tobool = icmp ne i32 %0, 0, !dbg !17
+  br i1 %tobool, label %if.then, label %if.else, !dbg !23
+
+if.then:                                          ; preds = %entry
+; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 2
+  invoke void @_Z3foov()
+          to label %invoke.cont unwind label %terminate.lpad, !dbg !24
+
+invoke.cont:                                      ; preds = %if.then
+; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
+  invoke void @_Z3bazv()
+          to label %invoke.cont1 unwind label %terminate.lpad, !dbg !26
+
+invoke.cont1:                                     ; preds = %invoke.cont
+; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
+  br label %if.end, !dbg !27
+
+if.else:                                          ; preds = %entry
+; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 3
+  invoke void @_Z3foov()
+          to label %invoke.cont2 unwind label %terminate.lpad, !dbg !28
+
+invoke.cont2:                                     ; preds = %if.else
+; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
+  br label %if.end
+
+if.end:                                           ; preds = %invoke.cont2, %invoke.cont1
+; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 4
+  invoke void @_Z3foov()
+          to label %invoke.cont3 unwind label %terminate.lpad, !dbg !29
+
+invoke.cont3:                                     ; preds = %if.end
+; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
+  %1 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !19
+  %tobool4 = icmp ne i32 %1, 0, !dbg !30
+  br i1 %tobool4, label %if.then5, label %if.end6, !dbg !32
+
+if.then5:                                         ; preds = %invoke.cont3
+; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 5
+  %2 = load volatile i32, ptr @x, align 4, !dbg !33, !tbaa !19
+  %inc = add nsw i32 %2, 1, !dbg !33
+  store volatile i32 %inc, ptr @x, align 4, !dbg !33, !tbaa !19
+  br label %if.end6, !dbg !35
+
+if.end6:                                          ; preds = %if.then5, %invoke.cont3
+; CHECK: call void @llvm.pseudoprobe(i64 -1069303473483922844, i64 6
+  ret void, !dbg !36
+
+terminate.lpad:                                   ; preds = %if.end, %if.else, %invoke.cont, %if.then
+; CHECK-NOT: call void @llvm.pseudoprobe(i64 -1069303473483922844,
+  %3 = landingpad { ptr, i32 }
+          catch ptr null, !dbg !24
+  %4 = extractvalue { ptr, i32 } %3, 0, !dbg !24
+  call void @__clang_call_terminate(ptr %4) #3, !dbg !24
+  unreachable, !dbg !24
+}
+
+; Function Attrs: mustprogress noinline nounwind uwtable
+define dso_local void @_Z3foov() #0 !dbg !37 {
+entry:
+  ret void, !dbg !38
+}
+
+declare i32 @__gxx_personality_v0(...)
+
+; Function Attrs: noinline noreturn nounwind uwtable
+define linkonce_odr hidden void @__clang_call_terminate(ptr noundef %0) #1 comdat {
+  %2 = call ptr @__cxa_begin_catch(ptr %0) #4
+  call void @_ZSt9terminatev() #3
+  unreachable
+}
+
+declare ptr @__cxa_begin_catch(ptr)
+
+declare void @_ZSt9terminatev()
+
+; Function Attrs: mustprogress noinline nounwind uwtable
+define dso_local void @_Z3bazv() #0 !dbg !39 {
+entry:
+  ret void, !dbg !40
+}
+
+; CHECK: ![[#]] = !{i64 -3270123626113159616, i64 18891622278, !"_Z3bazv"}
+
+attributes #0 = { mustprogress noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #1 = { noinline noreturn nounwind uwtable "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #2 = { mustprogress noinline norecurse nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { noreturn nounwind }
+attributes #4 = { nounwind }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12}
+!llvm.ident = !{!13}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_14, file: !3, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test.cpp", directory: "/home", checksumkind: CSK_MD5, checksum: "a4c7b0392f3fd9c8ebb85065159dbb02")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{!"clang version 19.0.0"}
+!14 = distinct !DISubprogram(name: "bar", linkageName: "_Z3barv", scope: !3, file: !3, line: 4, type: !15, scopeLine: 4, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!15 = !DISubroutineType(types: !16)
+!16 = !{null}
+!17 = !DILocation(line: 5, column: 6, scope: !18)
+!18 = distinct !DILexicalBlock(scope: !14, file: !3, line: 5, column: 6)
+!19 = !{!20, !20, i64 0}
+!20 = !{!"int", !21, i64 0}
+!21 = !{!"omnipotent char", !22, i64 0}
+!22 = !{!"Simple C++ TBAA"}
+!23 = !DILocation(line: 5, column: 6, scope: !14)
+!24 = !DILocation(line: 6, column: 5, scope: !25)
+!25 = distinct !DILexicalBlock(scope: !18, file: !3, line: 5, column: 9)
+!26 = !DILocation(line: 7, column: 5, scope: !25)
+!27 = !DILocation(line: 8, column: 3, scope: !25)
+!28 = !DILocation(line: 9, column: 5, scope: !18)
+!29 = !DILocation(line: 11, column: 3, scope: !14)
+!30 = !DILocation(line: 12, column: 6, scope: !31)
+!31 = distinct !DILexicalBlock(scope: !14, file: !3, line: 12, column: 6)
+!32 = !DILocation(line: 12, column: 6, scope: !14)
+!33 = !DILocation(line: 13, column: 5, scope: !34)
+!34 = distinct !DILexicalBlock(scope: !31, file: !3, line: 12, column: 9)
+!35 = !DILocation(line: 14, column: 5, scope: !34)
+!36 = !DILocation(line: 17, column: 1, scope: !14)
+!37 = distinct !DISubprogram(name: "foo", linkageName: "_Z3foov", scope: !3, file: !3, line: 19, type: !15, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!38 = !DILocation(line: 19, column: 13, scope: !37)
+!39 = distinct !DISubprogram(name: "baz", linkageName: "_Z3bazv", scope: !3, file: !3, line: 18, type: !15, scopeLine: 18, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!40 = !DILocation(line: 18, column: 13, scope: !39)
+!41 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 22, type: !42, scopeLine: 22, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!42 = !DISubroutineType(types: !43)
+!43 = !{!6}
+!44 = !DILocation(line: 23, column: 3, scope: !41)
+!45 = !DILocation(line: 24, column: 1, scope: !41)

``````````

</details>


https://github.com/llvm/llvm-project/pull/79919


More information about the llvm-commits mailing list