[llvm] [SamplePGO] Support -salvage-stale-profile without probes too (PR #86116)

Krzysztof Pszeniczny via llvm-commits llvm-commits at lists.llvm.org
Fri Mar 22 04:59:45 PDT 2024

https://github.com/amharc updated https://github.com/llvm/llvm-project/pull/86116

>From c3b155a761a75a7b8eed6ca5102ae95a9b99cda1 Mon Sep 17 00:00:00 2001
From: Krzysztof Pszeniczny <kpszeniczny at google.com>
Date: Wed, 20 Mar 2024 16:15:05 +0100
Subject: [PATCH 1/4] [SamplePGO] Support -salvage-stale-profile without probes

Currently -salvage-stale-profile is a no-op if the profile is not
probe-based. We observed that it can help for regular, non-probe-
based profiles too: some of our internal benchmarks show 0.2-0.3%
QPS improvement.

There seems to be no good reason to limit this flag to only work
for probe-based profiles.
 llvm/lib/Transforms/IPO/SampleProfile.cpp     |   8 +-
 .../non-probe-stale-profile-matching.prof     |  22 ++
 .../non-probe-stale-profile-matching.ll       | 242 ++++++++++++++++++
 3 files changed, 268 insertions(+), 4 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 9a8040bc4b064e..8a007358b35676 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -2411,10 +2411,10 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
   if (ReportProfileStaleness || PersistProfileStaleness)
     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
-  // Run profile matching for checksum mismatched profile, currently only
-  // support for pseudo-probe.
-  if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
-      !ProbeManager->profileIsValid(F, *FSFlattened)) {
+  // Run profile matching for checksum mismatched profile. For probe-based
+  // profiles, run matching only when the current profile is not valid.
+  if (SalvageStaleProfile && (!FunctionSamples::ProfileIsProbeBased ||
+                              !ProbeManager->profileIsValid(F, *FSFlattened))) {
     // The matching result will be saved to IRToProfileLocationMap, create a new
     // map for each function.
     auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof b/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof
new file mode 100644
index 00000000000000..35eb1761f38b20
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof
@@ -0,0 +1,22 @@
+ 1: 0
+ 2: 614
+ 3: 614
+ 4: 603
+ 5: 0
+ 7: 631 bar:631
+ 9: 636 bar:636
+ 6: foo:1900
+  1: 614
+  2: 554
+  3: 59
+  4: 614
+  6: 59 bar:59
+ 8: foo:1859
+  1: 589
+  2: 526
+  3: 63
+  4: 618
+  6: 63 bar:65
+ 1: 2401
diff --git a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
new file mode 100644
index 00000000000000..1f17b3446d7717
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
@@ -0,0 +1,242 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/non-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl 2>&1 | FileCheck %s
+; CHECK: Run stale profile matching for main
+; CHECK: Location is matched from 1 to 1
+; CHECK: Location is matched from 2 to 2
+; CHECK: Location is matched from 4 to 4
+; CHECK: Location is matched from 6 to 6
+; CHECK: Location is matched from 7 to 7
+; CHECK: Location is matched from 8 to 8
+; CHECK: Location is matched from 10 to 10
+; CHECK: Callsite with callee:foo is matched from 12 to 6
+; CHECK: Location is rematched backwards from 7 to 1
+; CHECK: Location is rematched backwards from 8 to 2
+; CHECK: Location is rematched backwards from 10 to 4
+; CHECK: Callsite with callee:bar is matched from 13 to 7
+; CHECK: Callsite with callee:foo is matched from 14 to 8
+; CHECK: Callsite with callee:bar is matched from 15 to 9
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+ at x = internal global i32 1, align 4, !dbg !0
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() local_unnamed_addr #0 !dbg !49 !prof !55 {
+  call void @llvm.dbg.value(metadata i32 0, metadata !53, metadata !DIExpression()), !dbg !57
+  br label %1, !dbg !61
+1:                                                ; preds = %17, %0
+  %2 = phi i32 [ 0, %0 ], [ %28, %17 ]
+  call void @llvm.dbg.value(metadata i32 %2, metadata !53, metadata !DIExpression()), !dbg !57
+  %3 = load volatile i32, ptr @x, align 4, !dbg !65, !tbaa !66
+  call void @llvm.dbg.value(metadata i32 %2, metadata !70, metadata !DIExpression()), !dbg !76
+  call void @llvm.dbg.value(metadata i32 %3, metadata !75, metadata !DIExpression()), !dbg !76
+  %4 = urem i32 %2, 10, !dbg !81
+  %5 = icmp eq i32 %4, 0, !dbg !81
+  %6 = zext i1 %5 to i32, !dbg !82
+  %7 = add nsw i32 %3, %6, !dbg !82
+  %8 = call i32 @bar(i32 noundef %7) #4, !dbg !83, !prof !84
+  %9 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !66
+  %10 = add nsw i32 %9, %8, !dbg !86
+  store volatile i32 %10, ptr @x, align 4, !dbg !86, !tbaa !66
+  %11 = load volatile i32, ptr @x, align 4, !dbg !87, !tbaa !66
+  %12 = call i32 @bar(i32 noundef %11) #4, !dbg !88, !prof !90
+  %13 = load volatile i32, ptr @x, align 4, !dbg !91, !tbaa !66
+  %14 = add nsw i32 %13, %12, !dbg !91
+  store volatile i32 %14, ptr @x, align 4, !dbg !91, !tbaa !66
+  %15 = load volatile i32, ptr @x, align 4, !dbg !92, !tbaa !66
+  %16 = icmp slt i32 %15, 0, !dbg !94
+  br i1 %16, label %30, label %17, !dbg !95, !prof !96
+17:                                               ; preds = %1
+  %18 = load volatile i32, ptr @x, align 4, !dbg !98, !tbaa !66
+  call void @llvm.dbg.value(metadata i32 %2, metadata !70, metadata !DIExpression()), !dbg !99
+  call void @llvm.dbg.value(metadata i32 %18, metadata !75, metadata !DIExpression()), !dbg !99
+  %19 = zext i1 %5 to i32, !dbg !103
+  %20 = add nsw i32 %18, %19, !dbg !103
+  %21 = call i32 @bar(i32 noundef %20) #4, !dbg !104, !prof !84
+  %22 = load volatile i32, ptr @x, align 4, !dbg !106, !tbaa !66
+  %23 = add nsw i32 %22, %21, !dbg !106
+  store volatile i32 %23, ptr @x, align 4, !dbg !106, !tbaa !66
+  %24 = load volatile i32, ptr @x, align 4, !dbg !107, !tbaa !66
+  %25 = call i32 @bar(i32 noundef %24) #4, !dbg !108, !prof !90
+  %26 = load volatile i32, ptr @x, align 4, !dbg !110, !tbaa !66
+  %27 = add nsw i32 %26, %25, !dbg !110
+  store volatile i32 %27, ptr @x, align 4, !dbg !110, !tbaa !66
+  %28 = add nuw nsw i32 %2, 1, !dbg !112
+  call void @llvm.dbg.value(metadata i32 %28, metadata !53, metadata !DIExpression()), !dbg !57
+  %29 = icmp eq i32 %28, 1000000, !dbg !114
+  br i1 %29, label %30, label %1, !dbg !61, !prof !116, !llvm.loop !117
+30:                                               ; preds = %17, %1
+  %31 = phi i32 [ 1, %1 ], [ 0, %17 ]
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !121
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !122
+  ret i32 %31, !dbg !122
+; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
+; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
+define available_externally dso_local i32 @bar(i32 noundef %0) local_unnamed_addr #3 !dbg !123 !prof !128 {
+  call void @llvm.dbg.value(metadata i32 %0, metadata !127, metadata !DIExpression()), !dbg !129
+  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !130
+  %2 = add nsw i32 %0, 1, !dbg !131
+  ret i32 %2, !dbg !132
+attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
+attributes #3 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #4 = { nounwind }
+!llvm.dbg.cu = !{!2, !7}
+!llvm.module.flags = !{!9, !10, !11, !12, !13, !14, !15, !16, !17}
+!llvm.ident = !{!46, !46}
+!llvm.pseudo_probe_desc = !{!47, !48}
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "file1.c", directory: "/home/", checksumkind: CSK_MD5, checksum: "03dfbda098b1285e8c9837f184747483")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "clang version 18.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!8 = !DIFile(filename: "file2.c", directory: "/home", checksumkind: CSK_MD5, checksum: "bd15f56a5a8604ec94485c3dbf9d0cdd")
+!9 = !{i32 7, !"Dwarf Version", i32 5}
+!10 = !{i32 2, !"Debug Info Version", i32 3}
+!11 = !{i32 1, !"wchar_size", i32 4}
+!12 = !{i32 8, !"PIC Level", i32 2}
+!13 = !{i32 7, !"PIE Level", i32 2}
+!14 = !{i32 7, !"uwtable", i32 2}
+!15 = !{i32 7, !"frame-pointer", i32 2}
+!16 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
+!17 = !{i32 1, !"ProfileSummary", !18}
+!18 = !{!19, !20, !21, !22, !23, !24, !25, !26, !27, !28}
+!19 = !{!"ProfileFormat", !"SampleProfile"}
+!20 = !{!"TotalCount", i64 9258}
+!21 = !{!"MaxCount", i64 2401}
+!22 = !{!"MaxInternalCount", i64 0}
+!23 = !{!"MaxFunctionCount", i64 2401}
+!24 = !{!"NumCounts", i64 18}
+!25 = !{!"NumFunctions", i64 2}
+!26 = !{!"IsPartialProfile", i64 0}
+!27 = !{!"PartialProfileRatio", double 0.000000e+00}
+!28 = !{!"DetailedSummary", !29}
+!29 = !{!30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45}
+!30 = !{i32 10000, i64 2401, i32 1}
+!31 = !{i32 100000, i64 2401, i32 1}
+!32 = !{i32 200000, i64 2401, i32 1}
+!33 = !{i32 300000, i64 636, i32 2}
+!34 = !{i32 400000, i64 618, i32 4}
+!35 = !{i32 500000, i64 614, i32 8}
+!36 = !{i32 600000, i64 614, i32 8}
+!37 = !{i32 700000, i64 614, i32 8}
+!38 = !{i32 800000, i64 589, i32 10}
+!39 = !{i32 900000, i64 554, i32 11}
+!40 = !{i32 950000, i64 526, i32 12}
+!41 = !{i32 990000, i64 59, i32 16}
+!42 = !{i32 999000, i64 59, i32 16}
+!43 = !{i32 999900, i64 59, i32 16}
+!44 = !{i32 999990, i64 59, i32 16}
+!45 = !{i32 999999, i64 59, i32 16}
+!46 = !{!"clang version 18.0.0"}
+!47 = !{i64 6699318081062747564, i64 563022570642068, !"foo"}
+!48 = !{i64 -2624081020897602054, i64 1126124211449298, !"main"}
+!49 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !50, scopeLine: 12, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !52)
+!50 = !DISubroutineType(types: !51)
+!51 = !{!6}
+!52 = !{!53}
+!53 = !DILocalVariable(name: "i", scope: !54, file: !3, line: 13, type: !6)
+!54 = distinct !DILexicalBlock(scope: !49, file: !3, line: 13, column: 3)
+!55 = !{!"function_entry_count", i64 608}
+!56 = !DILocation(line: 13, column: 11, scope: !54)
+!57 = !DILocation(line: 0, scope: !54)
+!58 = !DILocation(line: 13, column: 18, scope: !59)
+!59 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 0)
+!60 = distinct !DILexicalBlock(scope: !54, file: !3, line: 13, column: 3)
+!61 = !DILocation(line: 13, column: 3, scope: !62)
+!62 = !DILexicalBlockFile(scope: !54, file: !3, discriminator: 2)
+!63 = !DILocation(line: 14, column: 15, scope: !64)
+!64 = distinct !DILexicalBlock(scope: !60, file: !3, line: 13, column: 40)
+!65 = !DILocation(line: 14, column: 18, scope: !64)
+!66 = !{!67, !67, i64 0}
+!67 = !{!"int", !68, i64 0}
+!68 = !{!"omnipotent char", !69, i64 0}
+!69 = !{!"Simple C/C++ TBAA"}
+!70 = !DILocalVariable(name: "i", arg: 1, scope: !71, file: !3, line: 6, type: !6)
+!71 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !72, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !74)
+!72 = !DISubroutineType(types: !73)
+!73 = !{!6, !6, !6}
+!74 = !{!70, !75}
+!75 = !DILocalVariable(name: "p", arg: 2, scope: !71, file: !3, line: 6, type: !6)
+!76 = !DILocation(line: 0, scope: !71, inlinedAt: !77)
+!77 = distinct !DILocation(line: 14, column: 11, scope: !78)
+!78 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646631)
+!79 = !DILocation(line: 7, column: 6, scope: !80, inlinedAt: !77)
+!80 = distinct !DILexicalBlock(scope: !71, file: !3, line: 7, column: 6)
+!81 = !DILocation(line: 7, column: 8, scope: !80, inlinedAt: !77)
+!82 = !DILocation(line: 7, column: 6, scope: !71, inlinedAt: !77)
+!83 = !DILocation(line: 0, scope: !80, inlinedAt: !77)
+!84 = !{!"branch_weights", i64 614}
+!85 = !DILocation(line: 9, column: 1, scope: !71, inlinedAt: !77)
+!86 = !DILocation(line: 14, column: 8, scope: !64)
+!87 = !DILocation(line: 15, column: 15, scope: !64)
+!88 = !DILocation(line: 15, column: 11, scope: !89)
+!89 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646639)
+!90 = !{!"branch_weights", i32 614}
+!91 = !DILocation(line: 15, column: 8, scope: !64)
+!92 = !DILocation(line: 16, column: 9, scope: !93)
+!93 = distinct !DILexicalBlock(scope: !64, file: !3, line: 16, column: 9)
+!94 = !DILocation(line: 16, column: 11, scope: !93)
+!95 = !DILocation(line: 16, column: 9, scope: !64)
+!96 = !{!"branch_weights", i32 0, i32 614}
+!97 = !DILocation(line: 18, column: 15, scope: !64)
+!98 = !DILocation(line: 18, column: 18, scope: !64)
+!99 = !DILocation(line: 0, scope: !71, inlinedAt: !100)
+!100 = distinct !DILocation(line: 18, column: 11, scope: !101)
+!101 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646647)
+!102 = !DILocation(line: 7, column: 6, scope: !80, inlinedAt: !100)
+!103 = !DILocation(line: 7, column: 6, scope: !71, inlinedAt: !100)
+!104 = !DILocation(line: 0, scope: !80, inlinedAt: !100)
+!105 = !DILocation(line: 9, column: 1, scope: !71, inlinedAt: !100)
+!106 = !DILocation(line: 18, column: 8, scope: !64)
+!107 = !DILocation(line: 19, column: 15, scope: !64)
+!108 = !DILocation(line: 19, column: 11, scope: !109)
+!109 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646655)
+!110 = !DILocation(line: 19, column: 8, scope: !64)
+!111 = !DILocation(line: 13, column: 36, scope: !59)
+!112 = !DILocation(line: 13, column: 36, scope: !113)
+!113 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 6)
+!114 = !DILocation(line: 13, column: 20, scope: !115)
+!115 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 2)
+!116 = !{!"branch_weights", i32 608, i32 614}
+!117 = distinct !{!117, !118, !119, !120}
+!118 = !DILocation(line: 13, column: 3, scope: !54)
+!119 = !DILocation(line: 20, column: 3, scope: !54)
+!120 = !{!"llvm.loop.mustprogress"}
+!121 = !DILocation(line: 0, scope: !49)
+!122 = !DILocation(line: 22, column: 1, scope: !49)
+!123 = distinct !DISubprogram(name: "bar", scope: !8, file: !8, line: 1, type: !124, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !126)
+!124 = !DISubroutineType(types: !125)
+!125 = !{!6, !6}
+!126 = !{!127}
+!127 = !DILocalVariable(name: "x", arg: 1, scope: !123, file: !8, line: 1, type: !6)
+!128 = !{!"function_entry_count", i64 2401}
+!129 = !DILocation(line: 0, scope: !123)
+!130 = !DILocation(line: 5, column: 10, scope: !123)
+!131 = !DILocation(line: 5, column: 12, scope: !123)
+!132 = !DILocation(line: 5, column: 3, scope: !123)

>From 78c4008bff495b4a55c20b151fde7e3987230de4 Mon Sep 17 00:00:00 2001
From: Krzysztof Pszeniczny <kpszeniczny at google.com>
Date: Thu, 21 Mar 2024 17:40:30 +0100
Subject: [PATCH 2/4] fix the test

I somehow didn't have assertions enabled in my build before, so the test
was skipped. Currently the only IR anchors used for non-probe-based
profiles are callsite-based, so the other messages aren't actually
 .../non-probe-stale-profile-matching.ll       | 19 ++++---------------
 1 file changed, 4 insertions(+), 15 deletions(-)

diff --git a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
index 1f17b3446d7717..de010f53171eef 100644
--- a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
+++ b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
@@ -5,21 +5,10 @@
 ; CHECK: Run stale profile matching for main
-; CHECK: Location is matched from 1 to 1
-; CHECK: Location is matched from 2 to 2
-; CHECK: Location is matched from 4 to 4
-; CHECK: Location is matched from 6 to 6
-; CHECK: Location is matched from 7 to 7
-; CHECK: Location is matched from 8 to 8
-; CHECK: Location is matched from 10 to 10
-; CHECK: Callsite with callee:foo is matched from 12 to 6
-; CHECK: Location is rematched backwards from 7 to 1
-; CHECK: Location is rematched backwards from 8 to 2
-; CHECK: Location is rematched backwards from 10 to 4
-; CHECK: Callsite with callee:bar is matched from 13 to 7
-; CHECK: Callsite with callee:foo is matched from 14 to 8
-; CHECK: Callsite with callee:bar is matched from 15 to 9
+; CHECK: Callsite with callee:foo is matched from 3.12 to 6
+; CHECK: Callsite with callee:bar is matched from 4.13 to 7
+; CHECK: Callsite with callee:foo is matched from 7.14 to 8
+; CHECK: Callsite with callee:bar is matched from 8.15 to 9
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"

>From e477cd58a999e2f545cbc07fe020e1950d73dc6c Mon Sep 17 00:00:00 2001
From: Krzysztof Pszeniczny <kpszeniczny at google.com>
Date: Fri, 22 Mar 2024 12:53:58 +0100
Subject: [PATCH 3/4] update test

 .../non-probe-stale-profile-matching.prof     |  43 +-
 .../non-probe-stale-profile-matching.ll       | 430 +++++++++---------
 2 files changed, 236 insertions(+), 237 deletions(-)

diff --git a/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof b/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof
index 35eb1761f38b20..8e988515be8ed5 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/non-probe-stale-profile-matching.prof
@@ -1,22 +1,23 @@
+ 0: 0
  1: 0
- 2: 614
- 3: 614
- 4: 603
- 5: 0
- 7: 631 bar:631
- 9: 636 bar:636
- 6: foo:1900
-  1: 614
-  2: 554
-  3: 59
-  4: 614
-  6: 59 bar:59
- 8: foo:1859
-  1: 589
-  2: 526
-  3: 63
-  4: 618
-  6: 63 bar:65
- 1: 2401
+ 1.1: 47663
+ 1.2: 51871
+ 2: 48723
+ 3: 48723 bar:49018
+ 4: 49087
+ 5: 51871 bar:49588
+ 7: 0
+ 2: foo:1479916
+  1: 47663
+  1.1: 46683 bar:43238
+  2: 4519 bar:4932
+  3: 48723
+ 4: foo:1505537
+  1: 48604
+  1.1: 46965 bar:44479
+  2: 4613 bar:4967
+  3: 49087
+ 0: 194449
+ 1: 194449
diff --git a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
index de010f53171eef..b666511f40b549 100644
--- a/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
+++ b/llvm/test/Transforms/SampleProfile/non-probe-stale-profile-matching.ll
@@ -2,230 +2,228 @@
 ; REQUIRES: asserts
 ; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/non-probe-stale-profile-matching.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl 2>&1 | FileCheck %s
+; The profiled source code:
+;  volatile int x = 1;
+;  __attribute__((noinline)) int bar(int p) {
+;    return p;
+;  }
+;  __attribute__((always_inline)) int foo(int i, int p) {
+;    if (i % 10) return  bar(p);
+;    else return bar(p + 1);
+;  }
+;  int main() {
+;    for (int i = 0; i < 1000 * 1000; i++) {
+;       x += foo(i, x);
+;       x += bar(x);
+;       x += foo(i, x);
+;       x += bar(x);
+;    }
+;  }
+; The source code for the current build:
+;  volatile int x = 1;
+;  __attribute__((noinline)) int bar(int p) {
+;    return p;
+;  }
+;  __attribute__((always_inline)) int foo(int i, int p) {
+;    if (i % 10) return  bar(p);
+;    else return bar(p + 1);
+;  }
+;  int main() {
+;    if (x == 0)          // code change
+;      return 0;          // code change
+;    for (int i = 0; i < 1000 * 1000; i++) {
+;       x += foo(i, x);
+;       x += bar(x);
+;       if (i < 0)        // code change
+;         return 0;       // code change
+;       x += foo(i, x);
+;       x += bar(x);
+;    }
+;  }
+; CHECK: Run stale profile matching for bar
+; CHECK: Run stale profile matching for foo
+; CHECK: Callsite with callee:bar is matched from 1.1 to 1.1
+; CHECK: Callsite with callee:bar is matched from 2 to 2
 ; CHECK: Run stale profile matching for main
+; CHECK: Callsite with callee:foo is matched from 4 to 2
+; CHECK: Callsite with callee:bar is matched from 5 to 3
+; CHECK: Callsite with callee:foo is matched from 8 to 4
+; CHECK: Callsite with callee:bar is matched from 9 to 5
-; CHECK: Callsite with callee:foo is matched from 3.12 to 6
-; CHECK: Callsite with callee:bar is matched from 4.13 to 7
-; CHECK: Callsite with callee:foo is matched from 7.14 to 8
-; CHECK: Callsite with callee:bar is matched from 8.15 to 9
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
- at x = internal global i32 1, align 4, !dbg !0
+ at x = dso_local global i32 1, align 4
-; Function Attrs: nounwind uwtable
-define dso_local i32 @main() local_unnamed_addr #0 !dbg !49 !prof !55 {
-  call void @llvm.dbg.value(metadata i32 0, metadata !53, metadata !DIExpression()), !dbg !57
-  br label %1, !dbg !61
-1:                                                ; preds = %17, %0
-  %2 = phi i32 [ 0, %0 ], [ %28, %17 ]
-  call void @llvm.dbg.value(metadata i32 %2, metadata !53, metadata !DIExpression()), !dbg !57
-  %3 = load volatile i32, ptr @x, align 4, !dbg !65, !tbaa !66
-  call void @llvm.dbg.value(metadata i32 %2, metadata !70, metadata !DIExpression()), !dbg !76
-  call void @llvm.dbg.value(metadata i32 %3, metadata !75, metadata !DIExpression()), !dbg !76
-  %4 = urem i32 %2, 10, !dbg !81
-  %5 = icmp eq i32 %4, 0, !dbg !81
-  %6 = zext i1 %5 to i32, !dbg !82
-  %7 = add nsw i32 %3, %6, !dbg !82
-  %8 = call i32 @bar(i32 noundef %7) #4, !dbg !83, !prof !84
-  %9 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !66
-  %10 = add nsw i32 %9, %8, !dbg !86
-  store volatile i32 %10, ptr @x, align 4, !dbg !86, !tbaa !66
-  %11 = load volatile i32, ptr @x, align 4, !dbg !87, !tbaa !66
-  %12 = call i32 @bar(i32 noundef %11) #4, !dbg !88, !prof !90
-  %13 = load volatile i32, ptr @x, align 4, !dbg !91, !tbaa !66
-  %14 = add nsw i32 %13, %12, !dbg !91
-  store volatile i32 %14, ptr @x, align 4, !dbg !91, !tbaa !66
-  %15 = load volatile i32, ptr @x, align 4, !dbg !92, !tbaa !66
-  %16 = icmp slt i32 %15, 0, !dbg !94
-  br i1 %16, label %30, label %17, !dbg !95, !prof !96
-17:                                               ; preds = %1
-  %18 = load volatile i32, ptr @x, align 4, !dbg !98, !tbaa !66
-  call void @llvm.dbg.value(metadata i32 %2, metadata !70, metadata !DIExpression()), !dbg !99
-  call void @llvm.dbg.value(metadata i32 %18, metadata !75, metadata !DIExpression()), !dbg !99
-  %19 = zext i1 %5 to i32, !dbg !103
-  %20 = add nsw i32 %18, %19, !dbg !103
-  %21 = call i32 @bar(i32 noundef %20) #4, !dbg !104, !prof !84
-  %22 = load volatile i32, ptr @x, align 4, !dbg !106, !tbaa !66
-  %23 = add nsw i32 %22, %21, !dbg !106
-  store volatile i32 %23, ptr @x, align 4, !dbg !106, !tbaa !66
-  %24 = load volatile i32, ptr @x, align 4, !dbg !107, !tbaa !66
-  %25 = call i32 @bar(i32 noundef %24) #4, !dbg !108, !prof !90
-  %26 = load volatile i32, ptr @x, align 4, !dbg !110, !tbaa !66
-  %27 = add nsw i32 %26, %25, !dbg !110
-  store volatile i32 %27, ptr @x, align 4, !dbg !110, !tbaa !66
-  %28 = add nuw nsw i32 %2, 1, !dbg !112
-  call void @llvm.dbg.value(metadata i32 %28, metadata !53, metadata !DIExpression()), !dbg !57
-  %29 = icmp eq i32 %28, 1000000, !dbg !114
-  br i1 %29, label %30, label %1, !dbg !61, !prof !116, !llvm.loop !117
-30:                                               ; preds = %17, %1
-  %31 = phi i32 [ 1, %1 ], [ 0, %17 ]
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !121
-  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !122
-  ret i32 %31, !dbg !122
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %p) #0 !dbg !9 {
+  ret i32 %p, !dbg !13
-; Function Attrs: nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
-declare void @llvm.pseudoprobe(i64, i64, i32, i64) #1
-; Function Attrs: nocallback nofree nosync nounwind speculatable willreturn memory(none)
-declare void @llvm.dbg.value(metadata, metadata, metadata) #2
+; Function Attrs: alwaysinline nounwind uwtable
+define dso_local i32 @foo(i32 noundef %i, i32 noundef %p) #1 !dbg !14 {
+  %rem = srem i32 %i, 10, !dbg !15
+  %tobool = icmp ne i32 %rem, 0, !dbg !15
+  br i1 %tobool, label %if.then, label %if.else, !dbg !16
+if.then:                                          ; preds = %entry
+  %call = call i32 @bar(i32 noundef %p), !dbg !17
+  br label %return, !dbg !19
+if.else:                                          ; preds = %entry
+  %add = add nsw i32 %p, 1, !dbg !20
+  %call1 = call i32 @bar(i32 noundef %add), !dbg !21
+  br label %return, !dbg !22
+return:                                           ; preds = %if.else, %if.then
+  %retval.0 = phi i32 [ %call, %if.then ], [ %call1, %if.else ], !dbg !23
+  ret i32 %retval.0, !dbg !24
-; Function Attrs: mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable
-define available_externally dso_local i32 @bar(i32 noundef %0) local_unnamed_addr #3 !dbg !123 !prof !128 {
-  call void @llvm.dbg.value(metadata i32 %0, metadata !127, metadata !DIExpression()), !dbg !129
-  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !130
-  %2 = add nsw i32 %0, 1, !dbg !131
-  ret i32 %2, !dbg !132
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #2 !dbg !25 {
+  %0 = load volatile i32, ptr @x, align 4, !dbg !26, !tbaa !27
+  %cmp = icmp eq i32 %0, 0, !dbg !31
+  br i1 %cmp, label %if.then, label %if.end, !dbg !26
+if.then:                                          ; preds = %entry
+  br label %for.end, !dbg !32
+if.end:                                           ; preds = %entry
+  br label %for.cond, !dbg !33
+for.cond:                                         ; preds = %if.end6, %if.end
+  %i.0 = phi i32 [ 0, %if.end ], [ %inc, %if.end6 ], !dbg !34
+  %cmp1 = icmp slt i32 %i.0, 1000000, !dbg !35
+  br i1 %cmp1, label %for.body, label %for.cond.cleanup, !dbg !37
+for.cond.cleanup:                                 ; preds = %for.cond
+  br label %cleanup, !dbg !38
+for.body:                                         ; preds = %for.cond
+  %1 = load volatile i32, ptr @x, align 4, !dbg !40, !tbaa !27
+  %call = call i32 @foo(i32 noundef %i.0, i32 noundef %1), !dbg !41
+  %2 = load volatile i32, ptr @x, align 4, !dbg !42, !tbaa !27
+  %add = add nsw i32 %2, %call, !dbg !42
+  store volatile i32 %add, ptr @x, align 4, !dbg !42, !tbaa !27
+  %3 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !27
+  %call2 = call i32 @bar(i32 noundef %3), !dbg !44
+  %4 = load volatile i32, ptr @x, align 4, !dbg !45, !tbaa !27
+  %add3 = add nsw i32 %4, %call2, !dbg !45
+  store volatile i32 %add3, ptr @x, align 4, !dbg !45, !tbaa !27
+  br i1 false, label %if.then5, label %if.end6, !dbg !46
+if.then5:                                         ; preds = %for.body
+  br label %cleanup, !dbg !47
+if.end6:                                          ; preds = %for.body
+  %5 = load volatile i32, ptr @x, align 4, !dbg !48, !tbaa !27
+  %call7 = call i32 @foo(i32 noundef %i.0, i32 noundef %5), !dbg !49
+  %6 = load volatile i32, ptr @x, align 4, !dbg !50, !tbaa !27
+  %add8 = add nsw i32 %6, %call7, !dbg !50
+  store volatile i32 %add8, ptr @x, align 4, !dbg !50, !tbaa !27
+  %7 = load volatile i32, ptr @x, align 4, !dbg !51, !tbaa !27
+  %call9 = call i32 @bar(i32 noundef %7), !dbg !52
+  %8 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !27
+  %add10 = add nsw i32 %8, %call9, !dbg !53
+  store volatile i32 %add10, ptr @x, align 4, !dbg !53, !tbaa !27
+  %inc = add nsw i32 %i.0, 1, !dbg !54
+  br label %for.cond, !dbg !56, !llvm.loop !57
+cleanup:                                          ; preds = %if.then5, %for.cond.cleanup
+  br label %for.end
+for.end:                                          ; preds = %cleanup, %if.then
+  ret i32 0, !dbg !61
-attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
-attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
-attributes #3 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #4 = { nounwind }
-!llvm.dbg.cu = !{!2, !7}
-!llvm.module.flags = !{!9, !10, !11, !12, !13, !14, !15, !16, !17}
-!llvm.ident = !{!46, !46}
-!llvm.pseudo_probe_desc = !{!47, !48}
-!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
-!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 2, type: !5, isLocal: false, isDefinition: true)
-!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 18.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
-!3 = !DIFile(filename: "file1.c", directory: "/home/", checksumkind: CSK_MD5, checksum: "03dfbda098b1285e8c9837f184747483")
-!4 = !{!0}
-!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
-!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "clang version 18.0.0 ", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
-!8 = !DIFile(filename: "file2.c", directory: "/home", checksumkind: CSK_MD5, checksum: "bd15f56a5a8604ec94485c3dbf9d0cdd")
-!9 = !{i32 7, !"Dwarf Version", i32 5}
-!10 = !{i32 2, !"Debug Info Version", i32 3}
-!11 = !{i32 1, !"wchar_size", i32 4}
-!12 = !{i32 8, !"PIC Level", i32 2}
-!13 = !{i32 7, !"PIE Level", i32 2}
-!14 = !{i32 7, !"uwtable", i32 2}
-!15 = !{i32 7, !"frame-pointer", i32 2}
-!16 = !{i32 1, !"EnableSplitLTOUnit", i32 0}
-!17 = !{i32 1, !"ProfileSummary", !18}
-!18 = !{!19, !20, !21, !22, !23, !24, !25, !26, !27, !28}
-!19 = !{!"ProfileFormat", !"SampleProfile"}
-!20 = !{!"TotalCount", i64 9258}
-!21 = !{!"MaxCount", i64 2401}
-!22 = !{!"MaxInternalCount", i64 0}
-!23 = !{!"MaxFunctionCount", i64 2401}
-!24 = !{!"NumCounts", i64 18}
-!25 = !{!"NumFunctions", i64 2}
-!26 = !{!"IsPartialProfile", i64 0}
-!27 = !{!"PartialProfileRatio", double 0.000000e+00}
-!28 = !{!"DetailedSummary", !29}
-!29 = !{!30, !31, !32, !33, !34, !35, !36, !37, !38, !39, !40, !41, !42, !43, !44, !45}
-!30 = !{i32 10000, i64 2401, i32 1}
-!31 = !{i32 100000, i64 2401, i32 1}
-!32 = !{i32 200000, i64 2401, i32 1}
-!33 = !{i32 300000, i64 636, i32 2}
-!34 = !{i32 400000, i64 618, i32 4}
-!35 = !{i32 500000, i64 614, i32 8}
-!36 = !{i32 600000, i64 614, i32 8}
-!37 = !{i32 700000, i64 614, i32 8}
-!38 = !{i32 800000, i64 589, i32 10}
-!39 = !{i32 900000, i64 554, i32 11}
-!40 = !{i32 950000, i64 526, i32 12}
-!41 = !{i32 990000, i64 59, i32 16}
-!42 = !{i32 999000, i64 59, i32 16}
-!43 = !{i32 999900, i64 59, i32 16}
-!44 = !{i32 999990, i64 59, i32 16}
-!45 = !{i32 999999, i64 59, i32 16}
-!46 = !{!"clang version 18.0.0"}
-!47 = !{i64 6699318081062747564, i64 563022570642068, !"foo"}
-!48 = !{i64 -2624081020897602054, i64 1126124211449298, !"main"}
-!49 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !50, scopeLine: 12, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !52)
-!50 = !DISubroutineType(types: !51)
-!51 = !{!6}
-!52 = !{!53}
-!53 = !DILocalVariable(name: "i", scope: !54, file: !3, line: 13, type: !6)
-!54 = distinct !DILexicalBlock(scope: !49, file: !3, line: 13, column: 3)
-!55 = !{!"function_entry_count", i64 608}
-!56 = !DILocation(line: 13, column: 11, scope: !54)
-!57 = !DILocation(line: 0, scope: !54)
-!58 = !DILocation(line: 13, column: 18, scope: !59)
-!59 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 0)
-!60 = distinct !DILexicalBlock(scope: !54, file: !3, line: 13, column: 3)
-!61 = !DILocation(line: 13, column: 3, scope: !62)
-!62 = !DILexicalBlockFile(scope: !54, file: !3, discriminator: 2)
-!63 = !DILocation(line: 14, column: 15, scope: !64)
-!64 = distinct !DILexicalBlock(scope: !60, file: !3, line: 13, column: 40)
-!65 = !DILocation(line: 14, column: 18, scope: !64)
-!66 = !{!67, !67, i64 0}
-!67 = !{!"int", !68, i64 0}
-!68 = !{!"omnipotent char", !69, i64 0}
-!69 = !{!"Simple C/C++ TBAA"}
-!70 = !DILocalVariable(name: "i", arg: 1, scope: !71, file: !3, line: 6, type: !6)
-!71 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 6, type: !72, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !74)
-!72 = !DISubroutineType(types: !73)
-!73 = !{!6, !6, !6}
-!74 = !{!70, !75}
-!75 = !DILocalVariable(name: "p", arg: 2, scope: !71, file: !3, line: 6, type: !6)
-!76 = !DILocation(line: 0, scope: !71, inlinedAt: !77)
-!77 = distinct !DILocation(line: 14, column: 11, scope: !78)
-!78 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646631)
-!79 = !DILocation(line: 7, column: 6, scope: !80, inlinedAt: !77)
-!80 = distinct !DILexicalBlock(scope: !71, file: !3, line: 7, column: 6)
-!81 = !DILocation(line: 7, column: 8, scope: !80, inlinedAt: !77)
-!82 = !DILocation(line: 7, column: 6, scope: !71, inlinedAt: !77)
-!83 = !DILocation(line: 0, scope: !80, inlinedAt: !77)
-!84 = !{!"branch_weights", i64 614}
-!85 = !DILocation(line: 9, column: 1, scope: !71, inlinedAt: !77)
-!86 = !DILocation(line: 14, column: 8, scope: !64)
-!87 = !DILocation(line: 15, column: 15, scope: !64)
-!88 = !DILocation(line: 15, column: 11, scope: !89)
-!89 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646639)
-!90 = !{!"branch_weights", i32 614}
-!91 = !DILocation(line: 15, column: 8, scope: !64)
-!92 = !DILocation(line: 16, column: 9, scope: !93)
-!93 = distinct !DILexicalBlock(scope: !64, file: !3, line: 16, column: 9)
-!94 = !DILocation(line: 16, column: 11, scope: !93)
-!95 = !DILocation(line: 16, column: 9, scope: !64)
-!96 = !{!"branch_weights", i32 0, i32 614}
-!97 = !DILocation(line: 18, column: 15, scope: !64)
-!98 = !DILocation(line: 18, column: 18, scope: !64)
-!99 = !DILocation(line: 0, scope: !71, inlinedAt: !100)
-!100 = distinct !DILocation(line: 18, column: 11, scope: !101)
-!101 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646647)
-!102 = !DILocation(line: 7, column: 6, scope: !80, inlinedAt: !100)
-!103 = !DILocation(line: 7, column: 6, scope: !71, inlinedAt: !100)
-!104 = !DILocation(line: 0, scope: !80, inlinedAt: !100)
-!105 = !DILocation(line: 9, column: 1, scope: !71, inlinedAt: !100)
-!106 = !DILocation(line: 18, column: 8, scope: !64)
-!107 = !DILocation(line: 19, column: 15, scope: !64)
-!108 = !DILocation(line: 19, column: 11, scope: !109)
-!109 = !DILexicalBlockFile(scope: !64, file: !3, discriminator: 186646655)
-!110 = !DILocation(line: 19, column: 8, scope: !64)
-!111 = !DILocation(line: 13, column: 36, scope: !59)
-!112 = !DILocation(line: 13, column: 36, scope: !113)
-!113 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 6)
-!114 = !DILocation(line: 13, column: 20, scope: !115)
-!115 = !DILexicalBlockFile(scope: !60, file: !3, discriminator: 2)
-!116 = !{!"branch_weights", i32 608, i32 614}
-!117 = distinct !{!117, !118, !119, !120}
-!118 = !DILocation(line: 13, column: 3, scope: !54)
-!119 = !DILocation(line: 20, column: 3, scope: !54)
-!120 = !{!"llvm.loop.mustprogress"}
-!121 = !DILocation(line: 0, scope: !49)
-!122 = !DILocation(line: 22, column: 1, scope: !49)
-!123 = distinct !DISubprogram(name: "bar", scope: !8, file: !8, line: 1, type: !124, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !126)
-!124 = !DISubroutineType(types: !125)
-!125 = !{!6, !6}
-!126 = !{!127}
-!127 = !DILocalVariable(name: "x", arg: 1, scope: !123, file: !8, line: 1, type: !6)
-!128 = !{!"function_entry_count", i64 2401}
-!129 = !DILocation(line: 0, scope: !123)
-!130 = !DILocation(line: 5, column: 10, scope: !123)
-!131 = !DILocation(line: 5, column: 12, scope: !123)
-!132 = !DILocation(line: 5, column: 3, scope: !123)
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3
+attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { alwaysinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7}
+!llvm.ident = !{!8}
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0git", isOptimized: true, runtimeVersion: 0, emissionKind: LineTablesOnly, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "path")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{!"clang version 19.0.0git"}
+!9 = distinct !DISubprogram(name: "bar", scope: !10, file: !10, line: 2, type: !11, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!10 = !DIFile(filename: "test.c", directory: "path")
+!11 = !DISubroutineType(types: !12)
+!12 = !{}
+!13 = !DILocation(line: 3, column: 3, scope: !9)
+!14 = distinct !DISubprogram(name: "foo", scope: !10, file: !10, line: 6, type: !11, scopeLine: 6, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!15 = !DILocation(line: 7, column: 9, scope: !14)
+!16 = !DILocation(line: 7, column: 7, scope: !14)
+!17 = !DILocation(line: 7, column: 23, scope: !18)
+!18 = !DILexicalBlockFile(scope: !14, file: !10, discriminator: 2)
+!19 = !DILocation(line: 7, column: 15, scope: !18)
+!20 = !DILocation(line: 8, column: 21, scope: !14)
+!21 = !DILocation(line: 8, column: 15, scope: !14)
+!22 = !DILocation(line: 8, column: 8, scope: !14)
+!23 = !DILocation(line: 0, scope: !14)
+!24 = !DILocation(line: 9, column: 1, scope: !14)
+!25 = distinct !DISubprogram(name: "main", scope: !10, file: !10, line: 11, type: !11, scopeLine: 11, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0)
+!26 = !DILocation(line: 12, column: 7, scope: !25)
+!27 = !{!28, !28, i64 0}
+!28 = !{!"int", !29, i64 0}
+!29 = !{!"omnipotent char", !30, i64 0}
+!30 = !{!"Simple C/C++ TBAA"}
+!31 = !DILocation(line: 12, column: 9, scope: !25)
+!32 = !DILocation(line: 13, column: 5, scope: !25)
+!33 = !DILocation(line: 14, column: 8, scope: !25)
+!34 = !DILocation(line: 14, scope: !25)
+!35 = !DILocation(line: 14, column: 21, scope: !36)
+!36 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 2)
+!37 = !DILocation(line: 14, column: 3, scope: !36)
+!38 = !DILocation(line: 14, column: 3, scope: !39)
+!39 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 4)
+!40 = !DILocation(line: 15, column: 18, scope: !25)
+!41 = !DILocation(line: 15, column: 11, scope: !25)
+!42 = !DILocation(line: 15, column: 8, scope: !25)
+!43 = !DILocation(line: 16, column: 15, scope: !25)
+!44 = !DILocation(line: 16, column: 11, scope: !25)
+!45 = !DILocation(line: 16, column: 8, scope: !25)
+!46 = !DILocation(line: 17, column: 10, scope: !25)
+!47 = !DILocation(line: 18, column: 8, scope: !25)
+!48 = !DILocation(line: 19, column: 18, scope: !25)
+!49 = !DILocation(line: 19, column: 11, scope: !25)
+!50 = !DILocation(line: 19, column: 8, scope: !25)
+!51 = !DILocation(line: 20, column: 15, scope: !25)
+!52 = !DILocation(line: 20, column: 11, scope: !25)
+!53 = !DILocation(line: 20, column: 8, scope: !25)
+!54 = !DILocation(line: 14, column: 37, scope: !55)
+!55 = !DILexicalBlockFile(scope: !25, file: !10, discriminator: 6)
+!56 = !DILocation(line: 14, column: 3, scope: !55)
+!57 = distinct !{!57, !58, !59, !60}
+!58 = !DILocation(line: 14, column: 3, scope: !25)
+!59 = !DILocation(line: 21, column: 3, scope: !25)
+!60 = !{!"llvm.loop.mustprogress"}
+!61 = !DILocation(line: 22, column: 1, scope: !25)

>From a80609a43b1d53881aa26760bc7b21ed1a26ee6f Mon Sep 17 00:00:00 2001
From: Krzysztof Pszeniczny <kpszeniczny at google.com>
Date: Fri, 22 Mar 2024 12:55:47 +0100
Subject: [PATCH 4/4] update comment

 llvm/lib/Transforms/IPO/SampleProfile.cpp | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 8a007358b35676..fd781f0addea0d 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -2411,8 +2411,8 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
   if (ReportProfileStaleness || PersistProfileStaleness)
     recordCallsiteMatchStates(F, IRAnchors, ProfileAnchors, nullptr);
-  // Run profile matching for checksum mismatched profile. For probe-based
-  // profiles, run matching only when the current profile is not valid.
+  // For probe-based profiles, run matching only when the current profile is not
+  // valid.
   if (SalvageStaleProfile && (!FunctionSamples::ProfileIsProbeBased ||
                               !ProbeManager->profileIsValid(F, *FSFlattened))) {
     // The matching result will be saved to IRToProfileLocationMap, create a new

More information about the llvm-commits mailing list