[llvm] f8bab38 - [CSSPGO] Fix the issue of missing callee profile matches (#85715)

via llvm-commits llvm-commits at lists.llvm.org
Wed Mar 27 22:27:26 PDT 2024


Author: Lei Wang
Date: 2024-03-27T22:27:22-07:00
New Revision: f8bab38b6dd02f2cd3acc28521d0ccb3186ef616

URL: https://github.com/llvm/llvm-project/commit/f8bab38b6dd02f2cd3acc28521d0ccb3186ef616
DIFF: https://github.com/llvm/llvm-project/commit/f8bab38b6dd02f2cd3acc28521d0ccb3186ef616.diff

LOG: [CSSPGO] Fix the issue of missing callee profile matches (#85715)

Two fixes related to the callee/inlinee profile:

1. Fix the bug that the matching results are missing to distribute to
the callee profiles (should be pass-by-reference).
2. Narrow imported function matching to checksum mismatched functions. 

More context: before we run matchings for all imported functions even
checksums are matched, however, after we fix 1), we got a regression,
it's likely due to the matching is not no-op for checksum matched
function, so we want to make it consistent to only run matching for
checksum mismatched (imported)functions. Since the
metadata(pseudo_probe_desc) are dropped for imported function, we
leverage the function attribute mechanism and add a new function
attribute(`profile-checksum-mismatch`) to transfer the info from
pre-link to post-link.

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof
    llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll

Modified: 
    llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
    llvm/lib/Transforms/IPO/SampleProfile.cpp
    llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 66814d39527301..bd7496a799c579 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -86,9 +86,12 @@ template <> struct IRTraits<BasicBlock> {
 // SampleProfileProber.
 class PseudoProbeManager {
   DenseMap<uint64_t, PseudoProbeDescriptor> GUIDToProbeDescMap;
+  const ThinOrFullLTOPhase LTOPhase;
 
 public:
-  PseudoProbeManager(const Module &M) {
+  PseudoProbeManager(const Module &M,
+                     ThinOrFullLTOPhase LTOPhase = ThinOrFullLTOPhase::None)
+      : LTOPhase(LTOPhase) {
     if (NamedMDNode *FuncInfo =
             M.getNamedMetadata(PseudoProbeDescMetadataName)) {
       for (const auto *Operand : FuncInfo->operands()) {
@@ -126,17 +129,15 @@ class PseudoProbeManager {
 
   bool profileIsValid(const Function &F, const FunctionSamples &Samples) const {
     const auto *Desc = getDesc(F);
-    if (!Desc) {
-      LLVM_DEBUG(dbgs() << "Probe descriptor missing for Function "
-                        << F.getName() << "\n");
-      return false;
-    }
-    if (Desc->getFunctionHash() != Samples.getFunctionHash()) {
-      LLVM_DEBUG(dbgs() << "Hash mismatch for Function " << F.getName()
-                        << "\n");
-      return false;
-    }
-    return true;
+    assert((LTOPhase != ThinOrFullLTOPhase::ThinLTOPostLink || !Desc ||
+            profileIsHashMismatched(*Desc, Samples) ==
+                F.hasFnAttribute("profile-checksum-mismatch")) &&
+           "In post-link, profile checksum matching state doesn't match "
+           "function 'profile-checksum-mismatch' attribute.");
+    // The desc for import function is unavailable. Check the function attribute
+    // for mismatch.
+    return (!Desc && !F.hasFnAttribute("profile-checksum-mismatch")) ||
+           (Desc && !profileIsHashMismatched(*Desc, Samples));
   }
 };
 

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2cbef8a7ae611d..7545a92c114ef2 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -453,6 +453,7 @@ class SampleProfileMatcher {
   Module &M;
   SampleProfileReader &Reader;
   const PseudoProbeManager *ProbeManager;
+  const ThinOrFullLTOPhase LTOPhase;
   SampleProfileMap FlattenedProfiles;
   // For each function, the matcher generates a map, of which each entry is a
   // mapping from the source location of current build to the source location in
@@ -504,8 +505,9 @@ class SampleProfileMatcher {
 
 public:
   SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
-                       const PseudoProbeManager *ProbeManager)
-      : M(M), Reader(Reader), ProbeManager(ProbeManager){};
+                       const PseudoProbeManager *ProbeManager,
+                       ThinOrFullLTOPhase LTOPhase)
+      : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){};
   void runOnModule();
   void clearMatchingData() {
     // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
@@ -521,7 +523,7 @@ class SampleProfileMatcher {
       return &It->second;
     return nullptr;
   }
-  void runOnFunction(const Function &F);
+  void runOnFunction(Function &F);
   void findIRAnchors(const Function &F,
                      std::map<LineLocation, StringRef> &IRAnchors);
   void findProfileAnchors(
@@ -1911,15 +1913,22 @@ bool SampleProfileLoader::emitAnnotations(Function &F) {
   bool Changed = false;
 
   if (FunctionSamples::ProfileIsProbeBased) {
-    if (!ProbeManager->profileIsValid(F, *Samples)) {
+    LLVM_DEBUG({
+      if (!ProbeManager->getDesc(F))
+        dbgs() << "Probe descriptor missing for Function " << F.getName()
+               << "\n";
+    });
+
+    if (ProbeManager->profileIsValid(F, *Samples)) {
+      ++NumMatchedProfile;
+    } else {
+      ++NumMismatchedProfile;
       LLVM_DEBUG(
           dbgs() << "Profile is invalid due to CFG mismatch for Function "
                  << F.getName() << "\n");
-      ++NumMismatchedProfile;
       if (!SalvageStaleProfile)
         return false;
     }
-    ++NumMatchedProfile;
   } else {
     if (getFunctionLoc(F) == 0)
       return false;
@@ -2185,7 +2194,7 @@ bool SampleProfileLoader::doInitialization(Module &M,
 
   // Load pseudo probe descriptors for probe-based function samples.
   if (Reader->profileIsProbeBased()) {
-    ProbeManager = std::make_unique<PseudoProbeManager>(M);
+    ProbeManager = std::make_unique<PseudoProbeManager>(M, LTOPhase);
     if (!ProbeManager->moduleIsProbed(M)) {
       const char *Msg =
           "Pseudo-probe-based profile requires SampleProfileProbePass";
@@ -2197,8 +2206,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
 
   if (ReportProfileStaleness || PersistProfileStaleness ||
       SalvageStaleProfile) {
-    MatchingManager =
-        std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
+    MatchingManager = std::make_unique<SampleProfileMatcher>(
+        M, *Reader, ProbeManager.get(), LTOPhase);
   }
 
   return true;
@@ -2452,7 +2461,7 @@ void SampleProfileMatcher::runStaleProfileMatching(
   }
 }
 
-void SampleProfileMatcher::runOnFunction(const Function &F) {
+void SampleProfileMatcher::runOnFunction(Function &F) {
   // We need to use flattened function samples for matching.
   // Unlike IR, which includes all callsites from the source code, the callsites
   // in profile only show up when they are hit by samples, i,e. the profile
@@ -2481,8 +2490,16 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
   // support for pseudo-probe.
   if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
       !ProbeManager->profileIsValid(F, *FSFlattened)) {
-    // The matching result will be saved to IRToProfileLocationMap, create a new
-    // map for each function.
+    // For imported functions, the checksum metadata(pseudo_probe_desc) are
+    // dropped, so we leverage function attribute(profile-checksum-mismatch) to
+    // transfer the info: add the attribute during pre-link phase and check it
+    // during post-link phase(see "profileIsValid").
+    if (FunctionSamples::ProfileIsProbeBased &&
+        LTOPhase == ThinOrFullLTOPhase::ThinLTOPreLink)
+      F.addFnAttr("profile-checksum-mismatch");
+
+    // The matching result will be saved to IRToProfileLocationMap, create a
+    // new map for each function.
     auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
     runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
                             IRToProfileLocationMap);
@@ -2758,8 +2775,9 @@ void SampleProfileMatcher::distributeIRToProfileLocationMap(
     FS.setIRToProfileLocationMap(&(ProfileMappings->second));
   }
 
-  for (auto &Inlinees : FS.getCallsiteSamples()) {
-    for (auto FS : Inlinees.second) {
+  for (auto &Callees :
+       const_cast<CallsiteSampleMap &>(FS.getCallsiteSamples())) {
+    for (auto &FS : Callees.second) {
       distributeIRToProfileLocationMap(FS.second);
     }
   }

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof
new file mode 100644
index 00000000000000..76a8fc9d19a85d
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-callee-profile-mismatch.prof
@@ -0,0 +1,16 @@
+main:252:0
+ 1: 0
+ 2: 50
+ 5: 50
+ 7: bar:102
+  1: 51
+  2: baz:51
+   1: 51
+   !CFGChecksum: 4294967295
+   !Attributes: 3
+  !CFGChecksum: 281479271677951
+  !Attributes: 2
+ !CFGChecksum: 281582081721716
+bar:1:1
+ 1: 1
+ !CFGChecksum: 281479271677951

diff  --git a/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll
new file mode 100644
index 00000000000000..df56b55dcdf3c0
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/csspgo-profile-checksum-mismatch-attr.ll
@@ -0,0 +1,67 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof -pass-remarks=inline  -S -o %t 2>&1 | FileCheck %s --check-prefix=INLINE
+; RUN: FileCheck %s < %t
+; RUN: FileCheck %s < %t --check-prefix=MERGE
+
+
+; Make sure bar is inlined into main for attr merging verification.
+; INLINE: 'bar' inlined into 'main'
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @baz() #0 {
+entry:
+  ret i32 0
+}
+
+define i32 @bar() #0 !dbg !11 {
+; CHECK: define {{.*}} @bar() {{.*}} #[[#BAR_ATTR:]] !
+entry:
+  %call = call i32 @baz()
+  ret i32 0
+}
+
+define i32 @main() #0 {
+; MERGE: define {{.*}} @main() {{.*}} #[[#MAIN_ATTR:]] !
+entry:
+  br label %for.cond
+
+for.cond:                                         ; preds = %for.cond, %entry
+  %call = call i32 @bar(), !dbg !14
+  br label %for.cond
+}
+
+; CHECK: attributes #[[#BAR_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}}
+
+; Verify the attribute is not merged into the caller.
+; MERGE-NOT: attributes #[[#MAIN_ATTR]] = {{{.*}} "profile-checksum-mismatch" {{.*}}}
+
+attributes #0 = { "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!7}
+!llvm.pseudo_probe_desc = !{!8, !9, !10}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/home", checksumkind: CSK_MD5, checksum: "0df0c950a93a603a7d13f0a9d4623642")
+!2 = !{!3}
+!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression())
+!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true)
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 2, !"Debug Info Version", i32 3}
+!8 = !{i64 7546896869197086323, i64 4294967295, !"baz"}
+!9 = !{i64 -2012135647395072713, i64 281530612780802, !"bar"}
+!10 = !{i64 -2624081020897602054, i64 281582081721716, !"main"}
+!11 = distinct !DISubprogram(name: "bar", scope: !1, file: !1, line: 5, type: !12, scopeLine: 5, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
+!12 = distinct !DISubroutineType(types: !13)
+!13 = !{}
+!14 = !DILocation(line: 15, column: 10, scope: !15)
+!15 = !DILexicalBlockFile(scope: !16, file: !1, discriminator: 186646591)
+!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 14, column: 40)
+!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 14, column: 3)
+!18 = distinct !DILexicalBlock(scope: !19, file: !1, line: 14, column: 3)
+!19 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 12, type: !20, scopeLine: 13, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !13)
+!20 = !DISubroutineType(types: !13)

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll
new file mode 100644
index 00000000000000..e00b737cae4e85
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll
@@ -0,0 +1,63 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-callee-profile-mismatch.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-impl  -pass-remarks=inline 2>&1 | FileCheck %s
+
+
+; CHECK: Run stale profile matching for bar
+; CHECK: Callsite with callee:baz is matched from 4 to 2
+; CHECK: 'baz' inlined into 'main' to match profiling context with (cost=always): preinliner at callsite bar:3:8.4 @ main:3:10.7
+
+; CHECK: Probe descriptor missing for Function bar
+; CHECK: Profile is invalid due to CFG mismatch for Function bar
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+define i32 @main() #0 {
+  %1 = call i32 @bar(), !dbg !13
+  ret i32 0
+}
+
+define available_externally i32 @bar() #1 !dbg !21 {
+  %1 = call i32 @baz(), !dbg !23
+  ret i32 0
+}
+
+define available_externally i32 @baz() #0 !dbg !25 {
+  ret i32 0
+}
+
+attributes #0 = { "use-sample-profile" }
+attributes #1 = { "profile-checksum-mismatch" "use-sample-profile" }
+
+!llvm.dbg.cu = !{!0, !7, !9}
+!llvm.module.flags = !{!11}
+!llvm.pseudo_probe_desc = !{!12}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !2, splitDebugInlining: false, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "7220f1a2d70ff869f1a6ab7958e3c393")
+!2 = !{!3}
+!3 = !DIGlobalVariableExpression(var: !4, expr: !DIExpression())
+!4 = distinct !DIGlobalVariable(name: "x", scope: !0, file: !1, line: 2, type: !5, isLocal: false, isDefinition: true)
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = distinct !DICompileUnit(language: DW_LANG_C11, file: !8, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!8 = !DIFile(filename: "test1.v1.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "76696bd6bfe16a9f227fe03cfdb6a82c")
+!9 = distinct !DICompileUnit(language: DW_LANG_C11, file: !10, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, nameTableKind: None)
+!10 = !DIFile(filename: "test2.c", directory: "/home/test", checksumkind: CSK_MD5, checksum: "553093afc026f9c73562eb3b0c5b7532")
+!11 = !{i32 2, !"Debug Info Version", i32 3}
+!12 = !{i64 -2624081020897602054, i64 281582081721716, !"main"}
+!13 = !DILocation(line: 8, column: 10, scope: !14)
+!14 = !DILexicalBlockFile(scope: !15, file: !1, discriminator: 186646591)
+!15 = distinct !DILexicalBlock(scope: !16, file: !1, line: 7, column: 40)
+!16 = distinct !DILexicalBlock(scope: !17, file: !1, line: 7, column: 3)
+!17 = distinct !DILexicalBlock(scope: !18, file: !1, line: 7, column: 3)
+!18 = distinct !DISubprogram(name: "main", scope: !1, file: !1, line: 5, type: !19, scopeLine: 6, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !20)
+!19 = distinct !DISubroutineType(types: !20)
+!20 = !{}
+!21 = distinct !DISubprogram(name: "bar", scope: !8, file: !8, line: 3, type: !22, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !7, retainedNodes: !20)
+!22 = !DISubroutineType(types: !20)
+!23 = !DILocation(line: 6, column: 8, scope: !24)
+!24 = !DILexicalBlockFile(scope: !21, file: !8, discriminator: 186646567)
+!25 = distinct !DISubprogram(name: "baz", scope: !10, file: !10, line: 1, type: !22, scopeLine: 1, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !9, retainedNodes: !20)

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll
index 55225b415d4abc..270beee4ebc2bd 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll
@@ -106,7 +106,7 @@ define available_externally dso_local i32 @bar(i32 noundef %0) local_unnamed_add
   ret i32 %2, !dbg !132
 }
 
-attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #0 = { nounwind uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" "profile-checksum-mismatch"}
 attributes #1 = { nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
 attributes #2 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
 attributes #3 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "disable-tail-calls"="true" "frame-pointer"="all" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
index 89477ea5fecf1e..29877fb22a2c2e 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
@@ -48,6 +48,8 @@
 ;    }
 ;  }
 
+; Verify not running profile matching for checksum matched function.
+; CHECK-NOT: Run stale profile matching for bar
 
 ; CHECK: Run stale profile matching for main
 


        


More information about the llvm-commits mailing list