[llvm] d6a0585 - [SampleFDO] Compute and report profile staleness metrics

via llvm-commits llvm-commits at lists.llvm.org
Wed Oct 26 21:08:21 PDT 2022


Author: wlei
Date: 2022-10-26T21:06:52-07:00
New Revision: d6a0585dd1b8cc82e827e3218fe3b077d33c3ed1

URL: https://github.com/llvm/llvm-project/commit/d6a0585dd1b8cc82e827e3218fe3b077d33c3ed1
DIFF: https://github.com/llvm/llvm-project/commit/d6a0585dd1b8cc82e827e3218fe3b077d33c3ed1.diff

LOG: [SampleFDO] Compute and report profile staleness metrics

When a profile is stale and profile mismatch could happen, the mismatched samples are discarded, so we'd like to compute the mismatch metrics to quantify how stale the profile is, which will suggest user to refresh the profile if the number is high.

Two sets of metrics are introduced here:

 - (Num_of_mismatched_funchash/Total_profiled_funchash), (Samples_of_mismached_func_hash / Samples_of_profiled_function) : Here it leverages the FunctionSamples's checksums attribute which is a feature of pseudo probe. When the source code CFG changes, the function checksums will be different, later sample loader will discard the whole functions' samples, this metrics can show the percentage of samples are discarded due to this.
 -  (Num_of_mismatched_callsite/Total_profiled_callsite), (Samples_of_mismached_callsite / Samples_of_profiled_callsite) : This shows how many mismatching for the callsite location as callsite location mismatch will affect the inlining which is highly correlated with the performance. It goes through all the callsite location in the IR and profile, use the call target name to match, report the num of samples in the profile that doesn't match a IR callsite.

This is implemented in a new class(SampleProfileMatcher) and under a switch("--report-profile-staleness"), we plan to extend it with a fuzzy profile matching feature in the future.

Reviewed By: hoy, wenlei, davidxl

Differential Revision: https://reviews.llvm.org/D136627

Added: 
    llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
    llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch.prof
    llvm/test/Transforms/SampleProfile/profile-mismatch.ll
    llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll

Modified: 
    llvm/include/llvm/ProfileData/SampleProf.h
    llvm/lib/Transforms/IPO/SampleProfile.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/ProfileData/SampleProf.h b/llvm/include/llvm/ProfileData/SampleProf.h
index 240db259fb5ca..93aa16cd79845 100644
--- a/llvm/include/llvm/ProfileData/SampleProf.h
+++ b/llvm/include/llvm/ProfileData/SampleProf.h
@@ -303,6 +303,13 @@ struct LineLocation {
   uint32_t Discriminator;
 };
 
+struct LineLocationHash {
+  uint64_t operator()(const LineLocation &Loc) const {
+    return std::hash<std::uint64_t>{}((((uint64_t)Loc.LineOffset) << 32) |
+                                      Loc.Discriminator);
+  }
+};
+
 raw_ostream &operator<<(raw_ostream &OS, const LineLocation &Loc);
 
 /// Representation of a single sample record.

diff  --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index bbdbabb4e1e79..fc2ceae939196 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -129,6 +129,10 @@ static cl::opt<std::string> SampleProfileRemappingFile(
     "sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
     cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
 
+static cl::opt<bool> ReportProfileStaleness(
+    "report-profile-staleness", cl::Hidden, cl::init(false),
+    cl::desc("Compute and report stale profile statistical metrics."));
+
 static cl::opt<bool> ProfileSampleAccurate(
     "profile-sample-accurate", cl::Hidden, cl::init(false),
     cl::desc("If the sample profile is accurate, we will mark all un-sampled "
@@ -414,6 +418,30 @@ using CandidateQueue =
     PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
                   CandidateComparer>;
 
+// Sample profile matching - fuzzy match.
+class SampleProfileMatcher {
+  Module &M;
+  SampleProfileReader &Reader;
+  const PseudoProbeManager *ProbeManager;
+
+  // Profile mismatching statstics.
+  uint64_t TotalProfiledCallsite = 0;
+  uint64_t NumMismatchedCallsite = 0;
+  uint64_t MismatchedCallsiteSamples = 0;
+  uint64_t TotalCallsiteSamples = 0;
+  uint64_t TotalProfiledFunc = 0;
+  uint64_t NumMismatchedFuncHash = 0;
+  uint64_t MismatchedFuncHashSamples = 0;
+  uint64_t TotalFuncHashSamples = 0;
+
+public:
+  SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
+                       const PseudoProbeManager *ProbeManager)
+      : M(M), Reader(Reader), ProbeManager(ProbeManager) {}
+  void detectProfileMismatch();
+  void detectProfileMismatch(const Function &F, const FunctionSamples &FS);
+};
+
 /// Sample profile pass.
 ///
 /// This pass reads profile data from the file specified by
@@ -543,6 +571,9 @@ class SampleProfileLoader final
   // A pseudo probe helper to correlate the imported sample counts.
   std::unique_ptr<PseudoProbeManager> ProbeManager;
 
+  // A helper to implement the sample profile matching algorithm.
+  std::unique_ptr<SampleProfileMatcher> MatchingManager;
+
 private:
   const char *getAnnotatedRemarkPassName() const {
     return AnnotatedPassName.c_str();
@@ -2010,9 +2041,129 @@ bool SampleProfileLoader::doInitialization(Module &M,
     }
   }
 
+  if (ReportProfileStaleness) {
+    MatchingManager =
+        std::make_unique<SampleProfileMatcher>(M, *Reader, ProbeManager.get());
+  }
+
   return true;
 }
 
+void SampleProfileMatcher::detectProfileMismatch(const Function &F,
+                                                 const FunctionSamples &FS) {
+  if (FunctionSamples::ProfileIsProbeBased) {
+    uint64_t Count = FS.getTotalSamples();
+    TotalFuncHashSamples += Count;
+    TotalProfiledFunc++;
+    if (!ProbeManager->profileIsValid(F, FS)) {
+      MismatchedFuncHashSamples += Count;
+      NumMismatchedFuncHash++;
+      return;
+    }
+  }
+
+  std::unordered_set<LineLocation, LineLocationHash> MatchedCallsiteLocs;
+
+  // Go through all the callsites on the IR and flag the callsite if the target
+  // name is the same as the one in the profile.
+  for (auto &BB : F) {
+    for (auto &I : BB.getInstList()) {
+      if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
+        continue;
+
+      const auto *CB = dyn_cast<CallBase>(&I);
+      if (auto &DLoc = I.getDebugLoc()) {
+        LineLocation IRCallsite = FunctionSamples::getCallSiteIdentifier(DLoc);
+
+        StringRef CalleeName;
+        if (Function *Callee = CB->getCalledFunction())
+          CalleeName = Callee->getName();
+
+        const auto CTM = FS.findCallTargetMapAt(IRCallsite);
+        const auto CallsiteFS = FS.findFunctionSamplesMapAt(IRCallsite);
+
+        // Indirect call case.
+        if (CalleeName.empty()) {
+          // Since indirect call does not have the CalleeName, check
+          // conservatively if callsite in the profile is a callsite location.
+          // This is to avoid nums of false positive since otherwise all the
+          // indirect call samples will be reported as mismatching.
+          if ((CTM && !CTM->empty()) || (CallsiteFS && !CallsiteFS->empty()))
+            MatchedCallsiteLocs.insert(IRCallsite);
+        } else {
+          // Check if the call target name is matched for direct call case.
+          if ((CTM && CTM->count(CalleeName)) ||
+              (CallsiteFS && CallsiteFS->count(CalleeName)))
+            MatchedCallsiteLocs.insert(IRCallsite);
+        }
+      }
+    }
+  }
+
+  auto isInvalidLineOffset = [](uint32_t LineOffset) {
+    return LineOffset & 0x8000;
+  };
+
+  // Check if there are any callsites in the profile that does not match to any
+  // IR callsites, those callsite samples will be discarded.
+  for (auto &I : FS.getBodySamples()) {
+    const LineLocation &Loc = I.first;
+    if (isInvalidLineOffset(Loc.LineOffset))
+      continue;
+
+    uint64_t Count = I.second.getSamples();
+    if (!I.second.getCallTargets().empty()) {
+      TotalCallsiteSamples += Count;
+      TotalProfiledCallsite++;
+      if (!MatchedCallsiteLocs.count(Loc)) {
+        MismatchedCallsiteSamples += Count;
+        NumMismatchedCallsite++;
+      }
+    }
+  }
+
+  for (auto &I : FS.getCallsiteSamples()) {
+    const LineLocation &Loc = I.first;
+    if (isInvalidLineOffset(Loc.LineOffset))
+      continue;
+
+    uint64_t Count = 0;
+    for (auto &FM : I.second) {
+      Count += FM.second.getTotalSamples();
+    }
+    TotalCallsiteSamples += Count;
+    TotalProfiledCallsite++;
+    if (!MatchedCallsiteLocs.count(Loc)) {
+      MismatchedCallsiteSamples += Count;
+      NumMismatchedCallsite++;
+    }
+  }
+}
+
+void SampleProfileMatcher::detectProfileMismatch() {
+  for (auto &F : M) {
+    if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+      continue;
+    FunctionSamples *FS = Reader.getSamplesFor(F);
+    if (!FS)
+      continue;
+    detectProfileMismatch(F, *FS);
+  }
+
+  if (FunctionSamples::ProfileIsProbeBased) {
+    errs() << "(" << NumMismatchedFuncHash << "/" << TotalProfiledFunc << ")"
+           << " of functions' profile are invalid and "
+           << " (" << MismatchedFuncHashSamples << "/" << TotalFuncHashSamples
+           << ")"
+           << " of samples are discarded due to function hash mismatch.\n";
+  }
+  errs() << "(" << NumMismatchedCallsite << "/" << TotalProfiledCallsite << ")"
+         << " of callsites' profile are invalid and "
+         << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
+         << ")"
+         << " of samples are discarded due to callsite location mismatch.\n";
+}
+
 bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
                                       ProfileSummaryInfo *_PSI, CallGraph *CG) {
   GUIDToFuncNameMapper Mapper(M, *Reader, GUIDToFuncNameMap);
@@ -2057,6 +2208,9 @@ bool SampleProfileLoader::runOnModule(Module &M, ModuleAnalysisManager *AM,
   assert(SymbolMap.count(StringRef()) == 0 &&
          "No empty StringRef should be added in SymbolMap");
 
+  if (ReportProfileStaleness)
+    MatchingManager->detectProfileMismatch();
+
   bool retval = false;
   for (auto *F : buildFunctionOrder(M, CG)) {
     assert(!F->isDeclaration());

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
new file mode 100644
index 0000000000000..0bb17b2f8f6e4
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
@@ -0,0 +1,14 @@
+main:30:0
+ 0: 0
+ 1.1: 0
+ 3: 10 matched:10
+ 4: 10
+ 5: 10 bar_mismatch:10
+ 8: 0
+ 7: foo:10
+  1: 5
+  2: 5
+bar:10:10
+ 1: 10
+matched:10:10
+ 1: 10

diff  --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch.prof
new file mode 100644
index 0000000000000..5dc266941b2f2
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-profile-mismatch.prof
@@ -0,0 +1,14 @@
+main:30:0
+ 1: 0
+ 12: 10 matched:10
+ 20: 10 bar:10
+ 13: foo_mismatch:10
+  1: 10
+  !CFGChecksum: 4294967295
+ !CFGChecksum: 844635331715433
+bar:10:10
+ 1: 10
+ !CFGChecksum: 42949671295
+matched:10:10
+ 1: 10
+ !CFGChecksum: 4294967295

diff  --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
new file mode 100644
index 0000000000000..68f8e19b10701
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
@@ -0,0 +1,197 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-mismatch.prof -report-profile-staleness  -S 2>%t
+; RUN: FileCheck %s --input-file %t
+
+; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @foo(i32 noundef %x) #0 !dbg !12 {
+entry:
+  %y = alloca i32, align 4
+  call void @llvm.dbg.value(metadata i32 %x, metadata !16, metadata !DIExpression()), !dbg !18
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %y), !dbg !19
+  call void @llvm.dbg.declare(metadata ptr %y, metadata !17, metadata !DIExpression()), !dbg !20
+  %add = add nsw i32 %x, 1, !dbg !21
+  store volatile i32 %add, ptr %y, align 4, !dbg !20, !tbaa !22
+  %y.0. = load volatile i32, ptr %y, align 4, !dbg !26, !tbaa !22
+  %add1 = add nsw i32 %y.0., 1, !dbg !27
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %y), !dbg !28
+  ret i32 %add1, !dbg !29
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #3 !dbg !30 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %x, metadata !32, metadata !DIExpression()), !dbg !33
+  %add = add nsw i32 %x, 2, !dbg !34
+  ret i32 %add, !dbg !35
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @matched(i32 noundef %x) #3 !dbg !36 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %x, metadata !38, metadata !DIExpression()), !dbg !39
+  %add = add nsw i32 %x, 3, !dbg !40
+  ret i32 %add, !dbg !41
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #0 !dbg !42 {
+entry:
+  call void @llvm.dbg.value(metadata i32 0, metadata !46, metadata !DIExpression()), !dbg !52
+  br label %for.cond, !dbg !53
+
+for.cond:                                         ; preds = %for.cond.cleanup3, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc8, %for.cond.cleanup3 ], !dbg !52
+  call void @llvm.dbg.value(metadata i32 %i.0, metadata !46, metadata !DIExpression()), !dbg !52
+  %cmp = icmp ult i32 %i.0, 1000, !dbg !54
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !56
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  ret i32 0, !dbg !58
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.dbg.value(metadata i32 0, metadata !48, metadata !DIExpression()), !dbg !59
+  br label %for.cond1, !dbg !60
+
+for.cond1:                                        ; preds = %for.body4, %for.body
+  %a.0 = phi i32 [ 0, %for.body ], [ %inc, %for.body4 ], !dbg !59
+  call void @llvm.dbg.value(metadata i32 %a.0, metadata !48, metadata !DIExpression()), !dbg !59
+  %cmp2 = icmp ult i32 %a.0, 10000, !dbg !61
+  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3, !dbg !64
+
+for.cond.cleanup3:                                ; preds = %for.cond1
+  %inc8 = add nuw nsw i32 %i.0, 1, !dbg !66
+  call void @llvm.dbg.value(metadata i32 %inc8, metadata !46, metadata !DIExpression()), !dbg !52
+  br label %for.cond, !dbg !68, !llvm.loop !69
+
+for.body4:                                        ; preds = %for.cond1
+  %0 = load volatile i32, ptr @x, align 4, !dbg !73, !tbaa !22
+  %call = call i32 @matched(i32 noundef %0), !dbg !75
+  store volatile i32 %call, ptr @x, align 4, !dbg !76, !tbaa !22
+  %1 = load volatile i32, ptr @x, align 4, !dbg !77, !tbaa !22
+  %call5 = call i32 @foo(i32 noundef %1), !dbg !78
+  store volatile i32 %call5, ptr @x, align 4, !dbg !79, !tbaa !22
+  %2 = load volatile i32, ptr @x, align 4, !dbg !80, !tbaa !22
+  %call6 = call i32 @bar(i32 noundef %2), !dbg !81
+  store volatile i32 %call6, ptr @x, align 4, !dbg !82, !tbaa !22
+  %inc = add nuw nsw i32 %a.0, 1, !dbg !83
+  call void @llvm.dbg.value(metadata i32 %inc, metadata !48, metadata !DIExpression()), !dbg !59
+  br label %for.cond1, !dbg !85, !llvm.loop !86
+}
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata) #4
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind readnone speculatable willreturn }
+attributes #2 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { noinline nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #4 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10}
+!llvm.ident = !{!11}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "clang", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "test.c", directory: "test")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 7, !"uwtable", i32 2}
+!11 = !{!""}
+!12 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 2, type: !13, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !15)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!6, !6}
+!15 = !{!16, !17}
+!16 = !DILocalVariable(name: "x", arg: 1, scope: !12, file: !3, line: 2, type: !6)
+!17 = !DILocalVariable(name: "y", scope: !12, file: !3, line: 3, type: !5)
+!18 = !DILocation(line: 0, scope: !12)
+!19 = !DILocation(line: 3, column: 3, scope: !12)
+!20 = !DILocation(line: 3, column: 16, scope: !12)
+!21 = !DILocation(line: 3, column: 22, scope: !12)
+!22 = !{!23, !23, i64 0}
+!23 = !{!"int", !24, i64 0}
+!24 = !{!"omnipotent char", !25, i64 0}
+!25 = !{!"Simple C/C++ TBAA"}
+!26 = !DILocation(line: 4, column: 10, scope: !12)
+!27 = !DILocation(line: 4, column: 12, scope: !12)
+!28 = !DILocation(line: 5, column: 1, scope: !12)
+!29 = !DILocation(line: 4, column: 3, scope: !12)
+!30 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 7, type: !13, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !31)
+!31 = !{!32}
+!32 = !DILocalVariable(name: "x", arg: 1, scope: !30, file: !3, line: 7, type: !6)
+!33 = !DILocation(line: 0, scope: !30)
+!34 = !DILocation(line: 8, column: 12, scope: !30)
+!35 = !DILocation(line: 8, column: 3, scope: !30)
+!36 = distinct !DISubprogram(name: "matched", scope: !3, file: !3, line: 11, type: !13, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !37)
+!37 = !{!38}
+!38 = !DILocalVariable(name: "x", arg: 1, scope: !36, file: !3, line: 11, type: !6)
+!39 = !DILocation(line: 0, scope: !36)
+!40 = !DILocation(line: 12, column: 12, scope: !36)
+!41 = !DILocation(line: 12, column: 3, scope: !36)
+!42 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 15, type: !43, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !45)
+!43 = !DISubroutineType(types: !44)
+!44 = !{!6}
+!45 = !{!46, !48}
+!46 = !DILocalVariable(name: "i", scope: !47, file: !3, line: 16, type: !6)
+!47 = distinct !DILexicalBlock(scope: !42, file: !3, line: 16, column: 3)
+!48 = !DILocalVariable(name: "a", scope: !49, file: !3, line: 17, type: !6)
+!49 = distinct !DILexicalBlock(scope: !50, file: !3, line: 17, column: 5)
+!50 = distinct !DILexicalBlock(scope: !51, file: !3, line: 16, column: 34)
+!51 = distinct !DILexicalBlock(scope: !47, file: !3, line: 16, column: 3)
+!52 = !DILocation(line: 0, scope: !47)
+!53 = !DILocation(line: 16, column: 8, scope: !47)
+!54 = !DILocation(line: 16, column: 21, scope: !55)
+!55 = !DILexicalBlockFile(scope: !51, file: !3, discriminator: 2)
+!56 = !DILocation(line: 16, column: 3, scope: !57)
+!57 = !DILexicalBlockFile(scope: !47, file: !3, discriminator: 2)
+!58 = !DILocation(line: 23, column: 1, scope: !42)
+!59 = !DILocation(line: 0, scope: !49)
+!60 = !DILocation(line: 17, column: 10, scope: !49)
+!61 = !DILocation(line: 17, column: 23, scope: !62)
+!62 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 2)
+!63 = distinct !DILexicalBlock(scope: !49, file: !3, line: 17, column: 5)
+!64 = !DILocation(line: 17, column: 5, scope: !65)
+!65 = !DILexicalBlockFile(scope: !49, file: !3, discriminator: 2)
+!66 = !DILocation(line: 16, column: 30, scope: !67)
+!67 = !DILexicalBlockFile(scope: !51, file: !3, discriminator: 4)
+!68 = !DILocation(line: 16, column: 3, scope: !67)
+!69 = distinct !{!69, !70, !71, !72}
+!70 = !DILocation(line: 16, column: 3, scope: !47)
+!71 = !DILocation(line: 22, column: 3, scope: !47)
+!72 = !{!"llvm.loop.mustprogress"}
+!73 = !DILocation(line: 18, column: 19, scope: !74)
+!74 = distinct !DILexicalBlock(scope: !63, file: !3, line: 17, column: 37)
+!75 = !DILocation(line: 18, column: 11, scope: !74)
+!76 = !DILocation(line: 18, column: 9, scope: !74)
+!77 = !DILocation(line: 19, column: 15, scope: !74)
+!78 = !DILocation(line: 19, column: 11, scope: !74)
+!79 = !DILocation(line: 19, column: 9, scope: !74)
+!80 = !DILocation(line: 20, column: 15, scope: !74)
+!81 = !DILocation(line: 20, column: 11, scope: !74)
+!82 = !DILocation(line: 20, column: 9, scope: !74)
+!83 = !DILocation(line: 17, column: 33, scope: !84)
+!84 = !DILexicalBlockFile(scope: !63, file: !3, discriminator: 4)
+!85 = !DILocation(line: 17, column: 5, scope: !84)
+!86 = distinct !{!86, !87, !88, !72}
+!87 = !DILocation(line: 17, column: 5, scope: !49)
+!88 = !DILocation(line: 21, column: 5, scope: !49)

diff  --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
new file mode 100644
index 0000000000000..0e88dea3cdd02
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
@@ -0,0 +1,233 @@
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-profile-mismatch.prof -report-profile-staleness -S 2>%t
+; RUN: FileCheck %s --input-file %t
+
+; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @foo(i32 noundef %x) #0 !dbg !16 {
+entry:
+  %y = alloca i32, align 4
+  call void @llvm.dbg.value(metadata i32 %x, metadata !20, metadata !DIExpression()), !dbg !22
+  call void @llvm.lifetime.start.p0(i64 4, ptr nonnull %y), !dbg !23
+  call void @llvm.dbg.declare(metadata ptr %y, metadata !21, metadata !DIExpression()), !dbg !24
+  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg !25
+  %add = add nsw i32 %x, 1, !dbg !26
+  store volatile i32 %add, ptr %y, align 4, !dbg !24, !tbaa !27
+  %y.0. = load volatile i32, ptr %y, align 4, !dbg !31, !tbaa !27
+  %add1 = add nsw i32 %y.0., 1, !dbg !32
+  call void @llvm.lifetime.end.p0(i64 4, ptr nonnull %y), !dbg !33
+  ret i32 %add1, !dbg !34
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: argmemonly mustprogress nocallback nofree nosync nounwind willreturn
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #3 !dbg !35 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %x, metadata !37, metadata !DIExpression()), !dbg !38
+  call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !39
+  %add = add nsw i32 %x, 2, !dbg !40
+  ret i32 %add, !dbg !41
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @matched(i32 noundef %x) #3 !dbg !42 {
+entry:
+  call void @llvm.dbg.value(metadata i32 %x, metadata !44, metadata !DIExpression()), !dbg !45
+  call void @llvm.pseudoprobe(i64 -5844448289301669773, i64 1, i32 0, i64 -1), !dbg !46
+  %add = add nsw i32 %x, 3, !dbg !47
+  ret i32 %add, !dbg !48
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #0 !dbg !49 {
+entry:
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !59
+  call void @llvm.dbg.value(metadata i32 0, metadata !53, metadata !DIExpression()), !dbg !60
+  br label %for.cond, !dbg !61
+
+for.cond:                                         ; preds = %for.cond.cleanup3, %entry
+  %i.0 = phi i32 [ 0, %entry ], [ %inc8, %for.cond.cleanup3 ], !dbg !60
+  call void @llvm.dbg.value(metadata i32 %i.0, metadata !53, metadata !DIExpression()), !dbg !60
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !62
+  %cmp = icmp ult i32 %i.0, 1000, !dbg !64
+  br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !65
+
+for.cond.cleanup:                                 ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !67
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 11, i32 0, i64 -1), !dbg !68
+  ret i32 0, !dbg !68
+
+for.body:                                         ; preds = %for.cond
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !69
+  call void @llvm.dbg.value(metadata i32 0, metadata !55, metadata !DIExpression()), !dbg !70
+  br label %for.cond1, !dbg !71
+
+for.cond1:                                        ; preds = %for.body4, %for.body
+  %a.0 = phi i32 [ 0, %for.body ], [ %inc, %for.body4 ], !dbg !70
+  call void @llvm.dbg.value(metadata i32 %a.0, metadata !55, metadata !DIExpression()), !dbg !70
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 5, i32 0, i64 -1), !dbg !72
+  %cmp2 = icmp ult i32 %a.0, 10000, !dbg !75
+  br i1 %cmp2, label %for.body4, label %for.cond.cleanup3, !dbg !76
+
+for.cond.cleanup3:                                ; preds = %for.cond1
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !67
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 9, i32 0, i64 -1), !dbg !78
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 10, i32 0, i64 -1), !dbg !79
+  %inc8 = add nuw nsw i32 %i.0, 1, !dbg !79
+  call void @llvm.dbg.value(metadata i32 %inc8, metadata !53, metadata !DIExpression()), !dbg !60
+  br label %for.cond, !dbg !81, !llvm.loop !82
+
+for.body4:                                        ; preds = %for.cond1
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !86
+  %0 = load volatile i32, ptr @x, align 4, !dbg !86, !tbaa !27
+  %call = call i32 @matched(i32 noundef %0), !dbg !88
+  store volatile i32 %call, ptr @x, align 4, !dbg !90, !tbaa !27
+  %1 = load volatile i32, ptr @x, align 4, !dbg !91, !tbaa !27
+  %call5 = call i32 @foo(i32 noundef %1), !dbg !92
+  store volatile i32 %call5, ptr @x, align 4, !dbg !94, !tbaa !27
+  %2 = load volatile i32, ptr @x, align 4, !dbg !95, !tbaa !27
+  %call6 = call i32 @bar(i32 noundef %2), !dbg !96
+  store volatile i32 %call6, ptr @x, align 4, !dbg !98, !tbaa !27
+  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg !99
+  %inc = add nuw nsw i32 %a.0, 1, !dbg !99
+  call void @llvm.dbg.value(metadata i32 %inc, metadata !55, metadata !DIExpression()), !dbg !70
+  br label %for.cond1, !dbg !101, !llvm.loop !102
+}
+
+; Function Attrs: inaccessiblememonly mustprogress nocallback nofree nosync nounwind willreturn
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4
+
+; Function Attrs: nocallback nofree nosync nounwind readnone speculatable willreturn
+declare void @llvm.dbg.value(metadata, metadata, metadata) #5
+
+attributes #0 = { nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind readnone speculatable willreturn }
+attributes #2 = { argmemonly mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #3 = { noinline nounwind uwtable "frame-pointer"="none" "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #4 = { inaccessiblememonly mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #5 = { nocallback nofree nosync nounwind readnone speculatable willreturn }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10}
+!llvm.ident = !{!11}
+!llvm.pseudo_probe_desc = !{!12, !13, !14, !15}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C99, file: !3, producer: "", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!3 = !DIFile(filename: "test.c", directory: "")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 7, !"uwtable", i32 2}
+!11 = !{!""}
+!12 = !{i64 6699318081062747564, i64 4294967295, !"foo"}
+!13 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!14 = !{i64 -5844448289301669773, i64 4294967295, !"matched"}
+!15 = !{i64 -2624081020897602054, i64 844635331715433, !"main"}
+!16 = distinct !DISubprogram(name: "foo", scope: !3, file: !3, line: 2, type: !17, scopeLine: 2, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !19)
+!17 = !DISubroutineType(types: !18)
+!18 = !{!6, !6}
+!19 = !{!20, !21}
+!20 = !DILocalVariable(name: "x", arg: 1, scope: !16, file: !3, line: 2, type: !6)
+!21 = !DILocalVariable(name: "y", scope: !16, file: !3, line: 3, type: !5)
+!22 = !DILocation(line: 0, scope: !16)
+!23 = !DILocation(line: 3, column: 3, scope: !16)
+!24 = !DILocation(line: 3, column: 16, scope: !16)
+!25 = !DILocation(line: 3, column: 20, scope: !16)
+!26 = !DILocation(line: 3, column: 22, scope: !16)
+!27 = !{!28, !28, i64 0}
+!28 = !{!"int", !29, i64 0}
+!29 = !{!"omnipotent char", !30, i64 0}
+!30 = !{!"Simple C/C++ TBAA"}
+!31 = !DILocation(line: 4, column: 10, scope: !16)
+!32 = !DILocation(line: 4, column: 12, scope: !16)
+!33 = !DILocation(line: 5, column: 1, scope: !16)
+!34 = !DILocation(line: 4, column: 3, scope: !16)
+!35 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 7, type: !17, scopeLine: 7, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !36)
+!36 = !{!37}
+!37 = !DILocalVariable(name: "x", arg: 1, scope: !35, file: !3, line: 7, type: !6)
+!38 = !DILocation(line: 0, scope: !35)
+!39 = !DILocation(line: 8, column: 10, scope: !35)
+!40 = !DILocation(line: 8, column: 12, scope: !35)
+!41 = !DILocation(line: 8, column: 3, scope: !35)
+!42 = distinct !DISubprogram(name: "matched", scope: !3, file: !3, line: 11, type: !17, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !43)
+!43 = !{!44}
+!44 = !DILocalVariable(name: "x", arg: 1, scope: !42, file: !3, line: 11, type: !6)
+!45 = !DILocation(line: 0, scope: !42)
+!46 = !DILocation(line: 12, column: 10, scope: !42)
+!47 = !DILocation(line: 12, column: 12, scope: !42)
+!48 = !DILocation(line: 12, column: 3, scope: !42)
+!49 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 15, type: !50, scopeLine: 15, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !52)
+!50 = !DISubroutineType(types: !51)
+!51 = !{!6}
+!52 = !{!53, !55}
+!53 = !DILocalVariable(name: "i", scope: !54, file: !3, line: 16, type: !6)
+!54 = distinct !DILexicalBlock(scope: !49, file: !3, line: 16, column: 3)
+!55 = !DILocalVariable(name: "a", scope: !56, file: !3, line: 17, type: !6)
+!56 = distinct !DILexicalBlock(scope: !57, file: !3, line: 17, column: 5)
+!57 = distinct !DILexicalBlock(scope: !58, file: !3, line: 16, column: 34)
+!58 = distinct !DILexicalBlock(scope: !54, file: !3, line: 16, column: 3)
+!59 = !DILocation(line: 16, column: 12, scope: !54)
+!60 = !DILocation(line: 0, scope: !54)
+!61 = !DILocation(line: 16, column: 8, scope: !54)
+!62 = !DILocation(line: 16, column: 19, scope: !63)
+!63 = !DILexicalBlockFile(scope: !58, file: !3, discriminator: 2)
+!64 = !DILocation(line: 16, column: 21, scope: !63)
+!65 = !DILocation(line: 16, column: 3, scope: !66)
+!66 = !DILexicalBlockFile(scope: !54, file: !3, discriminator: 2)
+!67 = !DILocation(line: 0, scope: !49)
+!68 = !DILocation(line: 23, column: 1, scope: !49)
+!69 = !DILocation(line: 17, column: 14, scope: !56)
+!70 = !DILocation(line: 0, scope: !56)
+!71 = !DILocation(line: 17, column: 10, scope: !56)
+!72 = !DILocation(line: 17, column: 21, scope: !73)
+!73 = !DILexicalBlockFile(scope: !74, file: !3, discriminator: 2)
+!74 = distinct !DILexicalBlock(scope: !56, file: !3, line: 17, column: 5)
+!75 = !DILocation(line: 17, column: 23, scope: !73)
+!76 = !DILocation(line: 17, column: 5, scope: !77)
+!77 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 2)
+!78 = !DILocation(line: 22, column: 3, scope: !57)
+!79 = !DILocation(line: 16, column: 30, scope: !80)
+!80 = !DILexicalBlockFile(scope: !58, file: !3, discriminator: 4)
+!81 = !DILocation(line: 16, column: 3, scope: !80)
+!82 = distinct !{!82, !83, !84, !85}
+!83 = !DILocation(line: 16, column: 3, scope: !54)
+!84 = !DILocation(line: 22, column: 3, scope: !54)
+!85 = !{!"llvm.loop.mustprogress"}
+!86 = !DILocation(line: 18, column: 19, scope: !87)
+!87 = distinct !DILexicalBlock(scope: !74, file: !3, line: 17, column: 37)
+!88 = !DILocation(line: 18, column: 11, scope: !89)
+!89 = !DILexicalBlockFile(scope: !87, file: !3, discriminator: 186646631)
+!90 = !DILocation(line: 18, column: 9, scope: !87)
+!91 = !DILocation(line: 19, column: 15, scope: !87)
+!92 = !DILocation(line: 19, column: 11, scope: !93)
+!93 = !DILexicalBlockFile(scope: !87, file: !3, discriminator: 186646639)
+!94 = !DILocation(line: 19, column: 9, scope: !87)
+!95 = !DILocation(line: 20, column: 15, scope: !87)
+!96 = !DILocation(line: 20, column: 11, scope: !97)
+!97 = !DILexicalBlockFile(scope: !87, file: !3, discriminator: 186646647)
+!98 = !DILocation(line: 20, column: 9, scope: !87)
+!99 = !DILocation(line: 17, column: 33, scope: !100)
+!100 = !DILexicalBlockFile(scope: !74, file: !3, discriminator: 4)
+!101 = !DILocation(line: 17, column: 5, scope: !100)
+!102 = distinct !{!102, !103, !104, !85}
+!103 = !DILocation(line: 17, column: 5, scope: !56)
+!104 = !DILocation(line: 21, column: 5, scope: !56)


        


More information about the llvm-commits mailing list