[flang-commits] [flang] [libcxx] [libcxxabi] [libc] [llvm] [clang] [compiler-rt] [clang-tools-extra] [lld] [CSSPGO] Compute and report post-match profile staleness (PR #79090)

Lei Wang via flang-commits flang-commits at lists.llvm.org
Tue Jan 23 10:07:13 PST 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/79090

>From 54784e26f33178efd21b0289a1f673d66ea26cc3 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 22 Jan 2024 19:16:26 -0800
Subject: [PATCH] [CSSPGO] Support post-match profile staleness metrics

---
 llvm/lib/Transforms/IPO/SampleProfile.cpp     | 440 +++++++++++-------
 .../Inputs/profile-mismatch.prof              |   7 +-
 .../SampleProfile/profile-mismatch.ll         |  12 +-
 .../pseudo-probe-profile-mismatch-thinlto.ll  |   6 +-
 .../pseudo-probe-profile-mismatch.ll          |  76 +--
 5 files changed, 324 insertions(+), 217 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 2fd8668d15e200f..a7170faa65dc07c 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -433,12 +433,19 @@ using CandidateQueue =
     PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
                   CandidateComparer>;
 
+using IRAnchorMap = std::map<LineLocation, StringRef>;
+using ProfileAnchorMap = std::map<LineLocation, std::unordered_set<FunctionId>>;
+
 // Sample profile matching - fuzzy match.
 class SampleProfileMatcher {
   Module &M;
   SampleProfileReader &Reader;
   const PseudoProbeManager *ProbeManager;
   SampleProfileMap FlattenedProfiles;
+
+  std::unordered_map<const Function *, IRAnchorMap> FuncIRAnchors;
+  std::unordered_map<const Function *, ProfileAnchorMap> FuncProfileAnchors;
+
   // For each function, the matcher generates a map, of which each entry is a
   // mapping from the source location of current build to the source location in
   // the profile.
@@ -448,6 +455,8 @@ class SampleProfileMatcher {
   uint64_t TotalProfiledCallsites = 0;
   uint64_t NumMismatchedCallsites = 0;
   uint64_t MismatchedCallsiteSamples = 0;
+  uint64_t PostMatchNumMismatchedCallsites = 0;
+  uint64_t PostMatchMismatchedCallsiteSamples = 0;
   uint64_t TotalCallsiteSamples = 0;
   uint64_t TotalProfiledFunc = 0;
   uint64_t NumMismatchedFuncHash = 0;
@@ -474,24 +483,22 @@ class SampleProfileMatcher {
     return nullptr;
   }
   void runOnFunction(const Function &F);
-  void findIRAnchors(const Function &F,
-                     std::map<LineLocation, StringRef> &IRAnchors);
-  void findProfileAnchors(
+  void findFuncAnchors();
+  void UpdateIRAnchors();
+  void findIRAnchors(const Function &F, IRAnchorMap &IRAnchors);
+  void findProfileAnchors(const FunctionSamples &FS,
+                          ProfileAnchorMap &ProfileAnchors);
+  void countMismatchedHashSamples(const FunctionSamples &FS);
+  void countProfileMismatches(bool IsPreMatch);
+  void countMismatchedHashes(const Function &F, const FunctionSamples &FS);
+  void countMismatchedCallsites(
+      const Function &F,
+      StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+      uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const;
+  void countMismatchedCallsiteSamples(
       const FunctionSamples &FS,
-      std::map<LineLocation, std::unordered_set<FunctionId>>
-          &ProfileAnchors);
-  void countMismatchedSamples(const FunctionSamples &FS);
-  void countProfileMismatches(
-      const Function &F, const FunctionSamples &FS,
-      const std::map<LineLocation, StringRef> &IRAnchors,
-      const std::map<LineLocation, std::unordered_set<FunctionId>>
-          &ProfileAnchors);
-  void countProfileCallsiteMismatches(
-      const FunctionSamples &FS,
-      const std::map<LineLocation, StringRef> &IRAnchors,
-      const std::map<LineLocation, std::unordered_set<FunctionId>>
-          &ProfileAnchors,
-      uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites);
+      StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+      uint64_t &FuncMismatchedCallsiteSamples) const;
   LocToLocMap &getIRToProfileLocationMap(const Function &F) {
     auto Ret = FuncMappings.try_emplace(
         FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
@@ -499,11 +506,10 @@ class SampleProfileMatcher {
   }
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
-  void runStaleProfileMatching(
-      const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
-      const std::map<LineLocation, std::unordered_set<FunctionId>>
-          &ProfileAnchors,
-      LocToLocMap &IRToProfileLocationMap);
+  void runStaleProfileMatching();
+  void runStaleProfileMatching(const Function &F, const IRAnchorMap &IRAnchors,
+                               const ProfileAnchorMap &ProfileAnchors,
+                               LocToLocMap &IRToProfileLocationMap);
 };
 
 /// Sample profile pass.
@@ -1129,7 +1135,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
         CalleeSample->getContext().hasAttribute(ContextShouldBeInlined);
     if (!PreInline && CalleeSample->getHeadSamplesEstimate() < Threshold)
       continue;
-    
+
     Function *Func = SymbolMap.lookup(CalleeSample->getFunction());
     // Add to the import list only when it's defined out of module.
     if (!Func || Func->isDeclaration())
@@ -2123,8 +2129,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
   return true;
 }
 
-void SampleProfileMatcher::findIRAnchors(
-    const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
+void SampleProfileMatcher::findIRAnchors(const Function &F,
+                                         IRAnchorMap &IRAnchors) {
   // For inlined code, recover the original callsite and callee by finding the
   // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
   // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
@@ -2190,7 +2196,8 @@ void SampleProfileMatcher::findIRAnchors(
   }
 }
 
-void SampleProfileMatcher::countMismatchedSamples(const FunctionSamples &FS) {
+void SampleProfileMatcher::countMismatchedHashSamples(
+    const FunctionSamples &FS) {
   const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
   // Skip the function that is external or renamed.
   if (!FuncDesc)
@@ -2202,96 +2209,11 @@ void SampleProfileMatcher::countMismatchedSamples(const FunctionSamples &FS) {
   }
   for (const auto &I : FS.getCallsiteSamples())
     for (const auto &CS : I.second)
-      countMismatchedSamples(CS.second);
-}
-
-void SampleProfileMatcher::countProfileMismatches(
-    const Function &F, const FunctionSamples &FS,
-    const std::map<LineLocation, StringRef> &IRAnchors,
-    const std::map<LineLocation, std::unordered_set<FunctionId>>
-        &ProfileAnchors) {
-  [[maybe_unused]] bool IsFuncHashMismatch = false;
-  if (FunctionSamples::ProfileIsProbeBased) {
-    TotalFuncHashSamples += FS.getTotalSamples();
-    TotalProfiledFunc++;
-    const auto *FuncDesc = ProbeManager->getDesc(F);
-    if (FuncDesc) {
-      if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
-        NumMismatchedFuncHash++;
-        IsFuncHashMismatch = true;
-      }
-      countMismatchedSamples(FS);
-    }
-  }
-
-  uint64_t FuncMismatchedCallsites = 0;
-  uint64_t FuncProfiledCallsites = 0;
-  countProfileCallsiteMismatches(FS, IRAnchors, ProfileAnchors,
-                                 FuncMismatchedCallsites,
-                                 FuncProfiledCallsites);
-  TotalProfiledCallsites += FuncProfiledCallsites;
-  NumMismatchedCallsites += FuncMismatchedCallsites;
-  LLVM_DEBUG({
-    if (FunctionSamples::ProfileIsProbeBased && !IsFuncHashMismatch &&
-        FuncMismatchedCallsites)
-      dbgs() << "Function checksum is matched but there are "
-             << FuncMismatchedCallsites << "/" << FuncProfiledCallsites
-             << " mismatched callsites.\n";
-  });
+      countMismatchedHashSamples(CS.second);
 }
 
-void SampleProfileMatcher::countProfileCallsiteMismatches(
-    const FunctionSamples &FS,
-    const std::map<LineLocation, StringRef> &IRAnchors,
-    const std::map<LineLocation, std::unordered_set<FunctionId>>
-        &ProfileAnchors,
-    uint64_t &FuncMismatchedCallsites, uint64_t &FuncProfiledCallsites) {
-
-  // Check if there are any callsites in the profile that does not match to any
-  // IR callsites, those callsite samples will be discarded.
-  for (const auto &I : ProfileAnchors) {
-    const auto &Loc = I.first;
-    const auto &Callees = I.second;
-    assert(!Callees.empty() && "Callees should not be empty");
-
-    StringRef IRCalleeName;
-    const auto &IR = IRAnchors.find(Loc);
-    if (IR != IRAnchors.end())
-      IRCalleeName = IR->second;
-
-    // Compute number of samples in the original profile.
-    uint64_t CallsiteSamples = 0;
-    if (auto CTM = FS.findCallTargetMapAt(Loc)) {
-      for (const auto &I : *CTM)
-        CallsiteSamples += I.second;
-    }
-    const auto *FSMap = FS.findFunctionSamplesMapAt(Loc);
-    if (FSMap) {
-      for (const auto &I : *FSMap)
-        CallsiteSamples += I.second.getTotalSamples();
-    }
-
-    bool CallsiteIsMatched = false;
-    // Since indirect call does not have CalleeName, check conservatively if
-    // callsite in the profile is a callsite location. This is to reduce num of
-    // false positive since otherwise all the indirect call samples will be
-    // reported as mismatching.
-    if (IRCalleeName == UnknownIndirectCallee)
-      CallsiteIsMatched = true;
-    else if (Callees.size() == 1 && Callees.count(getRepInFormat(IRCalleeName)))
-      CallsiteIsMatched = true;
-
-    FuncProfiledCallsites++;
-    TotalCallsiteSamples += CallsiteSamples;
-    if (!CallsiteIsMatched) {
-      FuncMismatchedCallsites++;
-      MismatchedCallsiteSamples += CallsiteSamples;
-    }
-  }
-}
-
-void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
-                                              std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
+void SampleProfileMatcher::findProfileAnchors(
+    const FunctionSamples &FS, ProfileAnchorMap &ProfileAnchors) {
   auto isInvalidLineOffset = [](uint32_t LineOffset) {
     return LineOffset & 0x8000;
   };
@@ -2338,10 +2260,8 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
 //   [1, 2, 3(foo), 4,  7,  8(bar), 9]
 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
 void SampleProfileMatcher::runStaleProfileMatching(
-    const Function &F,
-    const std::map<LineLocation, StringRef> &IRAnchors,
-    const std::map<LineLocation, std::unordered_set<FunctionId>>
-        &ProfileAnchors,
+    const Function &F, const IRAnchorMap &IRAnchors,
+    const ProfileAnchorMap &ProfileAnchors,
     LocToLocMap &IRToProfileLocationMap) {
   LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
                     << "\n");
@@ -2422,59 +2342,226 @@ void SampleProfileMatcher::runStaleProfileMatching(
   }
 }
 
-void SampleProfileMatcher::runOnFunction(const Function &F) {
-  // We need to use flattened function samples for matching.
-  // Unlike IR, which includes all callsites from the source code, the callsites
-  // in profile only show up when they are hit by samples, i,e. the profile
-  // callsites in one context may differ from those in another context. To get
-  // the maximum number of callsites, we merge the function profiles from all
-  // contexts, aka, the flattened profile to find profile anchors.
-  const auto *FSFlattened = getFlattenedSamplesFor(F);
-  if (!FSFlattened)
-    return;
+void SampleProfileMatcher::runStaleProfileMatching() {
+  for (const auto &F : M) {
+    if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+      continue;
+    const auto *FSFlattened = getFlattenedSamplesFor(F);
+    if (!FSFlattened)
+      continue;
+    auto IR = FuncIRAnchors.find(&F);
+    auto P = FuncProfileAnchors.find(&F);
+    if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+      continue;
 
-  // Anchors for IR. It's a map from IR location to callee name, callee name is
-  // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
-  // for unknown indrect callee name.
-  std::map<LineLocation, StringRef> IRAnchors;
-  findIRAnchors(F, IRAnchors);
-  // Anchors for profile. It's a map from callsite location to a set of callee
-  // name.
-  std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
-  findProfileAnchors(*FSFlattened, ProfileAnchors);
-
-  // Detect profile mismatch for profile staleness metrics report.
-  // Skip reporting the metrics for imported functions.
-  if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
-      (ReportProfileStaleness || PersistProfileStaleness)) {
-    // Use top-level nested FS for counting profile mismatch metrics since
-    // currently once a callsite is mismatched, all its children profiles are
-    // dropped.
-    if (const auto *FS = Reader.getSamplesFor(F))
-      countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
+    // Run profile matching for checksum mismatched profile, currently only
+    // support for pseudo-probe.
+    if (FunctionSamples::ProfileIsProbeBased &&
+        !ProbeManager->profileIsValid(F, *FSFlattened)) {
+      runStaleProfileMatching(F, IR->second, P->second,
+                              getIRToProfileLocationMap(F));
+    }
   }
 
-  // Run profile matching for checksum mismatched profile, currently only
-  // support for pseudo-probe.
-  if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
-      !ProbeManager->profileIsValid(F, *FSFlattened)) {
-    // The matching result will be saved to IRToProfileLocationMap, create a new
-    // map for each function.
-    runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
-                            getIRToProfileLocationMap(F));
-  }
+  distributeIRToProfileLocationMap();
 }
 
-void SampleProfileMatcher::runOnModule() {
+void SampleProfileMatcher::findFuncAnchors() {
   ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
                                    FunctionSamples::ProfileIsCS);
-  for (auto &F : M) {
+  for (const auto &F : M) {
     if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
       continue;
-    runOnFunction(F);
+    // We need to use flattened function samples for matching.
+    // Unlike IR, which includes all callsites from the source code, the
+    // callsites in profile only show up when they are hit by samples, i,e. the
+    // profile callsites in one context may differ from those in another
+    // context. To get the maximum number of callsites, we merge the function
+    // profiles from all contexts, aka, the flattened profile to find profile
+    // anchors.
+    const auto *FSFlattened = getFlattenedSamplesFor(F);
+    if (!FSFlattened)
+      continue;
+
+    // Anchors for IR. It's a map from IR location to callee name, callee name
+    // is empty for non-call instruction and use a dummy
+    // name(UnknownIndirectCallee) for unknown indrect callee name.
+    auto IR = FuncIRAnchors.emplace(&F, IRAnchorMap());
+    findIRAnchors(F, IR.first->second);
+
+    // Anchors for profile. It's a map from callsite location to a set of callee
+    // name.
+    auto P = FuncProfileAnchors.emplace(&F, ProfileAnchorMap());
+    findProfileAnchors(*FSFlattened, P.first->second);
+  }
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+    const FunctionSamples &FS,
+    StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+    uint64_t &FuncMismatchedCallsiteSamples) const {
+  auto It = FuncToMismatchCallsites.find(FS.getFuncName());
+  // Skip it if no mismatched callsite or this is an external function.
+  if (It == FuncToMismatchCallsites.end() || It->second.empty())
+    return;
+  const auto &MismatchCallsites = It->second;
+  for (const auto &I : FS.getBodySamples()) {
+    if (MismatchCallsites.count(I.first))
+      FuncMismatchedCallsiteSamples += I.second.getSamples();
+  }
+
+  for (const auto &I : FS.getCallsiteSamples()) {
+    const auto &Loc = I.first;
+    if (MismatchCallsites.count(Loc)) {
+      for (const auto &CS : I.second)
+        FuncMismatchedCallsiteSamples += CS.second.getTotalSamples();
+      continue;
+    }
+
+    // count mismatched samples for inlined samples.
+    for (const auto &CS : I.second)
+      countMismatchedCallsiteSamples(CS.second, FuncToMismatchCallsites,
+                                     FuncMismatchedCallsiteSamples);
+  }
+}
+
+void SampleProfileMatcher::countMismatchedCallsites(
+    const Function &F,
+    StringMap<std::set<LineLocation>> &FuncToMismatchCallsites,
+    uint64_t &FuncProfiledCallsites, uint64_t &FuncMismatchedCallsites) const {
+  auto IR = FuncIRAnchors.find(&F);
+  auto P = FuncProfileAnchors.find(&F);
+  if (IR == FuncIRAnchors.end() || P == FuncProfileAnchors.end())
+    return;
+  const auto &IRAnchors = IR->second;
+  const auto &ProfileAnchors = P->second;
+
+  auto &MismatchCallsites =
+      FuncToMismatchCallsites[FunctionSamples::getCanonicalFnName(F.getName())];
+
+  // Check if there are any callsites in the profile that does not match to any
+  // IR callsites, those callsite samples will be discarded.
+  for (const auto &I : ProfileAnchors) {
+    const auto &Loc = I.first;
+    const auto &Callees = I.second;
+    assert(!Callees.empty() && "Callees should not be empty");
+
+    StringRef IRCalleeName;
+    const auto &IR = IRAnchors.find(Loc);
+    if (IR != IRAnchors.end())
+      IRCalleeName = IR->second;
+    bool CallsiteIsMatched = false;
+    // Since indirect call does not have CalleeName, check conservatively if
+    // callsite in the profile is a callsite location. This is to reduce num of
+    // false positive since otherwise all the indirect call samples will be
+    // reported as mismatching.
+    if (IRCalleeName == UnknownIndirectCallee)
+      CallsiteIsMatched = true;
+    else if (Callees.count(FunctionId(IRCalleeName)))
+      CallsiteIsMatched = true;
+
+    FuncProfiledCallsites++;
+    if (!CallsiteIsMatched) {
+      FuncMismatchedCallsites++;
+      MismatchCallsites.insert(Loc);
+    }
+  }
+}
+
+void SampleProfileMatcher::countMismatchedHashes(const Function &F,
+                                                 const FunctionSamples &FS) {
+  if (!FunctionSamples::ProfileIsProbeBased)
+    return;
+  const auto *FuncDesc = ProbeManager->getDesc(F);
+  if (FuncDesc) {
+    if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+      NumMismatchedFuncHash++;
+    }
+    countMismatchedHashSamples(FS);
+  }
+}
+
+void SampleProfileMatcher::UpdateIRAnchors() {
+  for (auto &I : FuncIRAnchors) {
+    const auto *F = I.first;
+    auto &IRAnchors = I.second;
+    const auto Mapping =
+        FuncMappings.find(FunctionSamples::getCanonicalFnName(F->getName()));
+    if (Mapping == FuncMappings.end())
+      continue;
+    IRAnchorMap UpdatedIRAnchors;
+    const auto &LocToLocMapping = Mapping->second;
+    for (const auto L : LocToLocMapping) {
+      UpdatedIRAnchors[L.second] = IRAnchors[L.first];
+      IRAnchors.erase(L.first);
+    }
+
+    for (const auto &IR : UpdatedIRAnchors) {
+      IRAnchors[IR.first] = IR.second;
+    }
+  }
+}
+
+void SampleProfileMatcher::countProfileMismatches(bool IsPreMatch) {
+  if (!ReportProfileStaleness && !PersistProfileStaleness)
+    return;
+
+  if (!IsPreMatch) {
+    // Use the profile matching results to update to the IR anchors.
+    UpdateIRAnchors();
+  }
+
+  uint64_t UnusedCounter = 0;
+  uint64_t *TotalProfiledCallsitesPtr =
+      IsPreMatch ? &TotalProfiledCallsites : &UnusedCounter;
+  uint64_t *NumMismatchedCallsitesPtr =
+      IsPreMatch ? &NumMismatchedCallsites : &PostMatchNumMismatchedCallsites;
+  uint64_t *MismatchedCallsiteSamplesPtr =
+      IsPreMatch ? &MismatchedCallsiteSamples
+                 : &PostMatchMismatchedCallsiteSamples;
+
+  auto SkipFunctionForReport = [](const Function &F) {
+    if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+      return true;
+    // Skip reporting the metrics for imported functions.
+    if (GlobalValue::isAvailableExternallyLinkage(F.getLinkage()))
+      return true;
+    return false;
+  };
+
+  StringMap<std::set<LineLocation>> FuncToMismatchCallsites;
+  for (const auto &F : M) {
+    if (SkipFunctionForReport(F))
+      continue;
+    const auto *FS = Reader.getSamplesFor(F);
+    if (FS && IsPreMatch) {
+      // Only count the total function metrics once in pre-match time.
+      TotalFuncHashSamples += FS->getTotalSamples();
+      TotalProfiledFunc++;
+      countMismatchedHashes(F, *FS);
+    }
+    countMismatchedCallsites(F, FuncToMismatchCallsites,
+                             *TotalProfiledCallsitesPtr,
+                             *NumMismatchedCallsitesPtr);
+  }
+
+  for (const auto &F : M) {
+    if (SkipFunctionForReport(F))
+      continue;
+    if (const auto *FS = Reader.getSamplesFor(F))
+      countMismatchedCallsiteSamples(*FS, FuncToMismatchCallsites,
+                                     *MismatchedCallsiteSamplesPtr);
+  }
+}
+
+void SampleProfileMatcher::runOnModule() {
+  findFuncAnchors();
+  countProfileMismatches(true);
+
+  if (SalvageStaleProfile) {
+    runStaleProfileMatching();
+    countProfileMismatches(false);
   }
-  if (SalvageStaleProfile)
-    distributeIRToProfileLocationMap();
 
   if (ReportProfileStaleness) {
     if (FunctionSamples::ProfileIsProbeBased) {
@@ -2487,9 +2574,18 @@ void SampleProfileMatcher::runOnModule() {
     errs() << "(" << NumMismatchedCallsites << "/" << TotalProfiledCallsites
            << ")"
            << " of callsites' profile are invalid and "
-           << "(" << MismatchedCallsiteSamples << "/" << TotalCallsiteSamples
+           << "(" << MismatchedCallsiteSamples << "/" << TotalFuncHashSamples
            << ")"
            << " of samples are discarded due to callsite location mismatch.\n";
+    if (SalvageStaleProfile) {
+      errs() << "(" << PostMatchNumMismatchedCallsites << "/"
+             << TotalProfiledCallsites << ")"
+             << " of callsites' profile are invalid and "
+             << "(" << PostMatchMismatchedCallsiteSamples << "/"
+             << TotalFuncHashSamples << ")"
+             << " of samples are discarded due to callsite location mismatch "
+                "after stale profile matching.\n";
+    }
   }
 
   if (PersistProfileStaleness) {
@@ -2497,19 +2593,23 @@ void SampleProfileMatcher::runOnModule() {
     MDBuilder MDB(Ctx);
 
     SmallVector<std::pair<StringRef, uint64_t>> ProfStatsVec;
+    ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
+    ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
+    ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
+                              MismatchedCallsiteSamples);
+    ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
     if (FunctionSamples::ProfileIsProbeBased) {
-      ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
       ProfStatsVec.emplace_back("TotalProfiledFunc", TotalProfiledFunc);
+      ProfStatsVec.emplace_back("NumMismatchedFuncHash", NumMismatchedFuncHash);
       ProfStatsVec.emplace_back("MismatchedFuncHashSamples",
                                 MismatchedFuncHashSamples);
-      ProfStatsVec.emplace_back("TotalFuncHashSamples", TotalFuncHashSamples);
     }
-
-    ProfStatsVec.emplace_back("NumMismatchedCallsites", NumMismatchedCallsites);
-    ProfStatsVec.emplace_back("TotalProfiledCallsites", TotalProfiledCallsites);
-    ProfStatsVec.emplace_back("MismatchedCallsiteSamples",
-                              MismatchedCallsiteSamples);
-    ProfStatsVec.emplace_back("TotalCallsiteSamples", TotalCallsiteSamples);
+    if (SalvageStaleProfile) {
+      ProfStatsVec.emplace_back("PostMatchNumMismatchedCallsites",
+                                PostMatchNumMismatchedCallsites);
+      ProfStatsVec.emplace_back("PostMatchMismatchedCallsiteSamples",
+                                PostMatchMismatchedCallsiteSamples);
+    }
 
     auto *MD = MDB.createLLVMStats(ProfStatsVec);
     auto *NMD = M.getOrInsertNamedMetadata("llvm.stats");
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
index 818a048b8cabb84..f2a00e789b8b669 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/profile-mismatch.prof
@@ -2,14 +2,15 @@ main:30:0
  0: 0
  1.1: 0
  3: 10 matched:10
- 4: 10
- 5: 10 bar_mismatch:10
+ 7: 10
  8: 0
- 7: foo:15
+ 4: foo:15
   1: 5
   2: 5
   3: inlinee_mismatch:5
    1: 5
+ 5: bar_mismatch:10
+  1: 10
 bar:10:10
  1: 10
 matched:10:10
diff --git a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
index d86175c02dbb423..e7c5dece1235b57 100644
--- a/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/profile-mismatch.ll
@@ -6,9 +6,9 @@
 ; RUN: llvm-objdump --section-headers %t.obj | FileCheck %s --check-prefix=CHECK-OBJ
 ; RUN: llc < %t.ll -filetype=asm -o - | FileCheck %s --check-prefix=CHECK-ASM
 
-; CHECK: (2/3) of callsites' profile are invalid and (25/35) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/4) of callsites' profile are invalid and (15/50) of samples are discarded due to callsite location mismatch.
 
-; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 25, !"TotalCallsiteSamples", i64 35}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 15, !"TotalFuncHashSamples", i64 50}
 
 ; CHECK-OBJ: .llvm_stats
 
@@ -20,15 +20,15 @@
 ; CHECK-ASM: .byte 22
 ; CHECK-ASM: .ascii  "TotalProfiledCallsites"
 ; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "Mw=="
+; CHECK-ASM: .ascii  "NA=="
 ; CHECK-ASM: .byte 25
 ; CHECK-ASM: .ascii  "MismatchedCallsiteSamples"
 ; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "MjU="
+; CHECK-ASM: .ascii  "MTU="
 ; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii  "TotalCallsiteSamples"
+; CHECK-ASM: .ascii  "TotalFuncHashSamples"
 ; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "MzU="
+; CHECK-ASM: .ascii  "NTA="
 
 target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-unknown-linux-gnu"
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
index 29c3a142cc68f8f..7f848da74a53cee 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch-thinlto.ll
@@ -4,7 +4,7 @@
 ; RUN: FileCheck %s --input-file %t.ll -check-prefix=CHECK-MD
 
 ; CHECK: (1/1) of functions' profile are invalid and  (6822/6822) of samples are discarded due to function hash mismatch.
-; CHECK: (4/4) of callsites' profile are invalid and (5026/5026) of samples are discarded due to callsite location mismatch.
+; CHECK: (4/4) of callsites' profile are invalid and (5026/6822) of samples are discarded due to callsite location mismatch.
+; CHECK: (0/4) of callsites' profile are invalid and (0/6822) of samples are discarded due to callsite location mismatch after stale profile matching.
 
-
-; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"TotalFuncHashSamples", i64 6822, !"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalCallsiteSamples", i64 5026}
+; CHECK-MD: !{!"NumMismatchedCallsites", i64 4, !"TotalProfiledCallsites", i64 4, !"MismatchedCallsiteSamples", i64 5026, !"TotalFuncHashSamples", i64 6822, !"TotalProfiledFunc", i64 1, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 6822, !"PostMatchNumMismatchedCallsites", i64 0, !"PostMatchMismatchedCallsiteSamples", i64 0}
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
index 4b6edf821376c0b..5c5bb1f0fae647f 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-profile-mismatch.ll
@@ -10,45 +10,51 @@
 
 
 ; CHECK: (1/3) of functions' profile are invalid and (10/50) of samples are discarded due to function hash mismatch.
-; CHECK: (2/3) of callsites' profile are invalid and (20/30) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch.
+; CHECK: (2/3) of callsites' profile are invalid and (20/50) of samples are discarded due to callsite location mismatch after stale profile matching.
 
-; CHECK-MD: ![[#]] = !{!"NumMismatchedFuncHash", i64 1, !"TotalProfiledFunc", i64 3, !"MismatchedFuncHashSamples", i64 10, !"TotalFuncHashSamples", i64 50, !"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalCallsiteSamples", i64 30}
+; CHECK-MD: ![[#]] = !{!"NumMismatchedCallsites", i64 2, !"TotalProfiledCallsites", i64 3, !"MismatchedCallsiteSamples", i64 20, !"TotalFuncHashSamples", i64 50, !"TotalProfiledFunc", i64 3, !"NumMismatchedFuncHash", i64 1, !"MismatchedFuncHashSamples", i64 10, !"PostMatchNumMismatchedCallsites", i64 2, !"PostMatchMismatchedCallsiteSamples", i64 20}
 
 ; CHECK-OBJ: .llvm_stats
 
-; CHECK-ASM: .section  .llvm_stats,"", at progbits
-; CHECK-ASM: .byte 21
-; CHECK-ASM: .ascii  "NumMismatchedFuncHash"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "MQ=="
-; CHECK-ASM: .byte 17
-; CHECK-ASM: .ascii  "TotalProfiledFunc"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "Mw=="
-; CHECK-ASM: .byte 25
-; CHECK-ASM: .ascii  "MismatchedFuncHashSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "MTA="
-; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii  "TotalFuncHashSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "NTA="
-; CHECK-ASM: .byte 22
-; CHECK-ASM: .ascii  "NumMismatchedCallsites"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "Mg=="
-; CHECK-ASM: .byte 22
-; CHECK-ASM: .ascii  "TotalProfiledCallsites"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "Mw=="
-; CHECK-ASM: .byte 25
-; CHECK-ASM: .ascii  "MismatchedCallsiteSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "MjA="
-; CHECK-ASM: .byte 20
-; CHECK-ASM: .ascii  "TotalCallsiteSamples"
-; CHECK-ASM: .byte 4
-; CHECK-ASM: .ascii  "MzA="
+
+; CHECK-ASM: .section	.llvm_stats,"", at progbits
+; CHECK-ASM: .byte	22
+; CHECK-ASM: .ascii	"NumMismatchedCallsites"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"Mg=="
+; CHECK-ASM: .byte	22
+; CHECK-ASM: .ascii	"TotalProfiledCallsites"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"Mw=="
+; CHECK-ASM: .byte	25
+; CHECK-ASM: .ascii	"MismatchedCallsiteSamples"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"MjA="
+; CHECK-ASM: .byte	20
+; CHECK-ASM: .ascii	"TotalFuncHashSamples"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"NTA="
+; CHECK-ASM: .byte	17
+; CHECK-ASM: .ascii	"TotalProfiledFunc"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"Mw=="
+; CHECK-ASM: .byte	21
+; CHECK-ASM: .ascii	"NumMismatchedFuncHash"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"MQ=="
+; CHECK-ASM: .byte	25
+; CHECK-ASM: .ascii	"MismatchedFuncHashSamples"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"MTA="
+; CHECK-ASM: .byte	31
+; CHECK-ASM: .ascii	"PostMatchNumMismatchedCallsites"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"Mg=="
+; CHECK-ASM: .byte	34
+; CHECK-ASM: .ascii	"PostMatchMismatchedCallsiteSamples"
+; CHECK-ASM: .byte	4
+; CHECK-ASM: .ascii	"MjA="
 
 ; CHECK-NESTED: (1/2) of functions' profile are invalid and (211/311) of samples are discarded due to function hash mismatch.
 



More information about the flang-commits mailing list