[llvm] [CSSPGO] Compute and report profile matching recovered callsites and samples (PR #79090)

via llvm-commits llvm-commits at lists.llvm.org
Wed Jan 31 22:52:00 PST 2024


================
@@ -2443,53 +2372,222 @@ void SampleProfileMatcher::runOnFunction(const Function &F) {
   std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
   findProfileAnchors(*FSFlattened, ProfileAnchors);
 
-  // Detect profile mismatch for profile staleness metrics report.
-  // Skip reporting the metrics for imported functions.
-  if (!GlobalValue::isAvailableExternallyLinkage(F.getLinkage()) &&
-      (ReportProfileStaleness || PersistProfileStaleness)) {
-    // Use top-level nested FS for counting profile mismatch metrics since
-    // currently once a callsite is mismatched, all its children profiles are
-    // dropped.
-    if (const auto *FS = Reader.getSamplesFor(F))
-      countProfileMismatches(F, *FS, IRAnchors, ProfileAnchors);
-  }
+  // Compute the callsite match states for profile staleness report.
+  if (ReportProfileStaleness || PersistProfileStaleness)
+    computeCallsiteMatchStates(F, IRAnchors, ProfileAnchors, LocToLocMap());
 
   // Run profile matching for checksum mismatched profile, currently only
   // support for pseudo-probe.
   if (SalvageStaleProfile && FunctionSamples::ProfileIsProbeBased &&
       !ProbeManager->profileIsValid(F, *FSFlattened)) {
     // The matching result will be saved to IRToProfileLocationMap, create a new
     // map for each function.
+    auto &IRToProfileLocationMap = getIRToProfileLocationMap(F);
     runStaleProfileMatching(F, IRAnchors, ProfileAnchors,
-                            getIRToProfileLocationMap(F));
+                            IRToProfileLocationMap);
+    // Find and update callsite match states after matching.
+    if ((ReportProfileStaleness || PersistProfileStaleness) &&
+        !IRToProfileLocationMap.empty())
+      computeCallsiteMatchStates(F, IRAnchors, ProfileAnchors,
+                                 IRToProfileLocationMap);
   }
 }
 
-void SampleProfileMatcher::runOnModule() {
-  ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
-                                   FunctionSamples::ProfileIsCS);
-  for (auto &F : M) {
-    if (F.isDeclaration() || !F.hasFnAttribute("use-sample-profile"))
+void SampleProfileMatcher::computeCallsiteMatchStates(
+    const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+    const std::map<LineLocation, std::unordered_set<FunctionId>>
+        &ProfileAnchors,
+    const LocToLocMap &IRToProfileLocationMap) {
+  // Use the matching result to determine if it's in post-match phrase.
+  bool IsPostMatch = !IRToProfileLocationMap.empty();
+  auto &MismatchedCallsites =
+      FuncCallsiteMatchStates[FunctionSamples::getCanonicalFnName(F.getName())];
+
+  auto MapIRLocToProfileLoc = [&](const LineLocation &IRLoc) {
+    const auto &ProfileLoc = IRToProfileLocationMap.find(IRLoc);
+    if (ProfileLoc != IRToProfileLocationMap.end())
+      return ProfileLoc->second;
+    else
+      return IRLoc;
+  };
+
+  std::set<LineLocation> MatchedCallsites;
+  for (const auto &I : IRAnchors) {
+    // In post-match, use the matching result to remap the current IR callsite.
+    const auto &Loc = MapIRLocToProfileLoc(I.first);
+    const auto &IRCalleeName = I.second;
+    const auto &It = ProfileAnchors.find(Loc);
+    if (It == ProfileAnchors.end())
       continue;
-    runOnFunction(F);
+    const auto &Callees = It->second;
+
+    // Since indirect call does not have CalleeName, check conservatively if
+    // callsite in the profile is a callsite location. This is to reduce num of
+    // false positive since otherwise all the indirect call samples will be
+    // reported as mismatching.
+    if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
+      MatchedCallsites.insert(Loc);
+    // TODO : Ideally, we should ensure it's a direct callsite location(Callees
+    // size is 1). However, there may be a bug for profile merge(like ODR
+    // violation) that causes the callees size to be more than 1. After we fix
+    // the bug, we can remove this check.
+    else if (Callees.count(getRepInFormat(IRCalleeName)))
+      MatchedCallsites.insert(Loc);
+  }
+
+  // Check if there are any callsites in the profile that does not match to any
+  // IR callsites, those callsite samples will be discarded.
+  for (const auto &I : ProfileAnchors) {
+    const auto &Loc = I.first;
+    [[maybe_unused]] const auto &Callees = I.second;
+    assert(!Callees.empty() && "Callees should not be empty");
+    if (IsPostMatch) {
+      if (MatchedCallsites.count(Loc)) {
+        auto It = MismatchedCallsites.find(Loc);
+        if (It != MismatchedCallsites.end() &&
+            It->second == MatchState::Mismatched)
+          MismatchedCallsites.emplace(Loc, MatchState::Recovered);
+      } else
+        MismatchedCallsites.emplace(Loc, MatchState::Mismatched);
+    } else {
+      if (MatchedCallsites.count(Loc))
+        MismatchedCallsites.emplace(Loc, MatchState::Matched);
+      else
+        MismatchedCallsites.emplace(Loc, MatchState::Mismatched);
+    }
+  }
+}
+
+void SampleProfileMatcher::countMismatchedFuncSamples(
+    const FunctionSamples &FS) {
+  const auto *FuncDesc = ProbeManager->getDesc(FS.getGUID());
+  // Skip the function that is external or renamed.
+  if (!FuncDesc)
+    return;
+
+  if (ProbeManager->profileIsHashMismatched(*FuncDesc, FS)) {
+    MismatchedFunctionSamples += FS.getTotalSamples();
+    return;
+  }
+  for (const auto &I : FS.getCallsiteSamples())
+    for (const auto &CS : I.second)
+      countMismatchedFuncSamples(CS.second);
+}
+
+void SampleProfileMatcher::countMismatchedCallsiteSamples(
+    const FunctionSamples &FS) {
+  auto It = FuncCallsiteMatchStates.find(FS.getFuncName());
+  // Skip it if no mismatched callsite or this is an external function.
+  if (It == FuncCallsiteMatchStates.end() || It->second.empty())
+    return;
+  const auto &MismatchCallsites = It->second;
+
+  auto IsCallsiteMismatched = [&](const LineLocation &Loc) {
+    auto It = MismatchCallsites.find(Loc);
+    if (It == MismatchCallsites.end())
+      return false;
+    return It->second == MatchState::Mismatched;
+  };
+
+  auto CountSamples = [&](const LineLocation &Loc, uint64_t Samples) {
+    auto It = MismatchCallsites.find(Loc);
+    if (It == MismatchCallsites.end())
+      return;
+    if (It->second == MatchState::Mismatched)
+      MismatchedCallsiteSamples += Samples;
+    else if (It->second == MatchState::Recovered)
+      RecoveredCallsiteSamples += Samples;
+  };
+
+  for (const auto &I : FS.getBodySamples())
+    CountSamples(I.first, I.second.getSamples());
+
+  for (const auto &I : FS.getCallsiteSamples()) {
+    uint64_t Samples = 0;
+    for (const auto &CS : I.second)
+      Samples += CS.second.getTotalSamples();
+
+    CountSamples(I.first, Samples);
+
+    if (IsCallsiteMismatched(I.first))
+      continue;
+
+    // Count mismatched samples for matched inlines.
+    for (const auto &CS : I.second)
+      countMismatchedCallsiteSamples(CS.second);
+  }
----------------
WenleiHe wrote:

Suggest an alternative structure to make the flow more intuitive:

```
if (IsCallsiteMismatched(I.first)) {
    // comment 
    for (const auto &CS : I.second)
      Samples += CS.second.getTotalSamples();
    AttributeMismatchedSamples(I.first, Samples);
} else {
   // When the current level of inlined call site matches the profiled call site, we need to go deeper along the inline tree to count mismatches from lower level inlinees.
   for (const auto &CS : I.second)
      countMismatchedCallsiteSamples(CS.second);
}
```

https://github.com/llvm/llvm-project/pull/79090


More information about the llvm-commits mailing list