[llvm] [SampleFDO] Improve stale profile matching by diff algorithm (PR #87375)

via llvm-commits llvm-commits at lists.llvm.org
Fri Apr 26 20:50:49 PDT 2024


================
@@ -149,77 +287,42 @@ void SampleProfileMatcher::runStaleProfileMatching(
   assert(IRToProfileLocationMap.empty() &&
          "Run stale profile matching only once per function");
 
-  std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap;
+  std::vector<Anchor> ProfileCallsiteAnchors;
   for (const auto &I : ProfileAnchors) {
     const auto &Loc = I.first;
     const auto &Callees = I.second;
     // Filter out possible indirect calls, use direct callee name as anchor.
     if (Callees.size() == 1) {
-      FunctionId CalleeName = *Callees.begin();
-      const auto &Candidates = CalleeToCallsitesMap.try_emplace(
-          CalleeName, std::set<LineLocation>());
-      Candidates.first->second.insert(Loc);
+      auto CalleeName = *Callees.begin();
+      ProfileCallsiteAnchors.emplace_back(Loc, CalleeName);
+    } else if (Callees.size() > 1) {
+      ProfileCallsiteAnchors.emplace_back(Loc,
+                                          FunctionId(UnknownIndirectCallee));
     }
   }
 
-  auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
-    // Skip the unchanged location mapping to save memory.
-    if (From != To)
-      IRToProfileLocationMap.insert({From, To});
-  };
-
-  // Use function's beginning location as the initial anchor.
-  int32_t LocationDelta = 0;
-  SmallVector<LineLocation> LastMatchedNonAnchors;
+  std::vector<Anchor> IRCallsiteAnchors;
+  for (const auto &I : IRAnchors) {
+    const auto &Loc = I.first;
+    const auto &CalleeName = I.second;
+    if (CalleeName.empty())
+      continue;
+    IRCallsiteAnchors.emplace_back(Loc, FunctionId(CalleeName));
+  }
 
-  for (const auto &IR : IRAnchors) {
-    const auto &Loc = IR.first;
-    auto CalleeName = IR.second;
-    bool IsMatchedAnchor = false;
-    // Match the anchor location in lexical order.
-    if (!CalleeName.empty()) {
-      auto CandidateAnchors =
-          CalleeToCallsitesMap.find(getRepInFormat(CalleeName));
-      if (CandidateAnchors != CalleeToCallsitesMap.end() &&
-          !CandidateAnchors->second.empty()) {
-        auto CI = CandidateAnchors->second.begin();
-        const auto Candidate = *CI;
-        CandidateAnchors->second.erase(CI);
-        InsertMatching(Loc, Candidate);
-        LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
-                          << " is matched from " << Loc << " to " << Candidate
-                          << "\n");
-        LocationDelta = Candidate.LineOffset - Loc.LineOffset;
-
-        // Match backwards for non-anchor locations.
-        // The locations in LastMatchedNonAnchors have been matched forwards
-        // based on the previous anchor, spilt it evenly and overwrite the
-        // second half based on the current anchor.
-        for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
-             I < LastMatchedNonAnchors.size(); I++) {
-          const auto &L = LastMatchedNonAnchors[I];
-          uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
-          LineLocation Candidate(CandidateLineOffset, L.Discriminator);
-          InsertMatching(L, Candidate);
-          LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
-                            << " to " << Candidate << "\n");
-        }
+  if (IRCallsiteAnchors.empty() || ProfileCallsiteAnchors.empty())
+    return;
 
-        IsMatchedAnchor = true;
-        LastMatchedNonAnchors.clear();
-      }
-    }
+  // Use the diff algorithm to find the LCS/SES, the resulting equal locations
+  // from IR to Profile are used as anchor to match other locations. Note that
+  // here use IR anchor as base(A) to align with the order of
+  // IRToProfileLocationMap.
+  MyersDiff Diff;
+  auto DiffRes =
----------------
WenleiHe wrote:

nit: `LocToLocMap MatchedAnchors = longestCommonSequence(...)`

https://github.com/llvm/llvm-project/pull/87375


More information about the llvm-commits mailing list