[llvm] [SampleFDO] Improve stale profile matching by diff algorithm (PR #87375)

via llvm-commits llvm-commits at lists.llvm.org
Wed May 8 22:51:00 PDT 2024


================
@@ -116,52 +118,107 @@ void SampleProfileMatcher::findProfileAnchors(
     const auto &CalleeMap = I.second;
     for (const auto &I : CalleeMap) {
       auto Ret =
-          ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
+          ProfileCallsites.try_emplace(Loc, std::unordered_set<FunctionId>());
       Ret.first->second.insert(I.first);
     }
   }
-}
 
-// Call target name anchor based profile fuzzy matching.
-// Input:
-// For IR locations, the anchor is the callee name of direct callsite; For
-// profile locations, it's the call target name for BodySamples or inlinee's
-// profile name for CallsiteSamples.
-// Matching heuristic:
-// First match all the anchors in lexical order, then split the non-anchor
-// locations between the two anchors evenly, first half are matched based on the
-// start anchor, second half are matched based on the end anchor.
-// For example, given:
-// IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
-// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
-// The matching gives:
-//   [1,    2(foo), 3,  5,  6(bar), 7]
-//    |     |       |   |     |     |
-//   [1, 2, 3(foo), 4,  7,  8(bar), 9]
-// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
-void SampleProfileMatcher::runStaleProfileMatching(
-    const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
-    const std::map<LineLocation, std::unordered_set<FunctionId>>
-        &ProfileAnchors,
-    LocToLocMap &IRToProfileLocationMap) {
-  LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
-                    << "\n");
-  assert(IRToProfileLocationMap.empty() &&
-         "Run stale profile matching only once per function");
-
-  std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap;
-  for (const auto &I : ProfileAnchors) {
+  for (const auto &I : ProfileCallsites) {
     const auto &Loc = I.first;
     const auto &Callees = I.second;
-    // Filter out possible indirect calls, use direct callee name as anchor.
     if (Callees.size() == 1) {
-      FunctionId CalleeName = *Callees.begin();
-      const auto &Candidates = CalleeToCallsitesMap.try_emplace(
-          CalleeName, std::set<LineLocation>());
-      Candidates.first->second.insert(Loc);
+      auto CalleeName = *Callees.begin();
+      ProfileAnchors.emplace(Loc, CalleeName);
+    } else if (Callees.size() > 1) {
+      // use a dummy name(UnknownIndirectCallee) for unknown indrect callee
+      // name.
+      ProfileAnchors.emplace(Loc, FunctionId(UnknownIndirectCallee));
+    }
+  }
+}
+
+LocToLocMap SampleProfileMatcher::longestCommonSequence(
+    const std::vector<Anchor> &AnchorVec1,
+    const std::vector<Anchor> &AnchorVec2) const {
+  int32_t Size1 = AnchorVec1.size(), Size2 = AnchorVec2.size(),
+          MaxDepth = Size1 + Size2;
+  auto Index = [&](int32_t I) { return I + MaxDepth; };
+
+  LocToLocMap EqualLocations;
+  if (MaxDepth == 0)
+    return EqualLocations;
+
+  // Backtrack the SES result.
+  auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
+                       const std::vector<Anchor> &AnchorVec1,
+                       const std::vector<Anchor> &AnchorVec2,
+                       LocToLocMap &EqualLocations) {
+    int32_t X = Size1, Y = Size2;
+    for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
+      const auto &P = Trace[Depth];
+      int32_t K = X - Y;
+      int32_t PrevK = K;
+      if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
+        PrevK = K + 1;
+      else
+        PrevK = K - 1;
+
+      int32_t PrevX = P[Index(PrevK)];
+      int32_t PrevY = PrevX - PrevK;
+      while (X > PrevX && Y > PrevY) {
+        X--;
+        Y--;
+        EqualLocations.insert({AnchorVec1[X].first, AnchorVec2[Y].first});
+      }
+
+      if (Depth == 0)
+        break;
+
+      if (Y == PrevY)
+        X--;
+      else if (X == PrevX)
+        Y--;
+      X = PrevX;
+      Y = PrevY;
+    }
+  };
+
+  // The greedy LCS/SES algorithm.
+
+  // An array contains the endpoints of the furthest reaching D-paths.
+  std::vector<int32_t> V(2 * MaxDepth + 1, -1);
+  V[Index(1)] = 0;
+  // Trace is used to backtrack the SES result.
+  std::vector<std::vector<int32_t>> Trace;
+  for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
+    Trace.push_back(V);
+    for (int32_t K = -Depth; K <= Depth; K += 2) {
+      int32_t X = 0, Y = 0;
+      if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
+        X = V[Index(K + 1)];
+      else
+        X = V[Index(K - 1)] + 1;
+      Y = X - K;
+      while (X < Size1 && Y < Size2 &&
+             AnchorVec1[X].second == AnchorVec2[Y].second)
+        X++, Y++;
+
+      V[Index(K)] = X;
+
+      if (X >= Size1 && Y >= Size2) {
+        // Length of an SES is D.
+        Backtrack(Trace, AnchorVec1, AnchorVec2, EqualLocations);
+        return EqualLocations;
+      }
     }
   }
+  // Length of an SES is greater than MaxDepth.
+  return EqualLocations;
+}
 
+void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
----------------
WenleiHe wrote:

This might have fell through the cracks..

> For "WriteResults", here it writes the matching results to IRToProfileLocationMap.

I mean.. of course the result will persistent somewhere, is it significant enough to include that in names?

Similarly, we don't say "readAnchorsAndMatchNonAnchorAndWriteResults" just because it reads anchors, right? :)

https://github.com/llvm/llvm-project/pull/87375


More information about the llvm-commits mailing list