[llvm] [SampleFDO] Improve stale profile matching by diff algorithm (PR #87375)

Lei Wang via llvm-commits llvm-commits at lists.llvm.org
Fri May 10 19:03:48 PDT 2024


https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/87375

>From 51c8adc59dda52f30430e7283fc29d24f01c02fd Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 1 Apr 2024 10:04:47 -0700
Subject: [PATCH 1/8] [SampleFDO] Use Myers diff for stale profile matching

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  55 ++++-
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 223 +++++++++++++-----
 llvm/unittests/Transforms/IPO/CMakeLists.txt  |   2 +
 .../IPO/SampleProfileMatcherTests.cpp         | 134 +++++++++++
 4 files changed, 349 insertions(+), 65 deletions(-)
 create mode 100644 llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 7ae6194da7c9c..44335274239b6 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -19,6 +19,53 @@
 
 namespace llvm {
 
+// Callsite location based matching anchor.
+struct Anchor {
+  LineLocation Loc;
+  FunctionId FuncId;
+
+  Anchor(const LineLocation &Loc, const FunctionId &FuncId)
+      : Loc(Loc), FuncId(FuncId) {}
+  bool operator==(const Anchor &Other) const {
+    return this->FuncId == Other.FuncId;
+  }
+};
+
+// This class implements the Myers diff algorithm used for stale profile
+// matching. The algorithm provides a simple and efficient way to find the
+// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
+// sequences. For more details, refer to the paper 'An O(ND) Difference
+// Algorithm and Its Variations' by Eugene W. Myers.
+// In the scenario of profile fuzzy matching, the two sequences are the IR
+// callsite anchors and profile callsite anchors. The subsequence equivalent
+// parts from the resulting SES are used to remap the IR locations to the
+// profile locations.
+class MyersDiff {
+public:
+  struct DiffResult {
+    LocToLocMap EqualLocations;
+    // New IR locations that are inserted in the new version.
+    std::vector<LineLocation> Insertions;
+    // Old Profile locations that are deleted in the new version.
+    std::vector<LineLocation> Deletions;
+    void addEqualLocations(const LineLocation &IRLoc,
+                           const LineLocation &ProfLoc) {
+      EqualLocations.insert({IRLoc, ProfLoc});
+    }
+    void addInsertion(const LineLocation &IRLoc) {
+      Insertions.push_back(IRLoc);
+    }
+    void addDeletion(const LineLocation &ProfLoc) {
+      Deletions.push_back(ProfLoc);
+    }
+  };
+
+  // The basic greedy version of Myers's algorithm. Refer to page 6 of the
+  // original paper.
+  DiffResult shortestEdit(const std::vector<Anchor> &A,
+                          const std::vector<Anchor> &B) const;
+};
+
 // Sample profile matching - fuzzy match.
 class SampleProfileMatcher {
   Module &M;
@@ -27,8 +74,8 @@ class SampleProfileMatcher {
   const ThinOrFullLTOPhase LTOPhase;
   SampleProfileMap FlattenedProfiles;
   // For each function, the matcher generates a map, of which each entry is a
-  // mapping from the source location of current build to the source location in
-  // the profile.
+  // mapping from the source location of current build to the source location
+  // in the profile.
   StringMap<LocToLocMap> FuncMappings;
 
   // Match state for an anchor/callsite.
@@ -143,6 +190,10 @@ class SampleProfileMatcher {
   }
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
+  void matchNonAnchorAndWriteResults(
+      const LocToLocMap &AnchorMatchings,
+      const std::map<LineLocation, StringRef> &IRAnchors,
+      LocToLocMap &IRToProfileLocationMap);
   void runStaleProfileMatching(
       const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
       const std::map<LineLocation, std::unordered_set<FunctionId>>
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 1ca89e0091daf..139a1636a7cbd 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -122,15 +122,149 @@ void SampleProfileMatcher::findProfileAnchors(
   }
 }
 
+MyersDiff::DiffResult
+MyersDiff::shortestEdit(const std::vector<Anchor> &A,
+                        const std::vector<Anchor> &B) const {
+  int32_t N = A.size(), M = B.size(), Max = N + M;
+  auto Index = [&](int32_t I) { return I + Max; };
+
+  DiffResult Diff;
+  if (Max == 0)
+    return Diff;
+
+  // Backtrack the SES result.
+  auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
+                       const std::vector<Anchor> &A,
+                       const std::vector<Anchor> &B) {
+    int32_t X = N, Y = M;
+    for (int32_t D = Trace.size() - 1; X > 0 || Y > 0; D--) {
+      const auto &P = Trace[D];
+      int32_t K = X - Y;
+      int32_t PrevK = K;
+      if (K == -D || (K != D && P[Index(K - 1)] < P[Index(K + 1)]))
+        PrevK = K + 1;
+      else
+        PrevK = K - 1;
+
+      int32_t PrevX = P[Index(PrevK)];
+      int32_t PrevY = PrevX - PrevK;
+      while (X > PrevX && Y > PrevY) {
+        X--;
+        Y--;
+        Diff.addEqualLocations(A[X].Loc, B[Y].Loc);
+      }
+
+      if (D == 0)
+        break;
+
+      if (Y == PrevY) {
+        X--;
+        Diff.addInsertion(A[X].Loc);
+      } else if (X == PrevX) {
+        Y--;
+        Diff.addDeletion(B[Y].Loc);
+      }
+      X = PrevX;
+      Y = PrevY;
+    }
+  };
+
+  // The greedy LCS/SES algorithm.
+  std::vector<int32_t> V(2 * Max + 1, -1);
+  V[Index(1)] = 0;
+  std::vector<std::vector<int32_t>> Trace;
+  for (int32_t D = 0; D <= Max; D++) {
+    Trace.push_back(V);
+    for (int32_t K = -D; K <= D; K += 2) {
+      int32_t X = 0, Y = 0;
+      if (K == -D || (K != D && V[Index(K - 1)] < V[Index(K + 1)]))
+        X = V[Index(K + 1)];
+      else
+        X = V[Index(K - 1)] + 1;
+      Y = X - K;
+      while (X < N && Y < M && A[X] == B[Y])
+        X++, Y++;
+
+      V[Index(K)] = X;
+
+      if (X >= N && Y >= M) {
+        // Length of an SES is D.
+        Backtrack(Trace, A, B);
+        return Diff;
+      }
+    }
+  }
+  // Length of an SES is greater than Max.
+  return Diff;
+}
+
+void SampleProfileMatcher::matchNonAnchorAndWriteResults(
+    const LocToLocMap &AnchorMatchings,
+    const std::map<LineLocation, StringRef> &IRAnchors,
+    LocToLocMap &IRToProfileLocationMap) {
+  auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
+    // Skip the unchanged location mapping to save memory.
+    if (From != To)
+      IRToProfileLocationMap.insert({From, To});
+  };
+
+  // Use function's beginning location as the initial anchor.
+  int32_t LocationDelta = 0;
+  SmallVector<LineLocation> LastMatchedNonAnchors;
+  for (const auto &IR : IRAnchors) {
+    const auto &Loc = IR.first;
+    StringRef CalleeName = IR.second;
+    bool IsMatchedAnchor = false;
+
+    // Match the anchor location in lexical order.
+    auto R = AnchorMatchings.find(Loc);
+    if (R != AnchorMatchings.end()) {
+      const auto &Candidate = R->second;
+      InsertMatching(Loc, Candidate);
+      LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
+                        << " is matched from " << Loc << " to " << Candidate
+                        << "\n");
+      LocationDelta = Candidate.LineOffset - Loc.LineOffset;
+
+      // Match backwards for non-anchor locations.
+      // The locations in LastMatchedNonAnchors have been matched forwards
+      // based on the previous anchor, spilt it evenly and overwrite the
+      // second half based on the current anchor.
+      for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
+           I < LastMatchedNonAnchors.size(); I++) {
+        const auto &L = LastMatchedNonAnchors[I];
+        uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
+        LineLocation Candidate(CandidateLineOffset, L.Discriminator);
+        InsertMatching(L, Candidate);
+        LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
+                          << " to " << Candidate << "\n");
+      }
+
+      IsMatchedAnchor = true;
+      LastMatchedNonAnchors.clear();
+    }
+
+    // Match forwards for non-anchor locations.
+    if (!IsMatchedAnchor) {
+      uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
+      LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
+      InsertMatching(Loc, Candidate);
+      LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
+                        << Candidate << "\n");
+      LastMatchedNonAnchors.emplace_back(Loc);
+    }
+  }
+}
+
 // Call target name anchor based profile fuzzy matching.
 // Input:
 // For IR locations, the anchor is the callee name of direct callsite; For
 // profile locations, it's the call target name for BodySamples or inlinee's
 // profile name for CallsiteSamples.
 // Matching heuristic:
-// First match all the anchors in lexical order, then split the non-anchor
-// locations between the two anchors evenly, first half are matched based on the
-// start anchor, second half are matched based on the end anchor.
+// First match all the anchors using the diff algorithm, then split the
+// non-anchor locations between the two anchors evenly, first half are matched
+// based on the start anchor, second half are matched based on the end anchor.
 // For example, given:
 // IR locations:      [1, 2(foo), 3, 5, 6(bar), 7]
 // Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
@@ -149,77 +283,40 @@ void SampleProfileMatcher::runStaleProfileMatching(
   assert(IRToProfileLocationMap.empty() &&
          "Run stale profile matching only once per function");
 
-  std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap;
+  std::vector<Anchor> ProfileCallsiteAnchors;
   for (const auto &I : ProfileAnchors) {
     const auto &Loc = I.first;
     const auto &Callees = I.second;
     // Filter out possible indirect calls, use direct callee name as anchor.
     if (Callees.size() == 1) {
-      FunctionId CalleeName = *Callees.begin();
-      const auto &Candidates = CalleeToCallsitesMap.try_emplace(
-          CalleeName, std::set<LineLocation>());
-      Candidates.first->second.insert(Loc);
+      auto CalleeName = *Callees.begin();
+      ProfileCallsiteAnchors.emplace_back(Loc, CalleeName);
+    } else if (Callees.size() > 1) {
+      ProfileCallsiteAnchors.emplace_back(Loc,
+                                          FunctionId(UnknownIndirectCallee));
     }
   }
 
-  auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
-    // Skip the unchanged location mapping to save memory.
-    if (From != To)
-      IRToProfileLocationMap.insert({From, To});
-  };
-
-  // Use function's beginning location as the initial anchor.
-  int32_t LocationDelta = 0;
-  SmallVector<LineLocation> LastMatchedNonAnchors;
+  std::vector<Anchor> IRCallsiteAnchors;
+  for (const auto &I : IRAnchors) {
+    const auto &Loc = I.first;
+    const auto &CalleeName = I.second;
+    if (CalleeName.empty())
+      continue;
+    IRCallsiteAnchors.emplace_back(Loc, FunctionId(CalleeName));
+  }
 
-  for (const auto &IR : IRAnchors) {
-    const auto &Loc = IR.first;
-    auto CalleeName = IR.second;
-    bool IsMatchedAnchor = false;
-    // Match the anchor location in lexical order.
-    if (!CalleeName.empty()) {
-      auto CandidateAnchors =
-          CalleeToCallsitesMap.find(getRepInFormat(CalleeName));
-      if (CandidateAnchors != CalleeToCallsitesMap.end() &&
-          !CandidateAnchors->second.empty()) {
-        auto CI = CandidateAnchors->second.begin();
-        const auto Candidate = *CI;
-        CandidateAnchors->second.erase(CI);
-        InsertMatching(Loc, Candidate);
-        LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
-                          << " is matched from " << Loc << " to " << Candidate
-                          << "\n");
-        LocationDelta = Candidate.LineOffset - Loc.LineOffset;
-
-        // Match backwards for non-anchor locations.
-        // The locations in LastMatchedNonAnchors have been matched forwards
-        // based on the previous anchor, spilt it evenly and overwrite the
-        // second half based on the current anchor.
-        for (size_t I = (LastMatchedNonAnchors.size() + 1) / 2;
-             I < LastMatchedNonAnchors.size(); I++) {
-          const auto &L = LastMatchedNonAnchors[I];
-          uint32_t CandidateLineOffset = L.LineOffset + LocationDelta;
-          LineLocation Candidate(CandidateLineOffset, L.Discriminator);
-          InsertMatching(L, Candidate);
-          LLVM_DEBUG(dbgs() << "Location is rematched backwards from " << L
-                            << " to " << Candidate << "\n");
-        }
+  if (IRCallsiteAnchors.empty() || ProfileCallsiteAnchors.empty())
+    return;
 
-        IsMatchedAnchor = true;
-        LastMatchedNonAnchors.clear();
-      }
-    }
+  // Use the diff algorithm to find the SES, the resulting equal locations from
+  // IR to Profile are used as anchor to match other locations. Note that here
+  // use IR anchor as base(A) to align with the order of IRToProfileLocationMap.
+  MyersDiff Diff;
+  auto DiffRes = Diff.shortestEdit(IRCallsiteAnchors, ProfileCallsiteAnchors);
 
-    // Match forwards for non-anchor locations.
-    if (!IsMatchedAnchor) {
-      uint32_t CandidateLineOffset = Loc.LineOffset + LocationDelta;
-      LineLocation Candidate(CandidateLineOffset, Loc.Discriminator);
-      InsertMatching(Loc, Candidate);
-      LLVM_DEBUG(dbgs() << "Location is matched from " << Loc << " to "
-                        << Candidate << "\n");
-      LastMatchedNonAnchors.emplace_back(Loc);
-    }
-  }
+  matchNonAnchorAndWriteResults(DiffRes.EqualLocations, IRAnchors,
+                                IRToProfileLocationMap);
 }
 
 void SampleProfileMatcher::runOnFunction(Function &F) {
diff --git a/llvm/unittests/Transforms/IPO/CMakeLists.txt b/llvm/unittests/Transforms/IPO/CMakeLists.txt
index 4e4372179b46c..80d0eadf0cce0 100644
--- a/llvm/unittests/Transforms/IPO/CMakeLists.txt
+++ b/llvm/unittests/Transforms/IPO/CMakeLists.txt
@@ -3,6 +3,7 @@ set(LLVM_LINK_COMPONENTS
   AsmParser
   Core
   IPO
+  ProfileData
   Support
   TargetParser
   TransformUtils
@@ -13,6 +14,7 @@ add_llvm_unittest(IPOTests
   WholeProgramDevirt.cpp
   AttributorTest.cpp
   FunctionSpecializationTest.cpp
+  SampleProfileMatcherTests.cpp
   )
 
 set_property(TARGET IPOTests PROPERTY FOLDER "Tests/UnitTests/TransformsTests")
diff --git a/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp b/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
new file mode 100644
index 0000000000000..37b92c80068dd
--- /dev/null
+++ b/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
@@ -0,0 +1,134 @@
+//===- SampleProfileMatcherTests.cpp - SampleProfileMatcher Unit Tests -----==//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
+#include "gtest/gtest.h"
+
+using namespace llvm;
+
+MyersDiff Diff;
+
+std::vector<Anchor>
+createAnchorsFromStrings(const std::vector<std::string> &SV) {
+  std::vector<Anchor> Anchors;
+  for (uint64_t I = 0; I < SV.size(); I++) {
+    Anchors.push_back(Anchor(LineLocation(I, 0), FunctionId(SV[I])));
+  }
+  return Anchors;
+}
+
+LocToLocMap
+createEqualLocations(const std::vector<std::pair<uint32_t, uint32_t>> &V) {
+  LocToLocMap LocMap;
+  for (auto P : V) {
+    LocMap.emplace(LineLocation(P.first, 0), LineLocation(P.second, 0));
+  }
+  return LocMap;
+}
+
+std::vector<LineLocation> createLocations(const std::vector<uint32_t> &V) {
+  std::vector<LineLocation> Locations;
+  for (auto I : V) {
+    Locations.emplace_back(LineLocation(I, 0));
+  }
+  return Locations;
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest1) {
+
+  std::vector<Anchor> AnchorsA;
+  std::vector<Anchor> AnchorsB;
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_TRUE(R.EqualLocations.empty());
+  EXPECT_TRUE(R.Deletions.empty());
+  EXPECT_TRUE(R.Insertions.empty());
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest2) {
+  std::vector<std::string> A({"a", "b", "c"});
+  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
+  std::vector<Anchor> AnchorsB;
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_TRUE(R.EqualLocations.empty());
+  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({2, 1, 0})));
+  EXPECT_TRUE(R.Deletions.empty());
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest3) {
+
+  std::vector<Anchor> AnchorsA;
+  std::vector<std::string> B({"a", "b", "c"});
+  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_TRUE(R.EqualLocations.empty());
+  EXPECT_TRUE(R.Insertions.empty());
+  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({2, 1, 0})));
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest4) {
+  std::vector<std::string> A({"a", "b", "c"});
+  std::vector<std::string> B({"a", "b", "c"});
+  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
+  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
+  LocToLocMap ExpectEqualLocations =
+      createEqualLocations({{0, 0}, {1, 1}, {2, 2}});
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+  EXPECT_TRUE(R.Insertions.empty());
+  EXPECT_TRUE(R.Deletions.empty());
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest5) {
+  std::vector<std::string> A({"a", "b", "c"});
+  std::vector<std::string> B({"b", "c", "d"});
+  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
+  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
+  LocToLocMap ExpectEqualLocations = createEqualLocations({{1, 0}, {2, 1}});
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({0})));
+  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({2})));
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest6) {
+  std::vector<std::string> A({"a", "b", "d"});
+  std::vector<std::string> B({"a", "c", "d"});
+  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
+  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
+  LocToLocMap ExpectEqualLocations = createEqualLocations({{0, 0}, {2, 2}});
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({1})));
+  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({1})));
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest7) {
+  std::vector<std::string> A({"a", "b", "c", "a", "b", "b", "a"});
+  std::vector<std::string> B({"c", "b", "a", "b", "a", "c"});
+  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
+  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
+  LocToLocMap ExpectEqualLocations =
+      createEqualLocations({{2, 0}, {3, 2}, {4, 3}, {6, 4}});
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({5, 1, 0})));
+  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({5, 1})));
+}
+
+TEST(SampleProfileMatcherTests, MyersDiffTest8) {
+  std::vector<std::string> A({"a", "c", "b", "c", "b", "d", "e"});
+  std::vector<std::string> B({"a", "b", "c", "a", "a", "b", "c", "c", "d"});
+  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
+  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
+  LocToLocMap ExpectEqualLocations =
+      createEqualLocations({{0, 0}, {2, 1}, {3, 2}, {4, 5}, {5, 8}});
+  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({6, 1})));
+  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({7, 6, 4, 3})));
+}

>From 858b04060a0e606439f49a9e0f95d2ebd50e12f6 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 25 Apr 2024 18:10:15 -0700
Subject: [PATCH 2/8] addressing comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  8 +++--
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 20 ++++++++----
 .../IPO/SampleProfileMatcherTests.cpp         | 32 ++++++++++++++-----
 3 files changed, 43 insertions(+), 17 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 44335274239b6..5b56638c13344 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -44,26 +44,30 @@ class MyersDiff {
 public:
   struct DiffResult {
     LocToLocMap EqualLocations;
+#ifndef NDEBUG
     // New IR locations that are inserted in the new version.
     std::vector<LineLocation> Insertions;
     // Old Profile locations that are deleted in the new version.
     std::vector<LineLocation> Deletions;
+#endif
     void addEqualLocations(const LineLocation &IRLoc,
                            const LineLocation &ProfLoc) {
       EqualLocations.insert({IRLoc, ProfLoc});
     }
+#ifndef NDEBUG
     void addInsertion(const LineLocation &IRLoc) {
       Insertions.push_back(IRLoc);
     }
     void addDeletion(const LineLocation &ProfLoc) {
       Deletions.push_back(ProfLoc);
     }
+#endif
   };
 
   // The basic greedy version of Myers's algorithm. Refer to page 6 of the
   // original paper.
-  DiffResult shortestEdit(const std::vector<Anchor> &A,
-                          const std::vector<Anchor> &B) const;
+  DiffResult longestCommonSequence(const std::vector<Anchor> &A,
+                                   const std::vector<Anchor> &B) const;
 };
 
 // Sample profile matching - fuzzy match.
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 139a1636a7cbd..de81450f0da25 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -123,8 +123,8 @@ void SampleProfileMatcher::findProfileAnchors(
 }
 
 MyersDiff::DiffResult
-MyersDiff::shortestEdit(const std::vector<Anchor> &A,
-                        const std::vector<Anchor> &B) const {
+MyersDiff::longestCommonSequence(const std::vector<Anchor> &A,
+                                 const std::vector<Anchor> &B) const {
   int32_t N = A.size(), M = B.size(), Max = N + M;
   auto Index = [&](int32_t I) { return I + Max; };
 
@@ -159,10 +159,14 @@ MyersDiff::shortestEdit(const std::vector<Anchor> &A,
 
       if (Y == PrevY) {
         X--;
+#ifndef NDEBUG
         Diff.addInsertion(A[X].Loc);
+#endif
       } else if (X == PrevX) {
         Y--;
+#ifndef NDEBUG
         Diff.addDeletion(B[Y].Loc);
+#endif
       }
       X = PrevX;
       Y = PrevY;
@@ -213,7 +217,7 @@ void SampleProfileMatcher::matchNonAnchorAndWriteResults(
   SmallVector<LineLocation> LastMatchedNonAnchors;
   for (const auto &IR : IRAnchors) {
     const auto &Loc = IR.first;
-    StringRef CalleeName = IR.second;
+    [[maybe_unused]] StringRef CalleeName = IR.second;
     bool IsMatchedAnchor = false;
 
     // Match the anchor location in lexical order.
@@ -309,11 +313,13 @@ void SampleProfileMatcher::runStaleProfileMatching(
   if (IRCallsiteAnchors.empty() || ProfileCallsiteAnchors.empty())
     return;
 
-  // Use the diff algorithm to find the SES, the resulting equal locations from
-  // IR to Profile are used as anchor to match other locations. Note that here
-  // use IR anchor as base(A) to align with the order of IRToProfileLocationMap.
+  // Use the diff algorithm to find the LCS/SES, the resulting equal locations
+  // from IR to Profile are used as anchor to match other locations. Note that
+  // here use IR anchor as base(A) to align with the order of
+  // IRToProfileLocationMap.
   MyersDiff Diff;
-  auto DiffRes = Diff.shortestEdit(IRCallsiteAnchors, ProfileCallsiteAnchors);
+  auto DiffRes =
+      Diff.longestCommonSequence(IRCallsiteAnchors, ProfileCallsiteAnchors);
 
   matchNonAnchorAndWriteResults(DiffRes.EqualLocations, IRAnchors,
                                 IRToProfileLocationMap);
diff --git a/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp b/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
index 37b92c80068dd..25d5c053d3ccb 100644
--- a/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
+++ b/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
@@ -43,20 +43,24 @@ TEST(SampleProfileMatcherTests, MyersDiffTest1) {
 
   std::vector<Anchor> AnchorsA;
   std::vector<Anchor> AnchorsB;
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_TRUE(R.EqualLocations.empty());
+#ifndef NDEBUG
   EXPECT_TRUE(R.Deletions.empty());
   EXPECT_TRUE(R.Insertions.empty());
+#endif
 }
 
 TEST(SampleProfileMatcherTests, MyersDiffTest2) {
   std::vector<std::string> A({"a", "b", "c"});
   std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
   std::vector<Anchor> AnchorsB;
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_TRUE(R.EqualLocations.empty());
+#ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({2, 1, 0})));
   EXPECT_TRUE(R.Deletions.empty());
+#endif
 }
 
 TEST(SampleProfileMatcherTests, MyersDiffTest3) {
@@ -64,10 +68,12 @@ TEST(SampleProfileMatcherTests, MyersDiffTest3) {
   std::vector<Anchor> AnchorsA;
   std::vector<std::string> B({"a", "b", "c"});
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_TRUE(R.EqualLocations.empty());
+#ifndef NDEBUG
   EXPECT_TRUE(R.Insertions.empty());
   EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({2, 1, 0})));
+#endif
 }
 
 TEST(SampleProfileMatcherTests, MyersDiffTest4) {
@@ -77,10 +83,12 @@ TEST(SampleProfileMatcherTests, MyersDiffTest4) {
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations =
       createEqualLocations({{0, 0}, {1, 1}, {2, 2}});
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+#ifndef NDEBUG
   EXPECT_TRUE(R.Insertions.empty());
   EXPECT_TRUE(R.Deletions.empty());
+#endif
 }
 
 TEST(SampleProfileMatcherTests, MyersDiffTest5) {
@@ -89,10 +97,12 @@ TEST(SampleProfileMatcherTests, MyersDiffTest5) {
   std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations = createEqualLocations({{1, 0}, {2, 1}});
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+#ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({0})));
   EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({2})));
+#endif
 }
 
 TEST(SampleProfileMatcherTests, MyersDiffTest6) {
@@ -101,10 +111,12 @@ TEST(SampleProfileMatcherTests, MyersDiffTest6) {
   std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations = createEqualLocations({{0, 0}, {2, 2}});
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+#ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({1})));
   EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({1})));
+#endif
 }
 
 TEST(SampleProfileMatcherTests, MyersDiffTest7) {
@@ -114,10 +126,12 @@ TEST(SampleProfileMatcherTests, MyersDiffTest7) {
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations =
       createEqualLocations({{2, 0}, {3, 2}, {4, 3}, {6, 4}});
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+#ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({5, 1, 0})));
   EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({5, 1})));
+#endif
 }
 
 TEST(SampleProfileMatcherTests, MyersDiffTest8) {
@@ -127,8 +141,10 @@ TEST(SampleProfileMatcherTests, MyersDiffTest8) {
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations =
       createEqualLocations({{0, 0}, {2, 1}, {3, 2}, {4, 5}, {5, 8}});
-  auto R = Diff.shortestEdit(AnchorsA, AnchorsB);
+  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
+#ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({6, 1})));
   EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({7, 6, 4, 3})));
+#endif
 }

>From 390efcc74a4bb8540fe57f21564529d13bd69c68 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 29 Apr 2024 16:10:35 -0700
Subject: [PATCH 3/8] addressing comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     | 14 +++---
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 47 ++++++++++++-------
 .../IPO/SampleProfileMatcherTests.cpp         | 16 +++----
 3 files changed, 46 insertions(+), 31 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 5b56638c13344..6de49657885c4 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -39,7 +39,8 @@ struct Anchor {
 // In the scenario of profile fuzzy matching, the two sequences are the IR
 // callsite anchors and profile callsite anchors. The subsequence equivalent
 // parts from the resulting SES are used to remap the IR locations to the
-// profile locations.
+// profile locations. As the number of function callsite is usually not big, we
+// currently just implements the basic greedy version(page 6 of the paper).
 class MyersDiff {
 public:
   struct DiffResult {
@@ -64,10 +65,8 @@ class MyersDiff {
 #endif
   };
 
-  // The basic greedy version of Myers's algorithm. Refer to page 6 of the
-  // original paper.
-  DiffResult longestCommonSequence(const std::vector<Anchor> &A,
-                                   const std::vector<Anchor> &B) const;
+  DiffResult shortestEditScript(const std::vector<Anchor> &A,
+                                const std::vector<Anchor> &B) const;
 };
 
 // Sample profile matching - fuzzy match.
@@ -194,7 +193,10 @@ class SampleProfileMatcher {
   }
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
-  void matchNonAnchorAndWriteResults(
+  LocToLocMap longestCommonSequence(
+      const std::vector<Anchor> &IRCallsiteAnchors,
+      const std::vector<Anchor> &ProfileCallsiteAnchors) const;
+  void matchNonCallsiteLocsAndWriteResults(
       const LocToLocMap &AnchorMatchings,
       const std::map<LineLocation, StringRef> &IRAnchors,
       LocToLocMap &IRToProfileLocationMap);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index de81450f0da25..d9284717196f4 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -123,8 +123,8 @@ void SampleProfileMatcher::findProfileAnchors(
 }
 
 MyersDiff::DiffResult
-MyersDiff::longestCommonSequence(const std::vector<Anchor> &A,
-                                 const std::vector<Anchor> &B) const {
+MyersDiff::shortestEditScript(const std::vector<Anchor> &A,
+                              const std::vector<Anchor> &B) const {
   int32_t N = A.size(), M = B.size(), Max = N + M;
   auto Index = [&](int32_t I) { return I + Max; };
 
@@ -135,7 +135,7 @@ MyersDiff::longestCommonSequence(const std::vector<Anchor> &A,
   // Backtrack the SES result.
   auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
                        const std::vector<Anchor> &A,
-                       const std::vector<Anchor> &B) {
+                       const std::vector<Anchor> &B, DiffResult &Diff) {
     int32_t X = N, Y = M;
     for (int32_t D = Trace.size() - 1; X > 0 || Y > 0; D--) {
       const auto &P = Trace[D];
@@ -174,8 +174,11 @@ MyersDiff::longestCommonSequence(const std::vector<Anchor> &A,
   };
 
   // The greedy LCS/SES algorithm.
+
+  // An array contains the endpoints of the furthest reaching D-paths.
   std::vector<int32_t> V(2 * Max + 1, -1);
   V[Index(1)] = 0;
+  // Trace is used to backtrack the SES result.
   std::vector<std::vector<int32_t>> Trace;
   for (int32_t D = 0; D <= Max; D++) {
     Trace.push_back(V);
@@ -193,7 +196,7 @@ MyersDiff::longestCommonSequence(const std::vector<Anchor> &A,
 
       if (X >= N && Y >= M) {
         // Length of an SES is D.
-        Backtrack(Trace, A, B);
+        Backtrack(Trace, A, B, Diff);
         return Diff;
       }
     }
@@ -202,8 +205,18 @@ MyersDiff::longestCommonSequence(const std::vector<Anchor> &A,
   return Diff;
 }
 
-void SampleProfileMatcher::matchNonAnchorAndWriteResults(
-    const LocToLocMap &AnchorMatchings,
+LocToLocMap SampleProfileMatcher::longestCommonSequence(
+    const std::vector<Anchor> &IRCallsiteAnchors,
+    const std::vector<Anchor> &ProfileCallsiteAnchors) const {
+  // Use the diff algorithm to find the LCS/SES, the resulting equal locations
+  // from IR to Profile are used as anchor to match other locations.
+  auto SES =
+      MyersDiff().shortestEditScript(IRCallsiteAnchors, ProfileCallsiteAnchors);
+  return SES.EqualLocations;
+}
+
+void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
+    const LocToLocMap &MatchedAnchors,
     const std::map<LineLocation, StringRef> &IRAnchors,
     LocToLocMap &IRToProfileLocationMap) {
   auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
@@ -221,8 +234,8 @@ void SampleProfileMatcher::matchNonAnchorAndWriteResults(
     bool IsMatchedAnchor = false;
 
     // Match the anchor location in lexical order.
-    auto R = AnchorMatchings.find(Loc);
-    if (R != AnchorMatchings.end()) {
+    auto R = MatchedAnchors.find(Loc);
+    if (R != MatchedAnchors.end()) {
       const auto &Candidate = R->second;
       InsertMatching(Loc, Candidate);
       LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
@@ -313,16 +326,16 @@ void SampleProfileMatcher::runStaleProfileMatching(
   if (IRCallsiteAnchors.empty() || ProfileCallsiteAnchors.empty())
     return;
 
-  // Use the diff algorithm to find the LCS/SES, the resulting equal locations
-  // from IR to Profile are used as anchor to match other locations. Note that
-  // here use IR anchor as base(A) to align with the order of
-  // IRToProfileLocationMap.
-  MyersDiff Diff;
-  auto DiffRes =
-      Diff.longestCommonSequence(IRCallsiteAnchors, ProfileCallsiteAnchors);
+  // Match the callsite anchors by finding the longest common subsequence
+  // between IR and profile. Note that we need to use IR anchor as base(A side)
+  // to align with the order of IRToProfileLocationMap.
+  LocToLocMap MatchedAnchors =
+      longestCommonSequence(IRCallsiteAnchors, ProfileCallsiteAnchors);
 
-  matchNonAnchorAndWriteResults(DiffRes.EqualLocations, IRAnchors,
-                                IRToProfileLocationMap);
+  // Match the non-callsite locations and write the result to
+  // IRToProfileLocationMap.
+  matchNonCallsiteLocsAndWriteResults(MatchedAnchors, IRAnchors,
+                                      IRToProfileLocationMap);
 }
 
 void SampleProfileMatcher::runOnFunction(Function &F) {
diff --git a/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp b/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
index 25d5c053d3ccb..9bdd45f4ae155 100644
--- a/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
+++ b/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
@@ -43,7 +43,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest1) {
 
   std::vector<Anchor> AnchorsA;
   std::vector<Anchor> AnchorsB;
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_TRUE(R.EqualLocations.empty());
 #ifndef NDEBUG
   EXPECT_TRUE(R.Deletions.empty());
@@ -55,7 +55,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest2) {
   std::vector<std::string> A({"a", "b", "c"});
   std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
   std::vector<Anchor> AnchorsB;
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_TRUE(R.EqualLocations.empty());
 #ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({2, 1, 0})));
@@ -68,7 +68,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest3) {
   std::vector<Anchor> AnchorsA;
   std::vector<std::string> B({"a", "b", "c"});
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_TRUE(R.EqualLocations.empty());
 #ifndef NDEBUG
   EXPECT_TRUE(R.Insertions.empty());
@@ -83,7 +83,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest4) {
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations =
       createEqualLocations({{0, 0}, {1, 1}, {2, 2}});
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
 #ifndef NDEBUG
   EXPECT_TRUE(R.Insertions.empty());
@@ -97,7 +97,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest5) {
   std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations = createEqualLocations({{1, 0}, {2, 1}});
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
 #ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({0})));
@@ -111,7 +111,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest6) {
   std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations = createEqualLocations({{0, 0}, {2, 2}});
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
 #ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({1})));
@@ -126,7 +126,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest7) {
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations =
       createEqualLocations({{2, 0}, {3, 2}, {4, 3}, {6, 4}});
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
 #ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({5, 1, 0})));
@@ -141,7 +141,7 @@ TEST(SampleProfileMatcherTests, MyersDiffTest8) {
   std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
   LocToLocMap ExpectEqualLocations =
       createEqualLocations({{0, 0}, {2, 1}, {3, 2}, {4, 5}, {5, 8}});
-  auto R = Diff.longestCommonSequence(AnchorsA, AnchorsB);
+  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
   EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
 #ifndef NDEBUG
   EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({6, 1})));

>From 40b03c978750995120204c417848c04b2eedfff4 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Sun, 5 May 2024 20:00:06 -0700
Subject: [PATCH 4/8] addressing feedback

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  33 ++--
 .../Transforms/IPO/SampleProfileMatcher.cpp   | 146 +++++++++---------
 2 files changed, 83 insertions(+), 96 deletions(-)

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 6de49657885c4..a95ad4f8053d9 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -26,6 +26,7 @@ struct Anchor {
 
   Anchor(const LineLocation &Loc, const FunctionId &FuncId)
       : Loc(Loc), FuncId(FuncId) {}
+  Anchor(const LineLocation &Loc, StringRef &FName) : Loc(Loc), FuncId(FName) {}
   bool operator==(const Anchor &Other) const {
     return this->FuncId == Other.FuncId;
   }
@@ -69,6 +70,8 @@ class MyersDiff {
                                 const std::vector<Anchor> &B) const;
 };
 
+using AnchorMap = std::map<LineLocation, Anchor>;
+
 // Sample profile matching - fuzzy match.
 class SampleProfileMatcher {
   Module &M;
@@ -145,18 +148,13 @@ class SampleProfileMatcher {
     return nullptr;
   }
   void runOnFunction(Function &F);
-  void findIRAnchors(const Function &F,
-                     std::map<LineLocation, StringRef> &IRAnchors);
-  void findProfileAnchors(
-      const FunctionSamples &FS,
-      std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors);
+  void findIRAnchors(const Function &F, AnchorMap &IRAnchors);
+  void findProfileAnchors(const FunctionSamples &FS, AnchorMap &ProfileAnchors);
   // Record the callsite match states for profile staleness report, the result
   // is saved in FuncCallsiteMatchStates.
-  void recordCallsiteMatchStates(
-      const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
-      const std::map<LineLocation, std::unordered_set<FunctionId>>
-          &ProfileAnchors,
-      const LocToLocMap *IRToProfileLocationMap);
+  void recordCallsiteMatchStates(const Function &F, const AnchorMap &IRAnchors,
+                                 const AnchorMap &ProfileAnchors,
+                                 const LocToLocMap *IRToProfileLocationMap);
 
   bool isMismatchState(const enum MatchState &State) {
     return State == MatchState::InitialMismatch ||
@@ -196,15 +194,12 @@ class SampleProfileMatcher {
   LocToLocMap longestCommonSequence(
       const std::vector<Anchor> &IRCallsiteAnchors,
       const std::vector<Anchor> &ProfileCallsiteAnchors) const;
-  void matchNonCallsiteLocsAndWriteResults(
-      const LocToLocMap &AnchorMatchings,
-      const std::map<LineLocation, StringRef> &IRAnchors,
-      LocToLocMap &IRToProfileLocationMap);
-  void runStaleProfileMatching(
-      const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
-      const std::map<LineLocation, std::unordered_set<FunctionId>>
-          &ProfileAnchors,
-      LocToLocMap &IRToProfileLocationMap);
+  void matchNonCallsiteLocsAndWriteResults(const LocToLocMap &AnchorMatchings,
+                                           const AnchorMap &IRAnchors,
+                                           LocToLocMap &IRToProfileLocationMap);
+  void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
+                               const AnchorMap &ProfileAnchors,
+                               LocToLocMap &IRToProfileLocationMap);
   void reportOrPersistProfileStats();
 };
 } // end namespace llvm
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index d9284717196f4..251efe32cfbe9 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -24,8 +24,8 @@ extern cl::opt<bool> SalvageStaleProfile;
 extern cl::opt<bool> PersistProfileStaleness;
 extern cl::opt<bool> ReportProfileStaleness;
 
-void SampleProfileMatcher::findIRAnchors(
-    const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
+void SampleProfileMatcher::findIRAnchors(const Function &F,
+                                         AnchorMap &IRAnchors) {
   // For inlined code, recover the original callsite and callee by finding the
   // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
   // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
@@ -39,7 +39,7 @@ void SampleProfileMatcher::findIRAnchors(
 
     LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
     StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
-    return std::make_pair(Callsite, CalleeName);
+    return std::make_pair(Callsite, Anchor(Callsite, CalleeName));
   };
 
   auto GetCanonicalCalleeName = [](const CallBase *CB) {
@@ -69,7 +69,8 @@ void SampleProfileMatcher::findIRAnchors(
               if (!isa<IntrinsicInst>(&I))
                 CalleeName = GetCanonicalCalleeName(CB);
             }
-            IRAnchors.emplace(LineLocation(Probe->Id, 0), CalleeName);
+            LineLocation Loc = LineLocation(Probe->Id, 0);
+            IRAnchors.emplace(Loc, Anchor(Loc, CalleeName));
           }
         }
       } else {
@@ -84,27 +85,28 @@ void SampleProfileMatcher::findIRAnchors(
         } else {
           LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
           StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
-          IRAnchors.emplace(Callsite, CalleeName);
+          IRAnchors.emplace(Callsite, Anchor(Callsite, CalleeName));
         }
       }
     }
   }
 }
 
-void SampleProfileMatcher::findProfileAnchors(
-    const FunctionSamples &FS,
-    std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
+void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
+                                              AnchorMap &ProfileAnchors) {
   auto isInvalidLineOffset = [](uint32_t LineOffset) {
     return LineOffset & 0x8000;
   };
 
+  std::map<LineLocation, std::unordered_set<FunctionId>> ProfileCallsites;
+
   for (const auto &I : FS.getBodySamples()) {
     const LineLocation &Loc = I.first;
     if (isInvalidLineOffset(Loc.LineOffset))
       continue;
     for (const auto &I : I.second.getCallTargets()) {
       auto Ret =
-          ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
+          ProfileCallsites.try_emplace(Loc, std::unordered_set<FunctionId>());
       Ret.first->second.insert(I.first);
     }
   }
@@ -116,32 +118,48 @@ void SampleProfileMatcher::findProfileAnchors(
     const auto &CalleeMap = I.second;
     for (const auto &I : CalleeMap) {
       auto Ret =
-          ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
+          ProfileCallsites.try_emplace(Loc, std::unordered_set<FunctionId>());
       Ret.first->second.insert(I.first);
     }
   }
+
+  for (const auto &I : ProfileCallsites) {
+    const auto &Loc = I.first;
+    const auto &Callees = I.second;
+    if (Callees.size() == 1) {
+      auto CalleeName = *Callees.begin();
+      ProfileAnchors.emplace(Loc, Anchor(Loc, CalleeName));
+    } else if (Callees.size() > 1) {
+      // use a dummy name(UnknownIndirectCallee) for unknown indrect callee
+      // name.
+      ProfileAnchors.emplace(Loc,
+                             Anchor(Loc, FunctionId(UnknownIndirectCallee)));
+    }
+  }
 }
 
 MyersDiff::DiffResult
-MyersDiff::shortestEditScript(const std::vector<Anchor> &A,
-                              const std::vector<Anchor> &B) const {
-  int32_t N = A.size(), M = B.size(), Max = N + M;
-  auto Index = [&](int32_t I) { return I + Max; };
+MyersDiff::shortestEditScript(const std::vector<Anchor> &AnchorVec1,
+                              const std::vector<Anchor> &AnchorVec2) const {
+  int32_t Size1 = AnchorVec1.size(), Size2 = AnchorVec2.size(),
+          MaxDepth = Size1 + Size2;
+  auto Index = [&](int32_t I) { return I + MaxDepth; };
 
   DiffResult Diff;
-  if (Max == 0)
+  if (MaxDepth == 0)
     return Diff;
 
   // Backtrack the SES result.
   auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
-                       const std::vector<Anchor> &A,
-                       const std::vector<Anchor> &B, DiffResult &Diff) {
-    int32_t X = N, Y = M;
-    for (int32_t D = Trace.size() - 1; X > 0 || Y > 0; D--) {
-      const auto &P = Trace[D];
+                       const std::vector<Anchor> &AnchorVec1,
+                       const std::vector<Anchor> &AnchorVec2,
+                       DiffResult &Diff) {
+    int32_t X = Size1, Y = Size2;
+    for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
+      const auto &P = Trace[Depth];
       int32_t K = X - Y;
       int32_t PrevK = K;
-      if (K == -D || (K != D && P[Index(K - 1)] < P[Index(K + 1)]))
+      if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
         PrevK = K + 1;
       else
         PrevK = K - 1;
@@ -151,21 +169,21 @@ MyersDiff::shortestEditScript(const std::vector<Anchor> &A,
       while (X > PrevX && Y > PrevY) {
         X--;
         Y--;
-        Diff.addEqualLocations(A[X].Loc, B[Y].Loc);
+        Diff.addEqualLocations(AnchorVec1[X].Loc, AnchorVec2[Y].Loc);
       }
 
-      if (D == 0)
+      if (Depth == 0)
         break;
 
       if (Y == PrevY) {
         X--;
 #ifndef NDEBUG
-        Diff.addInsertion(A[X].Loc);
+        Diff.addInsertion(AnchorVec1[X].Loc);
 #endif
       } else if (X == PrevX) {
         Y--;
 #ifndef NDEBUG
-        Diff.addDeletion(B[Y].Loc);
+        Diff.addDeletion(AnchorVec2[Y].Loc);
 #endif
       }
       X = PrevX;
@@ -176,32 +194,32 @@ MyersDiff::shortestEditScript(const std::vector<Anchor> &A,
   // The greedy LCS/SES algorithm.
 
   // An array contains the endpoints of the furthest reaching D-paths.
-  std::vector<int32_t> V(2 * Max + 1, -1);
+  std::vector<int32_t> V(2 * MaxDepth + 1, -1);
   V[Index(1)] = 0;
   // Trace is used to backtrack the SES result.
   std::vector<std::vector<int32_t>> Trace;
-  for (int32_t D = 0; D <= Max; D++) {
+  for (int32_t Depth = 0; Depth <= MaxDepth; Depth++) {
     Trace.push_back(V);
-    for (int32_t K = -D; K <= D; K += 2) {
+    for (int32_t K = -Depth; K <= Depth; K += 2) {
       int32_t X = 0, Y = 0;
-      if (K == -D || (K != D && V[Index(K - 1)] < V[Index(K + 1)]))
+      if (K == -Depth || (K != Depth && V[Index(K - 1)] < V[Index(K + 1)]))
         X = V[Index(K + 1)];
       else
         X = V[Index(K - 1)] + 1;
       Y = X - K;
-      while (X < N && Y < M && A[X] == B[Y])
+      while (X < Size1 && Y < Size2 && AnchorVec1[X] == AnchorVec2[Y])
         X++, Y++;
 
       V[Index(K)] = X;
 
-      if (X >= N && Y >= M) {
+      if (X >= Size1 && Y >= Size2) {
         // Length of an SES is D.
-        Backtrack(Trace, A, B, Diff);
+        Backtrack(Trace, AnchorVec1, AnchorVec2, Diff);
         return Diff;
       }
     }
   }
-  // Length of an SES is greater than Max.
+  // Length of an SES is greater than MaxDepth.
   return Diff;
 }
 
@@ -216,8 +234,7 @@ LocToLocMap SampleProfileMatcher::longestCommonSequence(
 }
 
 void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
-    const LocToLocMap &MatchedAnchors,
-    const std::map<LineLocation, StringRef> &IRAnchors,
+    const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
     LocToLocMap &IRToProfileLocationMap) {
   auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
     // Skip the unchanged location mapping to save memory.
@@ -230,7 +247,7 @@ void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
   SmallVector<LineLocation> LastMatchedNonAnchors;
   for (const auto &IR : IRAnchors) {
     const auto &Loc = IR.first;
-    [[maybe_unused]] StringRef CalleeName = IR.second;
+    [[maybe_unused]] StringRef CalleeName = IR.second.FuncId.stringRef();
     bool IsMatchedAnchor = false;
 
     // Match the anchor location in lexical order.
@@ -291,36 +308,23 @@ void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
 //   [1, 2, 3(foo), 4,  7,  8(bar), 9]
 // The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
 void SampleProfileMatcher::runStaleProfileMatching(
-    const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
-    const std::map<LineLocation, std::unordered_set<FunctionId>>
-        &ProfileAnchors,
-    LocToLocMap &IRToProfileLocationMap) {
+    const Function &F, const AnchorMap &IRAnchors,
+    const AnchorMap &ProfileAnchors, LocToLocMap &IRToProfileLocationMap) {
   LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
                     << "\n");
   assert(IRToProfileLocationMap.empty() &&
          "Run stale profile matching only once per function");
 
   std::vector<Anchor> ProfileCallsiteAnchors;
-  for (const auto &I : ProfileAnchors) {
-    const auto &Loc = I.first;
-    const auto &Callees = I.second;
-    // Filter out possible indirect calls, use direct callee name as anchor.
-    if (Callees.size() == 1) {
-      auto CalleeName = *Callees.begin();
-      ProfileCallsiteAnchors.emplace_back(Loc, CalleeName);
-    } else if (Callees.size() > 1) {
-      ProfileCallsiteAnchors.emplace_back(Loc,
-                                          FunctionId(UnknownIndirectCallee));
-    }
-  }
+  for (const auto &I : ProfileAnchors)
+    ProfileCallsiteAnchors.emplace_back(I.second);
 
   std::vector<Anchor> IRCallsiteAnchors;
+  // Filter the non-callsite from IRAnchors.
   for (const auto &I : IRAnchors) {
-    const auto &Loc = I.first;
-    const auto &CalleeName = I.second;
-    if (CalleeName.empty())
+    if (I.second.FuncId.stringRef().empty())
       continue;
-    IRCallsiteAnchors.emplace_back(Loc, FunctionId(CalleeName));
+    IRCallsiteAnchors.emplace_back(I.second);
   }
 
   if (IRCallsiteAnchors.empty() || ProfileCallsiteAnchors.empty())
@@ -352,11 +356,11 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
   // Anchors for IR. It's a map from IR location to callee name, callee name is
   // empty for non-call instruction and use a dummy name(UnknownIndirectCallee)
   // for unknown indrect callee name.
-  std::map<LineLocation, StringRef> IRAnchors;
+  AnchorMap IRAnchors;
   findIRAnchors(F, IRAnchors);
   // Anchors for profile. It's a map from callsite location to a set of callee
   // name.
-  std::map<LineLocation, std::unordered_set<FunctionId>> ProfileAnchors;
+  AnchorMap ProfileAnchors;
   findProfileAnchors(*FSFlattened, ProfileAnchors);
 
   // Compute the callsite match states for profile staleness report.
@@ -388,9 +392,8 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
 }
 
 void SampleProfileMatcher::recordCallsiteMatchStates(
-    const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
-    const std::map<LineLocation, std::unordered_set<FunctionId>>
-        &ProfileAnchors,
+    const Function &F, const AnchorMap &IRAnchors,
+    const AnchorMap &ProfileAnchors,
     const LocToLocMap *IRToProfileLocationMap) {
   bool IsPostMatch = IRToProfileLocationMap != nullptr;
   auto &CallsiteMatchStates =
@@ -411,23 +414,12 @@ void SampleProfileMatcher::recordCallsiteMatchStates(
     // After fuzzy profile matching, use the matching result to remap the
     // current IR callsite.
     const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
-    const auto &IRCalleeName = I.second;
+    const auto &IRCalleeId = I.second.FuncId;
     const auto &It = ProfileAnchors.find(ProfileLoc);
     if (It == ProfileAnchors.end())
       continue;
-    const auto &Callees = It->second;
-
-    bool IsCallsiteMatched = false;
-    // Since indirect call does not have CalleeName, check conservatively if
-    // callsite in the profile is a callsite location. This is to reduce num of
-    // false positive since otherwise all the indirect call samples will be
-    // reported as mismatching.
-    if (IRCalleeName == SampleProfileMatcher::UnknownIndirectCallee)
-      IsCallsiteMatched = true;
-    else if (Callees.size() == 1 && Callees.count(getRepInFormat(IRCalleeName)))
-      IsCallsiteMatched = true;
-
-    if (IsCallsiteMatched) {
+    const auto &ProfCalleeId = It->second.FuncId;
+    if (IRCalleeId == ProfCalleeId) {
       auto It = CallsiteMatchStates.find(ProfileLoc);
       if (It == CallsiteMatchStates.end())
         CallsiteMatchStates.emplace(ProfileLoc, MatchState::InitialMatch);
@@ -444,8 +436,8 @@ void SampleProfileMatcher::recordCallsiteMatchStates(
   // IR callsites.
   for (const auto &I : ProfileAnchors) {
     const auto &Loc = I.first;
-    [[maybe_unused]] const auto &Callees = I.second;
-    assert(!Callees.empty() && "Callees should not be empty");
+    [[maybe_unused]] StringRef CalleeName = I.second.FuncId.stringRef();
+    assert(!CalleeName.empty() && "Callees should not be empty");
     auto It = CallsiteMatchStates.find(Loc);
     if (It == CallsiteMatchStates.end())
       CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);

>From 10a9838d4e90732b5a45fea34eb6d802fa495456 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 8 May 2024 18:42:08 -0700
Subject: [PATCH 5/8] addressing comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  64 ++------
 .../Transforms/IPO/SampleProfileMatcher.cpp   |  77 ++++-----
 llvm/unittests/Transforms/IPO/CMakeLists.txt  |   2 -
 .../IPO/SampleProfileMatcherTests.cpp         | 150 ------------------
 4 files changed, 42 insertions(+), 251 deletions(-)
 delete mode 100644 llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index a95ad4f8053d9..c7f3b18fffd28 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -19,58 +19,8 @@
 
 namespace llvm {
 
-// Callsite location based matching anchor.
-struct Anchor {
-  LineLocation Loc;
-  FunctionId FuncId;
-
-  Anchor(const LineLocation &Loc, const FunctionId &FuncId)
-      : Loc(Loc), FuncId(FuncId) {}
-  Anchor(const LineLocation &Loc, StringRef &FName) : Loc(Loc), FuncId(FName) {}
-  bool operator==(const Anchor &Other) const {
-    return this->FuncId == Other.FuncId;
-  }
-};
-
-// This class implements the Myers diff algorithm used for stale profile
-// matching. The algorithm provides a simple and efficient way to find the
-// Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
-// sequences. For more details, refer to the paper 'An O(ND) Difference
-// Algorithm and Its Variations' by Eugene W. Myers.
-// In the scenario of profile fuzzy matching, the two sequences are the IR
-// callsite anchors and profile callsite anchors. The subsequence equivalent
-// parts from the resulting SES are used to remap the IR locations to the
-// profile locations. As the number of function callsite is usually not big, we
-// currently just implements the basic greedy version(page 6 of the paper).
-class MyersDiff {
-public:
-  struct DiffResult {
-    LocToLocMap EqualLocations;
-#ifndef NDEBUG
-    // New IR locations that are inserted in the new version.
-    std::vector<LineLocation> Insertions;
-    // Old Profile locations that are deleted in the new version.
-    std::vector<LineLocation> Deletions;
-#endif
-    void addEqualLocations(const LineLocation &IRLoc,
-                           const LineLocation &ProfLoc) {
-      EqualLocations.insert({IRLoc, ProfLoc});
-    }
-#ifndef NDEBUG
-    void addInsertion(const LineLocation &IRLoc) {
-      Insertions.push_back(IRLoc);
-    }
-    void addDeletion(const LineLocation &ProfLoc) {
-      Deletions.push_back(ProfLoc);
-    }
-#endif
-  };
-
-  DiffResult shortestEditScript(const std::vector<Anchor> &A,
-                                const std::vector<Anchor> &B) const;
-};
-
-using AnchorMap = std::map<LineLocation, Anchor>;
+using Anchor = std::pair<LineLocation, FunctionId>;
+using AnchorMap = std::map<LineLocation, FunctionId>;
 
 // Sample profile matching - fuzzy match.
 class SampleProfileMatcher {
@@ -191,6 +141,16 @@ class SampleProfileMatcher {
   }
   void distributeIRToProfileLocationMap();
   void distributeIRToProfileLocationMap(FunctionSamples &FS);
+  // This function implements the Myers diff algorithm used for stale profile
+  // matching. The algorithm provides a simple and efficient way to find the
+  // Longest Common Subsequence(LCS) or the Shortest Edit Script(SES) of two
+  // sequences. For more details, refer to the paper 'An O(ND) Difference
+  // Algorithm and Its Variations' by Eugene W. Myers.
+  // In the scenario of profile fuzzy matching, the two sequences are the IR
+  // callsite anchors and profile callsite anchors. The subsequence equivalent
+  // parts from the resulting SES are used to remap the IR locations to the
+  // profile locations. As the number of function callsite is usually not big,
+  // we currently just implements the basic greedy version(page 6 of the paper).
   LocToLocMap longestCommonSequence(
       const std::vector<Anchor> &IRCallsiteAnchors,
       const std::vector<Anchor> &ProfileCallsiteAnchors) const;
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 251efe32cfbe9..195b7ab31810c 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -39,7 +39,7 @@ void SampleProfileMatcher::findIRAnchors(const Function &F,
 
     LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
     StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
-    return std::make_pair(Callsite, Anchor(Callsite, CalleeName));
+    return std::make_pair(Callsite, FunctionId(CalleeName));
   };
 
   auto GetCanonicalCalleeName = [](const CallBase *CB) {
@@ -70,7 +70,7 @@ void SampleProfileMatcher::findIRAnchors(const Function &F,
                 CalleeName = GetCanonicalCalleeName(CB);
             }
             LineLocation Loc = LineLocation(Probe->Id, 0);
-            IRAnchors.emplace(Loc, Anchor(Loc, CalleeName));
+            IRAnchors.emplace(Loc, FunctionId(CalleeName));
           }
         }
       } else {
@@ -85,7 +85,7 @@ void SampleProfileMatcher::findIRAnchors(const Function &F,
         } else {
           LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
           StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
-          IRAnchors.emplace(Callsite, Anchor(Callsite, CalleeName));
+          IRAnchors.emplace(Callsite, FunctionId(CalleeName));
         }
       }
     }
@@ -128,32 +128,31 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
     const auto &Callees = I.second;
     if (Callees.size() == 1) {
       auto CalleeName = *Callees.begin();
-      ProfileAnchors.emplace(Loc, Anchor(Loc, CalleeName));
+      ProfileAnchors.emplace(Loc, CalleeName);
     } else if (Callees.size() > 1) {
       // use a dummy name(UnknownIndirectCallee) for unknown indrect callee
       // name.
-      ProfileAnchors.emplace(Loc,
-                             Anchor(Loc, FunctionId(UnknownIndirectCallee)));
+      ProfileAnchors.emplace(Loc, FunctionId(UnknownIndirectCallee));
     }
   }
 }
 
-MyersDiff::DiffResult
-MyersDiff::shortestEditScript(const std::vector<Anchor> &AnchorVec1,
-                              const std::vector<Anchor> &AnchorVec2) const {
+LocToLocMap SampleProfileMatcher::longestCommonSequence(
+    const std::vector<Anchor> &AnchorVec1,
+    const std::vector<Anchor> &AnchorVec2) const {
   int32_t Size1 = AnchorVec1.size(), Size2 = AnchorVec2.size(),
           MaxDepth = Size1 + Size2;
   auto Index = [&](int32_t I) { return I + MaxDepth; };
 
-  DiffResult Diff;
+  LocToLocMap EqualLocations;
   if (MaxDepth == 0)
-    return Diff;
+    return EqualLocations;
 
   // Backtrack the SES result.
   auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
                        const std::vector<Anchor> &AnchorVec1,
                        const std::vector<Anchor> &AnchorVec2,
-                       DiffResult &Diff) {
+                       LocToLocMap &EqualLocations) {
     int32_t X = Size1, Y = Size2;
     for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
       const auto &P = Trace[Depth];
@@ -169,23 +168,16 @@ MyersDiff::shortestEditScript(const std::vector<Anchor> &AnchorVec1,
       while (X > PrevX && Y > PrevY) {
         X--;
         Y--;
-        Diff.addEqualLocations(AnchorVec1[X].Loc, AnchorVec2[Y].Loc);
+        EqualLocations.insert({AnchorVec1[X].first, AnchorVec2[Y].first});
       }
 
       if (Depth == 0)
         break;
 
-      if (Y == PrevY) {
+      if (Y == PrevY)
         X--;
-#ifndef NDEBUG
-        Diff.addInsertion(AnchorVec1[X].Loc);
-#endif
-      } else if (X == PrevX) {
+      else if (X == PrevX)
         Y--;
-#ifndef NDEBUG
-        Diff.addDeletion(AnchorVec2[Y].Loc);
-#endif
-      }
       X = PrevX;
       Y = PrevY;
     }
@@ -207,30 +199,21 @@ MyersDiff::shortestEditScript(const std::vector<Anchor> &AnchorVec1,
       else
         X = V[Index(K - 1)] + 1;
       Y = X - K;
-      while (X < Size1 && Y < Size2 && AnchorVec1[X] == AnchorVec2[Y])
+      while (X < Size1 && Y < Size2 &&
+             AnchorVec1[X].second == AnchorVec2[Y].second)
         X++, Y++;
 
       V[Index(K)] = X;
 
       if (X >= Size1 && Y >= Size2) {
         // Length of an SES is D.
-        Backtrack(Trace, AnchorVec1, AnchorVec2, Diff);
-        return Diff;
+        Backtrack(Trace, AnchorVec1, AnchorVec2, EqualLocations);
+        return EqualLocations;
       }
     }
   }
   // Length of an SES is greater than MaxDepth.
-  return Diff;
-}
-
-LocToLocMap SampleProfileMatcher::longestCommonSequence(
-    const std::vector<Anchor> &IRCallsiteAnchors,
-    const std::vector<Anchor> &ProfileCallsiteAnchors) const {
-  // Use the diff algorithm to find the LCS/SES, the resulting equal locations
-  // from IR to Profile are used as anchor to match other locations.
-  auto SES =
-      MyersDiff().shortestEditScript(IRCallsiteAnchors, ProfileCallsiteAnchors);
-  return SES.EqualLocations;
+  return EqualLocations;
 }
 
 void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
@@ -247,7 +230,7 @@ void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
   SmallVector<LineLocation> LastMatchedNonAnchors;
   for (const auto &IR : IRAnchors) {
     const auto &Loc = IR.first;
-    [[maybe_unused]] StringRef CalleeName = IR.second.FuncId.stringRef();
+    [[maybe_unused]] StringRef CalleeName = IR.second.stringRef();
     bool IsMatchedAnchor = false;
 
     // Match the anchor location in lexical order.
@@ -315,26 +298,26 @@ void SampleProfileMatcher::runStaleProfileMatching(
   assert(IRToProfileLocationMap.empty() &&
          "Run stale profile matching only once per function");
 
-  std::vector<Anchor> ProfileCallsiteAnchors;
+  std::vector<Anchor> FilteredProfileAnchorList;
   for (const auto &I : ProfileAnchors)
-    ProfileCallsiteAnchors.emplace_back(I.second);
+    FilteredProfileAnchorList.emplace_back(I);
 
-  std::vector<Anchor> IRCallsiteAnchors;
+  std::vector<Anchor> FilteredIRAnchorsList;
   // Filter the non-callsite from IRAnchors.
   for (const auto &I : IRAnchors) {
-    if (I.second.FuncId.stringRef().empty())
+    if (I.second.stringRef().empty())
       continue;
-    IRCallsiteAnchors.emplace_back(I.second);
+    FilteredIRAnchorsList.emplace_back(I);
   }
 
-  if (IRCallsiteAnchors.empty() || ProfileCallsiteAnchors.empty())
+  if (FilteredIRAnchorsList.empty() || FilteredProfileAnchorList.empty())
     return;
 
   // Match the callsite anchors by finding the longest common subsequence
   // between IR and profile. Note that we need to use IR anchor as base(A side)
   // to align with the order of IRToProfileLocationMap.
   LocToLocMap MatchedAnchors =
-      longestCommonSequence(IRCallsiteAnchors, ProfileCallsiteAnchors);
+      longestCommonSequence(FilteredIRAnchorsList, FilteredProfileAnchorList);
 
   // Match the non-callsite locations and write the result to
   // IRToProfileLocationMap.
@@ -414,11 +397,11 @@ void SampleProfileMatcher::recordCallsiteMatchStates(
     // After fuzzy profile matching, use the matching result to remap the
     // current IR callsite.
     const auto &ProfileLoc = MapIRLocToProfileLoc(I.first);
-    const auto &IRCalleeId = I.second.FuncId;
+    const auto &IRCalleeId = I.second;
     const auto &It = ProfileAnchors.find(ProfileLoc);
     if (It == ProfileAnchors.end())
       continue;
-    const auto &ProfCalleeId = It->second.FuncId;
+    const auto &ProfCalleeId = It->second;
     if (IRCalleeId == ProfCalleeId) {
       auto It = CallsiteMatchStates.find(ProfileLoc);
       if (It == CallsiteMatchStates.end())
@@ -436,7 +419,7 @@ void SampleProfileMatcher::recordCallsiteMatchStates(
   // IR callsites.
   for (const auto &I : ProfileAnchors) {
     const auto &Loc = I.first;
-    [[maybe_unused]] StringRef CalleeName = I.second.FuncId.stringRef();
+    [[maybe_unused]] StringRef CalleeName = I.second.stringRef();
     assert(!CalleeName.empty() && "Callees should not be empty");
     auto It = CallsiteMatchStates.find(Loc);
     if (It == CallsiteMatchStates.end())
diff --git a/llvm/unittests/Transforms/IPO/CMakeLists.txt b/llvm/unittests/Transforms/IPO/CMakeLists.txt
index 80d0eadf0cce0..4e4372179b46c 100644
--- a/llvm/unittests/Transforms/IPO/CMakeLists.txt
+++ b/llvm/unittests/Transforms/IPO/CMakeLists.txt
@@ -3,7 +3,6 @@ set(LLVM_LINK_COMPONENTS
   AsmParser
   Core
   IPO
-  ProfileData
   Support
   TargetParser
   TransformUtils
@@ -14,7 +13,6 @@ add_llvm_unittest(IPOTests
   WholeProgramDevirt.cpp
   AttributorTest.cpp
   FunctionSpecializationTest.cpp
-  SampleProfileMatcherTests.cpp
   )
 
 set_property(TARGET IPOTests PROPERTY FOLDER "Tests/UnitTests/TransformsTests")
diff --git a/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp b/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
deleted file mode 100644
index 9bdd45f4ae155..0000000000000
--- a/llvm/unittests/Transforms/IPO/SampleProfileMatcherTests.cpp
+++ /dev/null
@@ -1,150 +0,0 @@
-//===- SampleProfileMatcherTests.cpp - SampleProfileMatcher Unit Tests -----==//
-//
-// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
-// See https://llvm.org/LICENSE.txt for license information.
-// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
-//
-//===----------------------------------------------------------------------===//
-
-#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
-#include "gtest/gtest.h"
-
-using namespace llvm;
-
-MyersDiff Diff;
-
-std::vector<Anchor>
-createAnchorsFromStrings(const std::vector<std::string> &SV) {
-  std::vector<Anchor> Anchors;
-  for (uint64_t I = 0; I < SV.size(); I++) {
-    Anchors.push_back(Anchor(LineLocation(I, 0), FunctionId(SV[I])));
-  }
-  return Anchors;
-}
-
-LocToLocMap
-createEqualLocations(const std::vector<std::pair<uint32_t, uint32_t>> &V) {
-  LocToLocMap LocMap;
-  for (auto P : V) {
-    LocMap.emplace(LineLocation(P.first, 0), LineLocation(P.second, 0));
-  }
-  return LocMap;
-}
-
-std::vector<LineLocation> createLocations(const std::vector<uint32_t> &V) {
-  std::vector<LineLocation> Locations;
-  for (auto I : V) {
-    Locations.emplace_back(LineLocation(I, 0));
-  }
-  return Locations;
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest1) {
-
-  std::vector<Anchor> AnchorsA;
-  std::vector<Anchor> AnchorsB;
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_TRUE(R.EqualLocations.empty());
-#ifndef NDEBUG
-  EXPECT_TRUE(R.Deletions.empty());
-  EXPECT_TRUE(R.Insertions.empty());
-#endif
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest2) {
-  std::vector<std::string> A({"a", "b", "c"});
-  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
-  std::vector<Anchor> AnchorsB;
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_TRUE(R.EqualLocations.empty());
-#ifndef NDEBUG
-  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({2, 1, 0})));
-  EXPECT_TRUE(R.Deletions.empty());
-#endif
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest3) {
-
-  std::vector<Anchor> AnchorsA;
-  std::vector<std::string> B({"a", "b", "c"});
-  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_TRUE(R.EqualLocations.empty());
-#ifndef NDEBUG
-  EXPECT_TRUE(R.Insertions.empty());
-  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({2, 1, 0})));
-#endif
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest4) {
-  std::vector<std::string> A({"a", "b", "c"});
-  std::vector<std::string> B({"a", "b", "c"});
-  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
-  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  LocToLocMap ExpectEqualLocations =
-      createEqualLocations({{0, 0}, {1, 1}, {2, 2}});
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
-#ifndef NDEBUG
-  EXPECT_TRUE(R.Insertions.empty());
-  EXPECT_TRUE(R.Deletions.empty());
-#endif
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest5) {
-  std::vector<std::string> A({"a", "b", "c"});
-  std::vector<std::string> B({"b", "c", "d"});
-  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
-  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  LocToLocMap ExpectEqualLocations = createEqualLocations({{1, 0}, {2, 1}});
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
-#ifndef NDEBUG
-  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({0})));
-  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({2})));
-#endif
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest6) {
-  std::vector<std::string> A({"a", "b", "d"});
-  std::vector<std::string> B({"a", "c", "d"});
-  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
-  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  LocToLocMap ExpectEqualLocations = createEqualLocations({{0, 0}, {2, 2}});
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
-#ifndef NDEBUG
-  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({1})));
-  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({1})));
-#endif
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest7) {
-  std::vector<std::string> A({"a", "b", "c", "a", "b", "b", "a"});
-  std::vector<std::string> B({"c", "b", "a", "b", "a", "c"});
-  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
-  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  LocToLocMap ExpectEqualLocations =
-      createEqualLocations({{2, 0}, {3, 2}, {4, 3}, {6, 4}});
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
-#ifndef NDEBUG
-  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({5, 1, 0})));
-  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({5, 1})));
-#endif
-}
-
-TEST(SampleProfileMatcherTests, MyersDiffTest8) {
-  std::vector<std::string> A({"a", "c", "b", "c", "b", "d", "e"});
-  std::vector<std::string> B({"a", "b", "c", "a", "a", "b", "c", "c", "d"});
-  std::vector<Anchor> AnchorsA = createAnchorsFromStrings(A);
-  std::vector<Anchor> AnchorsB = createAnchorsFromStrings(B);
-  LocToLocMap ExpectEqualLocations =
-      createEqualLocations({{0, 0}, {2, 1}, {3, 2}, {4, 5}, {5, 8}});
-  auto R = Diff.shortestEditScript(AnchorsA, AnchorsB);
-  EXPECT_EQ(R.EqualLocations, ExpectEqualLocations);
-#ifndef NDEBUG
-  EXPECT_EQ(R.Insertions, createLocations(std::vector<uint32_t>({6, 1})));
-  EXPECT_EQ(R.Deletions, createLocations(std::vector<uint32_t>({7, 6, 4, 3})));
-#endif
-}

>From 31dd272164ff66ae4713abbfbbc119b49e1350ea Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Wed, 8 May 2024 19:10:43 -0700
Subject: [PATCH 6/8] update test

---
 .../Inputs/pseudo-probe-stale-profile-matching.prof            | 1 -
 .../SampleProfile/pseudo-probe-stale-profile-matching.ll       | 3 +--
 2 files changed, 1 insertion(+), 3 deletions(-)

diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof
index 4d6241bb8568d..69d27b9079787 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof
@@ -4,7 +4,6 @@ main:1497:0
  3: 112 bar:60 dummy_calltarget:50
  4: 116
  5: 0
- 7: 124 bar:124
  9: 126 bar:126
  6: foo:452
   1: 112
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
index 0d471e43d2a72..9ef3ca1dc481f 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
@@ -71,7 +71,7 @@
 ; CHECK: Location is rematched backwards from 9 to 2
 ; CHECK: Location is rematched backwards from 10 to 3
 ; CHECK: Location is rematched backwards from 11 to 4
-; CHECK: Callsite with callee:bar is matched from 14 to 7
+; CHECK: Location is matched from 14 to 7
 ; CHECK: Callsite with callee:foo is matched from 15 to 8
 ; CHECK: Callsite with callee:bar is matched from 16 to 9
 
@@ -86,7 +86,6 @@
 ; CHECK:    3:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00)
 ; CHECK:    6:  %call1.i5 = call i32 @bar(i32 noundef %add.i4), !dbg ![[#]] - weight: 13 - factor: 1.00)
 ; CHECK:    4:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
-; CHECK:    14:  %call2 = call i32 @bar(i32 noundef %3), !dbg ![[#]] - weight: 124 - factor: 1.00)
 ; CHECK:    8:  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)
 ; CHECK:    1:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 117 - factor: 1.00)
 ; CHECK:    2:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 104 - factor: 1.00)

>From d6978c4dde74b7c2b541ce5ce4c8bf106c15e8e4 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 9 May 2024 13:50:55 -0700
Subject: [PATCH 7/8] added a LCS lit test and addressing other comments

---
 .../Transforms/IPO/SampleProfileMatcher.h     |  14 +-
 .../Transforms/IPO/SampleProfileMatcher.cpp   |  62 ++---
 ...eudo-probe-stale-profile-matching-LCS.prof |  26 +++
 .../pseudo-probe-stale-profile-matching.prof  |   1 +
 ...pseudo-probe-stale-profile-matching-LCS.ll | 219 ++++++++++++++++++
 .../pseudo-probe-stale-profile-matching.ll    |   3 +-
 6 files changed, 279 insertions(+), 46 deletions(-)
 create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-LCS.prof
 create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll

diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index c7f3b18fffd28..b6feca5d47035 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -19,7 +19,7 @@
 
 namespace llvm {
 
-using Anchor = std::pair<LineLocation, FunctionId>;
+using AnchorList = std::vector<std::pair<LineLocation, FunctionId>>;
 using AnchorMap = std::map<LineLocation, FunctionId>;
 
 // Sample profile matching - fuzzy match.
@@ -151,12 +151,12 @@ class SampleProfileMatcher {
   // parts from the resulting SES are used to remap the IR locations to the
   // profile locations. As the number of function callsite is usually not big,
   // we currently just implements the basic greedy version(page 6 of the paper).
-  LocToLocMap longestCommonSequence(
-      const std::vector<Anchor> &IRCallsiteAnchors,
-      const std::vector<Anchor> &ProfileCallsiteAnchors) const;
-  void matchNonCallsiteLocsAndWriteResults(const LocToLocMap &AnchorMatchings,
-                                           const AnchorMap &IRAnchors,
-                                           LocToLocMap &IRToProfileLocationMap);
+  LocToLocMap
+  longestCommonSequence(const AnchorList &IRCallsiteAnchors,
+                        const AnchorList &ProfileCallsiteAnchors) const;
+  void matchNonCallsiteLocs(const LocToLocMap &AnchorMatchings,
+                            const AnchorMap &IRAnchors,
+                            LocToLocMap &IRToProfileLocationMap);
   void runStaleProfileMatching(const Function &F, const AnchorMap &IRAnchors,
                                const AnchorMap &ProfileAnchors,
                                LocToLocMap &IRToProfileLocationMap);
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 195b7ab31810c..71c0e43c1c5f8 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -98,49 +98,36 @@ void SampleProfileMatcher::findProfileAnchors(const FunctionSamples &FS,
     return LineOffset & 0x8000;
   };
 
-  std::map<LineLocation, std::unordered_set<FunctionId>> ProfileCallsites;
+  auto InsertAnchor = [](const LineLocation &Loc, const FunctionId &CalleeName,
+                         AnchorMap &ProfileAnchors) {
+    auto Ret = ProfileAnchors.try_emplace(Loc, CalleeName);
+    if (!Ret.second) {
+      // For multiple callees, which indicates it's an indirect call, we use a
+      // dummy name(UnknownIndirectCallee) as the indrect callee name.
+      Ret.first->second = FunctionId(UnknownIndirectCallee);
+    }
+  };
 
   for (const auto &I : FS.getBodySamples()) {
     const LineLocation &Loc = I.first;
     if (isInvalidLineOffset(Loc.LineOffset))
       continue;
-    for (const auto &I : I.second.getCallTargets()) {
-      auto Ret =
-          ProfileCallsites.try_emplace(Loc, std::unordered_set<FunctionId>());
-      Ret.first->second.insert(I.first);
-    }
+    for (const auto &C : I.second.getCallTargets())
+      InsertAnchor(Loc, C.first, ProfileAnchors);
   }
 
   for (const auto &I : FS.getCallsiteSamples()) {
     const LineLocation &Loc = I.first;
     if (isInvalidLineOffset(Loc.LineOffset))
       continue;
-    const auto &CalleeMap = I.second;
-    for (const auto &I : CalleeMap) {
-      auto Ret =
-          ProfileCallsites.try_emplace(Loc, std::unordered_set<FunctionId>());
-      Ret.first->second.insert(I.first);
-    }
-  }
-
-  for (const auto &I : ProfileCallsites) {
-    const auto &Loc = I.first;
-    const auto &Callees = I.second;
-    if (Callees.size() == 1) {
-      auto CalleeName = *Callees.begin();
-      ProfileAnchors.emplace(Loc, CalleeName);
-    } else if (Callees.size() > 1) {
-      // use a dummy name(UnknownIndirectCallee) for unknown indrect callee
-      // name.
-      ProfileAnchors.emplace(Loc, FunctionId(UnknownIndirectCallee));
-    }
+    for (const auto &C : I.second)
+      InsertAnchor(Loc, C.first, ProfileAnchors);
   }
 }
 
 LocToLocMap SampleProfileMatcher::longestCommonSequence(
-    const std::vector<Anchor> &AnchorVec1,
-    const std::vector<Anchor> &AnchorVec2) const {
-  int32_t Size1 = AnchorVec1.size(), Size2 = AnchorVec2.size(),
+    const AnchorList &AnchorList1, const AnchorList &AnchorList2) const {
+  int32_t Size1 = AnchorList1.size(), Size2 = AnchorList2.size(),
           MaxDepth = Size1 + Size2;
   auto Index = [&](int32_t I) { return I + MaxDepth; };
 
@@ -150,8 +137,8 @@ LocToLocMap SampleProfileMatcher::longestCommonSequence(
 
   // Backtrack the SES result.
   auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
-                       const std::vector<Anchor> &AnchorVec1,
-                       const std::vector<Anchor> &AnchorVec2,
+                       const AnchorList &AnchorList1,
+                       const AnchorList &AnchorList2,
                        LocToLocMap &EqualLocations) {
     int32_t X = Size1, Y = Size2;
     for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
@@ -168,7 +155,7 @@ LocToLocMap SampleProfileMatcher::longestCommonSequence(
       while (X > PrevX && Y > PrevY) {
         X--;
         Y--;
-        EqualLocations.insert({AnchorVec1[X].first, AnchorVec2[Y].first});
+        EqualLocations.insert({AnchorList1[X].first, AnchorList2[Y].first});
       }
 
       if (Depth == 0)
@@ -200,14 +187,14 @@ LocToLocMap SampleProfileMatcher::longestCommonSequence(
         X = V[Index(K - 1)] + 1;
       Y = X - K;
       while (X < Size1 && Y < Size2 &&
-             AnchorVec1[X].second == AnchorVec2[Y].second)
+             AnchorList1[X].second == AnchorList2[Y].second)
         X++, Y++;
 
       V[Index(K)] = X;
 
       if (X >= Size1 && Y >= Size2) {
         // Length of an SES is D.
-        Backtrack(Trace, AnchorVec1, AnchorVec2, EqualLocations);
+        Backtrack(Trace, AnchorList1, AnchorList2, EqualLocations);
         return EqualLocations;
       }
     }
@@ -216,7 +203,7 @@ LocToLocMap SampleProfileMatcher::longestCommonSequence(
   return EqualLocations;
 }
 
-void SampleProfileMatcher::matchNonCallsiteLocsAndWriteResults(
+void SampleProfileMatcher::matchNonCallsiteLocs(
     const LocToLocMap &MatchedAnchors, const AnchorMap &IRAnchors,
     LocToLocMap &IRToProfileLocationMap) {
   auto InsertMatching = [&](const LineLocation &From, const LineLocation &To) {
@@ -298,11 +285,11 @@ void SampleProfileMatcher::runStaleProfileMatching(
   assert(IRToProfileLocationMap.empty() &&
          "Run stale profile matching only once per function");
 
-  std::vector<Anchor> FilteredProfileAnchorList;
+  AnchorList FilteredProfileAnchorList;
   for (const auto &I : ProfileAnchors)
     FilteredProfileAnchorList.emplace_back(I);
 
-  std::vector<Anchor> FilteredIRAnchorsList;
+  AnchorList FilteredIRAnchorsList;
   // Filter the non-callsite from IRAnchors.
   for (const auto &I : IRAnchors) {
     if (I.second.stringRef().empty())
@@ -321,8 +308,7 @@ void SampleProfileMatcher::runStaleProfileMatching(
 
   // Match the non-callsite locations and write the result to
   // IRToProfileLocationMap.
-  matchNonCallsiteLocsAndWriteResults(MatchedAnchors, IRAnchors,
-                                      IRToProfileLocationMap);
+  matchNonCallsiteLocs(MatchedAnchors, IRAnchors, IRToProfileLocationMap);
 }
 
 void SampleProfileMatcher::runOnFunction(Function &F) {
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-LCS.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-LCS.prof
new file mode 100644
index 0000000000000..e56c7c01865d1
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching-LCS.prof
@@ -0,0 +1,26 @@
+test_direct_call:606:83
+ 1: 83
+ 2: 83 C:83
+ 3: 90 B:90
+ 4: 83 A:83
+ 5: 92 B:92
+ 6: 83 A:83
+ 7: 97 C:97
+ !CFGChecksum: 123456
+test_indirect_call:589:86
+ 1: 86
+ 2: 86 C:86
+ 3: 83 A:43 B:40
+ 4: 84 B:84
+ 6: 82 B:62 A:20
+ 7: 91 C:91
+ !CFGChecksum: 123456
+main:403:0
+ 1: 0
+ 2: 80
+ 3: 80
+ 4: 86 test_indirect_call:86
+ 5: 83 test_direct_call:83
+ 6: 83
+ 7: 0
+ !CFGChecksum: 563036051115663
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof
index 69d27b9079787..4d6241bb8568d 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-matching.prof
@@ -4,6 +4,7 @@ main:1497:0
  3: 112 bar:60 dummy_calltarget:50
  4: 116
  5: 0
+ 7: 124 bar:124
  9: 126 bar:126
  6: foo:452
   1: 112
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll
new file mode 100644
index 0000000000000..ecf8484d98e59
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-LCS.ll
@@ -0,0 +1,219 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-matching-LCS.prof --salvage-stale-profile -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl 2>&1 | FileCheck %s
+
+; CHECK: Run stale profile matching for test_direct_call
+; CHECK: Location is matched from 1 to 1
+; CHECK: Location is matched from 2 to 2
+; CHECK: Location is matched from 3 to 3
+; CHECK: Callsite with callee:C is matched from 4 to 2
+; CHECK: Location is rematched backwards from 3 to 1
+; CHECK: Callsite with callee:A is matched from 5 to 4
+; CHECK: Callsite with callee:B is matched from 6 to 5
+; CHECK: Location is matched from 7 to 6
+; CHECK: Callsite with callee:A is matched from 8 to 6
+
+; CHECK: Run stale profile matching for test_indirect_call
+; CHECK: Location is matched from 1 to 1
+; CHECK: Location is matched from 2 to 2
+; CHECK: Location is matched from 3 to 3
+; CHECK: Location is matched from 4 to 4
+; CHECK: Callsite with callee:C is matched from 5 to 2
+; CHECK: Location is rematched backwards from 3 to 0
+; CHECK: Location is rematched backwards from 4 to 1
+; CHECK: Callsite with callee:unknown.indirect.callee is matched from 6 to 3
+; CHECK:Callsite with callee:B is matched from 7 to 4
+; CHECK: Location is matched from 8 to 5
+; CHECK: Callsite with callee:unknown.indirect.callee is matched from 9 to 6
+; CHECK: Callsite with callee:C is matched from 10 to 7
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at c = external global i32, align 4
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @test_direct_call(i32 noundef %x) #0 !dbg !12 {
+entry:
+    #dbg_value(i32 %x, !17, !DIExpression(), !18)
+  call void @llvm.pseudoprobe(i64 -4364451034228175269, i64 1, i32 0, i64 -1), !dbg !19
+  %call = call i32 @A(i32 noundef %x), !dbg !20
+  %add = add nsw i32 %x, %call, !dbg !22
+    #dbg_value(i32 %add, !17, !DIExpression(), !18)
+  %call1 = call i32 @B(i32 noundef %add), !dbg !23
+  %add2 = add nsw i32 %add, %call1, !dbg !25
+    #dbg_value(i32 %add2, !17, !DIExpression(), !18)
+  %call3 = call i32 @C(i32 noundef %add2), !dbg !26
+  %add4 = add nsw i32 %add2, %call3, !dbg !28
+    #dbg_value(i32 %add4, !17, !DIExpression(), !18)
+  %call5 = call i32 @A(i32 noundef %add4), !dbg !29
+  %add6 = add nsw i32 %add4, %call5, !dbg !31
+    #dbg_value(i32 %add6, !17, !DIExpression(), !18)
+  %call7 = call i32 @B(i32 noundef %add6), !dbg !32
+  %add8 = add nsw i32 %add6, %call7, !dbg !34
+    #dbg_value(i32 %add8, !17, !DIExpression(), !18)
+  %call9 = call i32 @B(i32 noundef %add8), !dbg !35
+  %add10 = add nsw i32 %add8, %call9, !dbg !37
+    #dbg_value(i32 %add10, !17, !DIExpression(), !18)
+  %call11 = call i32 @A(i32 noundef %add10), !dbg !38
+  %add12 = add nsw i32 %add10, %call11, !dbg !40
+    #dbg_value(i32 %add12, !17, !DIExpression(), !18)
+  ret i32 %add12, !dbg !41
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind speculatable willreturn memory(none)
+declare void @llvm.dbg.declare(metadata, metadata, metadata) #1
+
+declare !dbg !42 i32 @A(i32 noundef) #2
+
+declare !dbg !43 i32 @B(i32 noundef) #2
+
+declare !dbg !44 i32 @C(i32 noundef) #2
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @test_indirect_call(i32 noundef %x) #0 !dbg !45 {
+entry:
+    #dbg_value(i32 %x, !47, !DIExpression(), !50)
+  call void @llvm.pseudoprobe(i64 -8563147518712133441, i64 1, i32 0, i64 -1), !dbg !51
+  %0 = load i32, ptr @c, align 4, !dbg !51, !tbaa !53
+  %tobool = icmp ne i32 %0, 0, !dbg !51
+  br i1 %tobool, label %if.then, label %if.else, !dbg !57
+
+if.then:                                          ; preds = %entry
+  call void @llvm.pseudoprobe(i64 -8563147518712133441, i64 2, i32 0, i64 -1), !dbg !58
+    #dbg_value(ptr @A, !48, !DIExpression(), !50)
+  br label %if.end, !dbg !59
+
+if.else:                                          ; preds = %entry
+  call void @llvm.pseudoprobe(i64 -8563147518712133441, i64 3, i32 0, i64 -1), !dbg !60
+    #dbg_value(ptr @B, !48, !DIExpression(), !50)
+  br label %if.end
+
+if.end:                                           ; preds = %if.else, %if.then
+  %fp.0 = phi ptr [ @A, %if.then ], [ @B, %if.else ], !dbg !61
+    #dbg_value(ptr %fp.0, !48, !DIExpression(), !50)
+  call void @llvm.pseudoprobe(i64 -8563147518712133441, i64 4, i32 0, i64 -1), !dbg !62
+  %call = call i32 @C(i32 noundef %x), !dbg !63
+  %add = add nsw i32 %x, %call, !dbg !65
+    #dbg_value(i32 %add, !47, !DIExpression(), !50)
+  %call1 = call i32 %fp.0(i32 noundef %add), !dbg !66
+  %add2 = add nsw i32 %add, %call1, !dbg !68
+    #dbg_value(i32 %add2, !47, !DIExpression(), !50)
+  %call3 = call i32 @B(i32 noundef %add2), !dbg !69
+  %add4 = add nsw i32 %add2, %call3, !dbg !71
+    #dbg_value(i32 %add4, !47, !DIExpression(), !50)
+  %call5 = call i32 @C(i32 noundef %add4), !dbg !72
+  %add6 = add nsw i32 %add4, %call5, !dbg !74
+    #dbg_value(i32 %add6, !47, !DIExpression(), !50)
+  %call7 = call i32 %fp.0(i32 noundef %add6), !dbg !75
+  %add8 = add nsw i32 %add6, %call7, !dbg !77
+    #dbg_value(i32 %add8, !47, !DIExpression(), !50)
+  %call9 = call i32 @C(i32 noundef %add8), !dbg !78
+  %add10 = add nsw i32 %add8, %call9, !dbg !80
+    #dbg_value(i32 %add10, !47, !DIExpression(), !50)
+  ret i32 %add10, !dbg !81
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #3
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #4
+
+attributes #0 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind speculatable willreturn }
+attributes #2 = { "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn }
+attributes #4 = { mustprogress nocallback nofree nosync nounwind willreturn }
+
+!llvm.dbg.cu = !{!0}
+!llvm.module.flags = !{!2, !3, !4, !5, !6, !7, !8}
+!llvm.ident = !{!9}
+!llvm.pseudo_probe_desc = !{!10, !11}
+
+!0 = distinct !DICompileUnit(language: DW_LANG_C11, file: !1, producer: "clang version 19.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, splitDebugInlining: false, debugInfoForProfiling: true, nameTableKind: None)
+!1 = !DIFile(filename: "test.c", directory: "/home/", checksumkind: CSK_MD5, checksum: "be98aa946f37f0ad8d307c9121efe101")
+!2 = !{i32 7, !"Dwarf Version", i32 5}
+!3 = !{i32 2, !"Debug Info Version", i32 3}
+!4 = !{i32 1, !"wchar_size", i32 4}
+!5 = !{i32 8, !"PIC Level", i32 2}
+!6 = !{i32 7, !"PIE Level", i32 2}
+!7 = !{i32 7, !"uwtable", i32 2}
+!8 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!9 = !{!"clang version 19.0.0"}
+!10 = !{i64 -4364451034228175269, i64 1970329131941887, !"test_direct_call"}
+!11 = !{i64 -8563147518712133441, i64 1688922477484692, !"test_indirect_call"}
+!12 = distinct !DISubprogram(name: "test_direct_call", scope: !1, file: !1, line: 10, type: !13, scopeLine: 10, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !16)
+!13 = !DISubroutineType(types: !14)
+!14 = !{!15, !15}
+!15 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!16 = !{!17}
+!17 = !DILocalVariable(name: "x", arg: 1, scope: !12, file: !1, line: 10, type: !15)
+!18 = !DILocation(line: 0, scope: !12)
+!19 = !DILocation(line: 11, column: 10, scope: !12)
+!20 = !DILocation(line: 11, column: 8, scope: !21)
+!21 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 186646551)
+!22 = !DILocation(line: 11, column: 5, scope: !12)
+!23 = !DILocation(line: 12, column: 8, scope: !24)
+!24 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 186646559)
+!25 = !DILocation(line: 12, column: 5, scope: !12)
+!26 = !DILocation(line: 13, column: 8, scope: !27)
+!27 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 186646567)
+!28 = !DILocation(line: 13, column: 5, scope: !12)
+!29 = !DILocation(line: 14, column: 8, scope: !30)
+!30 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 186646575)
+!31 = !DILocation(line: 14, column: 5, scope: !12)
+!32 = !DILocation(line: 15, column: 8, scope: !33)
+!33 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 186646583)
+!34 = !DILocation(line: 15, column: 5, scope: !12)
+!35 = !DILocation(line: 16, column: 8, scope: !36)
+!36 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 186646591)
+!37 = !DILocation(line: 16, column: 5, scope: !12)
+!38 = !DILocation(line: 17, column: 8, scope: !39)
+!39 = !DILexicalBlockFile(scope: !12, file: !1, discriminator: 186646599)
+!40 = !DILocation(line: 17, column: 5, scope: !12)
+!41 = !DILocation(line: 18, column: 3, scope: !12)
+!42 = !DISubprogram(name: "A", scope: !1, file: !1, line: 2, type: !13, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!43 = !DISubprogram(name: "B", scope: !1, file: !1, line: 3, type: !13, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!44 = !DISubprogram(name: "C", scope: !1, file: !1, line: 4, type: !13, flags: DIFlagPrototyped, spFlags: DISPFlagOptimized)
+!45 = distinct !DISubprogram(name: "test_indirect_call", scope: !1, file: !1, line: 21, type: !13, scopeLine: 21, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !0, retainedNodes: !46)
+!46 = !{!47, !48}
+!47 = !DILocalVariable(name: "x", arg: 1, scope: !45, file: !1, line: 21, type: !15)
+!48 = !DILocalVariable(name: "fp", scope: !45, file: !1, line: 22, type: !49)
+!49 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !13, size: 64)
+!50 = !DILocation(line: 0, scope: !45)
+!51 = !DILocation(line: 23, column: 6, scope: !52)
+!52 = distinct !DILexicalBlock(scope: !45, file: !1, line: 23, column: 6)
+!53 = !{!54, !54, i64 0}
+!54 = !{!"int", !55, i64 0}
+!55 = !{!"omnipotent char", !56, i64 0}
+!56 = !{!"Simple C/C++ TBAA"}
+!57 = !DILocation(line: 23, column: 6, scope: !45)
+!58 = !DILocation(line: 24, column: 8, scope: !52)
+!59 = !DILocation(line: 24, column: 5, scope: !52)
+!60 = !DILocation(line: 26, column: 8, scope: !52)
+!61 = !DILocation(line: 0, scope: !52)
+!62 = !DILocation(line: 27, column: 10, scope: !45)
+!63 = !DILocation(line: 27, column: 8, scope: !64)
+!64 = !DILexicalBlockFile(scope: !45, file: !1, discriminator: 186646575)
+!65 = !DILocation(line: 27, column: 5, scope: !45)
+!66 = !DILocation(line: 28, column: 8, scope: !67)
+!67 = !DILexicalBlockFile(scope: !45, file: !1, discriminator: 119537719)
+!68 = !DILocation(line: 28, column: 5, scope: !45)
+!69 = !DILocation(line: 29, column: 8, scope: !70)
+!70 = !DILexicalBlockFile(scope: !45, file: !1, discriminator: 186646591)
+!71 = !DILocation(line: 29, column: 5, scope: !45)
+!72 = !DILocation(line: 30, column: 8, scope: !73)
+!73 = !DILexicalBlockFile(scope: !45, file: !1, discriminator: 186646599)
+!74 = !DILocation(line: 30, column: 5, scope: !45)
+!75 = !DILocation(line: 31, column: 8, scope: !76)
+!76 = !DILexicalBlockFile(scope: !45, file: !1, discriminator: 119537743)
+!77 = !DILocation(line: 31, column: 5, scope: !45)
+!78 = !DILocation(line: 32, column: 8, scope: !79)
+!79 = !DILexicalBlockFile(scope: !45, file: !1, discriminator: 186646615)
+!80 = !DILocation(line: 32, column: 5, scope: !45)
+!81 = !DILocation(line: 33, column: 3, scope: !45)
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
index 9ef3ca1dc481f..20be0c2fec7f2 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll
@@ -71,7 +71,7 @@
 ; CHECK: Location is rematched backwards from 9 to 2
 ; CHECK: Location is rematched backwards from 10 to 3
 ; CHECK: Location is rematched backwards from 11 to 4
-; CHECK: Location is matched from 14 to 7
+; CHECK: Callsite with callee:bar is matched from 14 to 7
 ; CHECK: Callsite with callee:foo is matched from 15 to 8
 ; CHECK: Callsite with callee:bar is matched from 16 to 9
 
@@ -86,6 +86,7 @@
 ; CHECK:    3:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 3, i32 0, i64 -1), !dbg ![[#]] - weight: 13 - factor: 1.00)
 ; CHECK:    6:  %call1.i5 = call i32 @bar(i32 noundef %add.i4), !dbg ![[#]] - weight: 13 - factor: 1.00)
 ; CHECK:    4:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 4, i32 0, i64 -1), !dbg ![[#]] - weight: 112 - factor: 1.00)
+; CHECK:    14: %call2 = call i32 @bar(i32 noundef %3), !dbg ![[#]] - weight: 124 - factor: 1.00)
 ; CHECK:    8:  call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 8, i32 0, i64 -1), !dbg ![[#]] - weight: 0 - factor: 1.00)
 ; CHECK:    1:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 1, i32 0, i64 -1), !dbg ![[#]] - weight: 117 - factor: 1.00)
 ; CHECK:    2:  call void @llvm.pseudoprobe(i64 6699318081062747564, i64 2, i32 0, i64 -1), !dbg ![[#]] - weight: 104 - factor: 1.00)

>From 5db6df88177172fc1300da9d3d5ef56ddcf3796b Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Thu, 9 May 2024 23:22:21 -0700
Subject: [PATCH 8/8] fold the calleename and remove the maybe_unused

---
 llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 71c0e43c1c5f8..bb4a2ed2a1e6e 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -217,15 +217,13 @@ void SampleProfileMatcher::matchNonCallsiteLocs(
   SmallVector<LineLocation> LastMatchedNonAnchors;
   for (const auto &IR : IRAnchors) {
     const auto &Loc = IR.first;
-    [[maybe_unused]] StringRef CalleeName = IR.second.stringRef();
     bool IsMatchedAnchor = false;
-
     // Match the anchor location in lexical order.
     auto R = MatchedAnchors.find(Loc);
     if (R != MatchedAnchors.end()) {
       const auto &Candidate = R->second;
       InsertMatching(Loc, Candidate);
-      LLVM_DEBUG(dbgs() << "Callsite with callee:" << CalleeName
+      LLVM_DEBUG(dbgs() << "Callsite with callee:" << IR.second.stringRef()
                         << " is matched from " << Loc << " to " << Candidate
                         << "\n");
       LocationDelta = Candidate.LineOffset - Loc.LineOffset;
@@ -405,8 +403,7 @@ void SampleProfileMatcher::recordCallsiteMatchStates(
   // IR callsites.
   for (const auto &I : ProfileAnchors) {
     const auto &Loc = I.first;
-    [[maybe_unused]] StringRef CalleeName = I.second.stringRef();
-    assert(!CalleeName.empty() && "Callees should not be empty");
+    assert(!I.second.stringRef().empty() && "Callees should not be empty");
     auto It = CallsiteMatchStates.find(Loc);
     if (It == CallsiteMatchStates.end())
       CallsiteMatchStates.emplace(Loc, MatchState::InitialMismatch);



More information about the llvm-commits mailing list