[llvm] [SampleFDO] Improve stale profile matching by diff algorithm (PR #87375)
Lei Wang via llvm-commits
llvm-commits at lists.llvm.org
Thu May 9 13:57:11 PDT 2024
================
@@ -116,52 +118,107 @@ void SampleProfileMatcher::findProfileAnchors(
const auto &CalleeMap = I.second;
for (const auto &I : CalleeMap) {
auto Ret =
- ProfileAnchors.try_emplace(Loc, std::unordered_set<FunctionId>());
+ ProfileCallsites.try_emplace(Loc, std::unordered_set<FunctionId>());
Ret.first->second.insert(I.first);
}
}
-}
-// Call target name anchor based profile fuzzy matching.
-// Input:
-// For IR locations, the anchor is the callee name of direct callsite; For
-// profile locations, it's the call target name for BodySamples or inlinee's
-// profile name for CallsiteSamples.
-// Matching heuristic:
-// First match all the anchors in lexical order, then split the non-anchor
-// locations between the two anchors evenly, first half are matched based on the
-// start anchor, second half are matched based on the end anchor.
-// For example, given:
-// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
-// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
-// The matching gives:
-// [1, 2(foo), 3, 5, 6(bar), 7]
-// | | | | | |
-// [1, 2, 3(foo), 4, 7, 8(bar), 9]
-// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
-void SampleProfileMatcher::runStaleProfileMatching(
- const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- LocToLocMap &IRToProfileLocationMap) {
- LLVM_DEBUG(dbgs() << "Run stale profile matching for " << F.getName()
- << "\n");
- assert(IRToProfileLocationMap.empty() &&
- "Run stale profile matching only once per function");
-
- std::unordered_map<FunctionId, std::set<LineLocation>> CalleeToCallsitesMap;
- for (const auto &I : ProfileAnchors) {
+ for (const auto &I : ProfileCallsites) {
const auto &Loc = I.first;
const auto &Callees = I.second;
- // Filter out possible indirect calls, use direct callee name as anchor.
if (Callees.size() == 1) {
- FunctionId CalleeName = *Callees.begin();
- const auto &Candidates = CalleeToCallsitesMap.try_emplace(
- CalleeName, std::set<LineLocation>());
- Candidates.first->second.insert(Loc);
+ auto CalleeName = *Callees.begin();
+ ProfileAnchors.emplace(Loc, CalleeName);
+ } else if (Callees.size() > 1) {
+ // use a dummy name(UnknownIndirectCallee) for unknown indrect callee
+ // name.
+ ProfileAnchors.emplace(Loc, FunctionId(UnknownIndirectCallee));
+ }
+ }
+}
+
+LocToLocMap SampleProfileMatcher::longestCommonSequence(
+ const std::vector<Anchor> &AnchorVec1,
+ const std::vector<Anchor> &AnchorVec2) const {
+ int32_t Size1 = AnchorVec1.size(), Size2 = AnchorVec2.size(),
+ MaxDepth = Size1 + Size2;
+ auto Index = [&](int32_t I) { return I + MaxDepth; };
+
+ LocToLocMap EqualLocations;
+ if (MaxDepth == 0)
+ return EqualLocations;
+
+ // Backtrack the SES result.
+ auto Backtrack = [&](const std::vector<std::vector<int32_t>> &Trace,
+ const std::vector<Anchor> &AnchorVec1,
+ const std::vector<Anchor> &AnchorVec2,
+ LocToLocMap &EqualLocations) {
+ int32_t X = Size1, Y = Size2;
+ for (int32_t Depth = Trace.size() - 1; X > 0 || Y > 0; Depth--) {
+ const auto &P = Trace[Depth];
+ int32_t K = X - Y;
+ int32_t PrevK = K;
+ if (K == -Depth || (K != Depth && P[Index(K - 1)] < P[Index(K + 1)]))
+ PrevK = K + 1;
+ else
+ PrevK = K - 1;
+
+ int32_t PrevX = P[Index(PrevK)];
+ int32_t PrevY = PrevX - PrevK;
+ while (X > PrevX && Y > PrevY) {
+ X--;
+ Y--;
+ EqualLocations.insert({AnchorVec1[X].first, AnchorVec2[Y].first});
+ }
+
+ if (Depth == 0)
+ break;
+
+ if (Y == PrevY)
+ X--;
+ else if (X == PrevX)
+ Y--;
+ X = PrevX;
+ Y = PrevY;
+ }
+ };
+
+ // The greedy LCS/SES algorithm.
+
+ // An array contains the endpoints of the furthest reaching D-paths.
+ std::vector<int32_t> V(2 * MaxDepth + 1, -1);
+ V[Index(1)] = 0;
+ // Trace is used to backtrack the SES result.
+ std::vector<std::vector<int32_t>> Trace;
----------------
wlei-llvm wrote:
Trace is "unknown"(not always the MaxDepth) here, the size of the Trace is the shortest Depth(LCS) to find.
https://github.com/llvm/llvm-project/pull/87375
More information about the llvm-commits
mailing list