[llvm] [SampleFDO][NFC] Refactoring SampleProfileMatcher (PR #86988)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Mar 28 11:47:04 PDT 2024
llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT-->
@llvm/pr-subscribers-llvm-transforms
Author: Lei Wang (wlei-llvm)
<details>
<summary>Changes</summary>
Move all the stale profile matching stuffs into new files so that it can be shared for unit testing.
---
Patch is 62.49 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/86988.diff
8 Files Affected:
- (added) llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h (+154)
- (modified) llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h (+4)
- (modified) llvm/lib/Transforms/IPO/CMakeLists.txt (+1)
- (modified) llvm/lib/Transforms/IPO/SampleProfile.cpp (+4-668)
- (added) llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp (+553)
- (modified) llvm/test/Transforms/SampleProfile/pseudo-probe-callee-profile-mismatch.ll (+1-1)
- (modified) llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching-lto.ll (+1-1)
- (modified) llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-matching.ll (+1-1)
``````````diff
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
new file mode 100644
index 00000000000000..7ae6194da7c9cc
--- /dev/null
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -0,0 +1,154 @@
+//===- Transforms/IPO/SampleProfileMatcher.h ----------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+//
+/// \file
+/// This file provides the interface for SampleProfileMatcher.
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H
+#define LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H
+
+#include "llvm/ADT/StringSet.h"
+#include "llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h"
+
+namespace llvm {
+
+// Sample profile matching - fuzzy match.
+class SampleProfileMatcher {
+ Module &M;
+ SampleProfileReader &Reader;
+ const PseudoProbeManager *ProbeManager;
+ const ThinOrFullLTOPhase LTOPhase;
+ SampleProfileMap FlattenedProfiles;
+ // For each function, the matcher generates a map, of which each entry is a
+ // mapping from the source location of current build to the source location in
+ // the profile.
+ StringMap<LocToLocMap> FuncMappings;
+
+ // Match state for an anchor/callsite.
+ enum class MatchState {
+ Unknown = 0,
+ // Initial match between input profile and current IR.
+ InitialMatch = 1,
+ // Initial mismatch between input profile and current IR.
+ InitialMismatch = 2,
+ // InitialMatch stays matched after fuzzy profile matching.
+ UnchangedMatch = 3,
+ // InitialMismatch stays mismatched after fuzzy profile matching.
+ UnchangedMismatch = 4,
+ // InitialMismatch is recovered after fuzzy profile matching.
+ RecoveredMismatch = 5,
+ // InitialMatch is removed and becomes mismatched after fuzzy profile
+ // matching.
+ RemovedMatch = 6,
+ };
+
+ // For each function, store every callsite and its matching state into this
+ // map, of which each entry is a pair of callsite location and MatchState.
+ // This is used for profile staleness computation and report.
+ StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
+ FuncCallsiteMatchStates;
+
+ // Profile mismatch statstics:
+ uint64_t TotalProfiledFunc = 0;
+ // Num of checksum-mismatched function.
+ uint64_t NumStaleProfileFunc = 0;
+ uint64_t TotalProfiledCallsites = 0;
+ uint64_t NumMismatchedCallsites = 0;
+ uint64_t NumRecoveredCallsites = 0;
+ // Total samples for all profiled functions.
+ uint64_t TotalFunctionSamples = 0;
+ // Total samples for all checksum-mismatched functions.
+ uint64_t MismatchedFunctionSamples = 0;
+ uint64_t MismatchedCallsiteSamples = 0;
+ uint64_t RecoveredCallsiteSamples = 0;
+
+ // A dummy name for unknown indirect callee, used to differentiate from a
+ // non-call instruction that also has an empty callee name.
+ static constexpr const char *UnknownIndirectCallee =
+ "unknown.indirect.callee";
+
+public:
+ SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
+ const PseudoProbeManager *ProbeManager,
+ ThinOrFullLTOPhase LTOPhase)
+ : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){};
+ void runOnModule();
+ void clearMatchingData() {
+ // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
+ // will be used for sample loader.
+ FuncCallsiteMatchStates.clear();
+ }
+
+private:
+ FunctionSamples *getFlattenedSamplesFor(const Function &F) {
+ StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
+ auto It = FlattenedProfiles.find(FunctionId(CanonFName));
+ if (It != FlattenedProfiles.end())
+ return &It->second;
+ return nullptr;
+ }
+ void runOnFunction(Function &F);
+ void findIRAnchors(const Function &F,
+ std::map<LineLocation, StringRef> &IRAnchors);
+ void findProfileAnchors(
+ const FunctionSamples &FS,
+ std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors);
+ // Record the callsite match states for profile staleness report, the result
+ // is saved in FuncCallsiteMatchStates.
+ void recordCallsiteMatchStates(
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
+ const LocToLocMap *IRToProfileLocationMap);
+
+ bool isMismatchState(const enum MatchState &State) {
+ return State == MatchState::InitialMismatch ||
+ State == MatchState::UnchangedMismatch ||
+ State == MatchState::RemovedMatch;
+ };
+
+ bool isInitialState(const enum MatchState &State) {
+ return State == MatchState::InitialMatch ||
+ State == MatchState::InitialMismatch;
+ };
+
+ bool isFinalState(const enum MatchState &State) {
+ return State == MatchState::UnchangedMatch ||
+ State == MatchState::UnchangedMismatch ||
+ State == MatchState::RecoveredMismatch ||
+ State == MatchState::RemovedMatch;
+ };
+
+ // Count the samples of checksum mismatched function for the top-level
+ // function and all inlinees.
+ void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel);
+ // Count the number of mismatched or recovered callsites.
+ void countMismatchCallsites(const FunctionSamples &FS);
+ // Count the samples of mismatched or recovered callsites for top-level
+ // function and all inlinees.
+ void countMismatchedCallsiteSamples(const FunctionSamples &FS);
+ void computeAndReportProfileStaleness();
+
+ LocToLocMap &getIRToProfileLocationMap(const Function &F) {
+ auto Ret = FuncMappings.try_emplace(
+ FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
+ return Ret.first->second;
+ }
+ void distributeIRToProfileLocationMap();
+ void distributeIRToProfileLocationMap(FunctionSamples &FS);
+ void runStaleProfileMatching(
+ const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
+ const std::map<LineLocation, std::unordered_set<FunctionId>>
+ &ProfileAnchors,
+ LocToLocMap &IRToProfileLocationMap);
+ void reportOrPersistProfileStats();
+};
+} // end namespace llvm
+#endif // LLVM_TRANSFORMS_IPO_SAMPLEPROFILEMATCHER_H
diff --git a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
index 048b97c34ee2ae..d898ee58307ead 100644
--- a/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
+++ b/llvm/include/llvm/Transforms/Utils/SampleProfileLoaderBaseImpl.h
@@ -146,6 +146,10 @@ class PseudoProbeManager {
extern cl::opt<bool> SampleProfileUseProfi;
+static inline bool skipProfileForFunction(const Function &F) {
+ return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile");
+}
+
template <typename FT> class SampleProfileLoaderBaseImpl {
public:
SampleProfileLoaderBaseImpl(std::string Name, std::string RemapName,
diff --git a/llvm/lib/Transforms/IPO/CMakeLists.txt b/llvm/lib/Transforms/IPO/CMakeLists.txt
index 034f1587ae8df4..5fbdbc3a014f9a 100644
--- a/llvm/lib/Transforms/IPO/CMakeLists.txt
+++ b/llvm/lib/Transforms/IPO/CMakeLists.txt
@@ -35,6 +35,7 @@ add_llvm_component_library(LLVMipo
PartialInlining.cpp
SampleContextTracker.cpp
SampleProfile.cpp
+ SampleProfileMatcher.cpp
SampleProfileProbe.cpp
SCCP.cpp
StripDeadPrototypes.cpp
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 7545a92c114ef2..b5f45a252c7b46 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -71,6 +71,7 @@
#include "llvm/Transforms/IPO.h"
#include "llvm/Transforms/IPO/ProfiledCallGraph.h"
#include "llvm/Transforms/IPO/SampleContextTracker.h"
+#include "llvm/Transforms/IPO/SampleProfileMatcher.h"
#include "llvm/Transforms/IPO/SampleProfileProbe.h"
#include "llvm/Transforms/Instrumentation.h"
#include "llvm/Transforms/Utils/CallPromotionUtils.h"
@@ -129,16 +130,16 @@ static cl::opt<std::string> SampleProfileRemappingFile(
"sample-profile-remapping-file", cl::init(""), cl::value_desc("filename"),
cl::desc("Profile remapping file loaded by -sample-profile"), cl::Hidden);
-static cl::opt<bool> SalvageStaleProfile(
+cl::opt<bool> SalvageStaleProfile(
"salvage-stale-profile", cl::Hidden, cl::init(false),
cl::desc("Salvage stale profile by fuzzy matching and use the remapped "
"location for sample profile query."));
-static cl::opt<bool> ReportProfileStaleness(
+cl::opt<bool> ReportProfileStaleness(
"report-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute and report stale profile statistical metrics."));
-static cl::opt<bool> PersistProfileStaleness(
+cl::opt<bool> PersistProfileStaleness(
"persist-profile-staleness", cl::Hidden, cl::init(false),
cl::desc("Compute stale profile statistical metrics and write it into the "
"native object file(.llvm_stats section)."));
@@ -448,138 +449,6 @@ using CandidateQueue =
PriorityQueue<InlineCandidate, std::vector<InlineCandidate>,
CandidateComparer>;
-// Sample profile matching - fuzzy match.
-class SampleProfileMatcher {
- Module &M;
- SampleProfileReader &Reader;
- const PseudoProbeManager *ProbeManager;
- const ThinOrFullLTOPhase LTOPhase;
- SampleProfileMap FlattenedProfiles;
- // For each function, the matcher generates a map, of which each entry is a
- // mapping from the source location of current build to the source location in
- // the profile.
- StringMap<LocToLocMap> FuncMappings;
-
- // Match state for an anchor/callsite.
- enum class MatchState {
- Unknown = 0,
- // Initial match between input profile and current IR.
- InitialMatch = 1,
- // Initial mismatch between input profile and current IR.
- InitialMismatch = 2,
- // InitialMatch stays matched after fuzzy profile matching.
- UnchangedMatch = 3,
- // InitialMismatch stays mismatched after fuzzy profile matching.
- UnchangedMismatch = 4,
- // InitialMismatch is recovered after fuzzy profile matching.
- RecoveredMismatch = 5,
- // InitialMatch is removed and becomes mismatched after fuzzy profile
- // matching.
- RemovedMatch = 6,
- };
-
- // For each function, store every callsite and its matching state into this
- // map, of which each entry is a pair of callsite location and MatchState.
- // This is used for profile staleness computation and report.
- StringMap<std::unordered_map<LineLocation, MatchState, LineLocationHash>>
- FuncCallsiteMatchStates;
-
- // Profile mismatch statstics:
- uint64_t TotalProfiledFunc = 0;
- // Num of checksum-mismatched function.
- uint64_t NumStaleProfileFunc = 0;
- uint64_t TotalProfiledCallsites = 0;
- uint64_t NumMismatchedCallsites = 0;
- uint64_t NumRecoveredCallsites = 0;
- // Total samples for all profiled functions.
- uint64_t TotalFunctionSamples = 0;
- // Total samples for all checksum-mismatched functions.
- uint64_t MismatchedFunctionSamples = 0;
- uint64_t MismatchedCallsiteSamples = 0;
- uint64_t RecoveredCallsiteSamples = 0;
-
- // A dummy name for unknown indirect callee, used to differentiate from a
- // non-call instruction that also has an empty callee name.
- static constexpr const char *UnknownIndirectCallee =
- "unknown.indirect.callee";
-
-public:
- SampleProfileMatcher(Module &M, SampleProfileReader &Reader,
- const PseudoProbeManager *ProbeManager,
- ThinOrFullLTOPhase LTOPhase)
- : M(M), Reader(Reader), ProbeManager(ProbeManager), LTOPhase(LTOPhase){};
- void runOnModule();
- void clearMatchingData() {
- // Do not clear FuncMappings, it stores IRLoc to ProfLoc remappings which
- // will be used for sample loader.
- FuncCallsiteMatchStates.clear();
- }
-
-private:
- FunctionSamples *getFlattenedSamplesFor(const Function &F) {
- StringRef CanonFName = FunctionSamples::getCanonicalFnName(F);
- auto It = FlattenedProfiles.find(FunctionId(CanonFName));
- if (It != FlattenedProfiles.end())
- return &It->second;
- return nullptr;
- }
- void runOnFunction(Function &F);
- void findIRAnchors(const Function &F,
- std::map<LineLocation, StringRef> &IRAnchors);
- void findProfileAnchors(
- const FunctionSamples &FS,
- std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors);
- // Record the callsite match states for profile staleness report, the result
- // is saved in FuncCallsiteMatchStates.
- void recordCallsiteMatchStates(
- const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- const LocToLocMap *IRToProfileLocationMap);
-
- bool isMismatchState(const enum MatchState &State) {
- return State == MatchState::InitialMismatch ||
- State == MatchState::UnchangedMismatch ||
- State == MatchState::RemovedMatch;
- };
-
- bool isInitialState(const enum MatchState &State) {
- return State == MatchState::InitialMatch ||
- State == MatchState::InitialMismatch;
- };
-
- bool isFinalState(const enum MatchState &State) {
- return State == MatchState::UnchangedMatch ||
- State == MatchState::UnchangedMismatch ||
- State == MatchState::RecoveredMismatch ||
- State == MatchState::RemovedMatch;
- };
-
- // Count the samples of checksum mismatched function for the top-level
- // function and all inlinees.
- void countMismatchedFuncSamples(const FunctionSamples &FS, bool IsTopLevel);
- // Count the number of mismatched or recovered callsites.
- void countMismatchCallsites(const FunctionSamples &FS);
- // Count the samples of mismatched or recovered callsites for top-level
- // function and all inlinees.
- void countMismatchedCallsiteSamples(const FunctionSamples &FS);
- void computeAndReportProfileStaleness();
-
- LocToLocMap &getIRToProfileLocationMap(const Function &F) {
- auto Ret = FuncMappings.try_emplace(
- FunctionSamples::getCanonicalFnName(F.getName()), LocToLocMap());
- return Ret.first->second;
- }
- void distributeIRToProfileLocationMap();
- void distributeIRToProfileLocationMap(FunctionSamples &FS);
- void runStaleProfileMatching(
- const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- LocToLocMap &IRToProfileLocationMap);
- void reportOrPersistProfileStats();
-};
-
/// Sample profile pass.
///
/// This pass reads profile data from the file specified by
@@ -766,10 +635,6 @@ void SampleProfileLoaderBaseImpl<Function>::computeDominanceAndLoopInfo(
}
} // namespace llvm
-static bool skipProfileForFunction(const Function &F) {
- return F.isDeclaration() || !F.hasFnAttribute("use-sample-profile");
-}
-
ErrorOr<uint64_t> SampleProfileLoader::getInstWeight(const Instruction &Inst) {
if (FunctionSamples::ProfileIsProbeBased)
return getProbeWeight(Inst);
@@ -2262,535 +2127,6 @@ bool SampleProfileLoader::rejectHighStalenessProfile(
return false;
}
-void SampleProfileMatcher::findIRAnchors(
- const Function &F, std::map<LineLocation, StringRef> &IRAnchors) {
- // For inlined code, recover the original callsite and callee by finding the
- // top-level inline frame. e.g. For frame stack "main:1 @ foo:2 @ bar:3", the
- // top-level frame is "main:1", the callsite is "1" and the callee is "foo".
- auto FindTopLevelInlinedCallsite = [](const DILocation *DIL) {
- assert((DIL && DIL->getInlinedAt()) && "No inlined callsite");
- const DILocation *PrevDIL = nullptr;
- do {
- PrevDIL = DIL;
- DIL = DIL->getInlinedAt();
- } while (DIL->getInlinedAt());
-
- LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
- StringRef CalleeName = PrevDIL->getSubprogramLinkageName();
- return std::make_pair(Callsite, CalleeName);
- };
-
- auto GetCanonicalCalleeName = [](const CallBase *CB) {
- StringRef CalleeName = UnknownIndirectCallee;
- if (Function *Callee = CB->getCalledFunction())
- CalleeName = FunctionSamples::getCanonicalFnName(Callee->getName());
- return CalleeName;
- };
-
- // Extract profile matching anchors in the IR.
- for (auto &BB : F) {
- for (auto &I : BB) {
- DILocation *DIL = I.getDebugLoc();
- if (!DIL)
- continue;
-
- if (FunctionSamples::ProfileIsProbeBased) {
- if (auto Probe = extractProbe(I)) {
- // Flatten inlined IR for the matching.
- if (DIL->getInlinedAt()) {
- IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
- } else {
- // Use empty StringRef for basic block probe.
- StringRef CalleeName;
- if (const auto *CB = dyn_cast<CallBase>(&I)) {
- // Skip the probe inst whose callee name is "llvm.pseudoprobe".
- if (!isa<IntrinsicInst>(&I))
- CalleeName = GetCanonicalCalleeName(CB);
- }
- IRAnchors.emplace(LineLocation(Probe->Id, 0), CalleeName);
- }
- }
- } else {
- // TODO: For line-number based profile(AutoFDO), currently only support
- // find callsite anchors. In future, we need to parse all the non-call
- // instructions to extract the line locations for profile matching.
- if (!isa<CallBase>(&I) || isa<IntrinsicInst>(&I))
- continue;
-
- if (DIL->getInlinedAt()) {
- IRAnchors.emplace(FindTopLevelInlinedCallsite(DIL));
- } else {
- LineLocation Callsite = FunctionSamples::getCallSiteIdentifier(DIL);
- StringRef CalleeName = GetCanonicalCalleeName(dyn_cast<CallBase>(&I));
- IRAnchors.emplace(Callsite, CalleeName);
- }
- }
- }
- }
-}
-
-void SampleProfileMatcher::findProfileAnchors(
- const FunctionSamples &FS,
- std::map<LineLocation, std::unordered_set<FunctionId>> &ProfileAnchors) {
- auto isInvalidLineOffset = [](uint32_t LineOffset) {
- return LineOffset & 0x8000;
- };
-
- for (const auto &I : FS.getBodySamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
- for (const auto &I : I.second.getCallTargets()) {
- auto Ret = ProfileAnchors.try_emplace(Loc,
- std::unordered_set<FunctionId>());
- Ret.first->second.insert(I.first);
- }
- }
-
- for (const auto &I : FS.getCallsiteSamples()) {
- const LineLocation &Loc = I.first;
- if (isInvalidLineOffset(Loc.LineOffset))
- continue;
- const auto &CalleeMap = I.second;
- for (const auto &I : CalleeMap) {
- auto Ret = ProfileAnchors.try_emplace(Loc,
- std::unordered_set<FunctionId>());
- Ret.first->second.insert(I.first);
- }
- }
-}
-
-// Call target name anchor based profile fuzzy matching.
-// Input:
-// For IR locations, the anchor is the callee name of direct callsite; For
-// profile locations, it's the call target name for BodySamples or inlinee's
-// profile name for CallsiteSamples.
-// Matching heuristic:
-// First match all the anchors in lexical order, then split the non-anchor
-// locations between the two anchors evenly, first half are matched based on the
-// start anchor, second half are matched based on the end anchor.
-// For example, given:
-// IR locations: [1, 2(foo), 3, 5, 6(bar), 7]
-// Profile locations: [1, 2, 3(foo), 4, 7, 8(bar), 9]
-// The matching gives:
-// [1, 2(foo), 3, 5, 6(bar), 7]
-// | | | | | |
-// [1, 2, 3(foo), 4, 7, 8(bar), 9]
-// The output mapping: [2->3, 3->4, 5->7, 6->8, 7->9].
-void SampleProfileMatcher::runStaleProfileMatching(
- const Function &F, const std::map<LineLocation, StringRef> &IRAnchors,
- const std::map<LineLocation, std::unordered_set<FunctionId>>
- &ProfileAnchors,
- LocT...
[truncated]
``````````
</details>
https://github.com/llvm/llvm-project/pull/86988
More information about the llvm-commits
mailing list