[llvm] [SampleFDO] Read call-graph matching recovered top-level function profile (PR #101053)
Lei Wang via llvm-commits
llvm-commits at lists.llvm.org
Tue Aug 27 12:02:36 PDT 2024
https://github.com/wlei-llvm updated https://github.com/llvm/llvm-project/pull/101053
>From 070702c9be2fb437b0765532c03e98c642951906 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 29 Jul 2024 10:17:46 -0700
Subject: [PATCH 1/8] [SampleFDO] Read top-level functions recovered by
call-graph matching
---
.../llvm/ProfileData/SampleProfReader.h | 47 ++++
.../Transforms/IPO/SampleProfileMatcher.h | 1 +
llvm/lib/ProfileData/SampleProfReader.cpp | 224 +++++++++------
.../Transforms/IPO/SampleProfileMatcher.cpp | 64 ++++-
...seudo-probe-stale-profile-toplev-func.prof | 23 ++
.../pseudo-probe-stale-profile-toplev-func.ll | 258 ++++++++++++++++++
6 files changed, 521 insertions(+), 96 deletions(-)
create mode 100644 llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index f4bdc6525308d2..b124233a02d11c 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -392,6 +392,11 @@ class SampleProfileReader {
/// which doesn't support loading function profiles on demand.
virtual bool collectFuncsFromModule() { return false; }
+ virtual std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) {
+ return sampleprof_error::not_implemented;
+ };
+
/// Print all the profiles on stream \p OS.
void dump(raw_ostream &OS = dbgs());
@@ -413,6 +418,16 @@ class SampleProfileReader {
if (It != Profiles.end())
return &It->second;
+ if (FuncNameToProfNameMap && !FuncNameToProfNameMap->empty()) {
+ auto R = FuncNameToProfNameMap->find(FunctionId(Fname));
+ if (R != FuncNameToProfNameMap->end()) {
+ Fname = R->second.stringRef();
+ auto It = Profiles.find(FunctionId(Fname));
+ if (It != Profiles.end())
+ return &It->second;
+ }
+ }
+
if (Remapper) {
if (auto NameInProfile = Remapper->lookUpNameInProfile(Fname)) {
auto It = Profiles.find(FunctionId(*NameInProfile));
@@ -494,6 +509,11 @@ class SampleProfileReader {
void setModule(const Module *Mod) { M = Mod; }
+ void setFuncNameToProfNameMap(
+ HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FPMap) {
+ FuncNameToProfNameMap = FPMap;
+ }
+
protected:
/// Map every function to its associated profile.
///
@@ -522,6 +542,21 @@ class SampleProfileReader {
std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
+ // A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
+ // which maps the function name to the matched profile name. This is used
+ // for sample loader to look up profile using the new name.
+ HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+ *FuncNameToProfNameMap = nullptr;
+
+ // A map from a function's context hash to its meta data section range, used
+ // for on-demand read function profile metadata.
+ std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
+ FContextToMetaDataSecRange;
+
+ std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+
+ bool ProfileHasAttribute = false;
+
/// \brief Whether samples are collected based on pseudo probes.
bool ProfileIsProbeBased = false;
@@ -621,6 +656,8 @@ class SampleProfileReaderBinary : public SampleProfileReader {
/// Read the next function profile instance.
std::error_code readFuncProfile(const uint8_t *Start);
+ std::error_code readFuncProfile(const uint8_t *Start,
+ SampleProfileMap &Profiles);
/// Read the contents of the given profile instance.
std::error_code readProfile(FunctionSamples &FProfile);
@@ -720,11 +757,15 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
std::error_code readSecHdrTableEntry(uint64_t Idx);
std::error_code readSecHdrTable();
+ std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute,
+ SampleProfileMap &Profiles);
std::error_code readFuncMetadata(bool ProfileHasAttribute);
std::error_code readFuncMetadata(bool ProfileHasAttribute,
FunctionSamples *FProfile);
std::error_code readFuncOffsetTable();
std::error_code readFuncProfiles();
+ std::error_code readFuncProfiles(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles);
std::error_code readNameTableSec(bool IsMD5, bool FixedLengthMD5);
std::error_code readCSNameTableSec();
std::error_code readProfileSymbolList();
@@ -776,6 +817,12 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
/// the reader has been given a module.
bool collectFuncsFromModule() override;
+ /// Read the profiles on-demand for the given functions. This is used after
+ /// stale call graph matching finds new functions whose profiles aren't read
+ /// at the beginning and we need to re-read the profiles.
+ std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) override;
+
std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
return std::move(ProfSymList);
};
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index a67f158433391c..67edea42e2fe14 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -198,6 +198,7 @@ class SampleProfileMatcher {
// function and all inlinees.
void countMismatchedCallsiteSamples(const FunctionSamples &FS);
void computeAndReportProfileStaleness();
+ void UpdateSampleLoaderWithRecoveredProfiles();
LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4752465fc072e0..f555da866f36eb 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -653,7 +653,8 @@ SampleProfileReaderBinary::readProfile(FunctionSamples &FProfile) {
}
std::error_code
-SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start,
+ SampleProfileMap &Profiles) {
Data = Start;
auto NumHeadSamples = readNumber<uint64_t>();
if (std::error_code EC = NumHeadSamples.getError())
@@ -678,6 +679,11 @@ SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
return sampleprof_error::success;
}
+std::error_code
+SampleProfileReaderBinary::readFuncProfile(const uint8_t *Start) {
+ return readFuncProfile(Start, Profiles);
+}
+
std::error_code SampleProfileReaderBinary::readImpl() {
ProfileIsFS = ProfileIsFSDisciminator;
FunctionSamples::ProfileIsFS = ProfileIsFS;
@@ -725,6 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
break;
}
case SecLBRProfile:
+ LBRProfileSecRange = std::make_pair(Data, End);
if (std::error_code EC = readFuncProfiles())
return EC;
break;
@@ -745,9 +752,9 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
ProfileIsProbeBased =
hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagIsProbeBased);
FunctionSamples::ProfileIsProbeBased = ProfileIsProbeBased;
- bool HasAttribute =
+ ProfileHasAttribute =
hasSecFlag(Entry, SecFuncMetadataFlags::SecFlagHasAttribute);
- if (std::error_code EC = readFuncMetadata(HasAttribute))
+ if (std::error_code EC = readFuncMetadata(ProfileHasAttribute))
return EC;
break;
}
@@ -791,6 +798,19 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
return false;
}
+std::error_code SampleProfileReaderExtBinaryBase::readOnDemand(
+ const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+ Data = LBRProfileSecRange.first;
+ End = LBRProfileSecRange.second;
+ if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+ return EC;
+ End = Data;
+
+ if (std::error_code EC =
+ readFuncMetadataOnDemand(ProfileHasAttribute, Profiles))
+ return EC;
+ return sampleprof_error::success;
+}
bool SampleProfileReaderExtBinaryBase::collectFuncsFromModule() {
if (!M)
@@ -838,6 +858,95 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncOffsetTable() {
return sampleprof_error::success;
}
+std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
+ const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+ const uint8_t *Start = Data;
+
+ if (Remapper) {
+ for (auto Name : FuncsToUse) {
+ Remapper->insert(Name);
+ }
+ }
+
+ if (ProfileIsCS) {
+ assert(useFuncOffsetList());
+ DenseSet<uint64_t> FuncGuidsToUse;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse)
+ FuncGuidsToUse.insert(Function::getGUID(Name));
+ }
+
+ // For each function in current module, load all context profiles for
+ // the function as well as their callee contexts which can help profile
+ // guided importing for ThinLTO. This can be achieved by walking
+ // through an ordered context container, where contexts are laid out
+ // as if they were walked in preorder of a context trie. While
+ // traversing the trie, a link to the highest common ancestor node is
+ // kept so that all of its decendants will be loaded.
+ const SampleContext *CommonContext = nullptr;
+ for (const auto &NameOffset : FuncOffsetList) {
+ const auto &FContext = NameOffset.first;
+ FunctionId FName = FContext.getFunction();
+ StringRef FNameString;
+ if (!useMD5())
+ FNameString = FName.stringRef();
+
+ // For function in the current module, keep its farthest ancestor
+ // context. This can be used to load itself and its child and
+ // sibling contexts.
+ if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
+ (!useMD5() && (FuncsToUse.count(FNameString) ||
+ (Remapper && Remapper->exist(FNameString))))) {
+ if (!CommonContext || !CommonContext->isPrefixOf(FContext))
+ CommonContext = &FContext;
+ }
+
+ if (CommonContext == &FContext ||
+ (CommonContext && CommonContext->isPrefixOf(FContext))) {
+ // Load profile for the current context which originated from
+ // the common ancestor.
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ }
+ } else if (useMD5()) {
+ assert(!useFuncOffsetList());
+ for (auto Name : FuncsToUse) {
+ auto GUID = MD5Hash(Name);
+ auto iter = FuncOffsetTable.find(GUID);
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+ return EC;
+ }
+ } else if (Remapper) {
+ assert(useFuncOffsetList());
+ for (auto NameOffset : FuncOffsetList) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getFunction();
+ StringRef FuncNameStr = FuncName.stringRef();
+ if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+ return EC;
+ }
+ } else {
+ assert(!useFuncOffsetList());
+ for (auto Name : FuncsToUse) {
+
+ auto iter = FuncOffsetTable.find(MD5Hash(Name));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr, Profiles))
+ return EC;
+ }
+ }
+}
+
std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
// Collect functions used by current module if the Reader has been
// given a module.
@@ -849,7 +958,6 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
// When LoadFuncsToBeUsed is false, we are using LLVM tool, need to read all
// profiles.
- const uint8_t *Start = Data;
if (!LoadFuncsToBeUsed) {
while (Data < End) {
if (std::error_code EC = readFuncProfile(Data))
@@ -858,88 +966,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
assert(Data == End && "More data is read than expected");
} else {
// Load function profiles on demand.
- if (Remapper) {
- for (auto Name : FuncsToUse) {
- Remapper->insert(Name);
- }
- }
-
- if (ProfileIsCS) {
- assert(useFuncOffsetList());
- DenseSet<uint64_t> FuncGuidsToUse;
- if (useMD5()) {
- for (auto Name : FuncsToUse)
- FuncGuidsToUse.insert(Function::getGUID(Name));
- }
-
- // For each function in current module, load all context profiles for
- // the function as well as their callee contexts which can help profile
- // guided importing for ThinLTO. This can be achieved by walking
- // through an ordered context container, where contexts are laid out
- // as if they were walked in preorder of a context trie. While
- // traversing the trie, a link to the highest common ancestor node is
- // kept so that all of its decendants will be loaded.
- const SampleContext *CommonContext = nullptr;
- for (const auto &NameOffset : FuncOffsetList) {
- const auto &FContext = NameOffset.first;
- FunctionId FName = FContext.getFunction();
- StringRef FNameString;
- if (!useMD5())
- FNameString = FName.stringRef();
-
- // For function in the current module, keep its farthest ancestor
- // context. This can be used to load itself and its child and
- // sibling contexts.
- if ((useMD5() && FuncGuidsToUse.count(FName.getHashCode())) ||
- (!useMD5() && (FuncsToUse.count(FNameString) ||
- (Remapper && Remapper->exist(FNameString))))) {
- if (!CommonContext || !CommonContext->isPrefixOf(FContext))
- CommonContext = &FContext;
- }
-
- if (CommonContext == &FContext ||
- (CommonContext && CommonContext->isPrefixOf(FContext))) {
- // Load profile for the current context which originated from
- // the common ancestor.
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- }
- } else if (useMD5()) {
- assert(!useFuncOffsetList());
- for (auto Name : FuncsToUse) {
- auto GUID = MD5Hash(Name);
- auto iter = FuncOffsetTable.find(GUID);
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else if (Remapper) {
- assert(useFuncOffsetList());
- for (auto NameOffset : FuncOffsetList) {
- SampleContext FContext(NameOffset.first);
- auto FuncName = FContext.getFunction();
- StringRef FuncNameStr = FuncName.stringRef();
- if (!FuncsToUse.count(FuncNameStr) && !Remapper->exist(FuncNameStr))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else {
- assert(!useFuncOffsetList());
- for (auto Name : FuncsToUse) {
- auto iter = FuncOffsetTable.find(MD5Hash(Name));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- }
+ if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
+ return EC;
Data = End;
}
assert((CSProfileCount == 0 || CSProfileCount == Profiles.size()) &&
@@ -1245,6 +1273,27 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
return sampleprof_error::success;
}
+std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand(
+ bool ProfileHasAttribute, SampleProfileMap &Profiles) {
+ if (FContextToMetaDataSecRange.empty())
+ return sampleprof_error::success;
+
+ for (auto &I : Profiles) {
+ FunctionSamples *FProfile = &I.second;
+ auto R =
+ FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode());
+ if (R == FContextToMetaDataSecRange.end())
+ continue;
+
+ Data = R->second.first;
+ End = R->second.second;
+ if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
+ return EC;
+ assert(Data == End && "More data is read than expected");
+ }
+ return sampleprof_error::success;
+}
+
std::error_code
SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
while (Data < End) {
@@ -1257,8 +1306,11 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
if (It != Profiles.end())
FProfile = &It->second;
+ const uint8_t *Start = Data;
if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
return EC;
+
+ FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data};
}
assert(Data == End && "More data is read than expected");
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 312672e56b0170..b9adc6a0631b80 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -782,6 +782,26 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
float Similarity = 0.0;
const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+ // Check if the function is top-level function. For extended profile format,
+ // if a function profile is unused and it's top-level, even if the profile is
+ // matched, it's not found in the profile. This is because sample reader only
+ // read the used profile at the beginning, we need to read the profile
+ // on-demand. Also save it into the FlattenedProfiles for future look-up.
+ if (!FSFlattened) {
+ DenseSet<StringRef> TopLevelFunc;
+ TopLevelFunc.insert(ProfFunc.stringRef());
+ SampleProfileMap TopLevelProfile;
+ Reader.readOnDemand(TopLevelFunc, TopLevelProfile);
+ assert(TopLevelProfile.size() <= 1 &&
+ "More than one profile is found for top-level function");
+ if (!TopLevelProfile.empty()) {
+ LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc
+ << " for call-graph matching\n");
+ auto &FS = TopLevelProfile.begin()->second;
+ FSFlattened =
+ &(FlattenedProfiles.create(FS.getContext()) = std::move(FS));
+ }
+ }
if (!FSFlattened)
return false;
// The check for similarity or checksum may not be reliable if the function is
@@ -863,6 +883,39 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
return Matched;
}
+void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
+ DenseSet<StringRef> RecoveredFuncs;
+ // Update FuncNameToProfNameMap and SymbolMap.
+ for (auto &I : FuncToProfileNameMap) {
+ assert(I.first && "New function is null");
+ FunctionId FuncName(I.first->getName());
+ RecoveredFuncs.insert(I.second.stringRef());
+ FuncNameToProfNameMap->emplace(FuncName, I.second);
+
+ // We need to remove the old entry to avoid duplicating the function
+ // processing.
+ SymbolMap->erase(FuncName);
+ SymbolMap->emplace(I.second, I.first);
+ }
+
+ // Read the top-level profiles for the recovered function profiles. This is
+ // because in extended binary format it only loads the top-level profile for
+ // the functions in the new build but not the recovered functions which is
+ // from the old build.
+ SampleProfileMap TopLevelRecoveredProfiles;
+ Reader.readOnDemand(RecoveredFuncs, TopLevelRecoveredProfiles);
+ auto &Profiles = Reader.getProfiles();
+ for (auto &I : TopLevelRecoveredProfiles) {
+ LLVM_DEBUG(dbgs() << "Top-level function " << I.second.getFunction()
+ << " is recovered and re-read by the sample reader.\n");
+ auto &Ctx = I.second.getContext();
+ assert(Profiles.find(Ctx) == Profiles.end() &&
+ "Top level profile is found for the unused profile");
+ Profiles.create(Ctx) = std::move(I.second);
+ }
+ Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap);
+}
+
void SampleProfileMatcher::runOnModule() {
ProfileConverter::flattenProfile(Reader.getProfiles(), FlattenedProfiles,
FunctionSamples::ProfileIsCS);
@@ -880,17 +933,8 @@ void SampleProfileMatcher::runOnModule() {
runOnFunction(*F);
}
- // Update the data in SampleLoader.
if (SalvageUnusedProfile)
- for (auto &I : FuncToProfileNameMap) {
- assert(I.first && "New function is null");
- FunctionId FuncName(I.first->getName());
- FuncNameToProfNameMap->emplace(FuncName, I.second);
- // We need to remove the old entry to avoid duplicating the function
- // processing.
- SymbolMap->erase(FuncName);
- SymbolMap->emplace(I.second, I.first);
- }
+ UpdateSampleLoaderWithRecoveredProfiles();
if (SalvageStaleProfile)
distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
new file mode 100644
index 00000000000000..a1bba5fc88de0e
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
@@ -0,0 +1,23 @@
+foo:2724522:51
+ 1: 51
+ 2: 452674
+ 3: 47
+ 4: 497875
+ 6: 415959
+ 10: 452623
+ 11: 452687 bar:452687
+ 12: 452623
+ 13: 47
+ !CFGChecksum: 281718392333557
+bar:452687:452687
+ 1: 452687
+ !CFGChecksum: 4294967295
+main:204:0
+ 1: 0
+ 2: 51
+ 3: 0
+ 4: 51
+ 5: 51 foo:51
+ 6: 51
+ 7: 0
+ !CFGChecksum: 281582264815352
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
new file mode 100644
index 00000000000000..f1f2506e08d2a5
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -0,0 +1,258 @@
+; REQUIRES: x86_64-linux
+; REQUIRES: asserts
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
+; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
+
+; CHECK-TEXT: Run stale profile matching for main
+; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching
+; CHECK-TEXT: The checksums for foo_rename(IR) and foo(Profile) match.
+; CHECK-TEXT: Function:foo_rename matches profile:foo
+; CHECK-TEXT: Run stale profile matching for foo_rename
+; CHECK-TEXT-NOT: Top-level function foo is recovered and re-read by the sample reader.
+; CHECK-TEXT: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
+
+; CHECK-TEXT: Processing Function main
+; CHECK-TEXT: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51
+; CHECK-TEXT: Processing Function foo_rename
+; CHECK-TEXT: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+
+
+; CHECK-EXTBIN: Run stale profile matching for main
+; CHECK-EXTBIN: Read top-level function foo for call-graph matching
+; CHECK-EXTBIN: The checksums for foo_rename(IR) and foo(Profile) match.
+; CHECK-EXTBIN: Function:foo_rename matches profile:foo
+; CHECK-EXTBIN: Run stale profile matching for foo_rename
+; CHECK-EXTBIN: Top-level function foo is recovered and re-read by the sample reader.
+; CHECK-EXTBIN: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
+
+; CHECK-EXTBIN: Processing Function main
+; CHECK-EXTBIN: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51
+; CHECK-EXTBIN: Processing Function foo_rename
+; CHECK-EXTBIN: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+
+
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 {
+entry:
+ #dbg_value(i32 %x, !22, !DIExpression(), !23)
+ call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
+ %add = add nsw i32 %x, 1, !dbg !25
+ ret i32 %add, !dbg !26
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @foo_rename() #0 !dbg !27 {
+entry:
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !33
+ #dbg_value(i32 0, !31, !DIExpression(), !34)
+ br label %for.cond, !dbg !35
+
+for.cond: ; preds = %if.end7, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc9, %if.end7 ], !dbg !36
+ #dbg_value(i32 %i.0, !31, !DIExpression(), !34)
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 2, i32 0, i64 -1), !dbg !37
+ %cmp = icmp slt i32 %i.0, 10000, !dbg !39
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !40
+
+for.cond.cleanup: ; preds = %for.cond
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 3, i32 0, i64 -1), !dbg !41
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 13, i32 0, i64 -1), !dbg !42
+ ret void, !dbg !42
+
+for.body: ; preds = %for.cond
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 4, i32 0, i64 -1), !dbg !43
+ %0 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !46
+ %rem = srem i32 %0, 3, !dbg !50
+ %cmp1 = icmp eq i32 %rem, 1, !dbg !51
+ br i1 %cmp1, label %if.then, label %if.else, !dbg !52
+
+if.then: ; preds = %for.body
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 5, i32 0, i64 -1), !dbg !53
+ %1 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !46
+ %add = add nsw i32 %1, 100, !dbg !53
+ store volatile i32 %add, ptr @x, align 4, !dbg !53, !tbaa !46
+ br label %if.end7, !dbg !54
+
+if.else: ; preds = %for.body
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 6, i32 0, i64 -1), !dbg !55
+ %2 = load volatile i32, ptr @x, align 4, !dbg !55, !tbaa !46
+ %rem2 = srem i32 %2, 2, !dbg !57
+ %cmp3 = icmp eq i32 %rem2, 1, !dbg !58
+ br i1 %cmp3, label %if.then4, label %if.else6, !dbg !59
+
+if.then4: ; preds = %if.else
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 7, i32 0, i64 -1), !dbg !60
+ %3 = load volatile i32, ptr @x, align 4, !dbg !60, !tbaa !46
+ %add5 = add nsw i32 %3, 10, !dbg !60
+ store volatile i32 %add5, ptr @x, align 4, !dbg !60, !tbaa !46
+ br label %if.end7, !dbg !61
+
+if.else6: ; preds = %if.else
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 8, i32 0, i64 -1), !dbg !62
+ %4 = load volatile i32, ptr @x, align 4, !dbg !62, !tbaa !46
+ %inc = add nsw i32 %4, 1, !dbg !62
+ store volatile i32 %inc, ptr @x, align 4, !dbg !62, !tbaa !46
+ br label %if.end7
+
+if.end7: ; preds = %if.then4, %if.else6, %if.then
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 10, i32 0, i64 -1), !dbg !63
+ %5 = load volatile i32, ptr @x, align 4, !dbg !63, !tbaa !46
+ %call = call i32 @bar(i32 noundef %5), !dbg !64
+ %6 = load volatile i32, ptr @x, align 4, !dbg !66, !tbaa !46
+ %add8 = add nsw i32 %6, %call, !dbg !66
+ store volatile i32 %add8, ptr @x, align 4, !dbg !66, !tbaa !46
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 12, i32 0, i64 -1), !dbg !67
+ %inc9 = add nsw i32 %i.0, 1, !dbg !67
+ #dbg_value(i32 %inc9, !31, !DIExpression(), !34)
+ br label %for.cond, !dbg !68, !llvm.loop !69
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #2 !dbg !72 {
+entry:
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !78
+ #dbg_value(i32 0, !76, !DIExpression(), !79)
+ br label %for.cond, !dbg !80
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !81
+ #dbg_value(i32 %i.0, !76, !DIExpression(), !79)
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !82
+ %cmp = icmp slt i32 %i.0, 100000, !dbg !84
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !85
+
+for.cond.cleanup: ; preds = %for.cond
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !86
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !87
+ ret i32 0, !dbg !87
+
+for.body: ; preds = %for.cond
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !88
+ call void @foo_rename(), !dbg !90
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !92
+ %inc = add nsw i32 %i.0, 1, !dbg !92
+ #dbg_value(i32 %inc, !76, !DIExpression(), !79)
+ br label %for.cond, !dbg !93, !llvm.loop !94
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
+
+attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
+!llvm.ident = !{!14}
+!llvm.pseudo_probe_desc = !{!15, !16, !17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "5c9304100fda7763e5a474c768d3b005")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!14 = !{!"clang version 20.0.0"}
+!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!16 = !{i64 -2115950948644264162, i64 281718392333557, !"foo_rename"}
+!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
+!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!6, !6}
+!21 = !{!22}
+!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6)
+!23 = !DILocation(line: 0, scope: !18)
+!24 = !DILocation(line: 4, column: 10, scope: !18)
+!25 = !DILocation(line: 4, column: 12, scope: !18)
+!26 = !DILocation(line: 4, column: 3, scope: !18)
+!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !30)
+!28 = !DISubroutineType(types: !29)
+!29 = !{null}
+!30 = !{!31}
+!31 = !DILocalVariable(name: "i", scope: !32, file: !3, line: 8, type: !6)
+!32 = distinct !DILexicalBlock(scope: !27, file: !3, line: 8, column: 3)
+!33 = !DILocation(line: 8, column: 12, scope: !32)
+!34 = !DILocation(line: 0, scope: !32)
+!35 = !DILocation(line: 8, column: 8, scope: !32)
+!36 = !DILocation(line: 8, scope: !32)
+!37 = !DILocation(line: 8, column: 19, scope: !38)
+!38 = distinct !DILexicalBlock(scope: !32, file: !3, line: 8, column: 3)
+!39 = !DILocation(line: 8, column: 21, scope: !38)
+!40 = !DILocation(line: 8, column: 3, scope: !32)
+!41 = !DILocation(line: 0, scope: !27)
+!42 = !DILocation(line: 17, column: 1, scope: !27)
+!43 = !DILocation(line: 9, column: 10, scope: !44)
+!44 = distinct !DILexicalBlock(scope: !45, file: !3, line: 9, column: 10)
+!45 = distinct !DILexicalBlock(scope: !38, file: !3, line: 8, column: 39)
+!46 = !{!47, !47, i64 0}
+!47 = !{!"int", !48, i64 0}
+!48 = !{!"omnipotent char", !49, i64 0}
+!49 = !{!"Simple C/C++ TBAA"}
+!50 = !DILocation(line: 9, column: 12, scope: !44)
+!51 = !DILocation(line: 9, column: 16, scope: !44)
+!52 = !DILocation(line: 9, column: 10, scope: !45)
+!53 = !DILocation(line: 10, column: 10, scope: !44)
+!54 = !DILocation(line: 10, column: 8, scope: !44)
+!55 = !DILocation(line: 11, column: 16, scope: !56)
+!56 = distinct !DILexicalBlock(scope: !44, file: !3, line: 11, column: 16)
+!57 = !DILocation(line: 11, column: 18, scope: !56)
+!58 = !DILocation(line: 11, column: 22, scope: !56)
+!59 = !DILocation(line: 11, column: 16, scope: !44)
+!60 = !DILocation(line: 12, column: 10, scope: !56)
+!61 = !DILocation(line: 12, column: 8, scope: !56)
+!62 = !DILocation(line: 14, column: 9, scope: !56)
+!63 = !DILocation(line: 15, column: 15, scope: !45)
+!64 = !DILocation(line: 15, column: 11, scope: !65)
+!65 = !DILexicalBlockFile(scope: !45, file: !3, discriminator: 455082079)
+!66 = !DILocation(line: 15, column: 8, scope: !45)
+!67 = !DILocation(line: 8, column: 35, scope: !38)
+!68 = !DILocation(line: 8, column: 3, scope: !38)
+!69 = distinct !{!69, !40, !70, !71}
+!70 = !DILocation(line: 16, column: 3, scope: !32)
+!71 = !{!"llvm.loop.mustprogress"}
+!72 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 19, type: !73, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !75)
+!73 = !DISubroutineType(types: !74)
+!74 = !{!6}
+!75 = !{!76}
+!76 = !DILocalVariable(name: "i", scope: !77, file: !3, line: 20, type: !6)
+!77 = distinct !DILexicalBlock(scope: !72, file: !3, line: 20, column: 3)
+!78 = !DILocation(line: 20, column: 12, scope: !77)
+!79 = !DILocation(line: 0, scope: !77)
+!80 = !DILocation(line: 20, column: 8, scope: !77)
+!81 = !DILocation(line: 20, scope: !77)
+!82 = !DILocation(line: 20, column: 19, scope: !83)
+!83 = distinct !DILexicalBlock(scope: !77, file: !3, line: 20, column: 3)
+!84 = !DILocation(line: 20, column: 21, scope: !83)
+!85 = !DILocation(line: 20, column: 3, scope: !77)
+!86 = !DILocation(line: 0, scope: !72)
+!87 = !DILocation(line: 23, column: 1, scope: !72)
+!88 = !DILocation(line: 21, column: 7, scope: !89)
+!89 = distinct !DILexicalBlock(scope: !83, file: !3, line: 20, column: 40)
+!90 = !DILocation(line: 21, column: 7, scope: !91)
+!91 = !DILexicalBlockFile(scope: !89, file: !3, discriminator: 455082031)
+!92 = !DILocation(line: 20, column: 36, scope: !83)
+!93 = !DILocation(line: 20, column: 3, scope: !83)
+!94 = distinct !{!94, !85, !95, !71}
+!95 = !DILocation(line: 22, column: 3, scope: !77)
>From 9a420e317601ab13d823c7b4be8fe93a5aaeee42 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 13 Aug 2024 00:17:42 -0700
Subject: [PATCH 2/8] addressing comments
---
.../llvm/ProfileData/SampleProfReader.h | 27 +-
.../Transforms/IPO/SampleProfileMatcher.h | 2 +-
llvm/lib/ProfileData/SampleProfReader.cpp | 24 +-
.../Transforms/IPO/SampleProfileMatcher.cpp | 43 ++--
...seudo-probe-stale-profile-toplev-func.prof | 2 +-
...eudo-probe-stale-profile-toplev-func-cp.ll | 147 +++++++++++
.../pseudo-probe-stale-profile-toplev-func.ll | 233 ++++++------------
7 files changed, 265 insertions(+), 213 deletions(-)
create mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index b124233a02d11c..00e4e7096ab7b4 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,6 +380,13 @@ class SampleProfileReader {
return sampleprof_error::success;
}
+ /// Read sample profiles for the given functions. Currently it's only used
+ /// for extended binary format to load the profiles on-demand.
+ virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) {
+ return sampleprof_error::not_implemented;
+ };
+
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;
@@ -392,11 +399,6 @@ class SampleProfileReader {
/// which doesn't support loading function profiles on demand.
virtual bool collectFuncsFromModule() { return false; }
- virtual std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
- SampleProfileMap &Profiles) {
- return sampleprof_error::not_implemented;
- };
-
/// Print all the profiles on stream \p OS.
void dump(raw_ostream &OS = dbgs());
@@ -551,7 +553,7 @@ class SampleProfileReader {
// A map from a function's context hash to its meta data section range, used
// for on-demand read function profile metadata.
std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
- FContextToMetaDataSecRange;
+ FuncMetadataIndex;
std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
@@ -757,8 +759,8 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
std::error_code readSecHdrTableEntry(uint64_t Idx);
std::error_code readSecHdrTable();
- std::error_code readFuncMetadataOnDemand(bool ProfileHasAttribute,
- SampleProfileMap &Profiles);
+ std::error_code readFuncMetadata(bool ProfileHasAttribute,
+ SampleProfileMap &Profiles);
std::error_code readFuncMetadata(bool ProfileHasAttribute);
std::error_code readFuncMetadata(bool ProfileHasAttribute,
FunctionSamples *FProfile);
@@ -818,10 +820,11 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
bool collectFuncsFromModule() override;
/// Read the profiles on-demand for the given functions. This is used after
- /// stale call graph matching finds new functions whose profiles aren't read
- /// at the beginning and we need to re-read the profiles.
- std::error_code readOnDemand(const DenseSet<StringRef> &FuncsToUse,
- SampleProfileMap &Profiles) override;
+ /// stale call graph matching finds new functions whose profiles aren't loaded
+ /// at the beginning and we need to loaded the profiles explicitly for
+ /// potential matching.
+ std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) override;
std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
return std::move(ProfSymList);
diff --git a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
index 67edea42e2fe14..076d91adfd1dea 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleProfileMatcher.h
@@ -198,7 +198,7 @@ class SampleProfileMatcher {
// function and all inlinees.
void countMismatchedCallsiteSamples(const FunctionSamples &FS);
void computeAndReportProfileStaleness();
- void UpdateSampleLoaderWithRecoveredProfiles();
+ void UpdateWithSalvagedProfiles();
LocToLocMap &getIRToProfileLocationMap(const Function &F) {
auto Ret = FuncMappings.try_emplace(
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index f555da866f36eb..4c0a45bfb47cf8 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -798,16 +798,16 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
return false;
}
-std::error_code SampleProfileReaderExtBinaryBase::readOnDemand(
- const DenseSet<StringRef> &FuncsToUse, SampleProfileMap &Profiles) {
+std::error_code
+SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) {
Data = LBRProfileSecRange.first;
End = LBRProfileSecRange.second;
if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
return EC;
End = Data;
- if (std::error_code EC =
- readFuncMetadataOnDemand(ProfileHasAttribute, Profiles))
+ if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, Profiles))
return EC;
return sampleprof_error::success;
}
@@ -945,6 +945,8 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles(
return EC;
}
}
+
+ return sampleprof_error::success;
}
std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
@@ -1273,16 +1275,16 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
return sampleprof_error::success;
}
-std::error_code SampleProfileReaderExtBinaryBase::readFuncMetadataOnDemand(
- bool ProfileHasAttribute, SampleProfileMap &Profiles) {
- if (FContextToMetaDataSecRange.empty())
+std::error_code
+SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute,
+ SampleProfileMap &Profiles) {
+ if (FuncMetadataIndex.empty())
return sampleprof_error::success;
for (auto &I : Profiles) {
FunctionSamples *FProfile = &I.second;
- auto R =
- FContextToMetaDataSecRange.find(FProfile->getContext().getHashCode());
- if (R == FContextToMetaDataSecRange.end())
+ auto R = FuncMetadataIndex.find(FProfile->getContext().getHashCode());
+ if (R == FuncMetadataIndex.end())
continue;
Data = R->second.first;
@@ -1310,7 +1312,7 @@ SampleProfileReaderExtBinaryBase::readFuncMetadata(bool ProfileHasAttribute) {
if (std::error_code EC = readFuncMetadata(ProfileHasAttribute, FProfile))
return EC;
- FContextToMetaDataSecRange[FContext.getHashCode()] = {Start, Data};
+ FuncMetadataIndex[FContext.getHashCode()] = {Start, Data};
}
assert(Data == End && "More data is read than expected");
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index b9adc6a0631b80..574a157c636835 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -782,16 +782,15 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
float Similarity = 0.0;
const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
- // Check if the function is top-level function. For extended profile format,
- // if a function profile is unused and it's top-level, even if the profile is
- // matched, it's not found in the profile. This is because sample reader only
- // read the used profile at the beginning, we need to read the profile
- // on-demand. Also save it into the FlattenedProfiles for future look-up.
+ // With extbinary profile format, initial profile loading only reads profile
+ // based on current function names in the module.
+ // However, if a function is renamed, sample loader fails to load its original
+ // profile(which has a different name), we will miss this case. To address
+ // this, we load the top-level profile candidate explicitly for the matching.
if (!FSFlattened) {
- DenseSet<StringRef> TopLevelFunc;
- TopLevelFunc.insert(ProfFunc.stringRef());
+ DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
SampleProfileMap TopLevelProfile;
- Reader.readOnDemand(TopLevelFunc, TopLevelProfile);
+ Reader.read(TopLevelFunc, TopLevelProfile);
assert(TopLevelProfile.size() <= 1 &&
"More than one profile is found for top-level function");
if (!TopLevelProfile.empty()) {
@@ -883,13 +882,13 @@ bool SampleProfileMatcher::functionMatchesProfile(Function &IRFunc,
return Matched;
}
-void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
- DenseSet<StringRef> RecoveredFuncs;
+void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
+ DenseSet<StringRef> ProfileSalvagedFuncs;
// Update FuncNameToProfNameMap and SymbolMap.
for (auto &I : FuncToProfileNameMap) {
assert(I.first && "New function is null");
FunctionId FuncName(I.first->getName());
- RecoveredFuncs.insert(I.second.stringRef());
+ ProfileSalvagedFuncs.insert(I.second.stringRef());
FuncNameToProfNameMap->emplace(FuncName, I.second);
// We need to remove the old entry to avoid duplicating the function
@@ -898,21 +897,11 @@ void SampleProfileMatcher::UpdateSampleLoaderWithRecoveredProfiles() {
SymbolMap->emplace(I.second, I.first);
}
- // Read the top-level profiles for the recovered function profiles. This is
- // because in extended binary format it only loads the top-level profile for
- // the functions in the new build but not the recovered functions which is
- // from the old build.
- SampleProfileMap TopLevelRecoveredProfiles;
- Reader.readOnDemand(RecoveredFuncs, TopLevelRecoveredProfiles);
- auto &Profiles = Reader.getProfiles();
- for (auto &I : TopLevelRecoveredProfiles) {
- LLVM_DEBUG(dbgs() << "Top-level function " << I.second.getFunction()
- << " is recovered and re-read by the sample reader.\n");
- auto &Ctx = I.second.getContext();
- assert(Profiles.find(Ctx) == Profiles.end() &&
- "Top level profile is found for the unused profile");
- Profiles.create(Ctx) = std::move(I.second);
- }
+ // With extbinary profile format, initial profile loading only reads profile
+ // based on current function names in the module, so we need to load top-level
+ // profiles for functions with different profile name explicitly after
+ // function-profile name map is established with stale profile matching.
+ Reader.read(ProfileSalvagedFuncs, Reader.getProfiles());
Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap);
}
@@ -934,7 +923,7 @@ void SampleProfileMatcher::runOnModule() {
}
if (SalvageUnusedProfile)
- UpdateSampleLoaderWithRecoveredProfiles();
+ UpdateWithSalvagedProfiles();
if (SalvageStaleProfile)
distributeIRToProfileLocationMap();
diff --git a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
index a1bba5fc88de0e..86c8cb3285afe2 100644
--- a/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
+++ b/llvm/test/Transforms/SampleProfile/Inputs/pseudo-probe-stale-profile-toplev-func.prof
@@ -8,7 +8,7 @@ foo:2724522:51
11: 452687 bar:452687
12: 452623
13: 47
- !CFGChecksum: 281718392333557
+ !CFGChecksum: 281479271677951
bar:452687:452687
1: 452687
!CFGChecksum: 4294967295
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
new file mode 100644
index 00000000000000..750bf03fa2d939
--- /dev/null
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
@@ -0,0 +1,147 @@
+; *** IR Dump Before SampleProfileLoaderPass on [module] ***
+; ModuleID = 'test_rename.c'
+source_filename = "test_rename.c"
+target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
+target triple = "x86_64-unknown-linux-gnu"
+
+ at x = dso_local global i32 0, align 4, !dbg !0
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 {
+entry:
+ #dbg_value(i32 %x, !22, !DIExpression(), !23)
+ call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
+ %add = add nsw i32 %x, 1, !dbg !25
+ ret i32 %add, !dbg !26
+}
+
+; Function Attrs: noinline nounwind uwtable
+define dso_local void @foo_rename() #0 !dbg !27 {
+entry:
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30
+ %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31
+ %call = call i32 @bar(i32 noundef %0), !dbg !35
+ %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31
+ %add = add nsw i32 %1, %call, !dbg !37
+ store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31
+ ret void, !dbg !38
+}
+
+; Function Attrs: nounwind uwtable
+define dso_local i32 @main() #1 !dbg !39 {
+entry:
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45
+ #dbg_value(i32 0, !43, !DIExpression(), !46)
+ br label %for.cond, !dbg !47
+
+for.cond: ; preds = %for.body, %entry
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48
+ #dbg_value(i32 %i.0, !43, !DIExpression(), !46)
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49
+ %cmp = icmp slt i32 %i.0, 100000, !dbg !51
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52
+
+for.cond.cleanup: ; preds = %for.cond
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54
+ ret i32 0, !dbg !54
+
+for.body: ; preds = %for.cond
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55
+ call void @foo_rename(), !dbg !57
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59
+ %inc = add nsw i32 %i.0, 1, !dbg !59
+ #dbg_value(i32 %inc, !43, !DIExpression(), !46)
+ br label %for.cond, !dbg !60, !llvm.loop !61
+}
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
+declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
+
+attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
+attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
+
+!llvm.dbg.cu = !{!2}
+!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
+!llvm.ident = !{!14}
+!llvm.pseudo_probe_desc = !{!15, !16, !17}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
+!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/llvm_test/rename/extbinary", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67")
+!4 = !{!0}
+!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
+!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!7 = !{i32 7, !"Dwarf Version", i32 5}
+!8 = !{i32 2, !"Debug Info Version", i32 3}
+!9 = !{i32 1, !"wchar_size", i32 4}
+!10 = !{i32 8, !"PIC Level", i32 2}
+!11 = !{i32 7, !"PIE Level", i32 2}
+!12 = !{i32 7, !"uwtable", i32 2}
+!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
+!14 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)"}
+!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
+!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"}
+!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
+!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
+!19 = !DISubroutineType(types: !20)
+!20 = !{!6, !6}
+!21 = !{!22}
+!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6)
+!23 = !DILocation(line: 0, scope: !18)
+!24 = !DILocation(line: 4, column: 10, scope: !18)
+!25 = !DILocation(line: 4, column: 12, scope: !18)
+!26 = !DILocation(line: 4, column: 3, scope: !18)
+!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
+!28 = !DISubroutineType(types: !29)
+!29 = !{null}
+!30 = !DILocation(line: 8, column: 15, scope: !27)
+!31 = !{!32, !32, i64 0}
+!32 = !{!"int", !33, i64 0}
+!33 = !{!"omnipotent char", !34, i64 0}
+!34 = !{!"Simple C/C++ TBAA"}
+!35 = !DILocation(line: 8, column: 11, scope: !36)
+!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007)
+!37 = !DILocation(line: 8, column: 8, scope: !27)
+!38 = !DILocation(line: 9, column: 1, scope: !27)
+!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42)
+!40 = !DISubroutineType(types: !41)
+!41 = !{!6}
+!42 = !{!43}
+!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6)
+!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3)
+!45 = !DILocation(line: 12, column: 12, scope: !44)
+!46 = !DILocation(line: 0, scope: !44)
+!47 = !DILocation(line: 12, column: 8, scope: !44)
+!48 = !DILocation(line: 12, scope: !44)
+!49 = !DILocation(line: 12, column: 19, scope: !50)
+!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3)
+!51 = !DILocation(line: 12, column: 21, scope: !50)
+!52 = !DILocation(line: 12, column: 3, scope: !44)
+!53 = !DILocation(line: 0, scope: !39)
+!54 = !DILocation(line: 15, column: 1, scope: !39)
+!55 = !DILocation(line: 13, column: 7, scope: !56)
+!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40)
+!57 = !DILocation(line: 13, column: 7, scope: !58)
+!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031)
+!59 = !DILocation(line: 12, column: 36, scope: !50)
+!60 = !DILocation(line: 12, column: 3, scope: !50)
+!61 = distinct !{!61, !52, !62, !63}
+!62 = !DILocation(line: 14, column: 3, scope: !44)
+!63 = !{!"llvm.loop.mustprogress"}
+Function foo_rename is not in profile or profile symbol list.
+Run stale profile matching for main
+Run stale profile matching for bar
+(0/2) of functions' profile are invalid and (0/452891) of samples are discarded due to function hash mismatch.
+(0/2) of functions' profile are matched and (0/452891) of samples are reused by call graph matching.
+(1/1) of callsites' profile are invalid and (51/452891) of samples are discarded due to callsite location mismatch.
+(0/1) of callsites and (0/51) of samples are recovered by stale profile matching.
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
index f1f2506e08d2a5..356b16ca6ad059 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -9,13 +9,12 @@
; CHECK-TEXT: The checksums for foo_rename(IR) and foo(Profile) match.
; CHECK-TEXT: Function:foo_rename matches profile:foo
; CHECK-TEXT: Run stale profile matching for foo_rename
-; CHECK-TEXT-NOT: Top-level function foo is recovered and re-read by the sample reader.
; CHECK-TEXT: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
; CHECK-TEXT: Processing Function main
; CHECK-TEXT: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51
; CHECK-TEXT: Processing Function foo_rename
-; CHECK-TEXT: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+; CHECK-TEXT: 2: %call = call i32 @bar(i32 noundef %0), !dbg ![[#]] - weight: 452674
; CHECK-EXTBIN: Run stale profile matching for main
@@ -23,13 +22,12 @@
; CHECK-EXTBIN: The checksums for foo_rename(IR) and foo(Profile) match.
; CHECK-EXTBIN: Function:foo_rename matches profile:foo
; CHECK-EXTBIN: Run stale profile matching for foo_rename
-; CHECK-EXTBIN: Top-level function foo is recovered and re-read by the sample reader.
; CHECK-EXTBIN: (1/3) of functions' profile are matched and (2724522/3177413) of samples are reused by call graph matching.
; CHECK-EXTBIN: Processing Function main
; CHECK-EXTBIN: 5: call void @foo_rename(), !dbg ![[#]] - weight: 51
; CHECK-EXTBIN: Processing Function foo_rename
-; CHECK-EXTBIN: 11: %call = call i32 @bar(i32 noundef %5), !dbg ![[#]] - weight: 452687
+; CHECK-EXTBIN: 2: %call = call i32 @bar(i32 noundef %0), !dbg ![[#]] - weight: 452674
target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
@@ -49,110 +47,55 @@ entry:
; Function Attrs: noinline nounwind uwtable
define dso_local void @foo_rename() #0 !dbg !27 {
entry:
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !33
- #dbg_value(i32 0, !31, !DIExpression(), !34)
- br label %for.cond, !dbg !35
-
-for.cond: ; preds = %if.end7, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc9, %if.end7 ], !dbg !36
- #dbg_value(i32 %i.0, !31, !DIExpression(), !34)
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 2, i32 0, i64 -1), !dbg !37
- %cmp = icmp slt i32 %i.0, 10000, !dbg !39
- br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !40
-
-for.cond.cleanup: ; preds = %for.cond
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 3, i32 0, i64 -1), !dbg !41
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 13, i32 0, i64 -1), !dbg !42
- ret void, !dbg !42
-
-for.body: ; preds = %for.cond
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 4, i32 0, i64 -1), !dbg !43
- %0 = load volatile i32, ptr @x, align 4, !dbg !43, !tbaa !46
- %rem = srem i32 %0, 3, !dbg !50
- %cmp1 = icmp eq i32 %rem, 1, !dbg !51
- br i1 %cmp1, label %if.then, label %if.else, !dbg !52
-
-if.then: ; preds = %for.body
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 5, i32 0, i64 -1), !dbg !53
- %1 = load volatile i32, ptr @x, align 4, !dbg !53, !tbaa !46
- %add = add nsw i32 %1, 100, !dbg !53
- store volatile i32 %add, ptr @x, align 4, !dbg !53, !tbaa !46
- br label %if.end7, !dbg !54
-
-if.else: ; preds = %for.body
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 6, i32 0, i64 -1), !dbg !55
- %2 = load volatile i32, ptr @x, align 4, !dbg !55, !tbaa !46
- %rem2 = srem i32 %2, 2, !dbg !57
- %cmp3 = icmp eq i32 %rem2, 1, !dbg !58
- br i1 %cmp3, label %if.then4, label %if.else6, !dbg !59
-
-if.then4: ; preds = %if.else
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 7, i32 0, i64 -1), !dbg !60
- %3 = load volatile i32, ptr @x, align 4, !dbg !60, !tbaa !46
- %add5 = add nsw i32 %3, 10, !dbg !60
- store volatile i32 %add5, ptr @x, align 4, !dbg !60, !tbaa !46
- br label %if.end7, !dbg !61
-
-if.else6: ; preds = %if.else
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 8, i32 0, i64 -1), !dbg !62
- %4 = load volatile i32, ptr @x, align 4, !dbg !62, !tbaa !46
- %inc = add nsw i32 %4, 1, !dbg !62
- store volatile i32 %inc, ptr @x, align 4, !dbg !62, !tbaa !46
- br label %if.end7
-
-if.end7: ; preds = %if.then4, %if.else6, %if.then
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 10, i32 0, i64 -1), !dbg !63
- %5 = load volatile i32, ptr @x, align 4, !dbg !63, !tbaa !46
- %call = call i32 @bar(i32 noundef %5), !dbg !64
- %6 = load volatile i32, ptr @x, align 4, !dbg !66, !tbaa !46
- %add8 = add nsw i32 %6, %call, !dbg !66
- store volatile i32 %add8, ptr @x, align 4, !dbg !66, !tbaa !46
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 12, i32 0, i64 -1), !dbg !67
- %inc9 = add nsw i32 %i.0, 1, !dbg !67
- #dbg_value(i32 %inc9, !31, !DIExpression(), !34)
- br label %for.cond, !dbg !68, !llvm.loop !69
+ call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30
+ %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31
+ %call = call i32 @bar(i32 noundef %0), !dbg !35
+ %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31
+ %add = add nsw i32 %1, %call, !dbg !37
+ store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31
+ ret void, !dbg !38
}
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #1
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #1
-
; Function Attrs: nounwind uwtable
-define dso_local i32 @main() #2 !dbg !72 {
+define dso_local i32 @main() #1 !dbg !39 {
entry:
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !78
- #dbg_value(i32 0, !76, !DIExpression(), !79)
- br label %for.cond, !dbg !80
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45
+ #dbg_value(i32 0, !43, !DIExpression(), !46)
+ br label %for.cond, !dbg !47
for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !81
- #dbg_value(i32 %i.0, !76, !DIExpression(), !79)
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !82
- %cmp = icmp slt i32 %i.0, 100000, !dbg !84
- br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !85
+ %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48
+ #dbg_value(i32 %i.0, !43, !DIExpression(), !46)
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49
+ %cmp = icmp slt i32 %i.0, 100000, !dbg !51
+ br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52
for.cond.cleanup: ; preds = %for.cond
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !86
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !87
- ret i32 0, !dbg !87
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54
+ ret i32 0, !dbg !54
for.body: ; preds = %for.cond
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !88
- call void @foo_rename(), !dbg !90
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !92
- %inc = add nsw i32 %i.0, 1, !dbg !92
- #dbg_value(i32 %inc, !76, !DIExpression(), !79)
- br label %for.cond, !dbg !93, !llvm.loop !94
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55
+ call void @foo_rename(), !dbg !57
+ call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59
+ %inc = add nsw i32 %i.0, 1, !dbg !59
+ #dbg_value(i32 %inc, !43, !DIExpression(), !46)
+ br label %for.cond, !dbg !60, !llvm.loop !61
}
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
+
+; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
+declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
+
; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #1 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #2 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
+attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
!llvm.dbg.cu = !{!2}
@@ -163,7 +106,7 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo
!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
-!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "5c9304100fda7763e5a474c768d3b005")
+!3 = !DIFile(filename: "test_rename.c", directory: "/home", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67")
!4 = !{!0}
!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
@@ -176,7 +119,7 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo
!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
!14 = !{!"clang version 20.0.0"}
!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
-!16 = !{i64 -2115950948644264162, i64 281718392333557, !"foo_rename"}
+!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"}
!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
!19 = !DISubroutineType(types: !20)
@@ -187,72 +130,40 @@ attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memo
!24 = !DILocation(line: 4, column: 10, scope: !18)
!25 = !DILocation(line: 4, column: 12, scope: !18)
!26 = !DILocation(line: 4, column: 3, scope: !18)
-!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !30)
+!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
!28 = !DISubroutineType(types: !29)
!29 = !{null}
-!30 = !{!31}
-!31 = !DILocalVariable(name: "i", scope: !32, file: !3, line: 8, type: !6)
-!32 = distinct !DILexicalBlock(scope: !27, file: !3, line: 8, column: 3)
-!33 = !DILocation(line: 8, column: 12, scope: !32)
-!34 = !DILocation(line: 0, scope: !32)
-!35 = !DILocation(line: 8, column: 8, scope: !32)
-!36 = !DILocation(line: 8, scope: !32)
-!37 = !DILocation(line: 8, column: 19, scope: !38)
-!38 = distinct !DILexicalBlock(scope: !32, file: !3, line: 8, column: 3)
-!39 = !DILocation(line: 8, column: 21, scope: !38)
-!40 = !DILocation(line: 8, column: 3, scope: !32)
-!41 = !DILocation(line: 0, scope: !27)
-!42 = !DILocation(line: 17, column: 1, scope: !27)
-!43 = !DILocation(line: 9, column: 10, scope: !44)
-!44 = distinct !DILexicalBlock(scope: !45, file: !3, line: 9, column: 10)
-!45 = distinct !DILexicalBlock(scope: !38, file: !3, line: 8, column: 39)
-!46 = !{!47, !47, i64 0}
-!47 = !{!"int", !48, i64 0}
-!48 = !{!"omnipotent char", !49, i64 0}
-!49 = !{!"Simple C/C++ TBAA"}
-!50 = !DILocation(line: 9, column: 12, scope: !44)
-!51 = !DILocation(line: 9, column: 16, scope: !44)
-!52 = !DILocation(line: 9, column: 10, scope: !45)
-!53 = !DILocation(line: 10, column: 10, scope: !44)
-!54 = !DILocation(line: 10, column: 8, scope: !44)
-!55 = !DILocation(line: 11, column: 16, scope: !56)
-!56 = distinct !DILexicalBlock(scope: !44, file: !3, line: 11, column: 16)
-!57 = !DILocation(line: 11, column: 18, scope: !56)
-!58 = !DILocation(line: 11, column: 22, scope: !56)
-!59 = !DILocation(line: 11, column: 16, scope: !44)
-!60 = !DILocation(line: 12, column: 10, scope: !56)
-!61 = !DILocation(line: 12, column: 8, scope: !56)
-!62 = !DILocation(line: 14, column: 9, scope: !56)
-!63 = !DILocation(line: 15, column: 15, scope: !45)
-!64 = !DILocation(line: 15, column: 11, scope: !65)
-!65 = !DILexicalBlockFile(scope: !45, file: !3, discriminator: 455082079)
-!66 = !DILocation(line: 15, column: 8, scope: !45)
-!67 = !DILocation(line: 8, column: 35, scope: !38)
-!68 = !DILocation(line: 8, column: 3, scope: !38)
-!69 = distinct !{!69, !40, !70, !71}
-!70 = !DILocation(line: 16, column: 3, scope: !32)
-!71 = !{!"llvm.loop.mustprogress"}
-!72 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 19, type: !73, scopeLine: 19, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !75)
-!73 = !DISubroutineType(types: !74)
-!74 = !{!6}
-!75 = !{!76}
-!76 = !DILocalVariable(name: "i", scope: !77, file: !3, line: 20, type: !6)
-!77 = distinct !DILexicalBlock(scope: !72, file: !3, line: 20, column: 3)
-!78 = !DILocation(line: 20, column: 12, scope: !77)
-!79 = !DILocation(line: 0, scope: !77)
-!80 = !DILocation(line: 20, column: 8, scope: !77)
-!81 = !DILocation(line: 20, scope: !77)
-!82 = !DILocation(line: 20, column: 19, scope: !83)
-!83 = distinct !DILexicalBlock(scope: !77, file: !3, line: 20, column: 3)
-!84 = !DILocation(line: 20, column: 21, scope: !83)
-!85 = !DILocation(line: 20, column: 3, scope: !77)
-!86 = !DILocation(line: 0, scope: !72)
-!87 = !DILocation(line: 23, column: 1, scope: !72)
-!88 = !DILocation(line: 21, column: 7, scope: !89)
-!89 = distinct !DILexicalBlock(scope: !83, file: !3, line: 20, column: 40)
-!90 = !DILocation(line: 21, column: 7, scope: !91)
-!91 = !DILexicalBlockFile(scope: !89, file: !3, discriminator: 455082031)
-!92 = !DILocation(line: 20, column: 36, scope: !83)
-!93 = !DILocation(line: 20, column: 3, scope: !83)
-!94 = distinct !{!94, !85, !95, !71}
-!95 = !DILocation(line: 22, column: 3, scope: !77)
+!30 = !DILocation(line: 8, column: 15, scope: !27)
+!31 = !{!32, !32, i64 0}
+!32 = !{!"int", !33, i64 0}
+!33 = !{!"omnipotent char", !34, i64 0}
+!34 = !{!"Simple C/C++ TBAA"}
+!35 = !DILocation(line: 8, column: 11, scope: !36)
+!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007)
+!37 = !DILocation(line: 8, column: 8, scope: !27)
+!38 = !DILocation(line: 9, column: 1, scope: !27)
+!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42)
+!40 = !DISubroutineType(types: !41)
+!41 = !{!6}
+!42 = !{!43}
+!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6)
+!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3)
+!45 = !DILocation(line: 12, column: 12, scope: !44)
+!46 = !DILocation(line: 0, scope: !44)
+!47 = !DILocation(line: 12, column: 8, scope: !44)
+!48 = !DILocation(line: 12, scope: !44)
+!49 = !DILocation(line: 12, column: 19, scope: !50)
+!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3)
+!51 = !DILocation(line: 12, column: 21, scope: !50)
+!52 = !DILocation(line: 12, column: 3, scope: !44)
+!53 = !DILocation(line: 0, scope: !39)
+!54 = !DILocation(line: 15, column: 1, scope: !39)
+!55 = !DILocation(line: 13, column: 7, scope: !56)
+!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40)
+!57 = !DILocation(line: 13, column: 7, scope: !58)
+!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031)
+!59 = !DILocation(line: 12, column: 36, scope: !50)
+!60 = !DILocation(line: 12, column: 3, scope: !50)
+!61 = distinct !{!61, !52, !62, !63}
+!62 = !DILocation(line: 14, column: 3, scope: !44)
+!63 = !{!"llvm.loop.mustprogress"}
>From 91ce2b23236bde42930a3dbb05fb2531c10d90a7 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 13 Aug 2024 09:20:31 -0700
Subject: [PATCH 3/8] fix lint
---
llvm/include/llvm/ProfileData/SampleProfReader.h | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 00e4e7096ab7b4..907663fd50094d 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,7 +380,7 @@ class SampleProfileReader {
return sampleprof_error::success;
}
- /// Read sample profiles for the given functions. Currently it's only used
+ /// Read sample profiles for the given functions. Currently it's only used
/// for extended binary format to load the profiles on-demand.
virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
SampleProfileMap &Profiles) {
@@ -821,7 +821,7 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
/// Read the profiles on-demand for the given functions. This is used after
/// stale call graph matching finds new functions whose profiles aren't loaded
- /// at the beginning and we need to loaded the profiles explicitly for
+ /// at the beginning and we need to loaded the profiles explicitly for
/// potential matching.
std::error_code read(const DenseSet<StringRef> &FuncsToUse,
SampleProfileMap &Profiles) override;
>From ab2f83da198013aa55e95c7312a65288dee4df18 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Fri, 16 Aug 2024 16:54:15 -0700
Subject: [PATCH 4/8] addressing comments
---
.../llvm/ProfileData/SampleProfReader.h | 20 ++-
.../Transforms/IPO/SampleProfileMatcher.cpp | 33 ++--
...eudo-probe-stale-profile-toplev-func-cp.ll | 147 ------------------
.../pseudo-probe-stale-profile-toplev-func.ll | 4 +-
4 files changed, 35 insertions(+), 169 deletions(-)
delete mode 100644 llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index 907663fd50094d..c86b97740e4f79 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,8 +380,17 @@ class SampleProfileReader {
return sampleprof_error::success;
}
- /// Read sample profiles for the given functions. Currently it's only used
- /// for extended binary format to load the profiles on-demand.
+ /// Read sample profiles for the given functions. Currently it's only used for
+ /// extended binary format to load the profiles on-demand.
+ std::error_code read(const DenseSet<StringRef> &FuncsToUse) {
+ if (std::error_code EC = read(FuncsToUse, Profiles))
+ return EC;
+ return sampleprof_error::success;
+ };
+
+ /// Read sample profiles for the given functions and write them to the given
+ /// profile map. Currently it's only used for extended binary format to load
+ /// the profiles on-demand.
virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
SampleProfileMap &Profiles) {
return sampleprof_error::not_implemented;
@@ -512,8 +521,8 @@ class SampleProfileReader {
void setModule(const Module *Mod) { M = Mod; }
void setFuncNameToProfNameMap(
- HashKeyMap<std::unordered_map, FunctionId, FunctionId> *FPMap) {
- FuncNameToProfNameMap = FPMap;
+ const HashKeyMap<std::unordered_map, FunctionId, FunctionId> &FPMap) {
+ FuncNameToProfNameMap = &FPMap;
}
protected:
@@ -547,7 +556,7 @@ class SampleProfileReader {
// A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
// which maps the function name to the matched profile name. This is used
// for sample loader to look up profile using the new name.
- HashKeyMap<std::unordered_map, FunctionId, FunctionId>
+ const HashKeyMap<std::unordered_map, FunctionId, FunctionId>
*FuncNameToProfNameMap = nullptr;
// A map from a function's context hash to its meta data section range, used
@@ -557,6 +566,7 @@ class SampleProfileReader {
std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+ /// Whether the profile has attribute metadata.
bool ProfileHasAttribute = false;
/// \brief Whether samples are collected based on pseudo probes.
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 574a157c636835..77cede8744707b 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -36,6 +36,12 @@ static cl::opt<unsigned> MinCallCountForCGMatching(
cl::desc("The minimum number of call anchors required for a function to "
"run stale profile call graph matching."));
+static cl::opt<bool> ReadToplevProfileforCGMatching(
+ "read-toplev-profile-for-cg-matching", cl::Hidden, cl::init(false),
+ cl::desc(
+ "Read top-level profiles that the sample reader initially skips for "
+ "the call-graph matching(only meaningful for extended binary format)"));
+
extern cl::opt<bool> SalvageStaleProfile;
extern cl::opt<bool> SalvageUnusedProfile;
extern cl::opt<bool> PersistProfileStaleness;
@@ -784,22 +790,19 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
// With extbinary profile format, initial profile loading only reads profile
// based on current function names in the module.
- // However, if a function is renamed, sample loader fails to load its original
+ // However, if a function is renamed, sample loader skips to load its original
// profile(which has a different name), we will miss this case. To address
// this, we load the top-level profile candidate explicitly for the matching.
- if (!FSFlattened) {
+ if (!FSFlattened && ReadToplevProfileforCGMatching) {
DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
- SampleProfileMap TopLevelProfile;
- Reader.read(TopLevelFunc, TopLevelProfile);
- assert(TopLevelProfile.size() <= 1 &&
- "More than one profile is found for top-level function");
- if (!TopLevelProfile.empty()) {
- LLVM_DEBUG(dbgs() << "Read top-level function " << ProfFunc
- << " for call-graph matching\n");
- auto &FS = TopLevelProfile.begin()->second;
- FSFlattened =
- &(FlattenedProfiles.create(FS.getContext()) = std::move(FS));
- }
+ if (std::error_code EC = Reader.read(TopLevelFunc, FlattenedProfiles))
+ return false;
+ FSFlattened = getFlattenedSamplesFor(ProfFunc);
+ LLVM_DEBUG({
+ if (FSFlattened)
+ dbgs() << "Read top-level function " << ProfFunc
+ << " for call-graph matching\n";
+ });
}
if (!FSFlattened)
return false;
@@ -901,8 +904,8 @@ void SampleProfileMatcher::UpdateWithSalvagedProfiles() {
// based on current function names in the module, so we need to load top-level
// profiles for functions with different profile name explicitly after
// function-profile name map is established with stale profile matching.
- Reader.read(ProfileSalvagedFuncs, Reader.getProfiles());
- Reader.setFuncNameToProfNameMap(FuncNameToProfNameMap);
+ Reader.read(ProfileSalvagedFuncs);
+ Reader.setFuncNameToProfNameMap(*FuncNameToProfNameMap);
}
void SampleProfileMatcher::runOnModule() {
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
deleted file mode 100644
index 750bf03fa2d939..00000000000000
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func-cp.ll
+++ /dev/null
@@ -1,147 +0,0 @@
-; *** IR Dump Before SampleProfileLoaderPass on [module] ***
-; ModuleID = 'test_rename.c'
-source_filename = "test_rename.c"
-target datalayout = "e-m:e-p270:32:32-p271:32:32-p272:64:64-i64:64-i128:128-f80:128-n8:16:32:64-S128"
-target triple = "x86_64-unknown-linux-gnu"
-
- at x = dso_local global i32 0, align 4, !dbg !0
-
-; Function Attrs: noinline nounwind uwtable
-define dso_local i32 @bar(i32 noundef %x) #0 !dbg !18 {
-entry:
- #dbg_value(i32 %x, !22, !DIExpression(), !23)
- call void @llvm.pseudoprobe(i64 -2012135647395072713, i64 1, i32 0, i64 -1), !dbg !24
- %add = add nsw i32 %x, 1, !dbg !25
- ret i32 %add, !dbg !26
-}
-
-; Function Attrs: noinline nounwind uwtable
-define dso_local void @foo_rename() #0 !dbg !27 {
-entry:
- call void @llvm.pseudoprobe(i64 -2115950948644264162, i64 1, i32 0, i64 -1), !dbg !30
- %0 = load volatile i32, ptr @x, align 4, !dbg !30, !tbaa !31
- %call = call i32 @bar(i32 noundef %0), !dbg !35
- %1 = load volatile i32, ptr @x, align 4, !dbg !37, !tbaa !31
- %add = add nsw i32 %1, %call, !dbg !37
- store volatile i32 %add, ptr @x, align 4, !dbg !37, !tbaa !31
- ret void, !dbg !38
-}
-
-; Function Attrs: nounwind uwtable
-define dso_local i32 @main() #1 !dbg !39 {
-entry:
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 1, i32 0, i64 -1), !dbg !45
- #dbg_value(i32 0, !43, !DIExpression(), !46)
- br label %for.cond, !dbg !47
-
-for.cond: ; preds = %for.body, %entry
- %i.0 = phi i32 [ 0, %entry ], [ %inc, %for.body ], !dbg !48
- #dbg_value(i32 %i.0, !43, !DIExpression(), !46)
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 2, i32 0, i64 -1), !dbg !49
- %cmp = icmp slt i32 %i.0, 100000, !dbg !51
- br i1 %cmp, label %for.body, label %for.cond.cleanup, !dbg !52
-
-for.cond.cleanup: ; preds = %for.cond
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 3, i32 0, i64 -1), !dbg !53
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 7, i32 0, i64 -1), !dbg !54
- ret i32 0, !dbg !54
-
-for.body: ; preds = %for.cond
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 4, i32 0, i64 -1), !dbg !55
- call void @foo_rename(), !dbg !57
- call void @llvm.pseudoprobe(i64 -2624081020897602054, i64 6, i32 0, i64 -1), !dbg !59
- %inc = add nsw i32 %i.0, 1, !dbg !59
- #dbg_value(i32 %inc, !43, !DIExpression(), !46)
- br label %for.cond, !dbg !60, !llvm.loop !61
-}
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.start.p0(i64 immarg, ptr nocapture) #2
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite)
-declare void @llvm.lifetime.end.p0(i64 immarg, ptr nocapture) #2
-
-; Function Attrs: mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite)
-declare void @llvm.pseudoprobe(i64, i64, i32, i64) #3
-
-attributes #0 = { noinline nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #1 = { nounwind uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cmov,+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" "use-sample-profile" }
-attributes #2 = { mustprogress nocallback nofree nosync nounwind willreturn memory(argmem: readwrite) }
-attributes #3 = { mustprogress nocallback nofree nosync nounwind willreturn memory(inaccessiblemem: readwrite) }
-
-!llvm.dbg.cu = !{!2}
-!llvm.module.flags = !{!7, !8, !9, !10, !11, !12, !13}
-!llvm.ident = !{!14}
-!llvm.pseudo_probe_desc = !{!15, !16, !17}
-
-!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
-!1 = distinct !DIGlobalVariable(name: "x", scope: !2, file: !3, line: 1, type: !5, isLocal: false, isDefinition: true)
-!2 = distinct !DICompileUnit(language: DW_LANG_C11, file: !3, producer: "clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)", isOptimized: true, runtimeVersion: 0, emissionKind: FullDebug, globals: !4, splitDebugInlining: false, nameTableKind: None)
-!3 = !DIFile(filename: "test_rename.c", directory: "/home/wlei/local/llvm_test/rename/extbinary", checksumkind: CSK_MD5, checksum: "11a33a83e4d190ebda0792d0610f0c67")
-!4 = !{!0}
-!5 = !DIDerivedType(tag: DW_TAG_volatile_type, baseType: !6)
-!6 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
-!7 = !{i32 7, !"Dwarf Version", i32 5}
-!8 = !{i32 2, !"Debug Info Version", i32 3}
-!9 = !{i32 1, !"wchar_size", i32 4}
-!10 = !{i32 8, !"PIC Level", i32 2}
-!11 = !{i32 7, !"PIE Level", i32 2}
-!12 = !{i32 7, !"uwtable", i32 2}
-!13 = !{i32 7, !"debug-info-assignment-tracking", i1 true}
-!14 = !{!"clang version 20.0.0git (https://github.com/llvm/llvm-project.git 070702c9be2fb437b0765532c03e98c642951906)"}
-!15 = !{i64 -2012135647395072713, i64 4294967295, !"bar"}
-!16 = !{i64 -2115950948644264162, i64 281479271677951, !"foo_rename"}
-!17 = !{i64 -2624081020897602054, i64 281582264815352, !"main"}
-!18 = distinct !DISubprogram(name: "bar", scope: !3, file: !3, line: 3, type: !19, scopeLine: 3, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !21)
-!19 = !DISubroutineType(types: !20)
-!20 = !{!6, !6}
-!21 = !{!22}
-!22 = !DILocalVariable(name: "x", arg: 1, scope: !18, file: !3, line: 3, type: !6)
-!23 = !DILocation(line: 0, scope: !18)
-!24 = !DILocation(line: 4, column: 10, scope: !18)
-!25 = !DILocation(line: 4, column: 12, scope: !18)
-!26 = !DILocation(line: 4, column: 3, scope: !18)
-!27 = distinct !DISubprogram(name: "foo_rename", scope: !3, file: !3, line: 7, type: !28, scopeLine: 7, flags: DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2)
-!28 = !DISubroutineType(types: !29)
-!29 = !{null}
-!30 = !DILocation(line: 8, column: 15, scope: !27)
-!31 = !{!32, !32, i64 0}
-!32 = !{!"int", !33, i64 0}
-!33 = !{!"omnipotent char", !34, i64 0}
-!34 = !{!"Simple C/C++ TBAA"}
-!35 = !DILocation(line: 8, column: 11, scope: !36)
-!36 = !DILexicalBlockFile(scope: !27, file: !3, discriminator: 455082007)
-!37 = !DILocation(line: 8, column: 8, scope: !27)
-!38 = !DILocation(line: 9, column: 1, scope: !27)
-!39 = distinct !DISubprogram(name: "main", scope: !3, file: !3, line: 11, type: !40, scopeLine: 11, flags: DIFlagPrototyped | DIFlagAllCallsDescribed, spFlags: DISPFlagDefinition | DISPFlagOptimized, unit: !2, retainedNodes: !42)
-!40 = !DISubroutineType(types: !41)
-!41 = !{!6}
-!42 = !{!43}
-!43 = !DILocalVariable(name: "i", scope: !44, file: !3, line: 12, type: !6)
-!44 = distinct !DILexicalBlock(scope: !39, file: !3, line: 12, column: 3)
-!45 = !DILocation(line: 12, column: 12, scope: !44)
-!46 = !DILocation(line: 0, scope: !44)
-!47 = !DILocation(line: 12, column: 8, scope: !44)
-!48 = !DILocation(line: 12, scope: !44)
-!49 = !DILocation(line: 12, column: 19, scope: !50)
-!50 = distinct !DILexicalBlock(scope: !44, file: !3, line: 12, column: 3)
-!51 = !DILocation(line: 12, column: 21, scope: !50)
-!52 = !DILocation(line: 12, column: 3, scope: !44)
-!53 = !DILocation(line: 0, scope: !39)
-!54 = !DILocation(line: 15, column: 1, scope: !39)
-!55 = !DILocation(line: 13, column: 7, scope: !56)
-!56 = distinct !DILexicalBlock(scope: !50, file: !3, line: 12, column: 40)
-!57 = !DILocation(line: 13, column: 7, scope: !58)
-!58 = !DILexicalBlockFile(scope: !56, file: !3, discriminator: 455082031)
-!59 = !DILocation(line: 12, column: 36, scope: !50)
-!60 = !DILocation(line: 12, column: 3, scope: !50)
-!61 = distinct !{!61, !52, !62, !63}
-!62 = !DILocation(line: 14, column: 3, scope: !44)
-!63 = !{!"llvm.loop.mustprogress"}
-Function foo_rename is not in profile or profile symbol list.
-Run stale profile matching for main
-Run stale profile matching for bar
-(0/2) of functions' profile are invalid and (0/452891) of samples are discarded due to function hash mismatch.
-(0/2) of functions' profile are matched and (0/452891) of samples are reused by call graph matching.
-(1/1) of callsites' profile are invalid and (51/452891) of samples are discarded due to callsite location mismatch.
-(0/1) of callsites and (0/51) of samples are recovered by stale profile matching.
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
index 356b16ca6ad059..7b3fe9e047bd20 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -1,8 +1,8 @@
; REQUIRES: x86_64-linux
; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
; CHECK-TEXT: Run stale profile matching for main
; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching
>From 259ab87f5a708cf1ee50e4b83dceb771fee791af Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 19 Aug 2024 10:45:22 -0700
Subject: [PATCH 5/8] check whether a profile is already loaded
---
llvm/include/llvm/ProfileData/SampleProfReader.h | 15 +++++++++------
llvm/lib/ProfileData/SampleProfReader.cpp | 6 +++---
2 files changed, 12 insertions(+), 9 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index c86b97740e4f79..a93cf25e3f7f7a 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -380,13 +380,16 @@ class SampleProfileReader {
return sampleprof_error::success;
}
- /// Read sample profiles for the given functions. Currently it's only used for
- /// extended binary format to load the profiles on-demand.
+ /// Read sample profiles for the given functions.
std::error_code read(const DenseSet<StringRef> &FuncsToUse) {
- if (std::error_code EC = read(FuncsToUse, Profiles))
+ DenseSet<StringRef> S;
+ for (StringRef F : FuncsToUse)
+ if (Profiles.find(FunctionId(F)) == Profiles.end())
+ S.insert(F);
+ if (std::error_code EC = read(S, Profiles))
return EC;
return sampleprof_error::success;
- };
+ }
/// Read sample profiles for the given functions and write them to the given
/// profile map. Currently it's only used for extended binary format to load
@@ -394,7 +397,7 @@ class SampleProfileReader {
virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
SampleProfileMap &Profiles) {
return sampleprof_error::not_implemented;
- };
+ }
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;
@@ -564,7 +567,7 @@ class SampleProfileReader {
std::unordered_map<uint64_t, std::pair<const uint8_t *, const uint8_t *>>
FuncMetadataIndex;
- std::pair<const uint8_t *, const uint8_t *> LBRProfileSecRange;
+ std::pair<const uint8_t *, const uint8_t *> ProfileSecRange;
/// Whether the profile has attribute metadata.
bool ProfileHasAttribute = false;
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 4c0a45bfb47cf8..71464e8dae65ce 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -731,7 +731,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readOneSection(
break;
}
case SecLBRProfile:
- LBRProfileSecRange = std::make_pair(Data, End);
+ ProfileSecRange = std::make_pair(Data, End);
if (std::error_code EC = readFuncProfiles())
return EC;
break;
@@ -801,8 +801,8 @@ bool SampleProfileReaderExtBinaryBase::useFuncOffsetList() const {
std::error_code
SampleProfileReaderExtBinaryBase::read(const DenseSet<StringRef> &FuncsToUse,
SampleProfileMap &Profiles) {
- Data = LBRProfileSecRange.first;
- End = LBRProfileSecRange.second;
+ Data = ProfileSecRange.first;
+ End = ProfileSecRange.second;
if (std::error_code EC = readFuncProfiles(FuncsToUse, Profiles))
return EC;
End = Data;
>From 11b5a6602f81942d59f4357b05a6a755a6212f33 Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 19 Aug 2024 11:29:54 -0700
Subject: [PATCH 6/8] load profiles into the sample reader's profile map
---
.../Transforms/IPO/SampleProfileMatcher.cpp | 41 ++++++++++---------
1 file changed, 21 insertions(+), 20 deletions(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 77cede8744707b..afd5933e39eb42 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -416,18 +416,19 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
// callsites in one context may differ from those in another context. To get
// the maximum number of callsites, we merge the function profiles from all
// contexts, aka, the flattened profile to find profile anchors.
- const auto *FSFlattened = getFlattenedSamplesFor(F);
- if (SalvageUnusedProfile && !FSFlattened) {
+ const auto *FSForMatching = getFlattenedSamplesFor(F);
+ if (SalvageUnusedProfile && !FSForMatching) {
// Apply the matching in place to find the new function's matched profile.
- // TODO: For extended profile format, if a function profile is unused and
- // it's top-level, even if the profile is matched, it's not found in the
- // profile. This is because sample reader only read the used profile at the
- // beginning, we need to support loading the profile on-demand in future.
auto R = FuncToProfileNameMap.find(&F);
- if (R != FuncToProfileNameMap.end())
- FSFlattened = getFlattenedSamplesFor(R->second);
+ if (R != FuncToProfileNameMap.end()) {
+ FSForMatching = getFlattenedSamplesFor(R->second);
+ // Try to find the salvaged top-level profiles that are explicitly loaded
+ // for the matching, see "functionMatchesProfileHelper" for the details.
+ if (!FSForMatching)
+ FSForMatching = Reader.getSamplesFor(R->second.stringRef());
+ }
}
- if (!FSFlattened)
+ if (!FSForMatching)
return;
// Anchors for IR. It's a map from IR location to callee name, callee name is
@@ -438,7 +439,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
// Anchors for profile. It's a map from callsite location to a set of callee
// name.
AnchorMap ProfileAnchors;
- findProfileAnchors(*FSFlattened, ProfileAnchors);
+ findProfileAnchors(*FSForMatching, ProfileAnchors);
// Compute the callsite match states for profile staleness report.
if (ReportProfileStaleness || PersistProfileStaleness)
@@ -449,7 +450,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
// For probe-based profiles, run matching only when profile checksum is
// mismatched.
bool ChecksumMismatch = FunctionSamples::ProfileIsProbeBased &&
- !ProbeManager->profileIsValid(F, *FSFlattened);
+ !ProbeManager->profileIsValid(F, *FSForMatching);
bool RunCFGMatching =
!FunctionSamples::ProfileIsProbeBased || ChecksumMismatch;
bool RunCGMatching = SalvageUnusedProfile;
@@ -787,30 +788,30 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
// two sequences are.
float Similarity = 0.0;
- const auto *FSFlattened = getFlattenedSamplesFor(ProfFunc);
+ const auto *FSForMatching = getFlattenedSamplesFor(ProfFunc);
// With extbinary profile format, initial profile loading only reads profile
// based on current function names in the module.
// However, if a function is renamed, sample loader skips to load its original
// profile(which has a different name), we will miss this case. To address
// this, we load the top-level profile candidate explicitly for the matching.
- if (!FSFlattened && ReadToplevProfileforCGMatching) {
+ if (!FSForMatching && ReadToplevProfileforCGMatching) {
DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
- if (std::error_code EC = Reader.read(TopLevelFunc, FlattenedProfiles))
+ if (std::error_code EC = Reader.read(TopLevelFunc))
return false;
- FSFlattened = getFlattenedSamplesFor(ProfFunc);
+ FSForMatching = Reader.getSamplesFor(ProfFunc.stringRef());
LLVM_DEBUG({
- if (FSFlattened)
+ if (FSForMatching)
dbgs() << "Read top-level function " << ProfFunc
<< " for call-graph matching\n";
});
}
- if (!FSFlattened)
+ if (!FSForMatching)
return false;
// The check for similarity or checksum may not be reliable if the function is
// tiny, we use the number of basic block as a proxy for the function
// complexity and skip the matching if it's too small.
if (IRFunc.size() < MinFuncCountForCGMatching ||
- FSFlattened->getBodySamples().size() < MinFuncCountForCGMatching)
+ FSForMatching->getBodySamples().size() < MinFuncCountForCGMatching)
return false;
// For probe-based function, we first trust the checksum info. If the checksum
@@ -818,7 +819,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
if (FunctionSamples::ProfileIsProbeBased) {
const auto *FuncDesc = ProbeManager->getDesc(IRFunc);
if (FuncDesc &&
- !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSFlattened)) {
+ !ProbeManager->profileIsHashMismatched(*FuncDesc, *FSForMatching)) {
LLVM_DEBUG(dbgs() << "The checksums for " << IRFunc.getName()
<< "(IR) and " << ProfFunc << "(Profile) match.\n");
@@ -829,7 +830,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
AnchorMap IRAnchors;
findIRAnchors(IRFunc, IRAnchors);
AnchorMap ProfileAnchors;
- findProfileAnchors(*FSFlattened, ProfileAnchors);
+ findProfileAnchors(*FSForMatching, ProfileAnchors);
AnchorList FilteredIRAnchorsList;
AnchorList FilteredProfileAnchorList;
>From 60440849a6e1cdb5ff7ceae1aa5a32c068fae60d Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Mon, 19 Aug 2024 17:59:29 -0700
Subject: [PATCH 7/8] make read function private
---
.../llvm/ProfileData/SampleProfReader.h | 29 ++++++++++---------
.../Transforms/IPO/SampleProfileMatcher.cpp | 10 +++----
.../pseudo-probe-stale-profile-toplev-func.ll | 4 +--
3 files changed, 22 insertions(+), 21 deletions(-)
diff --git a/llvm/include/llvm/ProfileData/SampleProfReader.h b/llvm/include/llvm/ProfileData/SampleProfReader.h
index a93cf25e3f7f7a..6cab1195938888 100644
--- a/llvm/include/llvm/ProfileData/SampleProfReader.h
+++ b/llvm/include/llvm/ProfileData/SampleProfReader.h
@@ -391,14 +391,6 @@ class SampleProfileReader {
return sampleprof_error::success;
}
- /// Read sample profiles for the given functions and write them to the given
- /// profile map. Currently it's only used for extended binary format to load
- /// the profiles on-demand.
- virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
- SampleProfileMap &Profiles) {
- return sampleprof_error::not_implemented;
- }
-
/// The implementaion to read sample profiles from the associated file.
virtual std::error_code readImpl() = 0;
@@ -554,6 +546,14 @@ class SampleProfileReader {
/// Compute summary for this profile.
void computeSummary();
+ /// Read sample profiles for the given functions and write them to the given
+ /// profile map. Currently it's only used for extended binary format to load
+ /// the profiles on-demand.
+ virtual std::error_code read(const DenseSet<StringRef> &FuncsToUse,
+ SampleProfileMap &Profiles) {
+ return sampleprof_error::not_implemented;
+ }
+
std::unique_ptr<SampleProfileReaderItaniumRemapper> Remapper;
// A map pointer to the FuncNameToProfNameMap in SampleProfileLoader,
@@ -832,18 +832,19 @@ class SampleProfileReaderExtBinaryBase : public SampleProfileReaderBinary {
/// the reader has been given a module.
bool collectFuncsFromModule() override;
+ std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
+ return std::move(ProfSymList);
+ };
+
+ void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
+
+private:
/// Read the profiles on-demand for the given functions. This is used after
/// stale call graph matching finds new functions whose profiles aren't loaded
/// at the beginning and we need to loaded the profiles explicitly for
/// potential matching.
std::error_code read(const DenseSet<StringRef> &FuncsToUse,
SampleProfileMap &Profiles) override;
-
- std::unique_ptr<ProfileSymbolList> getProfileSymbolList() override {
- return std::move(ProfSymList);
- };
-
- void setSkipFlatProf(bool Skip) override { SkipFlatProf = Skip; }
};
class SampleProfileReaderExtBinary : public SampleProfileReaderExtBinaryBase {
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index afd5933e39eb42..1c3d89bfc3b123 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -36,10 +36,10 @@ static cl::opt<unsigned> MinCallCountForCGMatching(
cl::desc("The minimum number of call anchors required for a function to "
"run stale profile call graph matching."));
-static cl::opt<bool> ReadToplevProfileforCGMatching(
- "read-toplev-profile-for-cg-matching", cl::Hidden, cl::init(false),
+static cl::opt<bool> LoadFuncProfileforCGMatching(
+ "load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
cl::desc(
- "Read top-level profiles that the sample reader initially skips for "
+ "Load top-level profiles that the sample reader initially skipped for "
"the call-graph matching(only meaningful for extended binary format)"));
extern cl::opt<bool> SalvageStaleProfile;
@@ -424,7 +424,7 @@ void SampleProfileMatcher::runOnFunction(Function &F) {
FSForMatching = getFlattenedSamplesFor(R->second);
// Try to find the salvaged top-level profiles that are explicitly loaded
// for the matching, see "functionMatchesProfileHelper" for the details.
- if (!FSForMatching)
+ if (!FSForMatching && LoadFuncProfileforCGMatching)
FSForMatching = Reader.getSamplesFor(R->second.stringRef());
}
}
@@ -794,7 +794,7 @@ bool SampleProfileMatcher::functionMatchesProfileHelper(
// However, if a function is renamed, sample loader skips to load its original
// profile(which has a different name), we will miss this case. To address
// this, we load the top-level profile candidate explicitly for the matching.
- if (!FSForMatching && ReadToplevProfileforCGMatching) {
+ if (!FSForMatching && LoadFuncProfileforCGMatching) {
DenseSet<StringRef> TopLevelFunc({ProfFunc.stringRef()});
if (std::error_code EC = Reader.read(TopLevelFunc))
return false;
diff --git a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
index 7b3fe9e047bd20..c839364f235536 100644
--- a/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
+++ b/llvm/test/Transforms/SampleProfile/pseudo-probe-stale-profile-toplev-func.ll
@@ -1,8 +1,8 @@
; REQUIRES: x86_64-linux
; REQUIRES: asserts
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/pseudo-probe-stale-profile-toplev-func.prof --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --load-func-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-TEXT
; RUN: llvm-profdata merge --sample %S/Inputs/pseudo-probe-stale-profile-toplev-func.prof -extbinary -o %t.extbinary
-; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --read-toplev-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.extbinary --salvage-stale-profile --salvage-unused-profile -report-profile-staleness -S --debug-only=sample-profile,sample-profile-matcher,sample-profile-impl -pass-remarks=inline --min-call-count-for-cg-matching=0 --min-func-count-for-cg-matching=0 --load-func-profile-for-cg-matching 2>&1 | FileCheck %s -check-prefix=CHECK-EXTBIN
; CHECK-TEXT: Run stale profile matching for main
; CHECK-TEXT-NOT: Read top-level function foo for call-graph matching
>From 6fbb401bb1546374eb2a3ffa6bca5df182f5fdbe Mon Sep 17 00:00:00 2001
From: wlei <wlei at fb.com>
Date: Tue, 27 Aug 2024 11:58:44 -0700
Subject: [PATCH 8/8] add space before (
---
llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
index 1c3d89bfc3b123..0c676e8fb95fdb 100644
--- a/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfileMatcher.cpp
@@ -40,7 +40,8 @@ static cl::opt<bool> LoadFuncProfileforCGMatching(
"load-func-profile-for-cg-matching", cl::Hidden, cl::init(false),
cl::desc(
"Load top-level profiles that the sample reader initially skipped for "
- "the call-graph matching(only meaningful for extended binary format)"));
+ "the call-graph matching (only meaningful for extended binary "
+ "format)"));
extern cl::opt<bool> SalvageStaleProfile;
extern cl::opt<bool> SalvageUnusedProfile;
More information about the llvm-commits
mailing list