[llvm] 7ca8030 - [CSSPGO] Enable loading MD5 CS profile.
Hongtao Yu via llvm-commits
llvm-commits at lists.llvm.org
Wed Sep 1 09:19:59 PDT 2021
Author: Hongtao Yu
Date: 2021-09-01T09:19:47-07:00
New Revision: 7ca80300309a95a152a343c6f5b68c0d83e9fb28
URL: https://github.com/llvm/llvm-project/commit/7ca80300309a95a152a343c6f5b68c0d83e9fb28
DIFF: https://github.com/llvm/llvm-project/commit/7ca80300309a95a152a343c6f5b68c0d83e9fb28.diff
LOG: [CSSPGO] Enable loading MD5 CS profile.
Adding the compiler support of MD5 CS profile based on pervious context split work D107299. A MD5 CS profile is about 40% smaller than the string-based extbinary profile. As a result, the compilation is 15% faster.
There are a few conversion from real names to md5 names that have been made on the sample loader and context tracker side to get it work.
Reviewed By: wenlei, wmi
Differential Revision: https://reviews.llvm.org/D108342
Added:
Modified:
llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
llvm/lib/ProfileData/SampleProfReader.cpp
llvm/lib/Transforms/IPO/SampleContextTracker.cpp
llvm/lib/Transforms/IPO/SampleProfile.cpp
llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
llvm/test/Transforms/SampleProfile/csspgo-inline.ll
llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
llvm/tools/llvm-profgen/CSPreInliner.cpp
llvm/tools/llvm-profgen/ProfileGenerator.cpp
Removed:
################################################################################
diff --git a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
index 7007fe6f1ad89..6e45f8f6fb05d 100644
--- a/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
+++ b/llvm/include/llvm/Transforms/IPO/ProfiledCallGraph.h
@@ -56,7 +56,7 @@ class ProfiledCallGraph {
std::queue<ContextTrieNode *> Queue;
for (auto &Child : ContextTracker.getRootContext().getAllChildContext()) {
ContextTrieNode *Callee = &Child.second;
- addProfiledFunction(Callee->getFuncName());
+ addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
}
@@ -72,9 +72,10 @@ class ProfiledCallGraph {
// context-based one, which may in turn block context-based inlining.
for (auto &Child : Caller->getAllChildContext()) {
ContextTrieNode *Callee = &Child.second;
- addProfiledFunction(Callee->getFuncName());
+ addProfiledFunction(ContextTracker.getFuncNameFor(Callee));
Queue.push(Callee);
- addProfiledCall(Caller->getFuncName(), Callee->getFuncName());
+ addProfiledCall(ContextTracker.getFuncNameFor(Caller),
+ ContextTracker.getFuncNameFor(Callee));
}
}
}
diff --git a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
index 77ce3ca711c51..b1e100bd95848 100644
--- a/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
+++ b/llvm/include/llvm/Transforms/IPO/SampleContextTracker.h
@@ -110,7 +110,8 @@ class SampleContextTracker {
// deterministically.
using ContextSamplesTy = std::set<FunctionSamples *, ProfileComparer>;
- SampleContextTracker(SampleProfileMap &Profiles);
+ SampleContextTracker(SampleProfileMap &Profiles,
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap);
// Query context profile for a specific callee with given name at a given
// call-site. The full context is identified by location of call instruction.
FunctionSamples *getCalleeContextSamplesFor(const CallBase &Inst,
@@ -134,6 +135,8 @@ class SampleContextTracker {
FunctionSamples *getBaseSamplesFor(StringRef Name, bool MergeContext = true);
// Retrieve the context trie node for given profile context
ContextTrieNode *getContextFor(const SampleContext &Context);
+ // Get real function name for a given trie node.
+ StringRef getFuncNameFor(ContextTrieNode *Node) const;
// Mark a context profile as inlined when function is inlined.
// This makes sure that inlined context profile will be excluded in
// function's base profile.
@@ -163,6 +166,9 @@ class SampleContextTracker {
// Map from function name to context profiles (excluding base profile)
StringMap<ContextSamplesTy> FuncToCtxtProfiles;
+ // Map from function guid to real function names. Only used in md5 mode.
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap;
+
// Root node for context trie tree
ContextTrieNode RootContext;
};
diff --git a/llvm/lib/ProfileData/SampleProfReader.cpp b/llvm/lib/ProfileData/SampleProfReader.cpp
index 545414a1dfbd0..838b4038438eb 100644
--- a/llvm/lib/ProfileData/SampleProfReader.cpp
+++ b/llvm/lib/ProfileData/SampleProfReader.cpp
@@ -759,18 +759,7 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
}
- if (useMD5()) {
- for (auto Name : FuncsToUse) {
- auto GUID = std::to_string(MD5Hash(Name));
- auto iter = FuncOffsetTable.find(StringRef(GUID));
- if (iter == FuncOffsetTable.end())
- continue;
- const uint8_t *FuncProfileAddr = Start + iter->second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
- }
- } else if (ProfileIsCS) {
+ if (ProfileIsCS) {
// Compute the ordered set of names, so we can
// get all context profiles under a subtree by
// iterating through the ordered names.
@@ -779,13 +768,20 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
OrderedContexts.insert(Name.first);
}
+ DenseSet<uint64_t> FuncGuidsToUse;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse)
+ FuncGuidsToUse.insert(Function::getGUID(Name));
+ }
+
// For each function in current module, load all
// context profiles for the function.
for (auto NameOffset : FuncOffsetTable) {
SampleContext FContext = NameOffset.first;
auto FuncName = FContext.getName();
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
+ if ((useMD5() && !FuncGuidsToUse.count(std::stoull(FuncName.data()))) ||
+ (!useMD5() && !FuncsToUse.count(FuncName) &&
+ (!Remapper || !Remapper->exist(FuncName))))
continue;
// For each context profile we need, try to load
@@ -803,16 +799,29 @@ std::error_code SampleProfileReaderExtBinaryBase::readFuncProfiles() {
}
}
} else {
- for (auto NameOffset : FuncOffsetTable) {
- SampleContext FContext(NameOffset.first);
- auto FuncName = FContext.getName();
- if (!FuncsToUse.count(FuncName) &&
- (!Remapper || !Remapper->exist(FuncName)))
- continue;
- const uint8_t *FuncProfileAddr = Start + NameOffset.second;
- assert(FuncProfileAddr < End && "out of LBRProfile section");
- if (std::error_code EC = readFuncProfile(FuncProfileAddr))
- return EC;
+ if (useMD5()) {
+ for (auto Name : FuncsToUse) {
+ auto GUID = std::to_string(MD5Hash(Name));
+ auto iter = FuncOffsetTable.find(StringRef(GUID));
+ if (iter == FuncOffsetTable.end())
+ continue;
+ const uint8_t *FuncProfileAddr = Start + iter->second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
+ } else {
+ for (auto NameOffset : FuncOffsetTable) {
+ SampleContext FContext(NameOffset.first);
+ auto FuncName = FContext.getName();
+ if (!FuncsToUse.count(FuncName) &&
+ (!Remapper || !Remapper->exist(FuncName)))
+ continue;
+ const uint8_t *FuncProfileAddr = Start + NameOffset.second;
+ assert(FuncProfileAddr < End && "out of LBRProfile section");
+ if (std::error_code EC = readFuncProfile(FuncProfileAddr))
+ return EC;
+ }
}
}
Data = End;
diff --git a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
index 5137e61fb0653..ac62ca42a259e 100644
--- a/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
+++ b/llvm/lib/Transforms/IPO/SampleContextTracker.cpp
@@ -203,7 +203,10 @@ ContextTrieNode *ContextTrieNode::getOrCreateChildContext(
}
// Profiler tracker than manages profiles and its associated context
-SampleContextTracker::SampleContextTracker(SampleProfileMap &Profiles) {
+SampleContextTracker::SampleContextTracker(
+ SampleProfileMap &Profiles,
+ const DenseMap<uint64_t, StringRef> *GUIDToFuncNameMap)
+ : GUIDToFuncNameMap(GUIDToFuncNameMap) {
for (auto &FuncSample : Profiles) {
FunctionSamples *FSamples = &FuncSample.second;
SampleContext Context = FuncSample.first;
@@ -227,6 +230,10 @@ SampleContextTracker::getCalleeContextSamplesFor(const CallBase &Inst,
return nullptr;
CalleeName = FunctionSamples::getCanonicalFnName(CalleeName);
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ CalleeName = getRepInFormat(CalleeName, FunctionSamples::UseMD5, FGUID);
// For indirect call, CalleeName will be empty, in which case the context
// profile for callee with largest total samples will be returned.
@@ -313,6 +320,11 @@ FunctionSamples *SampleContextTracker::getBaseSamplesFor(const Function &Func,
FunctionSamples *SampleContextTracker::getBaseSamplesFor(StringRef Name,
bool MergeContext) {
LLVM_DEBUG(dbgs() << "Getting base profile for function: " << Name << "\n");
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::string FGUID;
+ Name = getRepInFormat(Name, FunctionSamples::UseMD5, FGUID);
+
// Base profile is top-level node (child of root node), so try to retrieve
// existing top-level node for given function first. If it exists, it could be
// that we've merged base profile before, or there's actually context-less
@@ -416,6 +428,13 @@ ContextTrieNode &SampleContextTracker::promoteMergeContextSamplesTree(
void SampleContextTracker::dump() { RootContext.dumpTree(); }
+StringRef SampleContextTracker::getFuncNameFor(ContextTrieNode *Node) const {
+ if (!FunctionSamples::UseMD5)
+ return Node->getFuncName();
+ assert(GUIDToFuncNameMap && "GUIDToFuncNameMap needs to be populated first");
+ return GUIDToFuncNameMap->lookup(std::stoull(Node->getFuncName().data()));
+}
+
ContextTrieNode *
SampleContextTracker::getContextFor(const SampleContext &Context) {
return getOrCreateContextPath(Context, false);
@@ -458,6 +477,17 @@ ContextTrieNode *SampleContextTracker::getContextFor(const DILocation *DIL) {
RootName = PrevDIL->getScope()->getSubprogram()->getName();
S.push_back(std::make_pair(LineLocation(0, 0), RootName));
+ // Convert real function names to MD5 names, if the input profile is
+ // MD5-based.
+ std::vector<std::string> MD5Names;
+ if (FunctionSamples::UseMD5) {
+ for (auto &Location : S) {
+ MD5Names.emplace_back();
+ getRepInFormat(Location.second, FunctionSamples::UseMD5, MD5Names.back());
+ Location.second = MD5Names.back();
+ }
+ }
+
ContextTrieNode *ContextNode = &RootContext;
int I = S.size();
while (--I >= 0 && ContextNode) {
diff --git a/llvm/lib/Transforms/IPO/SampleProfile.cpp b/llvm/lib/Transforms/IPO/SampleProfile.cpp
index 868a735347816..bb6c15b56a166 100644
--- a/llvm/lib/Transforms/IPO/SampleProfile.cpp
+++ b/llvm/lib/Transforms/IPO/SampleProfile.cpp
@@ -984,7 +984,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
Function *Func = SymbolMap.lookup(Name);
// Add to the import list only when it's defined out of module.
if (!Func || Func->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(Name));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeSample->getName()));
// Import hot CallTargets, which may not be available in IR because full
// profile annotation cannot be done until backend compilation in ThinLTO.
@@ -994,7 +994,7 @@ void SampleProfileLoader::findExternalInlineCandidate(
StringRef CalleeName = CalleeSample->getFuncName(TS.getKey());
const Function *Callee = SymbolMap.lookup(CalleeName);
if (!Callee || Callee->isDeclaration())
- InlinedGUIDs.insert(FunctionSamples::getGUID(CalleeName));
+ InlinedGUIDs.insert(FunctionSamples::getGUID(TS.getKey()));
}
// Import hot child context profile associted with callees. Note that this
@@ -1832,8 +1832,8 @@ bool SampleProfileLoader::doInitialization(Module &M,
UseIterativeBFIInference = true;
// Tracker for profiles under
diff erent context
- ContextTracker =
- std::make_unique<SampleContextTracker>(Reader->getProfiles());
+ ContextTracker = std::make_unique<SampleContextTracker>(
+ Reader->getProfiles(), &GUIDToFuncNameMap);
}
// Load pseudo probe descriptors for probe-based function samples.
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
index df180fbc31445..5665d7759efe2 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-import-list.ll
@@ -2,6 +2,8 @@
; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%S/Inputs/csspgo-import-list.prof -S | FileCheck %s
; RUN: llvm-profdata merge --sample --extbinary %S/Inputs/csspgo-import-list.prof -o %t.prof
; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.prof -S | FileCheck %s
+; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/csspgo-import-list.prof -o %t.md5
+; RUN: opt < %s -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -sample-profile-file=%t.md5 -S | FileCheck %s
declare i32 @_Z5funcBi(i32 %x)
declare i32 @_Z5funcAi(i32 %x)
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
index c0b789a78b391..01a39e2123a7d 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline-icall.ll
@@ -2,6 +2,9 @@
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-ALL %s
; RUN: opt < %s -sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/indirect-call-csspgo.prof -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s
+; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/indirect-call-csspgo.prof -o %t.md5
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-icp-relative-hotness=1 -pass-remarks=sample-profile -sample-profile-inline-size=0 -S -o /dev/null 2>&1 | FileCheck -check-prefix=ICP-HOT %s
+
define void @test(void ()*) #0 !dbg !3 {
;; Add two direct call to force top-down order for sample profile loader
diff --git a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
index 0cdadf188c929..25bbf72f0a1f5 100644
--- a/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
+++ b/llvm/test/Transforms/SampleProfile/csspgo-inline.ll
@@ -6,7 +6,10 @@
; main:3 @ _Z5funcAi:1 @ _Z8funcLeafi
; _Z5funcBi:1 @ _Z8funcLeafi
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
-;
+
+; RUN: llvm-profdata merge --sample --extbinary --use-md5 %S/Inputs/profile-context-tracker.prof -o %t.md5
+; RUN: opt < %s -passes=sample-profile -sample-profile-file=%t.md5 -sample-profile-inline-size -sample-profile-prioritized-inline=0 -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-BASE
+
; With new FDO early inliner, callee entry count is used to drive inlining instead of callee total samples, so we get less inlining for given profile
; RUN: opt < %s -passes=sample-profile -sample-profile-file=%S/Inputs/profile-context-tracker.prof -sample-profile-inline-size -profile-sample-accurate -S -pass-remarks=inline -o /dev/null 2>&1 | FileCheck %s --check-prefix=INLINE-NEW
;
diff --git a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
index 0e9d94cf3df1c..7004ce53cc467 100644
--- a/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
+++ b/llvm/test/tools/llvm-profgen/noinline-cs-noprobe.test
@@ -9,6 +9,13 @@
; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.aggperfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t --profile-summary-cold-count=0
; RUN: FileCheck %s --input-file %t --check-prefix=CHECK-AGG
+; RUN: llvm-profgen --format=text --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t1 --profile-summary-cold-count=0
+; RUN: llvm-profgen --format=extbinary --perfscript=%S/Inputs/noinline-cs-noprobe.perfscript --binary=%S/Inputs/noinline-cs-noprobe.perfbin --output=%t2 --profile-summary-cold-count=0 -use-md5
+; RUN: llvm-profdata show --sample -show-sec-info-only %t2 | FileCheck %s --check-prefix=CHECK-MD5
+; RUN: llvm-profdata show --sample -detailed-summary --function=dummy %t1 > %t.summary
+; RUN: llvm-profdata show --sample -detailed-summary --function=dummy %t2 > %t2.summary
+; RUN:
diff -b %t.summary %t2.summary
+
; CHECK-AGG:[main:1 @ foo]:108:0
; CHECK-AGG: 2: 6
@@ -74,6 +81,7 @@
; CHECK-UNWINDER: (5d7, 5e5): 2
; CHECK-UNWINDER: (5e9, 634): 3
+; CHECK-MD5: NameTableSection {{.*}} {fixlenmd5}
diff --git a/llvm/tools/llvm-profgen/CSPreInliner.cpp b/llvm/tools/llvm-profgen/CSPreInliner.cpp
index 7218d398d7561..a0544c0e6a9d0 100644
--- a/llvm/tools/llvm-profgen/CSPreInliner.cpp
+++ b/llvm/tools/llvm-profgen/CSPreInliner.cpp
@@ -42,9 +42,12 @@ static cl::opt<bool> SamplePreInlineReplay(
CSPreInliner::CSPreInliner(SampleProfileMap &Profiles, ProfiledBinary &Binary,
uint64_t HotThreshold, uint64_t ColdThreshold)
- : UseContextCost(UseContextCostForPreInliner), ContextTracker(Profiles),
- ProfileMap(Profiles), Binary(Binary), HotCountThreshold(HotThreshold),
- ColdCountThreshold(ColdThreshold) {}
+ : UseContextCost(UseContextCostForPreInliner),
+ // TODO: Pass in a guid-to-name map in order for
+ // ContextTracker.getFuncNameFor to work, if `Profiles` can have md5 codes
+ // as their profile context.
+ ContextTracker(Profiles, nullptr), ProfileMap(Profiles), Binary(Binary),
+ HotCountThreshold(HotThreshold), ColdCountThreshold(ColdThreshold) {}
std::vector<StringRef> CSPreInliner::buildTopDownOrder() {
std::vector<StringRef> Order;
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 6ea827cb1dab5..581e4dd6f0bf4 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -27,6 +27,11 @@ static cl::opt<SampleProfileFormat> OutputFormat(
clEnumValN(SPF_GCC, "gcc",
"GCC encoding (only meaningful for -sample)")));
+cl::opt<bool> UseMD5(
+ "use-md5", cl::init(false), cl::Hidden,
+ cl::desc("Use md5 to represent function names in the output profile (only "
+ "meaningful for -extbinary)"));
+
static cl::opt<int32_t, true> RecursionCompression(
"compress-recursion",
cl::desc("Compressing recursion by deduplicating adjacent frame "
@@ -99,6 +104,15 @@ void ProfileGenerator::write() {
auto WriterOrErr = SampleProfileWriter::create(OutputFilename, OutputFormat);
if (std::error_code EC = WriterOrErr.getError())
exitWithError(EC, OutputFilename);
+
+ if (UseMD5) {
+ if (OutputFormat != SPF_Ext_Binary)
+ WithColor::warning() << "-use-md5 is ignored. Specify "
+ "--format=extbinary to enable it\n";
+ else
+ WriterOrErr.get()->setUseMD5();
+ }
+
write(std::move(WriterOrErr.get()), ProfileMap);
}
More information about the llvm-commits
mailing list