[llvm] 34e131b - [llvm-profgen] On-demand track optimized-away inlinees for preinliner.
Hongtao Yu via llvm-commits
llvm-commits at lists.llvm.org
Tue Feb 8 08:33:29 PST 2022
Author: Hongtao Yu
Date: 2022-02-08T08:33:23-08:00
New Revision: 34e131b0f253d9c2eda78378aaf77703bc2921f9
URL: https://github.com/llvm/llvm-project/commit/34e131b0f253d9c2eda78378aaf77703bc2921f9
DIFF: https://github.com/llvm/llvm-project/commit/34e131b0f253d9c2eda78378aaf77703bc2921f9.diff
LOG: [llvm-profgen] On-demand track optimized-away inlinees for preinliner.
Tracking optimized-away inlinees based on all probes in a binary is expansive in terms of memory usage I'm making the tracking on-demand based on profiled functions only. This saves about 10% memory overall for a medium-sized benchmark.
Before:
note: After parsePerfTraces
note: Thu Jan 27 18:42:09 2022
note: VM: 8.68 GB RSS: 8.39 GB
note: After computeSizeForProfiledFunctions
note: Thu Jan 27 18:42:41 2022
note: **VM: 10.63 GB RSS: 10.20 GB**
note: After generateProbeBasedProfile
note: Thu Jan 27 18:45:49 2022
note: VM: 25.00 GB RSS: 24.95 GB
note: After postProcessProfiles
note: Thu Jan 27 18:49:29 2022
note: VM: 26.34 GB RSS: 26.27 GB
After:
note: After parsePerfTraces
note: Fri Jan 28 12:04:49 2022
note: VM: 8.68 GB RSS: 7.65 GB
note: After computeSizeForProfiledFunctions
note: Fri Jan 28 12:05:26 2022
note: **VM: 8.68 GB RSS: 8.42 GB**
note: After generateProbeBasedProfile
note: Fri Jan 28 12:08:03 2022
note: VM: 22.93 GB RSS: 22.89 GB
note: After postProcessProfiles
note: Fri Jan 28 12:11:30 2022
note: VM: 24.27 GB RSS: 24.22 GB
This should be a no-diff change in terms of profile quality.
Reviewed By: wenlei
Differential Revision: https://reviews.llvm.org/D118515
Added:
Modified:
llvm/tools/llvm-profgen/ProfileGenerator.cpp
llvm/tools/llvm-profgen/ProfiledBinary.cpp
llvm/tools/llvm-profgen/ProfiledBinary.h
Removed:
################################################################################
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 61d4626574a00..b0e72def46db6 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -589,27 +589,24 @@ void CSProfileGenerator::generateProfile() {
}
void CSProfileGenerator::computeSizeForProfiledFunctions() {
- // Hash map to deduplicate the function range and the item is a pair of
- // function start and end offset.
- std::unordered_map<uint64_t, uint64_t> AggregatedRanges;
+ std::unordered_set<const BinaryFunction *> ProfiledFunctions;
+
// Go through all the ranges in the CS counters, use the start of the range to
- // look up the function it belongs and record the function range.
+ // look up the function it belongs and record the function.
for (const auto &CI : SampleCounters) {
for (const auto &Item : CI.second.RangeCounter) {
// FIXME: Filter the bogus crossing function range.
uint64_t StartOffset = Item.first.first;
- // Note that a function can be spilt into multiple ranges, so get all
- // ranges of the function.
- for (const auto &Range : Binary->getRangesForOffset(StartOffset))
- AggregatedRanges[Range.first] = Range.second;
+ if (FuncRange *FRange = Binary->findFuncRangeForOffset(StartOffset))
+ ProfiledFunctions.insert(FRange->Func);
}
}
- for (const auto &I : AggregatedRanges) {
- uint64_t StartOffset = I.first;
- uint64_t EndOffset = I.second;
- Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
- }
+ for (auto *Func : ProfiledFunctions)
+ Binary->computeInlinedContextSizeForFunc(Func);
+
+ // Flush the symbolizer to save memory.
+ Binary->flushSymbolizer();
}
void CSProfileGenerator::generateLineNumBasedProfile() {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index e477eb86f2652..3430b030c01a8 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -219,10 +219,6 @@ void ProfiledBinary::load() {
// Disassemble the text sections.
disassemble(Obj);
- // Track size for optimized inlinees when probe is available
- if (UsePseudoProbes && TrackFuncContextSize)
- FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder);
-
// Use function start and return address to infer prolog and epilog
ProEpilogTracker.inferPrologOffsets(StartOffset2FuncRangeMap);
ProEpilogTracker.inferEpilogOffsets(RetOffsets);
@@ -349,6 +345,17 @@ void ProfiledBinary::decodePseudoProbe(const ELFObjectFileBase *Obj) {
}
}
+ // Build TopLevelProbeFrameMap to track size for optimized inlinees when probe
+ // is available
+ if (UsePseudoProbes && TrackFuncContextSize) {
+ for (const auto &Child : ProbeDecoder.getDummyInlineRoot().getChildren()) {
+ auto *Frame = Child.second.get();
+ StringRef FuncName =
+ ProbeDecoder.getFuncDescForGUID(Frame->Guid)->FuncName;
+ TopLevelProbeFrameMap[FuncName] = Frame;
+ }
+ }
+
if (ShowPseudoProbe)
ProbeDecoder.printGUID2FuncDescMap(outs());
}
@@ -747,6 +754,25 @@ void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
} while (IP.advance() && IP.Address < RangeEnd);
}
+void ProfiledBinary::computeInlinedContextSizeForFunc(
+ const BinaryFunction *Func) {
+ // Note that a function can be spilt into multiple ranges, so compute for all
+ // ranges of the function.
+ for (const auto &Range : Func->Ranges)
+ computeInlinedContextSizeForRange(Range.first, Range.second);
+
+ // Track optimized-away inlinee for probed binary. A function inlined and then
+ // optimized away should still have their probes left over in places.
+ if (usePseudoProbes()) {
+ auto I = TopLevelProbeFrameMap.find(Func->FuncName);
+ if (I != TopLevelProbeFrameMap.end()) {
+ BinarySizeContextTracker::ProbeFrameStack ProbeContext;
+ FuncSizeTracker.trackInlineesOptimizedAway(ProbeDecoder, *I->second,
+ ProbeContext);
+ }
+ }
+}
+
InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
uint64_t Address, bool RoundToNext)
: Binary(Binary), Address(Address) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index d3d1c6f1fd248..33b0b81fb0468 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -166,14 +166,14 @@ class BinarySizeContextTracker {
// their remaining probes.
void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder);
- void dump() { RootContext.dumpTree(); }
-
-private:
using ProbeFrameStack = SmallVector<std::pair<StringRef, uint32_t>>;
void trackInlineesOptimizedAway(MCPseudoProbeDecoder &ProbeDecoder,
MCDecodedPseudoProbeInlineTree &ProbeNode,
ProbeFrameStack &Context);
+ void dump() { RootContext.dumpTree(); }
+
+private:
// Root node for context trie tree, node that this is a reverse context trie
// with callee as parent and caller as child. This way we can traverse from
// root to find the best/longest matching context if an exact match does not
@@ -256,6 +256,9 @@ class ProfiledBinary {
// Pseudo probe decoder
MCPseudoProbeDecoder ProbeDecoder;
+ // Function name to probe frame map for top-level outlined functions.
+ StringMap<MCDecodedPseudoProbeInlineTree *> TopLevelProbeFrameMap;
+
bool UsePseudoProbes = false;
bool UseFSDiscriminator = false;
@@ -477,6 +480,8 @@ class ProfiledBinary {
return Stack.back();
}
+ void flushSymbolizer() { Symbolizer.reset(); }
+
// Compare two addresses' inline context
bool inlineContextEqual(uint64_t Add1, uint64_t Add2);
@@ -491,6 +496,8 @@ class ProfiledBinary {
void computeInlinedContextSizeForRange(uint64_t StartOffset,
uint64_t EndOffset);
+ void computeInlinedContextSizeForFunc(const BinaryFunction *Func);
+
const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
return ProbeDecoder.getCallProbeForAddr(Address);
}
More information about the llvm-commits
mailing list