[llvm] ce40843 - [llvm-profgen][CSSPGO] On-demand function size computation for preinliner
via llvm-commits
llvm-commits at lists.llvm.org
Tue Sep 28 09:10:12 PDT 2021
Author: wlei
Date: 2021-09-28T09:09:38-07:00
New Revision: ce40843a3fe120621bb6e4aa07dc9cbf76b6aa0e
URL: https://github.com/llvm/llvm-project/commit/ce40843a3fe120621bb6e4aa07dc9cbf76b6aa0e
DIFF: https://github.com/llvm/llvm-project/commit/ce40843a3fe120621bb6e4aa07dc9cbf76b6aa0e.diff
LOG: [llvm-profgen][CSSPGO] On-demand function size computation for preinliner
Similar to https://reviews.llvm.org/D110465, we can compute function size on-demand for the functions that's hit by samples.
Here we leverage the raw range samples' address to compute a set of sample hit function. Then `BinarySizeContextTracker` just works on those function range for the size.
Reviewed By: hoy
Differential Revision: https://reviews.llvm.org/D110466
Added:
Modified:
llvm/tools/llvm-profgen/ProfileGenerator.cpp
llvm/tools/llvm-profgen/ProfileGenerator.h
llvm/tools/llvm-profgen/ProfiledBinary.cpp
llvm/tools/llvm-profgen/ProfiledBinary.h
Removed:
################################################################################
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.cpp b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
index 77aff9d9967a7..05e8e4dc78a0d 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.cpp
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.cpp
@@ -425,6 +425,10 @@ FunctionSamples &CSProfileGenerator::getFunctionProfileForContext(
void CSProfileGenerator::generateProfile() {
FunctionSamples::ProfileIsCS = true;
+
+ if (Binary->getTrackFuncContextSize())
+ computeSizeForProfiledFunctions();
+
if (Binary->usePseudoProbes()) {
// Enable pseudo probe functionalities in SampleProf
FunctionSamples::ProfileIsProbeBased = true;
@@ -435,6 +439,29 @@ void CSProfileGenerator::generateProfile() {
postProcessProfiles();
}
+void CSProfileGenerator::computeSizeForProfiledFunctions() {
+ // Hash map to deduplicate the function range and the item is a pair of
+ // function start and end offset.
+ std::unordered_map<uint64_t, uint64_t> FuncRanges;
+ // Go through all the ranges in the CS counters, use the start of the range to
+ // look up the function it belongs and record the function range.
+ for (const auto &CI : SampleCounters) {
+ for (auto Item : CI.second.RangeCounter) {
+ // FIXME: Filter the bogus crossing function range.
+ uint64_t RangeStartOffset = Item.first.first;
+ auto FuncRange = Binary->findFuncOffsetRange(RangeStartOffset);
+ if (FuncRange.second != 0)
+ FuncRanges[FuncRange.first] = FuncRange.second;
+ }
+ }
+
+ for (auto I : FuncRanges) {
+ uint64_t StartOffset = I.first;
+ uint64_t EndOffset = I.second;
+ Binary->computeInlinedContextSizeForRange(StartOffset, EndOffset);
+ }
+}
+
void CSProfileGenerator::generateLineNumBasedProfile() {
for (const auto &CI : SampleCounters) {
const StringBasedCtxKey *CtxKey =
diff --git a/llvm/tools/llvm-profgen/ProfileGenerator.h b/llvm/tools/llvm-profgen/ProfileGenerator.h
index 64f469be363f0..86e2d68f216e4 100644
--- a/llvm/tools/llvm-profgen/ProfileGenerator.h
+++ b/llvm/tools/llvm-profgen/ProfileGenerator.h
@@ -228,6 +228,9 @@ class CSProfileGenerator : public ProfileGeneratorBase {
FunctionSamples &
getFunctionProfileForContext(const SampleContextFrameVector &Context,
bool WasLeafInlined = false);
+ // For profiled only functions, on-demand compute their inline context
+ // function byte size which is used by the pre-inliner.
+ void computeSizeForProfiledFunctions();
// Post processing for profiles before writing out, such as mermining
// and trimming cold profiles, running preinliner on profiles.
void postProcessProfiles();
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 1e8da15500059..2d1f68ff77ec5 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -293,10 +293,10 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
uint64_t SectionOffset = Section.getAddress() - getPreferredBaseAddress();
uint64_t SectSize = Section.getSize();
uint64_t StartOffset = Symbols[SI].Addr - getPreferredBaseAddress();
- uint64_t EndOffset = (SI + 1 < SE)
- ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
- : SectionOffset + SectSize;
- if (StartOffset >= EndOffset)
+ uint64_t NextStartOffset =
+ (SI + 1 < SE) ? Symbols[SI + 1].Addr - getPreferredBaseAddress()
+ : SectionOffset + SectSize;
+ if (StartOffset >= NextStartOffset)
return true;
StringRef SymbolName =
@@ -316,10 +316,11 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
};
uint64_t Offset = StartOffset;
+ uint64_t EndOffset = 0;
// Size of a consecutive invalid instruction range starting from Offset -1
// backwards.
uint64_t InvalidInstLength = 0;
- while (Offset < EndOffset) {
+ while (Offset < NextStartOffset) {
MCInst Inst;
uint64_t Size;
// Disassemble an instruction.
@@ -353,32 +354,19 @@ bool ProfiledBinary::dissassembleSymbol(std::size_t SI, ArrayRef<uint8_t> Bytes,
if (Disassembled) {
const MCInstrDesc &MCDesc = MII->get(Inst.getOpcode());
- // Populate a vector of the symbolized callsite at this location
- // We don't need symbolized info for probe-based profile, just use an
- // empty stack as an entry to indicate a valid binary offset
- SampleContextFrameVector SymbolizedCallStack;
- if (TrackFuncContextSize) {
- InstructionPointer IP(this, Offset);
- // TODO: reallocation of Offset2LocStackMap will lead to dangling
- // strings We need ProfiledBinary to owned these string.
- Offset2LocStackMap[Offset] = symbolize(IP, true, UsePseudoProbes);
- SampleContextFrameVector &SymbolizedCallStack =
- Offset2LocStackMap[Offset];
- // Record instruction size for the corresponding context
- if (TrackFuncContextSize && !SymbolizedCallStack.empty())
- FuncSizeTracker.addInstructionForContext(Offset2LocStackMap[Offset],
- Size);
- }
+
// Record instruction size.
Offset2InstSizeMap[Offset] = Size;
// Populate address maps.
- CodeAddrs.push_back(Offset);
+ CodeAddrOffsets.push_back(Offset);
if (MCDesc.isCall())
CallAddrs.insert(Offset);
else if (MCDesc.isReturn())
RetAddrs.insert(Offset);
+ EndOffset = Offset;
+
if (InvalidInstLength) {
WarnInvalidInsts(Offset - InvalidInstLength, Offset - 1);
InvalidInstLength = 0;
@@ -560,6 +548,26 @@ SampleContextFrameVector ProfiledBinary::symbolize(const InstructionPointer &IP,
return CallStack;
}
+void ProfiledBinary::computeInlinedContextSizeForRange(uint64_t StartOffset,
+ uint64_t EndOffset) {
+ uint32_t Index = getIndexForOffset(StartOffset);
+ if (CodeAddrOffsets[Index] != StartOffset)
+ WithColor::warning() << "Invalid start instruction at "
+ << format("%8" PRIx64, StartOffset) << "\n";
+
+ uint64_t Offset = CodeAddrOffsets[Index];
+ while (Offset <= EndOffset) {
+ const SampleContextFrameVector &SymbolizedCallStack =
+ getFrameLocationStack(Offset, UsePseudoProbes);
+ uint64_t Size = Offset2InstSizeMap[Offset];
+
+ // Record instruction size for the corresponding context
+ FuncSizeTracker.addInstructionForContext(SymbolizedCallStack, Size);
+
+ Offset = CodeAddrOffsets[++Index];
+ }
+}
+
InstructionPointer::InstructionPointer(const ProfiledBinary *Binary,
uint64_t Address, bool RoundToNext)
: Binary(Binary), Address(Address) {
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index fd719ef443e64..64759cad8fa92 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -175,7 +175,7 @@ class ProfiledBinary {
// An array of offsets of all instructions sorted in increasing order. The
// sorting is needed to fast advance to the next forward/backward instruction.
- std::vector<uint64_t> CodeAddrs;
+ std::vector<uint64_t> CodeAddrOffsets;
// A set of call instruction offsets. Used by virtual unwinding.
std::unordered_set<uint64_t> CallAddrs;
// A set of return instruction offsets. Used by virtual unwinding.
@@ -231,7 +231,6 @@ class ProfiledBinary {
SampleContextFrameVector symbolize(const InstructionPointer &IP,
bool UseCanonicalFnName = false,
bool UseProbeDiscriminator = false);
-
/// Decode the interesting parts of the binary and build internal data
/// structures. On high level, the parts of interest are:
/// 1. Text sections, including the main code section and the PLT
@@ -289,18 +288,21 @@ class ProfiledBinary {
}
uint64_t getAddressforIndex(uint64_t Index) const {
- return offsetToVirtualAddr(CodeAddrs[Index]);
+ return offsetToVirtualAddr(CodeAddrOffsets[Index]);
}
bool usePseudoProbes() const { return UsePseudoProbes; }
- // Get the index in CodeAddrs for the address
+ // Get the index in CodeAddrOffsets for the address
// As we might get an address which is not the code
// here it would round to the next valid code address by
// using lower bound operation
+ uint32_t getIndexForOffset(uint64_t Offset) const {
+ auto Low = llvm::lower_bound(CodeAddrOffsets, Offset);
+ return Low - CodeAddrOffsets.begin();
+ }
uint32_t getIndexForAddr(uint64_t Address) const {
uint64_t Offset = virtualAddrToOffset(Address);
- auto Low = llvm::lower_bound(CodeAddrs, Offset);
- return Low - CodeAddrs.begin();
+ return getIndexForOffset(Offset);
}
uint64_t getCallAddrFromFrameAddr(uint64_t FrameAddr) const {
@@ -356,6 +358,10 @@ class ProfiledBinary {
SampleContextFrameVector
getExpandedContext(const SmallVectorImpl<uint64_t> &Stack,
bool &WasLeafInlined);
+ // Go through instructions among the given range and record its size for the
+ // inline context.
+ void computeInlinedContextSizeForRange(uint64_t StartOffset,
+ uint64_t EndOffset);
const MCDecodedPseudoProbe *getCallProbeForAddr(uint64_t Address) const {
return ProbeDecoder.getCallProbeForAddr(Address);
@@ -384,6 +390,8 @@ class ProfiledBinary {
return ProbeDecoder.getInlinerDescForProbe(Probe);
}
+ bool getTrackFuncContextSize() { return TrackFuncContextSize; }
+
bool getIsLoadedByMMap() { return IsLoadedByMMap; }
void setIsLoadedByMMap(bool Value) { IsLoadedByMMap = Value; }
More information about the llvm-commits
mailing list