[llvm] 08e8bb6 - Revert "[CSSPGO][llvm-profgen] Aggregate samples on call frame trie to speed up profile generation"
via llvm-commits
llvm-commits at lists.llvm.org
Wed Feb 3 22:18:36 PST 2021
Author: wlei
Date: 2021-02-03T22:16:05-08:00
New Revision: 08e8bb60cf99fa86890603f99d63a6b854a8ade4
URL: https://github.com/llvm/llvm-project/commit/08e8bb60cf99fa86890603f99d63a6b854a8ade4
DIFF: https://github.com/llvm/llvm-project/commit/08e8bb60cf99fa86890603f99d63a6b854a8ade4.diff
LOG: Revert "[CSSPGO][llvm-profgen] Aggregate samples on call frame trie to speed up profile generation"
This reverts commit 1714ad2336293f351b15dd4b518f9e8618ec38f2.
Added:
Modified:
llvm/tools/llvm-profgen/PerfReader.cpp
llvm/tools/llvm-profgen/PerfReader.h
llvm/tools/llvm-profgen/ProfiledBinary.cpp
llvm/tools/llvm-profgen/ProfiledBinary.h
Removed:
################################################################################
diff --git a/llvm/tools/llvm-profgen/PerfReader.cpp b/llvm/tools/llvm-profgen/PerfReader.cpp
index 787bde28400f..d05c665f8583 100644
--- a/llvm/tools/llvm-profgen/PerfReader.cpp
+++ b/llvm/tools/llvm-profgen/PerfReader.cpp
@@ -28,12 +28,11 @@ void VirtualUnwinder::unwindCall(UnwindState &State) {
// 2nd frame is in prolog/epilog. In the future, we will switch to
// pro/epi tracker(Dwarf CFI) for the precise check.
uint64_t Source = State.getCurrentLBRSource();
- auto *ParentFrame = State.getParentFrame();
- if (ParentFrame == State.getDummyRootPtr() ||
- ParentFrame->Address != Source) {
- State.switchToFrame(Source);
+ auto Iter = State.CallStack.begin();
+ if (State.CallStack.size() == 1 || *(++Iter) != Source) {
+ State.CallStack.front() = Source;
} else {
- State.popFrame();
+ State.CallStack.pop_front();
}
State.InstPtr.update(Source);
}
@@ -42,29 +41,26 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
InstructionPointer &IP = State.InstPtr;
uint64_t Target = State.getCurrentLBRTarget();
uint64_t End = IP.Address;
- if (Binary->usePseudoProbes()) {
- // We don't need to top frame probe since it should be extracted
- // from the range.
+ if (State.getBinary()->usePseudoProbes()) {
// The outcome of the virtual unwinding with pseudo probes is a
// map from a context key to the address range being unwound.
// This means basically linear unwinding is not needed for pseudo
// probes. The range will be simply recorded here and will be
// converted to a list of pseudo probes to report in ProfileGenerator.
- State.getParentFrame()->recordRangeCount(Target, End, Repeat);
+ recordRangeCount(Target, End, State, Repeat);
} else {
// Unwind linear execution part
- uint64_t LeafAddr = State.CurrentLeafFrame->Address;
while (IP.Address >= Target) {
uint64_t PrevIP = IP.Address;
IP.backward();
// Break into segments for implicit call/return due to inlining
- bool SameInlinee = Binary->inlineContextEqual(PrevIP, IP.Address);
+ bool SameInlinee =
+ State.getBinary()->inlineContextEqual(PrevIP, IP.Address);
if (!SameInlinee || PrevIP == Target) {
- State.switchToFrame(LeafAddr);
- State.CurrentLeafFrame->recordRangeCount(PrevIP, End, Repeat);
+ recordRangeCount(PrevIP, End, State, Repeat);
End = IP.Address;
}
- LeafAddr = IP.Address;
+ State.CallStack.front() = IP.Address;
}
}
}
@@ -72,9 +68,9 @@ void VirtualUnwinder::unwindLinear(UnwindState &State, uint64_t Repeat) {
void VirtualUnwinder::unwindReturn(UnwindState &State) {
// Add extra frame as we unwind through the return
const LBREntry &LBR = State.getCurrentLBR();
- uint64_t CallAddr = Binary->getCallAddrFromFrameAddr(LBR.Target);
- State.switchToFrame(CallAddr);
- State.pushFrame(LBR.Source);
+ uint64_t CallAddr = State.getBinary()->getCallAddrFromFrameAddr(LBR.Target);
+ State.CallStack.front() = CallAddr;
+ State.CallStack.push_front(LBR.Source);
State.InstPtr.update(LBR.Source);
}
@@ -82,100 +78,79 @@ void VirtualUnwinder::unwindBranchWithinFrame(UnwindState &State) {
// TODO: Tolerate tail call for now, as we may see tail call from libraries.
// This is only for intra function branches, excluding tail calls.
uint64_t Source = State.getCurrentLBRSource();
- State.switchToFrame(Source);
+ State.CallStack.front() = Source;
State.InstPtr.update(Source);
}
-std::shared_ptr<StringBasedCtxKey> FrameStack::getContextKey() {
+SampleCounter &
+VirtualUnwinder::getOrCreateCounter(const ProfiledBinary *Binary,
+ std::list<uint64_t> &CallStack) {
+ if (Binary->usePseudoProbes()) {
+ return getOrCreateCounterForProbe(Binary, CallStack);
+ }
std::shared_ptr<StringBasedCtxKey> KeyStr =
std::make_shared<StringBasedCtxKey>();
- KeyStr->Context = Binary->getExpandedContextStr(Stack);
+ KeyStr->Context = Binary->getExpandedContextStr(CallStack);
KeyStr->genHashCode();
- return KeyStr;
+ auto Ret =
+ CtxCounterMap->emplace(Hashable<ContextKey>(KeyStr), SampleCounter());
+ return Ret.first->second;
}
-std::shared_ptr<ProbeBasedCtxKey> ProbeStack::getContextKey() {
+SampleCounter &
+VirtualUnwinder::getOrCreateCounterForProbe(const ProfiledBinary *Binary,
+ std::list<uint64_t> &CallStack) {
std::shared_ptr<ProbeBasedCtxKey> ProbeBasedKey =
std::make_shared<ProbeBasedCtxKey>();
- for (auto CallProbe : Stack) {
- ProbeBasedKey->Probes.emplace_back(CallProbe);
+ if (CallStack.size() > 1) {
+ // We don't need to top frame probe since it should be extracted
+ // from the range.
+ // The top of stack is an instruction from the function where
+ // the LBR address range physcially resides. Strip it since
+ // the function is not a part of the call context. We also
+ // don't need its inline context since the probes being unwound
+ // come with an inline context all the way back to the uninlined
+ // function in their prefix tree.
+ auto Iter = CallStack.rbegin();
+ auto EndT = std::prev(CallStack.rend());
+ for (; Iter != EndT; Iter++) {
+ uint64_t Address = *Iter;
+ const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Address);
+ // We may not find a probe for a merged or external callsite.
+ // Callsite merging may cause the loss of original probe IDs.
+ // Cutting off the context from here since the inline will
+ // not know how to consume a context with unknown callsites.
+ if (!CallProbe)
+ break;
+ ProbeBasedKey->Probes.emplace_back(CallProbe);
+ }
}
CSProfileGenerator::compressRecursionContext<const PseudoProbe *>(
ProbeBasedKey->Probes);
ProbeBasedKey->genHashCode();
- return ProbeBasedKey;
-}
-
-template <typename T>
-void VirtualUnwinder::collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur,
- T &Stack) {
- if (Cur->RangeSamples.empty() && Cur->BranchSamples.empty())
- return;
-
- std::shared_ptr<ContextKey> Key = Stack.getContextKey();
- auto Ret = CtxCounterMap->emplace(Hashable<ContextKey>(Key), SampleCounter());
- SampleCounter &SCounter = Ret.first->second;
- for (auto &Item : Cur->RangeSamples) {
- uint64_t StartOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
- uint64_t EndOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
- SCounter.recordRangeCount(StartOffset, EndOffset, std::get<2>(Item));
- }
-
- for (auto &Item : Cur->BranchSamples) {
- uint64_t SourceOffset = Binary->virtualAddrToOffset(std::get<0>(Item));
- uint64_t TargetOffset = Binary->virtualAddrToOffset(std::get<1>(Item));
- SCounter.recordBranchCount(SourceOffset, TargetOffset, std::get<2>(Item));
- }
-}
-
-template <typename T>
-void VirtualUnwinder::collectSamplesFromFrameTrie(
- UnwindState::ProfiledFrame *Cur, T &Stack) {
- if (!Cur->isDummyRoot()) {
- if (!Stack.pushFrame(Cur)) {
- // Process truncated context
- for (const auto &Item : Cur->Children) {
- // Start a new traversal ignoring its bottom context
- collectSamplesFromFrameTrie(Item.second.get());
- }
- return;
- }
- }
-
- collectSamplesFromFrame(Cur, Stack);
- // Process children frame
- for (const auto &Item : Cur->Children) {
- collectSamplesFromFrameTrie(Item.second.get(), Stack);
- }
- // Recover the call stack
- Stack.popFrame();
+ Hashable<ContextKey> ContextId(ProbeBasedKey);
+ auto Ret = CtxCounterMap->emplace(ContextId, SampleCounter());
+ return Ret.first->second;
}
-void VirtualUnwinder::collectSamplesFromFrameTrie(
- UnwindState::ProfiledFrame *Cur) {
- if (Binary->usePseudoProbes()) {
- ProbeStack Stack(Binary);
- collectSamplesFromFrameTrie<ProbeStack>(Cur, Stack);
- } else {
- FrameStack Stack(Binary);
- collectSamplesFromFrameTrie<FrameStack>(Cur, Stack);
- }
+void VirtualUnwinder::recordRangeCount(uint64_t Start, uint64_t End,
+ UnwindState &State, uint64_t Repeat) {
+ uint64_t StartOffset = State.getBinary()->virtualAddrToOffset(Start);
+ uint64_t EndOffset = State.getBinary()->virtualAddrToOffset(End);
+ SampleCounter &SCounter =
+ getOrCreateCounter(State.getBinary(), State.CallStack);
+ SCounter.recordRangeCount(StartOffset, EndOffset, Repeat);
}
void VirtualUnwinder::recordBranchCount(const LBREntry &Branch,
UnwindState &State, uint64_t Repeat) {
if (Branch.IsArtificial)
return;
-
- if (Binary->usePseudoProbes()) {
- // Same as recordRangeCount, We don't need to top frame probe since we will
- // extract it from branch's source address
- State.getParentFrame()->recordBranchCount(Branch.Source, Branch.Target,
- Repeat);
- } else {
- State.CurrentLeafFrame->recordBranchCount(Branch.Source, Branch.Target,
- Repeat);
- }
+ uint64_t SourceOffset = State.getBinary()->virtualAddrToOffset(Branch.Source);
+ uint64_t TargetOffset = State.getBinary()->virtualAddrToOffset(Branch.Target);
+ SampleCounter &SCounter =
+ getOrCreateCounter(State.getBinary(), State.CallStack);
+ SCounter.recordBranchCount(SourceOffset, TargetOffset, Repeat);
}
bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
@@ -224,8 +199,6 @@ bool VirtualUnwinder::unwind(const HybridSample *Sample, uint64_t Repeat) {
// Record `branch` with calling context after unwinding.
recordBranchCount(Branch, State, Repeat);
}
- // As samples are aggregated on trie, record them into counter map
- collectSamplesFromFrameTrie(State.getDummyRootPtr());
return true;
}
@@ -352,8 +325,7 @@ void PerfReader::printUnwinderOutput() {
void PerfReader::unwindSamples() {
for (const auto &Item : AggregatedSamples) {
const HybridSample *Sample = dyn_cast<HybridSample>(Item.first.getPtr());
- VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary],
- Sample->Binary);
+ VirtualUnwinder Unwinder(&BinarySampleCounters[Sample->Binary]);
Unwinder.unwind(Sample, Item.second);
}
@@ -362,7 +334,7 @@ void PerfReader::unwindSamples() {
}
bool PerfReader::extractLBRStack(TraceStream &TraceIt,
- SmallVectorImpl<LBREntry> &LBRStack,
+ SmallVector<LBREntry, 16> &LBRStack,
ProfiledBinary *Binary) {
// The raw format of LBR stack is like:
// 0x4005c8/0x4005dc/P/-/-/0 0x40062f/0x4005b0/P/-/-/0 ...
@@ -426,7 +398,7 @@ bool PerfReader::extractLBRStack(TraceStream &TraceIt,
}
bool PerfReader::extractCallstack(TraceStream &TraceIt,
- SmallVectorImpl<uint64_t> &CallStack) {
+ std::list<uint64_t> &CallStack) {
// The raw format of call stack is like:
// 4005dc # leaf frame
// 400634
diff --git a/llvm/tools/llvm-profgen/PerfReader.h b/llvm/tools/llvm-profgen/PerfReader.h
index 7eaa4b846259..66649a060bc3 100644
--- a/llvm/tools/llvm-profgen/PerfReader.h
+++ b/llvm/tools/llvm-profgen/PerfReader.h
@@ -133,7 +133,7 @@ struct HybridSample : public PerfSample {
// Profiled binary that current frame address belongs to
ProfiledBinary *Binary;
// Call stack recorded in FILO(leaf to root) order
- SmallVector<uint64_t, 16> CallStack;
+ std::list<uint64_t> CallStack;
// LBR stack recorded in FIFO order
SmallVector<LBREntry, 16> LBRStack;
@@ -147,7 +147,7 @@ struct HybridSample : public PerfSample {
const HybridSample *Other = dyn_cast<HybridSample>(K);
if (Other->Binary != Binary)
return false;
- const SmallVector<uint64_t, 16> &OtherCallStack = Other->CallStack;
+ const std::list<uint64_t> &OtherCallStack = Other->CallStack;
const SmallVector<LBREntry, 16> &OtherLBRStack = Other->LBRStack;
if (CallStack.size() != OtherCallStack.size() ||
@@ -193,40 +193,14 @@ using AggregatedCounter =
std::unordered_map<Hashable<PerfSample>, uint64_t,
Hashable<PerfSample>::Hash, Hashable<PerfSample>::Equal>;
-using SampleVector = SmallVector<std::tuple<uint64_t, uint64_t, uint64_t>, 16>;
// The state for the unwinder, it doesn't hold the data but only keep the
// pointer/index of the data, While unwinding, the CallStack is changed
// dynamicially and will be recorded as the context of the sample
struct UnwindState {
// Profiled binary that current frame address belongs to
const ProfiledBinary *Binary;
- // Call stack trie node
- struct ProfiledFrame {
- const uint64_t Address = 0;
- ProfiledFrame *Parent;
- SampleVector RangeSamples;
- SampleVector BranchSamples;
- std::unordered_map<uint64_t, std::unique_ptr<ProfiledFrame>> Children;
-
- ProfiledFrame(uint64_t Addr = 0, ProfiledFrame *P = nullptr)
- : Address(Addr), Parent(P) {}
- ProfiledFrame *getOrCreateChildFrame(uint64_t Address) {
- assert(Address && "Address can't be zero!");
- auto Ret = Children.emplace(
- Address, std::make_unique<ProfiledFrame>(Address, this));
- return Ret.first->second.get();
- }
- void recordRangeCount(uint64_t Start, uint64_t End, uint64_t Count) {
- RangeSamples.emplace_back(std::make_tuple(Start, End, Count));
- }
- void recordBranchCount(uint64_t Source, uint64_t Target, uint64_t Count) {
- BranchSamples.emplace_back(std::make_tuple(Source, Target, Count));
- }
- bool isDummyRoot() { return Address == 0; }
- };
-
- ProfiledFrame DummyTrieRoot;
- ProfiledFrame *CurrentLeafFrame;
+ // TODO: switch to use trie for call stack
+ std::list<uint64_t> CallStack;
// Used to fall through the LBR stack
uint32_t LBRIndex = 0;
// Reference to HybridSample.LBRStack
@@ -234,20 +208,19 @@ struct UnwindState {
// Used to iterate the address range
InstructionPointer InstPtr;
UnwindState(const HybridSample *Sample)
- : Binary(Sample->Binary), LBRStack(Sample->LBRStack),
- InstPtr(Sample->Binary, Sample->CallStack.front()) {
- initFrameTrie(Sample->CallStack);
- }
+ : Binary(Sample->Binary), CallStack(Sample->CallStack),
+ LBRStack(Sample->LBRStack),
+ InstPtr(Sample->Binary, Sample->CallStack.front()) {}
bool validateInitialState() {
uint64_t LBRLeaf = LBRStack[LBRIndex].Target;
- uint64_t LeafAddr = CurrentLeafFrame->Address;
+ uint64_t StackLeaf = CallStack.front();
// When we take a stack sample, ideally the sampling distance between the
// leaf IP of stack and the last LBR target shouldn't be very large.
// Use a heuristic size (0x100) to filter out broken records.
- if (LeafAddr < LBRLeaf || LeafAddr >= LBRLeaf + 0x100) {
+ if (StackLeaf < LBRLeaf || StackLeaf >= LBRLeaf + 0x100) {
WithColor::warning() << "Bogus trace: stack tip = "
- << format("%#010x", LeafAddr)
+ << format("%#010x", StackLeaf)
<< ", LBR tip = " << format("%#010x\n", LBRLeaf);
return false;
}
@@ -255,40 +228,19 @@ struct UnwindState {
}
void checkStateConsistency() {
- assert(InstPtr.Address == CurrentLeafFrame->Address &&
+ assert(InstPtr.Address == CallStack.front() &&
"IP should align with context leaf");
}
+ std::string getExpandedContextStr() const {
+ return Binary->getExpandedContextStr(CallStack);
+ }
const ProfiledBinary *getBinary() const { return Binary; }
bool hasNextLBR() const { return LBRIndex < LBRStack.size(); }
uint64_t getCurrentLBRSource() const { return LBRStack[LBRIndex].Source; }
uint64_t getCurrentLBRTarget() const { return LBRStack[LBRIndex].Target; }
const LBREntry &getCurrentLBR() const { return LBRStack[LBRIndex]; }
void advanceLBR() { LBRIndex++; }
-
- ProfiledFrame *getParentFrame() { return CurrentLeafFrame->Parent; }
-
- void pushFrame(uint64_t Address) {
- CurrentLeafFrame = CurrentLeafFrame->getOrCreateChildFrame(Address);
- }
-
- void switchToFrame(uint64_t Address) {
- if (CurrentLeafFrame->Address == Address)
- return;
- CurrentLeafFrame = CurrentLeafFrame->Parent->getOrCreateChildFrame(Address);
- }
-
- void popFrame() { CurrentLeafFrame = CurrentLeafFrame->Parent; }
-
- void initFrameTrie(const SmallVectorImpl<uint64_t> &CallStack) {
- ProfiledFrame *Cur = &DummyTrieRoot;
- for (auto Address : reverse(CallStack)) {
- Cur = Cur->getOrCreateChildFrame(Address);
- }
- CurrentLeafFrame = Cur;
- }
-
- ProfiledFrame *getDummyRootPtr() { return &DummyTrieRoot; }
};
// Base class for sample counter key with context
@@ -378,56 +330,6 @@ using ContextSampleCounterMap =
std::unordered_map<Hashable<ContextKey>, SampleCounter,
Hashable<ContextKey>::Hash, Hashable<ContextKey>::Equal>;
-struct FrameStack {
- SmallVector<uint64_t, 16> Stack;
- const ProfiledBinary *Binary;
- FrameStack(const ProfiledBinary *B) : Binary(B) {}
- bool pushFrame(UnwindState::ProfiledFrame *Cur) {
- Stack.push_back(Cur->Address);
- return true;
- }
-
- void popFrame() {
- if (!Stack.empty())
- Stack.pop_back();
- }
- std::shared_ptr<StringBasedCtxKey> getContextKey();
-};
-
-struct ProbeStack {
- SmallVector<const PseudoProbe *, 16> Stack;
- const ProfiledBinary *Binary;
- ProbeStack(const ProfiledBinary *B) : Binary(B) {}
- bool pushFrame(UnwindState::ProfiledFrame *Cur) {
- const PseudoProbe *CallProbe = Binary->getCallProbeForAddr(Cur->Address);
- // We may not find a probe for a merged or external callsite.
- // Callsite merging may cause the loss of original probe IDs.
- // Cutting off the context from here since the inliner will
- // not know how to consume a context with unknown callsites.
- if (!CallProbe)
- return false;
- Stack.push_back(CallProbe);
- return true;
- }
-
- void popFrame() {
- if (!Stack.empty())
- Stack.pop_back();
- }
- // Use pseudo probe based context key to get the sample counter
- // A context stands for a call path from 'main' to an uninlined
- // callee with all inline frames recovered on that path. The probes
- // belonging to that call path is the probes either originated from
- // the callee or from any functions inlined into the callee. Since
- // pseudo probes are organized in a tri-tree style after decoded,
- // the tree path from the tri-tree root (which is the uninlined
- // callee) to the probe node forms an inline context.
- // Here we use a list of probe(pointer) as the context key to speed up
- // aggregation and the final context string will be generate in
- // ProfileGenerator
- std::shared_ptr<ProbeBasedCtxKey> getContextKey();
-};
-
/*
As in hybrid sample we have a group of LBRs and the most recent sampling call
stack, we can walk through those LBRs to infer more call stacks which would be
@@ -449,43 +351,47 @@ range as sample counter for further CS profile generation.
*/
class VirtualUnwinder {
public:
- VirtualUnwinder(ContextSampleCounterMap *Counter, const ProfiledBinary *B)
- : CtxCounterMap(Counter), Binary(B) {}
- bool unwind(const HybridSample *Sample, uint64_t Repeat);
+ VirtualUnwinder(ContextSampleCounterMap *Counter) : CtxCounterMap(Counter) {}
-private:
bool isCallState(UnwindState &State) const {
// The tail call frame is always missing here in stack sample, we will
// use a specific tail call tracker to infer it.
- return Binary->addressIsCall(State.getCurrentLBRSource());
+ return State.getBinary()->addressIsCall(State.getCurrentLBRSource());
}
bool isReturnState(UnwindState &State) const {
// Simply check addressIsReturn, as ret is always reliable, both for
// regular call and tail call.
- return Binary->addressIsReturn(State.getCurrentLBRSource());
+ return State.getBinary()->addressIsReturn(State.getCurrentLBRSource());
}
void unwindCall(UnwindState &State);
void unwindLinear(UnwindState &State, uint64_t Repeat);
void unwindReturn(UnwindState &State);
void unwindBranchWithinFrame(UnwindState &State);
-
- template <typename T>
- void collectSamplesFromFrame(UnwindState::ProfiledFrame *Cur, T &Stack);
- // Collect each samples on trie node by DFS traversal
- template <typename T>
- void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur, T &Stack);
- void collectSamplesFromFrameTrie(UnwindState::ProfiledFrame *Cur);
-
+ bool unwind(const HybridSample *Sample, uint64_t Repeat);
void recordRangeCount(uint64_t Start, uint64_t End, UnwindState &State,
uint64_t Repeat);
void recordBranchCount(const LBREntry &Branch, UnwindState &State,
uint64_t Repeat);
+ SampleCounter &getOrCreateCounter(const ProfiledBinary *Binary,
+ std::list<uint64_t> &CallStack);
+ // Use pseudo probe based context key to get the sample counter
+ // A context stands for a call path from 'main' to an uninlined
+ // callee with all inline frames recovered on that path. The probes
+ // belonging to that call path is the probes either originated from
+ // the callee or from any functions inlined into the callee. Since
+ // pseudo probes are organized in a tri-tree style after decoded,
+ // the tree path from the tri-tree root (which is the uninlined
+ // callee) to the probe node forms an inline context.
+ // Here we use a list of probe(pointer) as the context key to speed up
+ // aggregation and the final context string will be generate in
+ // ProfileGenerator
+ SampleCounter &getOrCreateCounterForProbe(const ProfiledBinary *Binary,
+ std::list<uint64_t> &CallStack);
+private:
ContextSampleCounterMap *CtxCounterMap;
- // Profiled binary that current frame address belongs to
- const ProfiledBinary *Binary;
};
// Filename to binary map
@@ -551,11 +457,10 @@ class PerfReader {
// Parse the hybrid sample including the call and LBR line
void parseHybridSample(TraceStream &TraceIt);
// Extract call stack from the perf trace lines
- bool extractCallstack(TraceStream &TraceIt,
- SmallVectorImpl<uint64_t> &CallStack);
+ bool extractCallstack(TraceStream &TraceIt, std::list<uint64_t> &CallStack);
// Extract LBR stack from one perf trace line
bool extractLBRStack(TraceStream &TraceIt,
- SmallVectorImpl<LBREntry> &LBRStack,
+ SmallVector<LBREntry, 16> &LBRStack,
ProfiledBinary *Binary);
void checkAndSetPerfType(cl::list<std::string> &PerfTraceFilenames);
// Post process the profile after trace aggregation, we will do simple range
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.cpp b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
index 2c6cedf57649..16ef04aba99e 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.cpp
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.cpp
@@ -126,13 +126,13 @@ bool ProfiledBinary::inlineContextEqual(uint64_t Address1,
Context2.begin(), Context2.begin() + Context2.size() - 1);
}
-std::string ProfiledBinary::getExpandedContextStr(
- const SmallVectorImpl<uint64_t> &Stack) const {
+std::string
+ProfiledBinary::getExpandedContextStr(const std::list<uint64_t> &Stack) const {
std::string ContextStr;
SmallVector<std::string, 16> ContextVec;
// Process from frame root to leaf
- for (auto Address : Stack) {
- uint64_t Offset = virtualAddrToOffset(Address);
+ for (auto Iter = Stack.rbegin(); Iter != Stack.rend(); Iter++) {
+ uint64_t Offset = virtualAddrToOffset(*Iter);
const FrameLocationStack &ExpandedContext = getFrameLocationStack(Offset);
for (const auto &Loc : ExpandedContext) {
ContextVec.push_back(getCallSite(Loc));
diff --git a/llvm/tools/llvm-profgen/ProfiledBinary.h b/llvm/tools/llvm-profgen/ProfiledBinary.h
index b99a7a69284c..552037b6c319 100644
--- a/llvm/tools/llvm-profgen/ProfiledBinary.h
+++ b/llvm/tools/llvm-profgen/ProfiledBinary.h
@@ -236,8 +236,7 @@ class ProfiledBinary {
// Get the context string of the current stack with inline context filled in.
// It will search the disassembling info stored in Offset2LocStackMap. This is
// used as the key of function sample map
- std::string
- getExpandedContextStr(const SmallVectorImpl<uint64_t> &Stack) const;
+ std::string getExpandedContextStr(const std::list<uint64_t> &stack) const;
const PseudoProbe *getCallProbeForAddr(uint64_t Address) const {
return ProbeDecoder.getCallProbeForAddr(Address);
More information about the llvm-commits
mailing list