[llvm] r315635 - [XRay][tools] Updated stacks tool with flamegraph output.
Evgenii Stepanov via llvm-commits
llvm-commits at lists.llvm.org
Thu Oct 12 16:23:17 PDT 2017
Please test your code with -Werror before committing.
/code/llvm-project/llvm/tools/llvm-xray/xray-stacks.cc:801:7: error:
default label in switch which covers all enumeration values
[-Werror,-Wcovered-switch-default]
default:
^
/code/llvm-project/llvm/tools/llvm-xray/xray-stacks.cc:816:7: error:
default label in switch which covers all enumeration values
[-Werror,-Wcovered-switch-default]
default:
^
2 errors generated.
On Thu, Oct 12, 2017 at 3:47 PM, Keith Wyss via llvm-commits
<llvm-commits at lists.llvm.org> wrote:
> Author: kpw
> Date: Thu Oct 12 15:47:42 2017
> New Revision: 315635
>
> URL: http://llvm.org/viewvc/llvm-project?rev=315635&view=rev
> Log:
> [XRay][tools] Updated stacks tool with flamegraph output.
>
> Summary:
> As the first step to allow analysis and visualization of xray collected data,
> allow using the llvm-xray stacks tool to emit a complete listing of stacks in
> the format consumable by a flamegraph tool.
>
> Possible follow up formats include chrome trace viewer format and sql load
> files.
>
> As a POC, I'm able to generate flamegraphs of an xray instrumented llc compiling
> hello world.
>
> Reviewers: dberris, pelikan
>
> Subscribers: llvm-commits
>
> Differential Revision: https://reviews.llvm.org/D38650
>
> Modified:
> llvm/trunk/tools/llvm-xray/xray-stacks.cc
>
> Modified: llvm/trunk/tools/llvm-xray/xray-stacks.cc
> URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-xray/xray-stacks.cc?rev=315635&r1=315634&r2=315635&view=diff
> ==============================================================================
> --- llvm/trunk/tools/llvm-xray/xray-stacks.cc (original)
> +++ llvm/trunk/tools/llvm-xray/xray-stacks.cc Thu Oct 12 15:47:42 2017
> @@ -66,8 +66,52 @@ static cl::opt<bool>
> cl::desc("Aggregate stack times across threads"),
> cl::sub(Stack), cl::init(false));
>
> -/// A helper struct to work with formatv and XRayRecords. Makes it easier to use
> -/// instrumentation map names or addresses in formatted output.
> +static cl::opt<bool>
> + DumpAllStacks("all-stacks",
> + cl::desc("Dump sum of timings for all stacks. "
> + "By default separates stacks per-thread."),
> + cl::sub(Stack), cl::init(false));
> +static cl::alias DumpAllStacksShort("all", cl::aliasopt(DumpAllStacks),
> + cl::desc("Alias for -all-stacks"),
> + cl::sub(Stack));
> +
> +// TODO(kpw): Add other interesting formats. Perhaps chrome trace viewer format
> +// possibly with aggregations or just a linear trace of timings.
> +enum StackOutputFormat { HUMAN, FLAMETOOL };
> +
> +static cl::opt<StackOutputFormat> StacksOutputFormat(
> + "stack-format",
> + cl::desc("The format that output stacks should be "
> + "output in. Only applies with all-stacks."),
> + cl::values(
> + clEnumValN(HUMAN, "human",
> + "Human readable output. Only valid without -all-stacks."),
> + clEnumValN(FLAMETOOL, "flame",
> + "Format consumable by Brendan Gregg's FlameGraph tool. "
> + "Only valid with -all-stacks.")),
> + cl::sub(Stack), cl::init(HUMAN));
> +
> +// Types of values for each stack in a CallTrie.
> +enum class AggregationType {
> + TOTAL_TIME, // The total time spent in a stack and its callees.
> + INVOCATION_COUNT // The number of times the stack was invoked.
> +};
> +
> +static cl::opt<AggregationType> RequestedAggregation(
> + "aggregation-type",
> + cl::desc("The type of aggregation to do on call stacks."),
> + cl::values(
> + clEnumValN(
> + AggregationType::TOTAL_TIME, "time",
> + "Capture the total time spent in an all invocations of a stack."),
> + clEnumValN(AggregationType::INVOCATION_COUNT, "count",
> + "Capture the number of times a stack was invoked. "
> + "In flamegraph mode, this count also includes invocations "
> + "of all callees.")),
> + cl::sub(Stack), cl::init(AggregationType::TOTAL_TIME));
> +
> +/// A helper struct to work with formatv and XRayRecords. Makes it easier to
> +/// use instrumentation map names or addresses in formatted output.
> struct format_xray_record : public FormatAdapter<XRayRecord> {
> explicit format_xray_record(XRayRecord record,
> const FuncIdConversionHelper &conv)
> @@ -274,10 +318,45 @@ TrieNode *mergeTrieNodes(const TrieNode
> return Node;
> }
>
> +template <AggregationType AggType>
> +std::size_t GetValueForStack(const TrieNode *Node);
> +
> +// When computing total time spent in a stack, we're adding the timings from
> +// its callees and the timings from when it was a leaf.
> +template <>
> +std::size_t
> +GetValueForStack<AggregationType::TOTAL_TIME>(const TrieNode *Node) {
> + auto TopSum = std::accumulate(Node->TerminalDurations.begin(),
> + Node->TerminalDurations.end(), 0uLL);
> + return std::accumulate(Node->IntermediateDurations.begin(),
> + Node->IntermediateDurations.end(), TopSum);
> +}
> +
> +// Calculates how many times a function was invoked.
> +// TODO: Hook up option to produce stacks
> +template <>
> +std::size_t
> +GetValueForStack<AggregationType::INVOCATION_COUNT>(const TrieNode *Node) {
> + return Node->TerminalDurations.size() + Node->IntermediateDurations.size();
> +}
> +
> +// Make sure there are implementations for each enum value.
> +template <AggregationType T> struct DependentFalseType : std::false_type {};
> +
> +template <AggregationType AggType>
> +std::size_t GetValueForStack(const TrieNode *Node) {
> + static_assert(DependentFalseType<AggType>::value,
> + "No implementation found for aggregation type provided.");
> + return 0;
> +}
> +
> class StackTrie {
> + // Avoid the magic number of 4 propagated through the code with an alias.
> + // We use this SmallVector to track the root nodes in a call graph.
> + using RootVector = SmallVector<TrieNode *, 4>;
>
> // We maintain pointers to the roots of the tries we see.
> - DenseMap<uint32_t, SmallVector<TrieNode *, 4>> Roots;
> + DenseMap<uint32_t, RootVector> Roots;
>
> // We make sure all the nodes are accounted for in this list.
> std::forward_list<TrieNode> NodeStore;
> @@ -439,11 +518,23 @@ public:
> }
> }
>
> + /// Prints timing sums for each stack in each threads.
> + template <AggregationType AggType>
> + void printAllPerThread(raw_ostream &OS, FuncIdConversionHelper &FN,
> + StackOutputFormat format) {
> + for (auto iter : Roots) {
> + uint32_t threadId = iter.first;
> + RootVector &perThreadRoots = iter.second;
> + bool reportThreadId = true;
> + printAll<AggType>(OS, FN, perThreadRoots, threadId, reportThreadId);
> + }
> + }
> +
> /// Prints top stacks from looking at all the leaves and ignoring thread IDs.
> /// Stacks that consist of the same function IDs but were called in different
> /// thread IDs are not considered unique in this printout.
> void printIgnoringThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
> - SmallVector<TrieNode *, 4> RootValues;
> + RootVector RootValues;
>
> // Function to pull the values out of a map iterator.
> using RootsType = decltype(Roots.begin())::value_type;
> @@ -459,30 +550,88 @@ public:
> print(OS, FN, RootValues);
> }
>
> - /// Merges the trie by thread id before printing top stacks.
> - void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
> - std::forward_list<TrieNode> AggregatedNodeStore;
> - SmallVector<TrieNode *, 4> RootValues;
> + /// Creates a merged list of Tries for unique stacks that disregards their
> + /// thread IDs.
> + RootVector mergeAcrossThreads(std::forward_list<TrieNode> &NodeStore) {
> + RootVector MergedByThreadRoots;
> for (auto MapIter : Roots) {
> const auto &RootNodeVector = MapIter.second;
> for (auto *Node : RootNodeVector) {
> - auto MaybeFoundIter = find_if(RootValues, [Node](TrieNode *elem) {
> - return Node->FuncId == elem->FuncId;
> - });
> - if (MaybeFoundIter == RootValues.end()) {
> - RootValues.push_back(Node);
> + auto MaybeFoundIter =
> + find_if(MergedByThreadRoots, [Node](TrieNode *elem) {
> + return Node->FuncId == elem->FuncId;
> + });
> + if (MaybeFoundIter == MergedByThreadRoots.end()) {
> + MergedByThreadRoots.push_back(Node);
> } else {
> - RootValues.push_back(mergeTrieNodes(**MaybeFoundIter, *Node, nullptr,
> - AggregatedNodeStore));
> - RootValues.erase(MaybeFoundIter);
> + MergedByThreadRoots.push_back(
> + mergeTrieNodes(**MaybeFoundIter, *Node, nullptr, NodeStore));
> + MergedByThreadRoots.erase(MaybeFoundIter);
> }
> }
> }
> - print(OS, FN, RootValues);
> + return MergedByThreadRoots;
> + }
> +
> + /// Print timing sums for all stacks merged by Thread ID.
> + template <AggregationType AggType>
> + void printAllAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN,
> + StackOutputFormat format) {
> + std::forward_list<TrieNode> AggregatedNodeStore;
> + RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
> + bool reportThreadId = false;
> + printAll<AggType>(OS, FN, MergedByThreadRoots,
> + /*threadId*/ 0, reportThreadId);
> + }
> +
> + /// Merges the trie by thread id before printing top stacks.
> + void printAggregatingThreads(raw_ostream &OS, FuncIdConversionHelper &FN) {
> + std::forward_list<TrieNode> AggregatedNodeStore;
> + RootVector MergedByThreadRoots = mergeAcrossThreads(AggregatedNodeStore);
> + print(OS, FN, MergedByThreadRoots);
> + }
> +
> + // TODO: Add a format option when more than one are supported.
> + template <AggregationType AggType>
> + void printAll(raw_ostream &OS, FuncIdConversionHelper &FN,
> + RootVector RootValues, uint32_t ThreadId, bool ReportThread) {
> + SmallVector<const TrieNode *, 16> S;
> + for (const auto *N : RootValues) {
> + S.clear();
> + S.push_back(N);
> + while (!S.empty()) {
> + auto *Top = S.pop_back_val();
> + printSingleStack<AggType>(OS, FN, ReportThread, ThreadId, Top);
> + for (const auto *C : Top->Callees)
> + S.push_back(C);
> + }
> + }
> + }
> +
> + /// Prints values for stacks in a format consumable for the flamegraph.pl
> + /// tool. This is a line based format that lists each level in the stack
> + /// hierarchy in a semicolon delimited form followed by a space and a numeric
> + /// value. If breaking down by thread, the thread ID will be added as the
> + /// root level of the stack.
> + template <AggregationType AggType>
> + void printSingleStack(raw_ostream &OS, FuncIdConversionHelper &Converter,
> + bool ReportThread, uint32_t ThreadId,
> + const TrieNode *Node) {
> + if (ReportThread)
> + OS << "thread_" << ThreadId << ";";
> + SmallVector<const TrieNode *, 5> lineage{};
> + lineage.push_back(Node);
> + while (lineage.back()->Parent != nullptr)
> + lineage.push_back(lineage.back()->Parent);
> + while (!lineage.empty()) {
> + OS << Converter.SymbolOrNumber(lineage.back()->FuncId) << ";";
> + lineage.pop_back();
> + }
> + OS << " " << GetValueForStack<AggType>(Node) << "\n";
> }
>
> void print(raw_ostream &OS, FuncIdConversionHelper &FN,
> - SmallVector<TrieNode *, 4> RootValues) {
> + RootVector RootValues) {
> // Go through each of the roots, and traverse the call stack, producing the
> // aggregates as you go along. Remember these aggregates and stacks, and
> // show summary statistics about:
> @@ -502,7 +651,7 @@ public:
> S.emplace_back(N);
>
> while (!S.empty()) {
> - auto Top = S.pop_back_val();
> + auto *Top = S.pop_back_val();
>
> // We only start printing the stack (by walking up the parent pointers)
> // when we get to a leaf function.
> @@ -587,6 +736,17 @@ static CommandRegistration Unused(&Stack
> "that aggregates threads."),
> std::make_error_code(std::errc::invalid_argument));
>
> + if (!DumpAllStacks && StacksOutputFormat != HUMAN)
> + return make_error<StringError>(
> + Twine("Can't specify a non-human format without -all-stacks."),
> + std::make_error_code(std::errc::invalid_argument));
> +
> + if (DumpAllStacks && StacksOutputFormat == HUMAN)
> + return make_error<StringError>(
> + Twine("You must specify a non-human format when reporting with "
> + "-all-stacks."),
> + std::make_error_code(std::errc::invalid_argument));
> +
> symbolize::LLVMSymbolizer::Options Opts(
> symbolize::FunctionNameKind::LinkageName, true, true, false, "");
> symbolize::LLVMSymbolizer Symbolizer(Opts);
> @@ -625,6 +785,44 @@ static CommandRegistration Unused(&Stack
> "No instrumented calls were accounted in the input file.",
> make_error_code(errc::result_out_of_range));
> }
> +
> + // Report the stacks in a long form mode for another tool to analyze.
> + if (DumpAllStacks) {
> + if (AggregateThreads) {
> + switch (RequestedAggregation) {
> + case AggregationType::TOTAL_TIME:
> + ST.printAllAggregatingThreads<AggregationType::TOTAL_TIME>(
> + outs(), FuncIdHelper, StacksOutputFormat);
> + break;
> + case AggregationType::INVOCATION_COUNT:
> + ST.printAllAggregatingThreads<AggregationType::INVOCATION_COUNT>(
> + outs(), FuncIdHelper, StacksOutputFormat);
> + break;
> + default:
> + return make_error<StringError>(
> + "Illegal value for aggregation-type.",
> + make_error_code(errc::result_out_of_range));
> + }
> + } else {
> + switch (RequestedAggregation) {
> + case AggregationType::TOTAL_TIME:
> + ST.printAllPerThread<AggregationType::TOTAL_TIME>(outs(), FuncIdHelper,
> + StacksOutputFormat);
> + break;
> + case AggregationType::INVOCATION_COUNT:
> + ST.printAllPerThread<AggregationType::INVOCATION_COUNT>(
> + outs(), FuncIdHelper, StacksOutputFormat);
> + break;
> + default:
> + return make_error<StringError>(
> + "Illegal value for aggregation-type.",
> + make_error_code(errc::result_out_of_range));
> + }
> + }
> + return Error::success();
> + }
> +
> + // We're only outputting top stacks.
> if (AggregateThreads) {
> ST.printAggregatingThreads(outs(), FuncIdHelper);
> } else if (SeparateThreadStacks) {
>
>
> _______________________________________________
> llvm-commits mailing list
> llvm-commits at lists.llvm.org
> http://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-commits
More information about the llvm-commits
mailing list