[llvm] r275921 - [llvm-profdata] Speed up merging by using a thread pool
Vedant Kumar via llvm-commits
llvm-commits at lists.llvm.org
Mon Jul 18 15:02:40 PDT 2016
Author: vedantk
Date: Mon Jul 18 17:02:39 2016
New Revision: 275921
URL: http://llvm.org/viewvc/llvm-project?rev=275921&view=rev
Log:
[llvm-profdata] Speed up merging by using a thread pool
Add a "-j" option to llvm-profdata to control the number of threads
used. Auto-detect NumThreads when it isn't specified, and avoid spawning
threads when they wouldn't be beneficial.
I tested this patch using a raw profile produced by clang (147MB). Here is the
time taken to merge 4 copies together on my laptop:
No thread pool: 112.87s user 5.92s system 97% cpu 2:01.08 total
With 2 threads: 134.99s user 26.54s system 164% cpu 1:33.31 total
Differential Revision: https://reviews.llvm.org/D22438
Modified:
llvm/trunk/docs/CommandGuide/llvm-profdata.rst
llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h
llvm/trunk/lib/ProfileData/InstrProfWriter.cpp
llvm/trunk/test/tools/llvm-profdata/multiple-inputs.test
llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp
llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
Modified: llvm/trunk/docs/CommandGuide/llvm-profdata.rst
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/docs/CommandGuide/llvm-profdata.rst?rev=275921&r1=275920&r2=275921&view=diff
==============================================================================
--- llvm/trunk/docs/CommandGuide/llvm-profdata.rst (original)
+++ llvm/trunk/docs/CommandGuide/llvm-profdata.rst Mon Jul 18 17:02:39 2016
@@ -106,6 +106,11 @@ OPTIONS
conjunction with -instr. Defaults to false, since it can inhibit compiler
optimization during PGO.
+.. option:: -num-threads=N, -j=N
+
+ Use N threads to perform profile merging. When N=0, llvm-profdata auto-detects
+ an appropriate number of threads to use. This is the default.
+
EXAMPLES
^^^^^^^^
Basic Usage
Modified: llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h?rev=275921&r1=275920&r2=275921&view=diff
==============================================================================
--- llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h (original)
+++ llvm/trunk/include/llvm/ProfileData/InstrProfWriter.h Mon Jul 18 17:02:39 2016
@@ -47,6 +47,8 @@ public:
/// for this function and the hash and number of counts match, each counter is
/// summed. Optionally scale counts by \p Weight.
Error addRecord(InstrProfRecord &&I, uint64_t Weight = 1);
+ /// Merge existing function counts from the given writer.
+ Error mergeRecordsFromWriter(InstrProfWriter &&IPW);
/// Write the profile to \c OS
void write(raw_fd_ostream &OS);
/// Write the profile in text format to \c OS
Modified: llvm/trunk/lib/ProfileData/InstrProfWriter.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/ProfileData/InstrProfWriter.cpp?rev=275921&r1=275920&r2=275921&view=diff
==============================================================================
--- llvm/trunk/lib/ProfileData/InstrProfWriter.cpp (original)
+++ llvm/trunk/lib/ProfileData/InstrProfWriter.cpp Mon Jul 18 17:02:39 2016
@@ -182,6 +182,14 @@ Error InstrProfWriter::addRecord(InstrPr
return Dest.takeError();
}
+Error InstrProfWriter::mergeRecordsFromWriter(InstrProfWriter &&IPW) {
+ for (auto &I : IPW.FunctionData)
+ for (auto &Func : I.getValue())
+ if (Error E = addRecord(std::move(Func.second), 1))
+ return E;
+ return Error::success();
+}
+
bool InstrProfWriter::shouldEncodeData(const ProfilingData &PD) {
if (!Sparse)
return true;
Modified: llvm/trunk/test/tools/llvm-profdata/multiple-inputs.test
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/test/tools/llvm-profdata/multiple-inputs.test?rev=275921&r1=275920&r2=275921&view=diff
==============================================================================
--- llvm/trunk/test/tools/llvm-profdata/multiple-inputs.test (original)
+++ llvm/trunk/test/tools/llvm-profdata/multiple-inputs.test Mon Jul 18 17:02:39 2016
@@ -51,3 +51,43 @@ DISJOINT-2: Block counts: [2, 3]
DISJOINT: Total functions: 2
DISJOINT: Maximum function count: 1
DISJOINT: Maximum internal block count: 3
+
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: -num-threads 2 -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: -j 3 -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO4
+FOO4: foo:
+FOO4: Counters: 3
+FOO4: Function count: 4
+FOO4: Block counts: [8, 12]
+FOO4: Total functions: 1
+FOO4: Maximum function count: 4
+FOO4: Maximum internal block count: 12
+
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext -j 2 -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext -j 3 -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
+RUN: llvm-profdata merge %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext %p/Inputs/foo3-1.proftext \
+RUN: %p/Inputs/foo3-1.proftext -j 1 -o %t
+RUN: llvm-profdata show %t -all-functions -counts | FileCheck %s --check-prefix=FOO5
+FOO5: foo:
+FOO5: Counters: 3
+FOO5: Function count: 5
+FOO5: Block counts: [10, 15]
+FOO5: Total functions: 1
+FOO5: Maximum function count: 5
+FOO5: Maximum internal block count: 15
Modified: llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp?rev=275921&r1=275920&r2=275921&view=diff
==============================================================================
--- llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp (original)
+++ llvm/trunk/tools/llvm-profdata/llvm-profdata.cpp Mon Jul 18 17:02:39 2016
@@ -29,6 +29,7 @@
#include "llvm/Support/Path.h"
#include "llvm/Support/PrettyStackTrace.h"
#include "llvm/Support/Signals.h"
+#include "llvm/Support/ThreadPool.h"
#include "llvm/Support/raw_ostream.h"
#include <algorithm>
@@ -117,9 +118,68 @@ struct WeightedFile {
};
typedef SmallVector<WeightedFile, 5> WeightedFileVector;
+/// Keep track of merged data and reported errors.
+struct WriterContext {
+ std::mutex Lock;
+ InstrProfWriter Writer;
+ Error Err;
+ StringRef ErrWhence;
+ std::mutex &ErrLock;
+ SmallSet<instrprof_error, 4> &WriterErrorCodes;
+
+ WriterContext(bool IsSparse, std::mutex &ErrLock,
+ SmallSet<instrprof_error, 4> &WriterErrorCodes)
+ : Lock(), Writer(IsSparse), Err(Error::success()), ErrWhence(""),
+ ErrLock(ErrLock), WriterErrorCodes(WriterErrorCodes) {}
+};
+
+/// Load an input into a writer context.
+static void loadInput(const WeightedFile &Input, WriterContext *WC) {
+ std::unique_lock<std::mutex> CtxGuard{WC->Lock};
+
+ // If there's a pending hard error, don't do more work.
+ if (WC->Err)
+ return;
+
+ WC->ErrWhence = Input.Filename;
+
+ auto ReaderOrErr = InstrProfReader::create(Input.Filename);
+ if ((WC->Err = ReaderOrErr.takeError()))
+ return;
+
+ auto Reader = std::move(ReaderOrErr.get());
+ bool IsIRProfile = Reader->isIRLevelProfile();
+ if (WC->Writer.setIsIRLevelProfile(IsIRProfile)) {
+ WC->Err = make_error<StringError>(
+ "Merge IR generated profile with Clang generated profile.",
+ std::error_code());
+ return;
+ }
+
+ for (auto &I : *Reader) {
+ if (Error E = WC->Writer.addRecord(std::move(I), Input.Weight)) {
+ // Only show hint the first time an error occurs.
+ instrprof_error IPE = InstrProfError::take(std::move(E));
+ std::unique_lock<std::mutex> ErrGuard{WC->ErrLock};
+ bool firstTime = WC->WriterErrorCodes.insert(IPE).second;
+ handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
+ I.Name, firstTime);
+ }
+ }
+ if (Reader->hasError())
+ WC->Err = Reader->getError();
+}
+
+/// Merge the \p Src writer context into \p Dst.
+static void mergeWriterContexts(WriterContext *Dst, WriterContext *Src) {
+ if (Error E = Dst->Writer.mergeRecordsFromWriter(std::move(Src->Writer)))
+ Dst->Err = std::move(E);
+}
+
static void mergeInstrProfile(const WeightedFileVector &Inputs,
StringRef OutputFilename,
- ProfileFormat OutputFormat, bool OutputSparse) {
+ ProfileFormat OutputFormat, bool OutputSparse,
+ unsigned NumThreads) {
if (OutputFilename.compare("-") == 0)
exitWithError("Cannot write indexed profdata format to stdout.");
@@ -131,30 +191,57 @@ static void mergeInstrProfile(const Weig
if (EC)
exitWithErrorCode(EC, OutputFilename);
- InstrProfWriter Writer(OutputSparse);
+ std::mutex ErrorLock;
SmallSet<instrprof_error, 4> WriterErrorCodes;
- for (const auto &Input : Inputs) {
- auto ReaderOrErr = InstrProfReader::create(Input.Filename);
- if (Error E = ReaderOrErr.takeError())
- exitWithError(std::move(E), Input.Filename);
-
- auto Reader = std::move(ReaderOrErr.get());
- bool IsIRProfile = Reader->isIRLevelProfile();
- if (Writer.setIsIRLevelProfile(IsIRProfile))
- exitWithError("Merge IR generated profile with Clang generated profile.");
-
- for (auto &I : *Reader) {
- if (Error E = Writer.addRecord(std::move(I), Input.Weight)) {
- // Only show hint the first time an error occurs.
- instrprof_error IPE = InstrProfError::take(std::move(E));
- bool firstTime = WriterErrorCodes.insert(IPE).second;
- handleMergeWriterError(make_error<InstrProfError>(IPE), Input.Filename,
- I.Name, firstTime);
- }
+
+ // If NumThreads is not specified, auto-detect a good default.
+ if (NumThreads == 0)
+ NumThreads = std::max(1U, std::min(std::thread::hardware_concurrency(),
+ unsigned(Inputs.size() / 2)));
+
+ // Initialize the writer contexts.
+ SmallVector<std::unique_ptr<WriterContext>, 4> Contexts;
+ for (unsigned I = 0; I < NumThreads; ++I)
+ Contexts.emplace_back(llvm::make_unique<WriterContext>(
+ OutputSparse, ErrorLock, WriterErrorCodes));
+
+ if (NumThreads == 1) {
+ for (const auto &Input : Inputs)
+ loadInput(Input, Contexts[0].get());
+ } else {
+ ThreadPool Pool(NumThreads);
+
+ // Load the inputs in parallel (N/NumThreads serial steps).
+ unsigned Ctx = 0;
+ for (const auto &Input : Inputs) {
+ Pool.async(loadInput, Input, Contexts[Ctx].get());
+ Ctx = (Ctx + 1) % NumThreads;
}
- if (Reader->hasError())
- exitWithError(Reader->getError(), Input.Filename);
+ Pool.wait();
+
+ // Merge the writer contexts together (lg(NumThreads) serial steps).
+ unsigned Mid = Contexts.size() / 2;
+ unsigned End = Contexts.size();
+ assert(Mid > 0 && "Expected more than one context");
+ do {
+ for (unsigned I = 0; I < Mid; ++I)
+ Pool.async(mergeWriterContexts, Contexts[I].get(),
+ Contexts[I + Mid].get());
+ if (End & 1)
+ Pool.async(mergeWriterContexts, Contexts[0].get(),
+ Contexts[End - 1].get());
+ Pool.wait();
+ End = Mid;
+ Mid /= 2;
+ } while (Mid > 0);
}
+
+ // Handle deferred hard errors encountered during merging.
+ for (std::unique_ptr<WriterContext> &WC : Contexts)
+ if (WC->Err)
+ exitWithError(std::move(WC->Err), WC->ErrWhence);
+
+ InstrProfWriter &Writer = Contexts[0]->Writer;
if (OutputFormat == PF_Text)
Writer.writeText(Output);
else
@@ -288,6 +375,11 @@ static int merge_main(int argc, const ch
clEnumValEnd));
cl::opt<bool> OutputSparse("sparse", cl::init(false),
cl::desc("Generate a sparse profile (only meaningful for -instr)"));
+ cl::opt<unsigned> NumThreads(
+ "num-threads", cl::init(0),
+ cl::desc("Number of merge threads to use (default: autodetect)"));
+ cl::alias NumThreadsA("j", cl::desc("Alias for --num-threads"),
+ cl::aliasopt(NumThreads));
cl::ParseCommandLineOptions(argc, argv, "LLVM profile data merger\n");
@@ -314,7 +406,7 @@ static int merge_main(int argc, const ch
if (ProfileKind == instr)
mergeInstrProfile(WeightedInputs, OutputFilename, OutputFormat,
- OutputSparse);
+ OutputSparse, NumThreads);
else
mergeSampleProfile(WeightedInputs, OutputFilename, OutputFormat);
Modified: llvm/trunk/unittests/ProfileData/InstrProfTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/ProfileData/InstrProfTest.cpp?rev=275921&r1=275920&r2=275921&view=diff
==============================================================================
--- llvm/trunk/unittests/ProfileData/InstrProfTest.cpp (original)
+++ llvm/trunk/unittests/ProfileData/InstrProfTest.cpp Mon Jul 18 17:02:39 2016
@@ -204,6 +204,31 @@ TEST_F(InstrProfTest, get_profile_summar
delete PSFromMD;
}
+TEST_F(InstrProfTest, test_writer_merge) {
+ InstrProfRecord Record1("func1", 0x1234, {42});
+ NoError(Writer.addRecord(std::move(Record1)));
+
+ InstrProfWriter Writer2;
+ InstrProfRecord Record2("func2", 0x1234, {0, 0});
+ NoError(Writer2.addRecord(std::move(Record2)));
+
+ NoError(Writer.mergeRecordsFromWriter(std::move(Writer2)));
+
+ auto Profile = Writer.writeBuffer();
+ readProfile(std::move(Profile));
+
+ Expected<InstrProfRecord> R = Reader->getInstrProfRecord("func1", 0x1234);
+ ASSERT_TRUE(NoError(R.takeError()));
+ ASSERT_EQ(1U, R->Counts.size());
+ ASSERT_EQ(42U, R->Counts[0]);
+
+ R = Reader->getInstrProfRecord("func2", 0x1234);
+ ASSERT_TRUE(NoError(R.takeError()));
+ ASSERT_EQ(2U, R->Counts.size());
+ ASSERT_EQ(0U, R->Counts[0]);
+ ASSERT_EQ(0U, R->Counts[1]);
+}
+
static const char callee1[] = "callee1";
static const char callee2[] = "callee2";
static const char callee3[] = "callee3";
More information about the llvm-commits
mailing list