[compiler-rt] [llvm] [InstrProf] Add debuginfod correlation support (PR #106606)
via llvm-commits
llvm-commits at lists.llvm.org
Thu Aug 29 11:28:42 PDT 2024
https://github.com/gulfemsavrun created https://github.com/llvm/llvm-project/pull/106606
This patch adds debuginfod support into llvm-profdata to find the assosicated executable by a build id in a raw profile to correlate a profile by using profile data and name sections in an executable.
>From 045d4d89e68663166de421fcc9eb88465e7d242e Mon Sep 17 00:00:00 2001
From: Gulfem Savrun Yeniceri <gulfem at google.com>
Date: Tue, 27 Aug 2024 15:24:13 -0700
Subject: [PATCH] [InstrProf] Add debuginfod correlation support
This patch adds debuginfod support into llvm-profdata to
find the assosicated executable by a build id in a raw
profile to correlate a profile by using profile data and
name sections in an executable.
---
.../profile/instrprof-debuginfod-correlate.c | 27 +++++++++++++
llvm/docs/CommandGuide/llvm-profdata.rst | 14 +++++++
.../llvm/ProfileData/InstrProfCorrelator.h | 6 ++-
.../llvm/ProfileData/InstrProfReader.h | 13 ++++++-
llvm/lib/ProfileData/InstrProfCorrelator.cpp | 37 +++++++++++++++---
llvm/lib/ProfileData/InstrProfReader.cpp | 38 +++++++++++++++----
llvm/tools/llvm-profdata/CMakeLists.txt | 4 ++
llvm/tools/llvm-profdata/llvm-profdata.cpp | 37 ++++++++++++++----
8 files changed, 153 insertions(+), 23 deletions(-)
create mode 100644 compiler-rt/test/profile/instrprof-debuginfod-correlate.c
diff --git a/compiler-rt/test/profile/instrprof-debuginfod-correlate.c b/compiler-rt/test/profile/instrprof-debuginfod-correlate.c
new file mode 100644
index 00000000000000..b139a351d3235d
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-debuginfod-correlate.c
@@ -0,0 +1,27 @@
+// REQUIRES: linux || windows
+// RUN: rm -rf %t
+
+// Default build with no profile correlation.
+// RUN: %clang_profgen -o %t.default.exe -Wl,--build-id=0x12345678 -fprofile-instr-generate -fcoverage-mapping %S/Inputs/instrprof-debug-info-correlate-main.cpp %S/Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: env LLVM_PROFILE_FILE=%t.default.profraw %run %t.default.exe
+// RUN: llvm-profdata merge -o %t.default.profdata %t.default.profraw
+
+// Build with profile binary correlation and test llvm-profdata merge profile correlation with --binary-file option.
+// RUN: %clang_profgen -o %t.correlate.exe -Wl,--build-id=0x12345678 -fprofile-instr-generate -fcoverage-mapping -mllvm -profile-correlate=binary %S/Inputs/instrprof-debug-info-correlate-main.cpp %S/Inputs/instrprof-debug-info-correlate-foo.cpp
+// Strip above binary and run
+// RUN: llvm-strip %t.correlate.exe -o %t.stripped.exe
+// RUN: env LLVM_PROFILE_FILE=%t.correlate.profraw %run %t.stripped.exe
+// RUN: llvm-profdata merge -o %t.correlate-binary.profdata --binary-file=%t.correlate.exe %t.correlate.profraw
+// RUN: diff %t.default.profdata %t.correlate-binary.profdata
+
+// Test llvm-profdata merge profile correlation with --debuginfod option.
+// RUN: mkdir -p %t/buildid/12345678
+// RUN: cp %t.correlate.exe %t/buildid/12345678/debuginfo
+// RUN: env DEBUGINFOD_CACHE_PATH=%t/debuginfod-cache DEBUGINFOD_URLS=file://%t llvm-profdata merge -o %t.correlate-debuginfod.profdata --debuginfod %t.correlate.profraw
+// RUN: diff %t.default.profdata %t.correlate-debuginfod.profdata
+
+// Test llvm-profdata merge profile correlation with --debug-file-directory option.
+// RUN: mkdir -p %t/.build-id/12
+// RUN: cp %t.correlate.exe %t/.build-id/12/345678.debug
+// RUN: llvm-profdata merge -o %t.correlate-debug-file-dir.profdata --debug-file-directory %t %t.correlate.profraw
+// RUN: diff %t.default.profdata %t.correlate-debug-file-dir.profdata
diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst
index acf016a6dbcd70..1941bdfede3734 100644
--- a/llvm/docs/CommandGuide/llvm-profdata.rst
+++ b/llvm/docs/CommandGuide/llvm-profdata.rst
@@ -204,6 +204,20 @@ OPTIONS
the raw profile. When ``-profile-correlate=binary`` was used for
instrumentation, use this option to correlate the raw profile.
+.. option:: --debuginfod
+
+ Use debuginfod to find the associated executables that contain profile data and
+ name sections for the raw profiles to correlate them.
+ When -profile-correlate=binary was used for instrumentation, this option can be
+ used for correlation.
+
+.. option:: -debug-file-directory=<dir>
+
+ Use provided local directories to search for executables that contain profile
+ data and name sections for the raw profiles to correlate them.
+ When -profile-correlate=binary was used for instrumentation, this option can be
+ used for correlation.
+
.. option:: --temporal-profile-trace-reservoir-size
The maximum number of temporal profile traces to be stored in the output
diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
index c07c67d287e2ce..c83ab4e2383953 100644
--- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
+++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
@@ -13,6 +13,8 @@
#define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H
#include "llvm/ADT/DenseSet.h"
+#include "llvm/Debuginfod/BuildIDFetcher.h"
+#include "llvm/Object/BuildID.h"
#include "llvm/ProfileData/InstrProf.h"
#include "llvm/Support/Error.h"
#include "llvm/Support/MemoryBuffer.h"
@@ -36,7 +38,9 @@ class InstrProfCorrelator {
enum ProfCorrelatorKind { NONE, DEBUG_INFO, BINARY };
static llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
- get(StringRef Filename, ProfCorrelatorKind FileKind);
+ get(StringRef Filename, ProfCorrelatorKind FileKind,
+ const object::BuildIDFetcher *BIDFetcher = nullptr,
+ const std::optional<ArrayRef<llvm::object::BuildID>> BIs = std::nullopt);
/// Construct a ProfileData vector used to correlate raw instrumentation data
/// to their functions.
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 3b307d08359980..6407672b786f5d 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -200,11 +200,13 @@ class InstrProfReader {
static Expected<std::unique_ptr<InstrProfReader>>
create(const Twine &Path, vfs::FileSystem &FS,
const InstrProfCorrelator *Correlator = nullptr,
+ const object::BuildIDFetcher *BIDFetcher = nullptr,
std::function<void(Error)> Warn = nullptr);
static Expected<std::unique_ptr<InstrProfReader>>
create(std::unique_ptr<MemoryBuffer> Buffer,
const InstrProfCorrelator *Correlator = nullptr,
+ const object::BuildIDFetcher *BIDFetcher = nullptr,
std::function<void(Error)> Warn = nullptr);
/// \param Weight for raw profiles use this as the temporal profile trace
@@ -314,6 +316,11 @@ class RawInstrProfReader : public InstrProfReader {
/// If available, this hold the ProfileData array used to correlate raw
/// instrumentation data to their functions.
const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
+ /// Correlator that fetches debuginfo from debuginfod on the fly by build id.
+ std::unique_ptr<InstrProfCorrelator> DebugInfodCorrelator;
+ /// Fetcher that fetches debuginfo from debuginfod to correlate profiles with
+ /// binaries.
+ const object::BuildIDFetcher *BIDFetcher;
/// A list of timestamps paired with a function name reference.
std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
bool ShouldSwapBytes;
@@ -351,11 +358,13 @@ class RawInstrProfReader : public InstrProfReader {
public:
RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
const InstrProfCorrelator *Correlator,
+ const object::BuildIDFetcher *BIDFetcher,
std::function<void(Error)> Warn)
: DataBuffer(std::move(DataBuffer)),
Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
Correlator)),
- Warn(Warn) {}
+ BIDFetcher(BIDFetcher), Warn(Warn) {}
+
RawInstrProfReader(const RawInstrProfReader &) = delete;
RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
@@ -439,7 +448,7 @@ class RawInstrProfReader : public InstrProfReader {
void advanceData() {
// `CountersDelta` is a constant zero when using debug info correlation.
- if (!Correlator) {
+ if (!Correlator && !DebugInfodCorrelator) {
// The initial CountersDelta is the in-memory address difference between
// the data and counts sections:
// start(__llvm_prf_cnts) - start(__llvm_prf_data)
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 44e2aeb00d8cc8..9dbef8d653305a 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -91,7 +91,9 @@ InstrProfCorrelator::Context::get(std::unique_ptr<MemoryBuffer> Buffer,
}
llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
-InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) {
+InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind,
+ const object::BuildIDFetcher *BIDFetcher,
+ const std::optional<ArrayRef<object::BuildID>> BIs) {
if (FileKind == DEBUG_INFO) {
auto DsymObjectsOrErr =
object::MachOObjectFile::findDsymObjectMembers(Filename);
@@ -113,11 +115,36 @@ InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) {
return get(std::move(*BufferOrErr), FileKind);
}
if (FileKind == BINARY) {
- auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
- if (auto Err = BufferOrErr.takeError())
- return std::move(Err);
+ if (!Filename.empty()) {
+ auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
+ if (auto Err = BufferOrErr.takeError())
+ return std::move(Err);
+ return get(std::move(*BufferOrErr), FileKind);
+ } else if (BIDFetcher) {
+ if (BIs->size() > 1)
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "unsupported profile binary correlation when there are multiple "
+ "build IDs in a binary");
- return get(std::move(*BufferOrErr), FileKind);
+ std::optional<std::string> Path = BIDFetcher->fetch(BIs->front());
+ if (Path) {
+ auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(*Path));
+ if (auto Err = BufferOrErr.takeError())
+ return std::move(Err);
+ return get(std::move(*BufferOrErr), BINARY);
+ } else {
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "Missing build ID: " +
+ llvm::toHex(BIs->front(), /*LowerCase=*/true));
+ }
+ } else {
+ return make_error<InstrProfError>(
+ instrprof_error::unable_to_correlate_profile,
+ "unsupported profile binary correlation when provided with a file "
+ "name and build id fetcher");
+ }
}
return make_error<InstrProfError>(
instrprof_error::unable_to_correlate_profile,
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 6d078c58ac805d..34d67cc8bf9b2f 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -153,19 +153,19 @@ static void printBinaryIdsInternal(raw_ostream &OS,
Expected<std::unique_ptr<InstrProfReader>>
InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
const InstrProfCorrelator *Correlator,
+ const object::BuildIDFetcher *BIDFetcher,
std::function<void(Error)> Warn) {
// Set up the buffer to read.
auto BufferOrError = setupMemoryBuffer(Path, FS);
if (Error E = BufferOrError.takeError())
return std::move(E);
return InstrProfReader::create(std::move(BufferOrError.get()), Correlator,
- Warn);
+ BIDFetcher, Warn);
}
-Expected<std::unique_ptr<InstrProfReader>>
-InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
- const InstrProfCorrelator *Correlator,
- std::function<void(Error)> Warn) {
+Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create(
+ std::unique_ptr<MemoryBuffer> Buffer, const InstrProfCorrelator *Correlator,
+ const object::BuildIDFetcher *BIDFetcher, std::function<void(Error)> Warn) {
if (Buffer->getBufferSize() == 0)
return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
@@ -174,9 +174,11 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
if (IndexedInstrProfReader::hasFormat(*Buffer))
Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
else if (RawInstrProfReader64::hasFormat(*Buffer))
- Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, Warn));
+ Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator,
+ BIDFetcher, Warn));
else if (RawInstrProfReader32::hasFormat(*Buffer))
- Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, Warn));
+ Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator,
+ BIDFetcher, Warn));
else if (TextInstrProfReader::hasFormat(*Buffer))
Result.reset(new TextInstrProfReader(std::move(Buffer)));
else
@@ -633,6 +635,20 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
return error(instrprof_error::bad_header);
+ if (BIDFetcher) {
+ std::vector<object::BuildID> BinaryIDs;
+ if (Error E = readBinaryIds(BinaryIDs))
+ return E;
+ if (auto E = InstrProfCorrelator::get(
+ "", InstrProfCorrelator::ProfCorrelatorKind::BINARY,
+ BIDFetcher, BinaryIDs)
+ .moveInto(DebugInfodCorrelator)) {
+ return E;
+ }
+ if (auto Err = DebugInfodCorrelator->correlateProfileData(0))
+ return Err;
+ }
+
if (Correlator) {
// These sizes in the raw file are zero because we constructed them in the
// Correlator.
@@ -643,6 +659,14 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
DataEnd = Data + Correlator->getDataSize();
NamesStart = Correlator->getNamesPointer();
NamesEnd = NamesStart + Correlator->getNamesSize();
+ } else if (DebugInfodCorrelator) {
+ InstrProfCorrelatorImpl<IntPtrT> *DebugInfodCorrelatorImpl =
+ dyn_cast_or_null<InstrProfCorrelatorImpl<IntPtrT>>(
+ DebugInfodCorrelator.get());
+ Data = DebugInfodCorrelatorImpl->getDataPointer();
+ DataEnd = Data + DebugInfodCorrelatorImpl->getDataSize();
+ NamesStart = DebugInfodCorrelatorImpl->getNamesPointer();
+ NamesEnd = NamesStart + DebugInfodCorrelatorImpl->getNamesSize();
} else {
Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
Start + DataOffset);
diff --git a/llvm/tools/llvm-profdata/CMakeLists.txt b/llvm/tools/llvm-profdata/CMakeLists.txt
index 25cf143337ad4b..165be9a2ea31bd 100644
--- a/llvm/tools/llvm-profdata/CMakeLists.txt
+++ b/llvm/tools/llvm-profdata/CMakeLists.txt
@@ -12,3 +12,7 @@ add_llvm_tool(llvm-profdata
intrinsics_gen
GENERATE_DRIVER
)
+
+if(NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
+ target_link_libraries(llvm-profdata PRIVATE LLVMDebuginfod)
+endif()
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1f6c4c604d57b5..acab2b9249b585 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -13,6 +13,7 @@
#include "llvm/ADT/SmallSet.h"
#include "llvm/ADT/SmallVector.h"
#include "llvm/ADT/StringRef.h"
+#include "llvm/Debuginfod/HTTPClient.h"
#include "llvm/IR/LLVMContext.h"
#include "llvm/Object/Binary.h"
#include "llvm/ProfileData/InstrProfCorrelator.h"
@@ -130,6 +131,12 @@ cl::opt<std::string>
cl::desc("For merge, use the provided unstripped bianry to "
"correlate the raw profile."),
cl::sub(MergeSubcommand));
+cl::list<std::string> DebugFileDirectory(
+ "debug-file-directory",
+ cl::desc("Directories to search for object files by build ID"));
+cl::opt<bool> DebugInfod("debuginfod", cl::init(false), cl::Hidden,
+ cl::sub(MergeSubcommand),
+ cl::desc("Enable debuginfod"));
cl::opt<std::string> FuncNameFilter(
"function",
cl::desc("Only functions matching the filter are shown in the output. For "
@@ -652,7 +659,8 @@ static void overlapInput(const std::string &BaseFilename,
/// Load an input into a writer context.
static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
const InstrProfCorrelator *Correlator,
- const StringRef ProfiledBinary, WriterContext *WC) {
+ const StringRef ProfiledBinary, WriterContext *WC,
+ const object::BuildIDFetcher *BIDFetcher = nullptr) {
std::unique_lock<std::mutex> CtxGuard{WC->Lock};
// Copy the filename, because llvm::ThreadPool copied the input "const
@@ -730,8 +738,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
};
- auto ReaderOrErr =
- InstrProfReader::create(Input.Filename, *FS, Correlator, Warn);
+ auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator,
+ BIDFetcher, Warn);
if (Error E = ReaderOrErr.takeError()) {
// Skip the empty profiles by returning silently.
auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
@@ -914,9 +922,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
exitWithError("unknown format is specified");
// TODO: Maybe we should support correlation with mixture of different
- // correlation modes(w/wo debug-info/object correlation).
- if (!DebugInfoFilename.empty() && !BinaryFilename.empty())
- exitWithError("Expected only one of -debug-info, -binary-file");
+ // correlaxtion modes(w/wo debug-info/object correlation).
+ if (DebugInfoFilename.empty()) {
+ if (!BinaryFilename.empty() && DebugInfod)
+ exitWithError("Expected only one of -binary-file, -debuginfod");
+ } else if (!BinaryFilename.empty() || DebugInfod) {
+ exitWithError(
+ "Expected only one of -debug-info, -binary-file, -debuginfod");
+ }
std::string CorrelateFilename;
ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
if (!DebugInfoFilename.empty()) {
@@ -936,6 +949,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
exitWithError(std::move(Err), CorrelateFilename);
}
+ std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
+ if (DebugInfod) {
+ llvm::HTTPClient::initialize();
+ BIDFetcher = std::make_unique<DebuginfodFetcher>(DebugFileDirectory);
+ } else if (!DebugFileDirectory.empty()) {
+ BIDFetcher = std::make_unique<object::BuildIDFetcher>(DebugFileDirectory);
+ }
+
std::mutex ErrorLock;
SmallSet<instrprof_error, 4> WriterErrorCodes;
@@ -954,7 +975,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
if (NumThreads == 1) {
for (const auto &Input : Inputs)
loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
- Contexts[0].get());
+ Contexts[0].get(), BIDFetcher.get());
} else {
DefaultThreadPool Pool(hardware_concurrency(NumThreads));
@@ -962,7 +983,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
unsigned Ctx = 0;
for (const auto &Input : Inputs) {
Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
- Contexts[Ctx].get());
+ Contexts[Ctx].get(), BIDFetcher.get());
Ctx = (Ctx + 1) % NumThreads;
}
Pool.wait();
More information about the llvm-commits
mailing list