[compiler-rt] [llvm] [InstrProf] Add debuginfod correlation support (PR #106606)

via llvm-commits llvm-commits at lists.llvm.org
Thu Aug 29 11:28:42 PDT 2024


https://github.com/gulfemsavrun created https://github.com/llvm/llvm-project/pull/106606

This patch adds debuginfod support into llvm-profdata to find the assosicated executable by a build id in a raw profile to correlate a profile by using profile data and name sections in an executable.

>From 045d4d89e68663166de421fcc9eb88465e7d242e Mon Sep 17 00:00:00 2001
From: Gulfem Savrun Yeniceri <gulfem at google.com>
Date: Tue, 27 Aug 2024 15:24:13 -0700
Subject: [PATCH] [InstrProf] Add debuginfod correlation support

This patch adds debuginfod support into llvm-profdata to
find the assosicated executable by a build id in a raw
profile to correlate a profile by using profile data and
name sections in an executable.
---
 .../profile/instrprof-debuginfod-correlate.c  | 27 +++++++++++++
 llvm/docs/CommandGuide/llvm-profdata.rst      | 14 +++++++
 .../llvm/ProfileData/InstrProfCorrelator.h    |  6 ++-
 .../llvm/ProfileData/InstrProfReader.h        | 13 ++++++-
 llvm/lib/ProfileData/InstrProfCorrelator.cpp  | 37 +++++++++++++++---
 llvm/lib/ProfileData/InstrProfReader.cpp      | 38 +++++++++++++++----
 llvm/tools/llvm-profdata/CMakeLists.txt       |  4 ++
 llvm/tools/llvm-profdata/llvm-profdata.cpp    | 37 ++++++++++++++----
 8 files changed, 153 insertions(+), 23 deletions(-)
 create mode 100644 compiler-rt/test/profile/instrprof-debuginfod-correlate.c

diff --git a/compiler-rt/test/profile/instrprof-debuginfod-correlate.c b/compiler-rt/test/profile/instrprof-debuginfod-correlate.c
new file mode 100644
index 00000000000000..b139a351d3235d
--- /dev/null
+++ b/compiler-rt/test/profile/instrprof-debuginfod-correlate.c
@@ -0,0 +1,27 @@
+// REQUIRES: linux || windows
+// RUN: rm -rf %t
+
+// Default build with no profile correlation.
+// RUN: %clang_profgen -o %t.default.exe -Wl,--build-id=0x12345678 -fprofile-instr-generate -fcoverage-mapping %S/Inputs/instrprof-debug-info-correlate-main.cpp %S/Inputs/instrprof-debug-info-correlate-foo.cpp
+// RUN: env LLVM_PROFILE_FILE=%t.default.profraw %run %t.default.exe
+// RUN: llvm-profdata merge -o %t.default.profdata %t.default.profraw
+
+// Build with profile binary correlation and test llvm-profdata merge profile correlation with --binary-file option.
+// RUN: %clang_profgen -o %t.correlate.exe -Wl,--build-id=0x12345678 -fprofile-instr-generate -fcoverage-mapping -mllvm -profile-correlate=binary %S/Inputs/instrprof-debug-info-correlate-main.cpp %S/Inputs/instrprof-debug-info-correlate-foo.cpp
+// Strip above binary and run
+// RUN: llvm-strip %t.correlate.exe -o %t.stripped.exe
+// RUN: env LLVM_PROFILE_FILE=%t.correlate.profraw %run %t.stripped.exe
+// RUN: llvm-profdata merge -o %t.correlate-binary.profdata --binary-file=%t.correlate.exe %t.correlate.profraw
+// RUN: diff %t.default.profdata %t.correlate-binary.profdata
+
+// Test llvm-profdata merge profile correlation with --debuginfod option.
+// RUN: mkdir -p %t/buildid/12345678
+// RUN: cp %t.correlate.exe %t/buildid/12345678/debuginfo
+// RUN: env DEBUGINFOD_CACHE_PATH=%t/debuginfod-cache DEBUGINFOD_URLS=file://%t llvm-profdata merge -o %t.correlate-debuginfod.profdata --debuginfod %t.correlate.profraw
+// RUN: diff %t.default.profdata %t.correlate-debuginfod.profdata
+
+// Test llvm-profdata merge profile correlation with --debug-file-directory option.
+// RUN: mkdir -p %t/.build-id/12
+// RUN: cp %t.correlate.exe %t/.build-id/12/345678.debug
+// RUN: llvm-profdata merge -o %t.correlate-debug-file-dir.profdata --debug-file-directory %t %t.correlate.profraw
+// RUN: diff %t.default.profdata %t.correlate-debug-file-dir.profdata
diff --git a/llvm/docs/CommandGuide/llvm-profdata.rst b/llvm/docs/CommandGuide/llvm-profdata.rst
index acf016a6dbcd70..1941bdfede3734 100644
--- a/llvm/docs/CommandGuide/llvm-profdata.rst
+++ b/llvm/docs/CommandGuide/llvm-profdata.rst
@@ -204,6 +204,20 @@ OPTIONS
  the raw profile. When ``-profile-correlate=binary`` was used for
  instrumentation, use this option to correlate the raw profile.
 
+.. option:: --debuginfod
+
+ Use debuginfod to find the associated executables that contain profile data and
+ name sections for the raw profiles to correlate them.
+ When -profile-correlate=binary was used for instrumentation, this option can be
+ used for correlation.
+
+.. option:: -debug-file-directory=<dir>
+
+ Use provided local directories to search for executables that contain profile
+ data and name sections for the raw profiles to correlate them.
+ When -profile-correlate=binary was used for instrumentation, this option can be
+ used for correlation.
+
 .. option:: --temporal-profile-trace-reservoir-size
 
  The maximum number of temporal profile traces to be stored in the output
diff --git a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
index c07c67d287e2ce..c83ab4e2383953 100644
--- a/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
+++ b/llvm/include/llvm/ProfileData/InstrProfCorrelator.h
@@ -13,6 +13,8 @@
 #define LLVM_PROFILEDATA_INSTRPROFCORRELATOR_H
 
 #include "llvm/ADT/DenseSet.h"
+#include "llvm/Debuginfod/BuildIDFetcher.h"
+#include "llvm/Object/BuildID.h"
 #include "llvm/ProfileData/InstrProf.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/MemoryBuffer.h"
@@ -36,7 +38,9 @@ class InstrProfCorrelator {
   enum ProfCorrelatorKind { NONE, DEBUG_INFO, BINARY };
 
   static llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
-  get(StringRef Filename, ProfCorrelatorKind FileKind);
+  get(StringRef Filename, ProfCorrelatorKind FileKind,
+      const object::BuildIDFetcher *BIDFetcher = nullptr,
+      const std::optional<ArrayRef<llvm::object::BuildID>> BIs = std::nullopt);
 
   /// Construct a ProfileData vector used to correlate raw instrumentation data
   /// to their functions.
diff --git a/llvm/include/llvm/ProfileData/InstrProfReader.h b/llvm/include/llvm/ProfileData/InstrProfReader.h
index 3b307d08359980..6407672b786f5d 100644
--- a/llvm/include/llvm/ProfileData/InstrProfReader.h
+++ b/llvm/include/llvm/ProfileData/InstrProfReader.h
@@ -200,11 +200,13 @@ class InstrProfReader {
   static Expected<std::unique_ptr<InstrProfReader>>
   create(const Twine &Path, vfs::FileSystem &FS,
          const InstrProfCorrelator *Correlator = nullptr,
+         const object::BuildIDFetcher *BIDFetcher = nullptr,
          std::function<void(Error)> Warn = nullptr);
 
   static Expected<std::unique_ptr<InstrProfReader>>
   create(std::unique_ptr<MemoryBuffer> Buffer,
          const InstrProfCorrelator *Correlator = nullptr,
+         const object::BuildIDFetcher *BIDFetcher = nullptr,
          std::function<void(Error)> Warn = nullptr);
 
   /// \param Weight for raw profiles use this as the temporal profile trace
@@ -314,6 +316,11 @@ class RawInstrProfReader : public InstrProfReader {
   /// If available, this hold the ProfileData array used to correlate raw
   /// instrumentation data to their functions.
   const InstrProfCorrelatorImpl<IntPtrT> *Correlator;
+  /// Correlator that fetches debuginfo from debuginfod on the fly by build id.
+  std::unique_ptr<InstrProfCorrelator> DebugInfodCorrelator;
+  /// Fetcher that fetches debuginfo from debuginfod to correlate profiles with
+  /// binaries.
+  const object::BuildIDFetcher *BIDFetcher;
   /// A list of timestamps paired with a function name reference.
   std::vector<std::pair<uint64_t, uint64_t>> TemporalProfTimestamps;
   bool ShouldSwapBytes;
@@ -351,11 +358,13 @@ class RawInstrProfReader : public InstrProfReader {
 public:
   RawInstrProfReader(std::unique_ptr<MemoryBuffer> DataBuffer,
                      const InstrProfCorrelator *Correlator,
+                     const object::BuildIDFetcher *BIDFetcher,
                      std::function<void(Error)> Warn)
       : DataBuffer(std::move(DataBuffer)),
         Correlator(dyn_cast_or_null<const InstrProfCorrelatorImpl<IntPtrT>>(
             Correlator)),
-        Warn(Warn) {}
+        BIDFetcher(BIDFetcher), Warn(Warn) {}
+
   RawInstrProfReader(const RawInstrProfReader &) = delete;
   RawInstrProfReader &operator=(const RawInstrProfReader &) = delete;
 
@@ -439,7 +448,7 @@ class RawInstrProfReader : public InstrProfReader {
 
   void advanceData() {
     // `CountersDelta` is a constant zero when using debug info correlation.
-    if (!Correlator) {
+    if (!Correlator && !DebugInfodCorrelator) {
       // The initial CountersDelta is the in-memory address difference between
       // the data and counts sections:
       // start(__llvm_prf_cnts) - start(__llvm_prf_data)
diff --git a/llvm/lib/ProfileData/InstrProfCorrelator.cpp b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
index 44e2aeb00d8cc8..9dbef8d653305a 100644
--- a/llvm/lib/ProfileData/InstrProfCorrelator.cpp
+++ b/llvm/lib/ProfileData/InstrProfCorrelator.cpp
@@ -91,7 +91,9 @@ InstrProfCorrelator::Context::get(std::unique_ptr<MemoryBuffer> Buffer,
 }
 
 llvm::Expected<std::unique_ptr<InstrProfCorrelator>>
-InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) {
+InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind,
+                         const object::BuildIDFetcher *BIDFetcher,
+                         const std::optional<ArrayRef<object::BuildID>> BIs) {
   if (FileKind == DEBUG_INFO) {
     auto DsymObjectsOrErr =
         object::MachOObjectFile::findDsymObjectMembers(Filename);
@@ -113,11 +115,36 @@ InstrProfCorrelator::get(StringRef Filename, ProfCorrelatorKind FileKind) {
     return get(std::move(*BufferOrErr), FileKind);
   }
   if (FileKind == BINARY) {
-    auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
-    if (auto Err = BufferOrErr.takeError())
-      return std::move(Err);
+    if (!Filename.empty()) {
+      auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(Filename));
+      if (auto Err = BufferOrErr.takeError())
+        return std::move(Err);
+      return get(std::move(*BufferOrErr), FileKind);
+    } else if (BIDFetcher) {
+      if (BIs->size() > 1)
+        return make_error<InstrProfError>(
+            instrprof_error::unable_to_correlate_profile,
+            "unsupported profile binary correlation when there are multiple "
+            "build IDs in a binary");
 
-    return get(std::move(*BufferOrErr), FileKind);
+      std::optional<std::string> Path = BIDFetcher->fetch(BIs->front());
+      if (Path) {
+        auto BufferOrErr = errorOrToExpected(MemoryBuffer::getFile(*Path));
+        if (auto Err = BufferOrErr.takeError())
+          return std::move(Err);
+        return get(std::move(*BufferOrErr), BINARY);
+      } else {
+        return make_error<InstrProfError>(
+            instrprof_error::unable_to_correlate_profile,
+            "Missing build ID: " +
+                llvm::toHex(BIs->front(), /*LowerCase=*/true));
+      }
+    } else {
+      return make_error<InstrProfError>(
+          instrprof_error::unable_to_correlate_profile,
+          "unsupported profile binary correlation when provided with a file "
+          "name and build id fetcher");
+    }
   }
   return make_error<InstrProfError>(
       instrprof_error::unable_to_correlate_profile,
diff --git a/llvm/lib/ProfileData/InstrProfReader.cpp b/llvm/lib/ProfileData/InstrProfReader.cpp
index 6d078c58ac805d..34d67cc8bf9b2f 100644
--- a/llvm/lib/ProfileData/InstrProfReader.cpp
+++ b/llvm/lib/ProfileData/InstrProfReader.cpp
@@ -153,19 +153,19 @@ static void printBinaryIdsInternal(raw_ostream &OS,
 Expected<std::unique_ptr<InstrProfReader>>
 InstrProfReader::create(const Twine &Path, vfs::FileSystem &FS,
                         const InstrProfCorrelator *Correlator,
+                        const object::BuildIDFetcher *BIDFetcher,
                         std::function<void(Error)> Warn) {
   // Set up the buffer to read.
   auto BufferOrError = setupMemoryBuffer(Path, FS);
   if (Error E = BufferOrError.takeError())
     return std::move(E);
   return InstrProfReader::create(std::move(BufferOrError.get()), Correlator,
-                                 Warn);
+                                 BIDFetcher, Warn);
 }
 
-Expected<std::unique_ptr<InstrProfReader>>
-InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
-                        const InstrProfCorrelator *Correlator,
-                        std::function<void(Error)> Warn) {
+Expected<std::unique_ptr<InstrProfReader>> InstrProfReader::create(
+    std::unique_ptr<MemoryBuffer> Buffer, const InstrProfCorrelator *Correlator,
+    const object::BuildIDFetcher *BIDFetcher, std::function<void(Error)> Warn) {
   if (Buffer->getBufferSize() == 0)
     return make_error<InstrProfError>(instrprof_error::empty_raw_profile);
 
@@ -174,9 +174,11 @@ InstrProfReader::create(std::unique_ptr<MemoryBuffer> Buffer,
   if (IndexedInstrProfReader::hasFormat(*Buffer))
     Result.reset(new IndexedInstrProfReader(std::move(Buffer)));
   else if (RawInstrProfReader64::hasFormat(*Buffer))
-    Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator, Warn));
+    Result.reset(new RawInstrProfReader64(std::move(Buffer), Correlator,
+                                          BIDFetcher, Warn));
   else if (RawInstrProfReader32::hasFormat(*Buffer))
-    Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator, Warn));
+    Result.reset(new RawInstrProfReader32(std::move(Buffer), Correlator,
+                                          BIDFetcher, Warn));
   else if (TextInstrProfReader::hasFormat(*Buffer))
     Result.reset(new TextInstrProfReader(std::move(Buffer)));
   else
@@ -633,6 +635,20 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
   if (Start + ValueDataOffset > DataBuffer->getBufferEnd())
     return error(instrprof_error::bad_header);
 
+  if (BIDFetcher) {
+    std::vector<object::BuildID> BinaryIDs;
+    if (Error E = readBinaryIds(BinaryIDs))
+      return E;
+    if (auto E = InstrProfCorrelator::get(
+                     "", InstrProfCorrelator::ProfCorrelatorKind::BINARY,
+                     BIDFetcher, BinaryIDs)
+                     .moveInto(DebugInfodCorrelator)) {
+      return E;
+    }
+    if (auto Err = DebugInfodCorrelator->correlateProfileData(0))
+      return Err;
+  }
+
   if (Correlator) {
     // These sizes in the raw file are zero because we constructed them in the
     // Correlator.
@@ -643,6 +659,14 @@ Error RawInstrProfReader<IntPtrT>::readHeader(
     DataEnd = Data + Correlator->getDataSize();
     NamesStart = Correlator->getNamesPointer();
     NamesEnd = NamesStart + Correlator->getNamesSize();
+  } else if (DebugInfodCorrelator) {
+    InstrProfCorrelatorImpl<IntPtrT> *DebugInfodCorrelatorImpl =
+        dyn_cast_or_null<InstrProfCorrelatorImpl<IntPtrT>>(
+            DebugInfodCorrelator.get());
+    Data = DebugInfodCorrelatorImpl->getDataPointer();
+    DataEnd = Data + DebugInfodCorrelatorImpl->getDataSize();
+    NamesStart = DebugInfodCorrelatorImpl->getNamesPointer();
+    NamesEnd = NamesStart + DebugInfodCorrelatorImpl->getNamesSize();
   } else {
     Data = reinterpret_cast<const RawInstrProf::ProfileData<IntPtrT> *>(
         Start + DataOffset);
diff --git a/llvm/tools/llvm-profdata/CMakeLists.txt b/llvm/tools/llvm-profdata/CMakeLists.txt
index 25cf143337ad4b..165be9a2ea31bd 100644
--- a/llvm/tools/llvm-profdata/CMakeLists.txt
+++ b/llvm/tools/llvm-profdata/CMakeLists.txt
@@ -12,3 +12,7 @@ add_llvm_tool(llvm-profdata
   intrinsics_gen
   GENERATE_DRIVER
   )
+
+if(NOT LLVM_TOOL_LLVM_DRIVER_BUILD)
+  target_link_libraries(llvm-profdata PRIVATE LLVMDebuginfod)
+endif()
diff --git a/llvm/tools/llvm-profdata/llvm-profdata.cpp b/llvm/tools/llvm-profdata/llvm-profdata.cpp
index 1f6c4c604d57b5..acab2b9249b585 100644
--- a/llvm/tools/llvm-profdata/llvm-profdata.cpp
+++ b/llvm/tools/llvm-profdata/llvm-profdata.cpp
@@ -13,6 +13,7 @@
 #include "llvm/ADT/SmallSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/Debuginfod/HTTPClient.h"
 #include "llvm/IR/LLVMContext.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/ProfileData/InstrProfCorrelator.h"
@@ -130,6 +131,12 @@ cl::opt<std::string>
                    cl::desc("For merge, use the provided unstripped bianry to "
                             "correlate the raw profile."),
                    cl::sub(MergeSubcommand));
+cl::list<std::string> DebugFileDirectory(
+    "debug-file-directory",
+    cl::desc("Directories to search for object files by build ID"));
+cl::opt<bool> DebugInfod("debuginfod", cl::init(false), cl::Hidden,
+                         cl::sub(MergeSubcommand),
+                         cl::desc("Enable debuginfod"));
 cl::opt<std::string> FuncNameFilter(
     "function",
     cl::desc("Only functions matching the filter are shown in the output. For "
@@ -652,7 +659,8 @@ static void overlapInput(const std::string &BaseFilename,
 /// Load an input into a writer context.
 static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
                       const InstrProfCorrelator *Correlator,
-                      const StringRef ProfiledBinary, WriterContext *WC) {
+                      const StringRef ProfiledBinary, WriterContext *WC,
+                      const object::BuildIDFetcher *BIDFetcher = nullptr) {
   std::unique_lock<std::mutex> CtxGuard{WC->Lock};
 
   // Copy the filename, because llvm::ThreadPool copied the input "const
@@ -730,8 +738,8 @@ static void loadInput(const WeightedFile &Input, SymbolRemapper *Remapper,
     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
     ReaderWarning = {make_error<InstrProfError>(ErrCode, Msg), Filename};
   };
-  auto ReaderOrErr =
-      InstrProfReader::create(Input.Filename, *FS, Correlator, Warn);
+  auto ReaderOrErr = InstrProfReader::create(Input.Filename, *FS, Correlator,
+                                             BIDFetcher, Warn);
   if (Error E = ReaderOrErr.takeError()) {
     // Skip the empty profiles by returning silently.
     auto [ErrCode, Msg] = InstrProfError::take(std::move(E));
@@ -914,9 +922,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
     exitWithError("unknown format is specified");
 
   // TODO: Maybe we should support correlation with mixture of different
-  // correlation modes(w/wo debug-info/object correlation).
-  if (!DebugInfoFilename.empty() && !BinaryFilename.empty())
-    exitWithError("Expected only one of -debug-info, -binary-file");
+  // correlaxtion modes(w/wo debug-info/object correlation).
+  if (DebugInfoFilename.empty()) {
+    if (!BinaryFilename.empty() && DebugInfod)
+      exitWithError("Expected only one of -binary-file, -debuginfod");
+  } else if (!BinaryFilename.empty() || DebugInfod) {
+    exitWithError(
+        "Expected only one of -debug-info, -binary-file, -debuginfod");
+  }
   std::string CorrelateFilename;
   ProfCorrelatorKind CorrelateKind = ProfCorrelatorKind::NONE;
   if (!DebugInfoFilename.empty()) {
@@ -936,6 +949,14 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
       exitWithError(std::move(Err), CorrelateFilename);
   }
 
+  std::unique_ptr<object::BuildIDFetcher> BIDFetcher;
+  if (DebugInfod) {
+    llvm::HTTPClient::initialize();
+    BIDFetcher = std::make_unique<DebuginfodFetcher>(DebugFileDirectory);
+  } else if (!DebugFileDirectory.empty()) {
+    BIDFetcher = std::make_unique<object::BuildIDFetcher>(DebugFileDirectory);
+  }
+
   std::mutex ErrorLock;
   SmallSet<instrprof_error, 4> WriterErrorCodes;
 
@@ -954,7 +975,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
   if (NumThreads == 1) {
     for (const auto &Input : Inputs)
       loadInput(Input, Remapper, Correlator.get(), ProfiledBinary,
-                Contexts[0].get());
+                Contexts[0].get(), BIDFetcher.get());
   } else {
     DefaultThreadPool Pool(hardware_concurrency(NumThreads));
 
@@ -962,7 +983,7 @@ static void mergeInstrProfile(const WeightedFileVector &Inputs,
     unsigned Ctx = 0;
     for (const auto &Input : Inputs) {
       Pool.async(loadInput, Input, Remapper, Correlator.get(), ProfiledBinary,
-                 Contexts[Ctx].get());
+                 Contexts[Ctx].get(), BIDFetcher.get());
       Ctx = (Ctx + 1) % NumThreads;
     }
     Pool.wait();



More information about the llvm-commits mailing list