[llvm] 9812948 - [Object] Refactor build ID parsing into Object lib.

Daniel Thornburgh via llvm-commits llvm-commits at lists.llvm.org
Wed Apr 5 11:25:31 PDT 2023


Author: Daniel Thornburgh
Date: 2023-04-05T11:25:26-07:00
New Revision: 9812948d22328e6f810c7654b93b063ce97ecfec

URL: https://github.com/llvm/llvm-project/commit/9812948d22328e6f810c7654b93b063ce97ecfec
DIFF: https://github.com/llvm/llvm-project/commit/9812948d22328e6f810c7654b93b063ce97ecfec.diff

LOG: [Object] Refactor build ID parsing into Object lib.

This makes parsing for build IDs in the markup filter slightly more
permissive, in line with fromHex.

It also removes the distinction between missing build ID and empty build
ID; empty build IDs aren't a useful concept, since their purpose is to
uniquely identify a binary. This removes a layer of indirection wherever
build IDs are obtained.

Reviewed By: jhenderson

Differential Revision: https://reviews.llvm.org/D147485

Added: 
    

Modified: 
    llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
    llvm/include/llvm/Object/BuildID.h
    llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
    llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
    llvm/lib/Debuginfod/Debuginfod.cpp
    llvm/lib/Object/BuildID.cpp
    llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
    llvm/lib/ProfileData/RawMemProfReader.cpp
    llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
    llvm/tools/llvm-objdump/llvm-objdump.cpp
    llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
index 534255640075a..429f9cba8a583 100644
--- a/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
+++ b/llvm/include/llvm/DebugInfo/Symbolize/MarkupFilter.h
@@ -15,13 +15,12 @@
 #ifndef LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
 #define LLVM_DEBUGINFO_SYMBOLIZE_MARKUPFILTER_H
 
-#include "Markup.h"
-
-#include <map>
-
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/DebugInfo/Symbolize/Markup.h"
+#include "llvm/Object/BuildID.h"
 #include "llvm/Support/WithColor.h"
 #include "llvm/Support/raw_ostream.h"
+#include <map>
 
 namespace llvm {
 namespace symbolize {
@@ -116,7 +115,7 @@ class MarkupFilter {
   std::optional<uint64_t> parseAddr(StringRef Str) const;
   std::optional<uint64_t> parseModuleID(StringRef Str) const;
   std::optional<uint64_t> parseSize(StringRef Str) const;
-  std::optional<SmallVector<uint8_t>> parseBuildID(StringRef Str) const;
+  object::BuildID parseBuildID(StringRef Str) const;
   std::optional<std::string> parseMode(StringRef Str) const;
   std::optional<PCType> parsePCType(StringRef Str) const;
   std::optional<uint64_t> parseFrameNumber(StringRef Str) const;

diff  --git a/llvm/include/llvm/Object/BuildID.h b/llvm/include/llvm/Object/BuildID.h
index 91c247be2cf64..b20f32b4d133e 100644
--- a/llvm/include/llvm/Object/BuildID.h
+++ b/llvm/include/llvm/Object/BuildID.h
@@ -29,8 +29,11 @@ typedef ArrayRef<uint8_t> BuildIDRef;
 
 class ObjectFile;
 
+/// Parses a build ID from a hex string.
+BuildID parseBuildID(StringRef Str);
+
 /// Returns the build ID, if any, contained in the given object file.
-std::optional<BuildIDRef> getBuildID(const ObjectFile *Obj);
+BuildIDRef getBuildID(const ObjectFile *Obj);
 
 /// BuildIDFetcher searches local cache directories for debug info.
 class BuildIDFetcher {

diff  --git a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
index 5e9d8ac538df2..48833a79fd456 100644
--- a/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/MarkupFilter.cpp
@@ -513,8 +513,9 @@ MarkupFilter::parseModule(const MarkupNode &Element) const {
   }
   if (!checkNumFields(Element, 4))
     return std::nullopt;
-  ASSIGN_OR_RETURN_NONE(SmallVector<uint8_t>, BuildID,
-                        parseBuildID(Element.Fields[3]));
+  SmallVector<uint8_t> BuildID = parseBuildID(Element.Fields[3]);
+  if (BuildID.empty())
+    return std::nullopt;
   return Module{ID, Name.str(), std::move(BuildID)};
 }
 
@@ -597,16 +598,11 @@ std::optional<uint64_t> MarkupFilter::parseFrameNumber(StringRef Str) const {
 }
 
 // Parse a build ID (%x in the spec).
-std::optional<SmallVector<uint8_t>>
-MarkupFilter::parseBuildID(StringRef Str) const {
-  std::string Bytes;
-  if (Str.empty() || Str.size() % 2 || !tryGetFromHex(Str, Bytes)) {
+object::BuildID MarkupFilter::parseBuildID(StringRef Str) const {
+  object::BuildID BID = llvm::object::parseBuildID(Str);
+  if (BID.empty())
     reportTypeError(Str, "build ID");
-    return std::nullopt;
-  }
-  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
-                            Bytes.size());
-  return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+  return BID;
 }
 
 // Parses the mode string for an mmap element.

diff  --git a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
index a92dfbcc5c64f..499ceae88d563 100644
--- a/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
+++ b/llvm/lib/DebugInfo/Symbolize/Symbolize.cpp
@@ -363,12 +363,10 @@ ObjectFile *LLVMSymbolizer::lookUpBuildIDObject(const std::string &Path,
                                                 const ELFObjectFileBase *Obj,
                                                 const std::string &ArchName) {
   auto BuildID = getBuildID(Obj);
-  if (!BuildID)
-    return nullptr;
-  if (BuildID->size() < 2)
+  if (BuildID.size() < 2)
     return nullptr;
   std::string DebugBinaryPath;
-  if (!getOrFindDebugBinary(*BuildID, DebugBinaryPath))
+  if (!getOrFindDebugBinary(BuildID, DebugBinaryPath))
     return nullptr;
   auto DbgObjOrErr = getOrCreateObject(DebugBinaryPath, ArchName);
   if (!DbgObjOrErr) {

diff  --git a/llvm/lib/Debuginfod/Debuginfod.cpp b/llvm/lib/Debuginfod/Debuginfod.cpp
index af84fe4176b75..394f2b29aee65 100644
--- a/llvm/lib/Debuginfod/Debuginfod.cpp
+++ b/llvm/lib/Debuginfod/Debuginfod.cpp
@@ -412,11 +412,11 @@ Error DebuginfodCollection::findBinaries(StringRef Path) {
         if (!Object)
           continue;
 
-        std::optional<BuildIDRef> ID = getBuildID(Object);
-        if (!ID)
+        BuildIDRef ID = getBuildID(Object);
+        if (ID.empty())
           continue;
 
-        std::string IDString = buildIDToString(*ID);
+        std::string IDString = buildIDToString(ID);
         if (Object->hasDebugInfo()) {
           std::lock_guard<sys::RWMutex> DebugBinariesGuard(DebugBinariesMutex);
           (void)DebugBinaries.try_emplace(IDString, std::move(FilePath));

diff  --git a/llvm/lib/Object/BuildID.cpp b/llvm/lib/Object/BuildID.cpp
index 795c22e769aa5..887798a26b90f 100644
--- a/llvm/lib/Object/BuildID.cpp
+++ b/llvm/lib/Object/BuildID.cpp
@@ -18,13 +18,12 @@
 #include "llvm/Support/FileSystem.h"
 #include "llvm/Support/Path.h"
 
-namespace llvm {
-namespace object {
+using namespace llvm;
+using namespace llvm::object;
 
 namespace {
 
-template <typename ELFT>
-std::optional<BuildIDRef> getBuildID(const ELFFile<ELFT> &Obj) {
+template <typename ELFT> BuildIDRef getBuildID(const ELFFile<ELFT> &Obj) {
   auto PhdrsOrErr = Obj.program_headers();
   if (!PhdrsOrErr) {
     consumeError(PhdrsOrErr.takeError());
@@ -45,15 +44,24 @@ std::optional<BuildIDRef> getBuildID(const ELFFile<ELFT> &Obj) {
 
 } // namespace
 
-std::optional<BuildIDRef> getBuildID(const ObjectFile *Obj) {
+BuildID llvm::object::parseBuildID(StringRef Str) {
+  std::string Bytes;
+  if (!tryGetFromHex(Str, Bytes))
+    return {};
+  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
+                            Bytes.size());
+  return SmallVector<uint8_t>(BuildID.begin(), BuildID.end());
+}
+
+BuildIDRef llvm::object::getBuildID(const ObjectFile *Obj) {
   if (auto *O = dyn_cast<ELFObjectFile<ELF32LE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   if (auto *O = dyn_cast<ELFObjectFile<ELF32BE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   if (auto *O = dyn_cast<ELFObjectFile<ELF64LE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   if (auto *O = dyn_cast<ELFObjectFile<ELF64BE>>(Obj))
-    return getBuildID(O->getELFFile());
+    return ::getBuildID(O->getELFFile());
   return std::nullopt;
 }
 
@@ -88,6 +96,3 @@ std::optional<std::string> BuildIDFetcher::fetch(BuildIDRef BuildID) const {
   }
   return std::nullopt;
 }
-
-} // namespace object
-} // namespace llvm

diff  --git a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
index c83fb046c13b9..05737323314a8 100644
--- a/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
+++ b/llvm/lib/ProfileData/Coverage/CoverageMappingReader.cpp
@@ -955,7 +955,7 @@ static Expected<std::vector<SectionRef>> lookupSections(ObjectFile &OF,
 static Expected<std::unique_ptr<BinaryCoverageReader>>
 loadBinaryFormat(std::unique_ptr<Binary> Bin, StringRef Arch,
                  StringRef CompilationDir = "",
-                 std::optional<object::BuildIDRef> *BinaryID = nullptr) {
+                 object::BuildIDRef *BinaryID = nullptr) {
   std::unique_ptr<ObjectFile> OF;
   if (auto *Universal = dyn_cast<MachOUniversalBinary>(Bin.get())) {
     // If we have a universal binary, try to look up the object for the
@@ -1151,14 +1151,14 @@ BinaryCoverageReader::create(
     return std::move(Readers);
   }
 
-  std::optional<object::BuildIDRef> BinaryID;
+  object::BuildIDRef BinaryID;
   auto ReaderOrErr = loadBinaryFormat(std::move(Bin), Arch, CompilationDir,
                                       BinaryIDs ? &BinaryID : nullptr);
   if (!ReaderOrErr)
     return ReaderOrErr.takeError();
   Readers.push_back(std::move(ReaderOrErr.get()));
-  if (BinaryID)
-    BinaryIDs->push_back(*BinaryID);
+  if (!BinaryID.empty())
+    BinaryIDs->push_back(BinaryID);
   return std::move(Readers);
 }
 

diff  --git a/llvm/lib/ProfileData/RawMemProfReader.cpp b/llvm/lib/ProfileData/RawMemProfReader.cpp
index 0c14ab4e2031d..1c7323ea37630 100644
--- a/llvm/lib/ProfileData/RawMemProfReader.cpp
+++ b/llvm/lib/ProfileData/RawMemProfReader.cpp
@@ -337,12 +337,11 @@ Error RawMemProfReader::initialize(std::unique_ptr<MemoryBuffer> DataBuffer) {
 
 Error RawMemProfReader::setupForSymbolization() {
   auto *Object = cast<object::ObjectFile>(Binary.getBinary());
-  auto BuildIdOr = object::getBuildID(Object);
-  if (!BuildIdOr.has_value())
+  object::BuildIDRef BinaryId = object::getBuildID(Object);
+  if (BinaryId.empty())
     return make_error<StringError>(Twine("No build id found in binary ") +
                                        Binary.getBinary()->getFileName(),
                                    inconvertibleErrorCode());
-  llvm::ArrayRef<uint8_t> BinaryId = BuildIdOr.value();
 
   int NumMatched = 0;
   for (const auto &Entry : SegmentInfo) {

diff  --git a/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test b/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
index 13e1d7f786c48..c9aff841d3517 100644
--- a/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
+++ b/llvm/test/DebugInfo/symbolize-filter-markup-parse-fields.test
@@ -2,25 +2,25 @@ RUN: split-file %s %t
 RUN: llvm-symbolizer --filter-markup < %t/log 2> %t.err
 RUN: FileCheck %s -input-file=%t.err --match-full-lines
 
-CHECK-NOT: '0x4f'
-CHECK-NOT: '00'
-CHECK: error: expected address; found ''
-CHECK: error: expected address; found '42'
-CHECK: error: expected address; found '0xgg'
+CHECK-NOT: error: expected address; found '0x4f'
+CHECK-NOT: error: expected address; found '00'
+CHECK:     error: expected address; found ''
+CHECK:     error: expected address; found '42'
+CHECK:     error: expected address; found '0xgg'
 
-CHECK-NOT: '0'
-CHECK: error: expected module ID; found ''
-CHECK: error: expected module ID; found '-1'
-CHECK-NOT: '077'
-CHECK: error: expected module ID; found '079'
-CHECK-NOT: '0xff'
-CHECK: error: expected module ID; found '0xfg'
-CHECK: error: expected module ID; found '0x'
+CHECK-NOT: error: expected module ID; found '0'
+CHECK:     error: expected module ID; found ''
+CHECK:     error: expected module ID; found '-1'
+CHECK-NOT: error: expected module ID; found '077'
+CHECK:     error: expected module ID; found '079'
+CHECK-NOT: error: expected module ID; found '0xff'
+CHECK:     error: expected module ID; found '0xfg'
+CHECK:     error: expected module ID; found '0x'
 
-CHECK: error: expected build ID; found ''
-CHECK: error: expected build ID; found '0'
-CHECK-NOT: '0xff'
-CHECK: error: expected build ID; found 'fg'
+CHECK:     error: expected build ID; found ''
+CHECK-NOT: error: expected build ID; found '0'
+CHECK-NOT: error: expected build ID; found '0xff'
+CHECK:     error: expected build ID; found 'fg'
 
 ;--- log
 {{{mmap:0x4f:1:unknown}}}

diff  --git a/llvm/tools/llvm-objdump/llvm-objdump.cpp b/llvm/tools/llvm-objdump/llvm-objdump.cpp
index e3d14192c0625..5abcdcc788441 100644
--- a/llvm/tools/llvm-objdump/llvm-objdump.cpp
+++ b/llvm/tools/llvm-objdump/llvm-objdump.cpp
@@ -1285,10 +1285,10 @@ static void createFakeELFSections(ObjectFile &Obj) {
 // Build ID. Returns std::nullopt if nothing was found.
 static std::optional<OwningBinary<Binary>>
 fetchBinaryByBuildID(const ObjectFile &Obj) {
-  std::optional<object::BuildIDRef> BuildID = getBuildID(&Obj);
-  if (!BuildID)
+  object::BuildIDRef BuildID = getBuildID(&Obj);
+  if (BuildID.empty())
     return std::nullopt;
-  std::optional<std::string> Path = BIDFetcher->fetch(*BuildID);
+  std::optional<std::string> Path = BIDFetcher->fetch(BuildID);
   if (!Path)
     return std::nullopt;
   Expected<OwningBinary<Binary>> DebugBinary = createBinary(*Path);
@@ -2943,13 +2943,11 @@ static void parseIntArg(const llvm::opt::InputArgList &InputArgs, int ID,
 
 static object::BuildID parseBuildIDArg(const opt::Arg *A) {
   StringRef V(A->getValue());
-  std::string Bytes;
-  if (!tryGetFromHex(V, Bytes))
+  object::BuildID BID = parseBuildID(V);
+  if (BID.empty())
     reportCmdLineError(A->getSpelling() + ": expected a build ID, but got '" +
                        V + "'");
-  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
-                            Bytes.size());
-  return object::BuildID(BuildID.begin(), BuildID.end());
+  return BID;
 }
 
 void objdump::invalidArgValue(const opt::Arg *A) {

diff  --git a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
index dbec2a981b91b..419f998f0a8c7 100644
--- a/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
+++ b/llvm/tools/llvm-symbolizer/llvm-symbolizer.cpp
@@ -125,15 +125,6 @@ static void enableDebuginfod(LLVMSymbolizer &Symbolizer,
   HTTPClient::initialize();
 }
 
-static object::BuildID parseBuildID(StringRef Str) {
-  std::string Bytes;
-  if (!tryGetFromHex(Str, Bytes))
-    return {};
-  ArrayRef<uint8_t> BuildID(reinterpret_cast<const uint8_t *>(Bytes.data()),
-                            Bytes.size());
-  return object::BuildID(BuildID.begin(), BuildID.end());
-}
-
 static bool parseCommand(StringRef BinaryName, bool IsAddr2Line,
                          StringRef InputString, Command &Cmd,
                          std::string &ModuleName, object::BuildID &BuildID,


        


More information about the llvm-commits mailing list