[llvm] aeda128 - Add lookup functions for efficient lookups of addresses when using GsymReader classes.

Greg Clayton via llvm-commits llvm-commits at lists.llvm.org
Thu Dec 5 16:50:10 PST 2019


Author: Greg Clayton
Date: 2019-12-05T16:49:53-08:00
New Revision: aeda128a96c4ac9eecef7563f4cf07dfcd2af0db

URL: https://github.com/llvm/llvm-project/commit/aeda128a96c4ac9eecef7563f4cf07dfcd2af0db
DIFF: https://github.com/llvm/llvm-project/commit/aeda128a96c4ac9eecef7563f4cf07dfcd2af0db.diff

LOG: Add lookup functions for efficient lookups of addresses when using GsymReader classes.

Summary:
Lookup functions are designed to not fully decode a FunctionInfo, LineTable or InlineInfo, they decode only what is needed into a LookupResult object. This allows lookups to avoid costly memory allocations and avoid parsing large amounts of information one a suitable match is found.

LookupResult objects contain the address that was looked up, the concrete function address range, the name of the concrete function, and a list of source locations. One for each inline function, and one for the concrete function. This allows one address to turn into multiple frames and improves the signal you get when symbolicating addresses in GSYM files.

Reviewers: labath, aprantl

Subscribers: mgorny, hiraditya, llvm-commits, lldb-commits

Tags: #llvm

Differential Revision: https://reviews.llvm.org/D70993

Added: 
    llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
    llvm/lib/DebugInfo/GSYM/LookupResult.cpp

Modified: 
    llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
    llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
    llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
    llvm/include/llvm/DebugInfo/GSYM/LineTable.h
    llvm/include/llvm/DebugInfo/GSYM/Range.h
    llvm/lib/DebugInfo/GSYM/CMakeLists.txt
    llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
    llvm/lib/DebugInfo/GSYM/GsymReader.cpp
    llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
    llvm/lib/DebugInfo/GSYM/LineTable.cpp
    llvm/lib/DebugInfo/GSYM/Range.cpp
    llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp

Removed: 
    


################################################################################
diff  --git a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
index 63e18bb2ecd5..1f686666c05f 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/FunctionInfo.h
@@ -12,6 +12,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/DebugInfo/GSYM/LineTable.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
 #include "llvm/DebugInfo/GSYM/StringTable.h"
 #include <tuple>
@@ -21,6 +22,7 @@ namespace llvm {
 class raw_ostream;
 namespace gsym {
 
+class GsymReader;
 /// Function information in GSYM files encodes information for one contiguous
 /// address range. If a function has discontiguous address ranges, they will
 /// need to be encoded using multiple FunctionInfo objects.
@@ -140,6 +142,33 @@ struct FunctionInfo {
   /// function info that was successfully written into the stream.
   llvm::Expected<uint64_t> encode(FileWriter &O) const;
 
+
+  /// Lookup an address within a FunctionInfo object's data stream.
+  ///
+  /// Instead of decoding an entire FunctionInfo object when doing lookups,
+  /// we can decode only the information we need from the FunctionInfo's data
+  /// for the specific address. The lookup result information is returned as
+  /// a LookupResult.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param GR The GSYM reader that contains the string and file table that
+  /// will be used to fill in information in the returned result.
+  ///
+  /// \param FuncAddr The function start address decoded from the GsymReader.
+  ///
+  /// \param Addr The address to lookup.
+  ///
+  /// \returns An LookupResult or an error describing the issue that was
+  /// encountered during decoding. An error should only be returned if the
+  /// address is not contained in the FunctionInfo or if the data is corrupted.
+  static llvm::Expected<LookupResult> lookup(DataExtractor &Data,
+                                             const GsymReader &GR,
+                                             uint64_t FuncAddr,
+                                             uint64_t Addr);
+
   uint64_t startAddress() const { return Range.Start; }
   uint64_t endAddress() const { return Range.End; }
   uint64_t size() const { return Range.size(); }

diff  --git a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
index 113bcee9c9a3..5ba13f846798 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/GsymReader.h
@@ -1,9 +1,8 @@
 //===- GsymReader.h ---------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -94,28 +93,45 @@ class GsymReader {
 
   /// Get the full function info for an address.
   ///
+  /// This should be called when a client will store a copy of the complete
+  /// FunctionInfo for a given address. For one off lookups, use the lookup()
+  /// function below.
+  ///
+  /// Symbolication server processes might want to parse the entire function
+  /// info for a given address and cache it if the process stays around to
+  /// service many symbolication addresses, like for parsing profiling
+  /// information.
+  ///
   /// \param Addr A virtual address from the orignal object file to lookup.
+  ///
   /// \returns An expected FunctionInfo that contains the function info object
   /// or an error object that indicates reason for failing to lookup the
-  /// address,
+  /// address.
   llvm::Expected<FunctionInfo> getFunctionInfo(uint64_t Addr) const;
 
+  /// Lookup an address in the a GSYM.
+  ///
+  /// Lookup just the information needed for a specific address \a Addr. This
+  /// function is faster that calling getFunctionInfo() as it will only return
+  /// information that pertains to \a Addr and allows the parsing to skip any
+  /// extra information encoded for other addresses. For example the line table
+  /// parsing can stop when a matching LineEntry has been fouhnd, and the
+  /// InlineInfo can stop parsing early once a match has been found and also
+  /// skip information that doesn't match. This avoids memory allocations and
+  /// is much faster for lookups.
+  ///
+  /// \param Addr A virtual address from the orignal object file to lookup.
+  /// \returns An expected LookupResult that contains only the information
+  /// needed for the current address, or an error object that indicates reason
+  /// for failing to lookup the address.
+  llvm::Expected<LookupResult> lookup(uint64_t Addr) const;
+
   /// Get a string from the string table.
   ///
   /// \param Offset The string table offset for the string to retrieve.
   /// \returns The string from the strin table.
   StringRef getString(uint32_t Offset) const { return StrTab[Offset]; }
 
-protected:
-  /// Gets an address from the address table.
-  ///
-  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
-  ///
-  /// \param Index A index into the address table.
-  /// \returns A resolved virtual address for adddress in the address table
-  /// or llvm::None if Index is out of bounds.
-  Optional<uint64_t> getAddress(size_t Index) const;
-
   /// Get the a file entry for the suppplied file index.
   ///
   /// Used to convert any file indexes in the FunctionInfo data back into
@@ -131,6 +147,16 @@ class GsymReader {
     return llvm::None;
   }
 
+protected:
+  /// Gets an address from the address table.
+  ///
+  /// Addresses are stored as offsets frrom the gsym::Header::BaseAddress.
+  ///
+  /// \param Index A index into the address table.
+  /// \returns A resolved virtual address for adddress in the address table
+  /// or llvm::None if Index is out of bounds.
+  Optional<uint64_t> getAddress(size_t Index) const;
+
   /// Get an appropriate address info offsets array.
   ///
   /// The address table in the GSYM file is stored as array of 1, 2, 4 or 8

diff  --git a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
index 48fd9a7c1308..3b95e3e050bd 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/InlineInfo.h
@@ -10,6 +10,8 @@
 #define LLVM_DEBUGINFO_GSYM_INLINEINFO_H
 
 #include "llvm/ADT/Optional.h"
+#include "llvm/DebugInfo/GSYM/LineEntry.h"
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
 #include "llvm/Support/Error.h"
 #include <stdint.h>
@@ -21,6 +23,7 @@ class raw_ostream;
 
 namespace gsym {
 
+class GsymReader;
 /// Inline information stores the name of the inline function along with
 /// an array of address ranges. It also stores the call file and call line
 /// that called this inline function. This allows us to unwind inline call
@@ -74,6 +77,52 @@ struct InlineInfo {
 
   using InlineArray = std::vector<const InlineInfo *>;
 
+  /// Lookup a single address within the inline info data.
+  ///
+  /// Clients have the option to decode an entire InlineInfo object (using
+  /// InlineInfo::decode() ) or just find the matching inline info using this
+  /// function. The benefit of using this function is that only the information
+  /// needed for the lookup will be extracted, other info can be skipped and
+  /// parsing can stop as soon as the deepest match is found. This allows
+  /// symbolication tools to be fast and efficient and avoid allocation costs
+  /// when doing lookups.
+  ///
+  /// This function will augment the SourceLocations array \a SrcLocs with any
+  /// inline information that pertains to \a Addr. If no inline information
+  /// exists for \a Addr, then \a SrcLocs will be left untouched. If there is
+  /// inline information for \a Addr, then \a SrcLocs will be modifiied to
+  /// contain the deepest most inline function's SourceLocation at index zero
+  /// in the array and proceed up the the concrete function source file and
+  /// line at the end of the array.
+  ///
+  /// \param GR The GSYM reader that contains the string and file table that
+  /// will be used to fill in the source locations.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the LineTable object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param BaseAddr The base address to use when decoding the line table.
+  /// This will be the FunctionInfo's start address and will be used to
+  /// decode the correct addresses for the inline information.
+  ///
+  /// \param Addr The address to lookup.
+  ///
+  /// \param SrcLocs The inline source locations that matches \a Addr. This
+  ///                array must be initialized with the matching line entry
+  ///                from the line table upon entry. The name of the concrete
+  ///                function must be supplied since it will get pushed to
+  ///                the last SourceLocation entry and the inline information
+  ///                will fill in the source file and line from the inline
+  ///                information.
+  ///
+  /// \returns An error if the inline information is corrupt, or
+  ///          Error::success() for all other cases, even when no information
+  ///          is added to \a SrcLocs.
+  static llvm::Error lookup(const GsymReader &GR, DataExtractor &Data,
+                            uint64_t BaseAddr, uint64_t Addr,
+                            SourceLocations &SrcLocs);
+
   /// Lookup an address in the InlineInfo object
   ///
   /// This function is used to symbolicate an inline call stack and can

diff  --git a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
index 3cdbccb08ced..22668e39d94c 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/LineTable.h
@@ -119,8 +119,25 @@ class LineTable {
   typedef std::vector<gsym::LineEntry> Collection;
   Collection Lines; ///< All line entries in the line table.
 public:
-  static LineEntry lookup(DataExtractor &Data, uint64_t BaseAddr,
-                          uint64_t Addr);
+  /// Lookup a single address within a line table's data.
+  ///
+  /// Clients have the option to decode an entire line table using
+  /// LineTable::decode() or just find a single matching entry using this
+  /// function. The benefit of using this function is that parsed LineEntry
+  /// objects that do not match will not be stored in an array. This will avoid
+  /// memory allocation costs and parsing can stop once a match has been found.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the LineTable object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param BaseAddr The base address to use when decoding the line table.
+  /// This will be the FunctionInfo's start address and will be used to
+  /// initialize the line table row prior to parsing any opcodes.
+  ///
+  /// \returns An LineEntry object if a match is found, error otherwise.
+  static Expected<LineEntry> lookup(DataExtractor &Data, uint64_t BaseAddr,
+                                    uint64_t Addr);
 
   /// Decode an LineTable object from a binary data stream.
   ///

diff  --git a/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
new file mode 100644
index 000000000000..746fd36208e1
--- /dev/null
+++ b/llvm/include/llvm/DebugInfo/GSYM/LookupResult.h
@@ -0,0 +1,61 @@
+//===- LookupResult.h -------------------------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+#define LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H
+
+#include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/ADT/StringRef.h"
+#include <inttypes.h>
+#include <vector>
+
+namespace llvm {
+class raw_ostream;
+namespace gsym {
+struct FileEntry;
+
+struct SourceLocation {
+  StringRef Name; ///< Function or symbol name.
+  StringRef Dir; ///< Line entry source file directory path.
+  StringRef Base; ///< Line entry source file basename.
+  uint32_t Line = 0; ///< Source file line number.
+};
+
+inline bool operator==(const SourceLocation &LHS, const SourceLocation &RHS) {
+  return LHS.Name == RHS.Name && LHS.Dir == RHS.Dir &&
+         LHS.Base == RHS.Base && LHS.Line == RHS.Line;
+}
+
+raw_ostream &operator<<(raw_ostream &OS, const SourceLocation &R);
+
+using SourceLocations = std::vector<SourceLocation>;
+
+
+struct LookupResult {
+  uint64_t LookupAddr = 0; ///< The address that this lookup pertains to.
+  AddressRange FuncRange; ///< The concrete function address range.
+  StringRef FuncName; ///< The concrete function name that contains LookupAddr.
+  /// The source locations that match this address. This information will only
+  /// be filled in if the FunctionInfo contains a line table. If an address is
+  /// for a concrete function with no inlined functions, this array will have
+  /// one entry. If an address points to an inline function, there will be one
+  /// SourceLocation for each inlined function with the last entry pointing to
+  /// the concrete function itself. This allows one address to generate
+  /// multiple locations and allows unwinding of inline call stacks. The
+  /// deepest inline function will appear at index zero in the source locations
+  /// array, and the concrete function will appear at the end of the array.
+  SourceLocations Locations;
+  std::string getSourceFile(uint32_t Index) const;
+};
+
+raw_ostream &operator<<(raw_ostream &OS, const LookupResult &R);
+
+} // namespace gsym
+} // namespace llvm
+
+#endif // #ifndef LLVM_DEBUGINFO_GSYM_LOOKUPRESULT_H

diff  --git a/llvm/include/llvm/DebugInfo/GSYM/Range.h b/llvm/include/llvm/DebugInfo/GSYM/Range.h
index 37cfec713f26..49e316eae3cf 100644
--- a/llvm/include/llvm/DebugInfo/GSYM/Range.h
+++ b/llvm/include/llvm/DebugInfo/GSYM/Range.h
@@ -61,6 +61,14 @@ struct AddressRange {
   void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
   void encode(FileWriter &O, uint64_t BaseAddr) const;
   /// @}
+
+  /// Skip an address range object in the specified data a the specified
+  /// offset.
+  ///
+  /// \param Data The binary stream to read the data from.
+  ///
+  /// \param Offset The byte offset within \a Data.
+  static void skip(DataExtractor &Data, uint64_t &Offset);
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const AddressRange &R);
@@ -100,6 +108,16 @@ class AddressRanges {
   void decode(DataExtractor &Data, uint64_t BaseAddr, uint64_t &Offset);
   void encode(FileWriter &O, uint64_t BaseAddr) const;
   /// @}
+
+  /// Skip an address range object in the specified data a the specified
+  /// offset.
+  ///
+  /// \param Data The binary stream to read the data from.
+  ///
+  /// \param Offset The byte offset within \a Data.
+  ///
+  /// \returns The number of address ranges that were skipped.
+  static uint64_t skip(DataExtractor &Data, uint64_t &Offset);
 };
 
 raw_ostream &operator<<(raw_ostream &OS, const AddressRanges &AR);

diff  --git a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
index 632ccff5d790..d0fb2caa8813 100644
--- a/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
+++ b/llvm/lib/DebugInfo/GSYM/CMakeLists.txt
@@ -6,6 +6,7 @@ add_llvm_component_library(LLVMDebugInfoGSYM
   GsymReader.cpp
   InlineInfo.cpp
   LineTable.cpp
+  LookupResult.cpp
   Range.cpp
 
   ADDITIONAL_HEADER_DIRS

diff  --git a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
index e234a13fe06b..26aab06108b6 100644
--- a/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/FunctionInfo.cpp
@@ -8,6 +8,7 @@
 
 #include "llvm/DebugInfo/GSYM/FunctionInfo.h"
 #include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
 #include "llvm/DebugInfo/GSYM/LineTable.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/Support/DataExtractor.h"
@@ -145,3 +146,104 @@ llvm::Expected<uint64_t> FunctionInfo::encode(FileWriter &O) const {
   O.writeU32(0);
   return FuncInfoOffset;
 }
+
+
+llvm::Expected<LookupResult> FunctionInfo::lookup(DataExtractor &Data,
+                                                  const GsymReader &GR,
+                                                  uint64_t FuncAddr,
+                                                  uint64_t Addr) {
+  LookupResult LR;
+  LR.LookupAddr = Addr;
+  LR.FuncRange.Start = FuncAddr;
+  uint64_t Offset = 0;
+  LR.FuncRange.End = FuncAddr + Data.getU32(&Offset);
+  uint32_t NameOffset = Data.getU32(&Offset);
+  // The "lookup" functions doesn't report errors as accurately as the "decode"
+  // function as it is meant to be fast. For more accurage errors we could call
+  // "decode".
+  if (!Data.isValidOffset(Offset))
+    return createStringError(std::errc::io_error,
+                              "FunctionInfo data is truncated");
+  // This function will be called with the result of a binary search of the
+  // address table, we must still make sure the address does not fall into a
+  // gap between functions are after the last function.
+  if (Addr >= LR.FuncRange.End)
+    return createStringError(std::errc::io_error,
+        "address 0x%" PRIx64 " is not in GSYM", Addr);
+
+  if (NameOffset == 0)
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": invalid FunctionInfo Name value 0x00000000",
+        Offset - 4);
+  LR.FuncName = GR.getString(NameOffset);
+  bool Done = false;
+  Optional<LineEntry> LineEntry;
+  Optional<DataExtractor> InlineInfoData;
+  while (!Done) {
+    if (!Data.isValidOffsetForDataOfSize(Offset, 8))
+      return createStringError(std::errc::io_error,
+                               "FunctionInfo data is truncated");
+    const uint32_t InfoType = Data.getU32(&Offset);
+    const uint32_t InfoLength = Data.getU32(&Offset);
+    const StringRef InfoBytes = Data.getData().substr(Offset, InfoLength);
+    if (InfoLength != InfoBytes.size())
+      return createStringError(std::errc::io_error,
+                               "FunctionInfo data is truncated");
+    DataExtractor InfoData(InfoBytes, Data.isLittleEndian(),
+                           Data.getAddressSize());
+    switch (InfoType) {
+      case InfoType::EndOfList:
+        Done = true;
+        break;
+
+      case InfoType::LineTableInfo:
+        if (auto ExpectedLE = LineTable::lookup(InfoData, FuncAddr, Addr))
+          LineEntry = ExpectedLE.get();
+        else
+          return ExpectedLE.takeError();
+        break;
+
+      case InfoType::InlineInfo:
+        // We will parse the inline info after our line table, but only if
+        // we have a line entry.
+        InlineInfoData = InfoData;
+        break;
+
+      default:
+        break;
+    }
+    Offset += InfoLength;
+  }
+
+  if (!LineEntry) {
+    // We don't have a valid line entry for our address, fill in our source
+    // location as best we can and return.
+    SourceLocation SrcLoc;
+    SrcLoc.Name = LR.FuncName;
+    LR.Locations.push_back(SrcLoc);
+    return LR;
+  }
+
+  Optional<FileEntry> LineEntryFile = GR.getFile(LineEntry->File);
+  if (!LineEntryFile)
+    return createStringError(std::errc::invalid_argument,
+                              "failed to extract file[%" PRIu32 "]",
+                              LineEntry->File);
+
+  SourceLocation SrcLoc;
+  SrcLoc.Name = LR.FuncName;
+  SrcLoc.Dir = GR.getString(LineEntryFile->Dir);
+  SrcLoc.Base = GR.getString(LineEntryFile->Base);
+  SrcLoc.Line = LineEntry->Line;
+  LR.Locations.push_back(SrcLoc);
+  // If we don't have inline information, we are done.
+  if (!InlineInfoData)
+    return LR;
+  // We have inline information. Try to augment the lookup result with this
+  // data.
+  llvm::Error Err = InlineInfo::lookup(GR, *InlineInfoData, FuncAddr, Addr,
+                                       LR.Locations);
+  if (Err)
+    return std::move(Err);
+  return LR;
+}

diff  --git a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
index 1b448cf80b70..b4f3f2052ae7 100644
--- a/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
+++ b/llvm/lib/DebugInfo/GSYM/GsymReader.cpp
@@ -1,9 +1,8 @@
 //===- GsymReader.cpp -----------------------------------------------------===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
@@ -263,3 +262,18 @@ llvm::Expected<FunctionInfo> GsymReader::getFunctionInfo(uint64_t Addr) const {
                            "failed to extract address[%" PRIu64 "]",
                            *AddressIndex);
 }
+
+llvm::Expected<LookupResult> GsymReader::lookup(uint64_t Addr) const {
+  Expected<uint64_t> AddressIndex = getAddressIndex(Addr);
+  if (!AddressIndex)
+    return AddressIndex.takeError();
+  // Address info offsets size should have been checked in parse().
+  assert(*AddressIndex < AddrInfoOffsets.size());
+  auto AddrInfoOffset = AddrInfoOffsets[*AddressIndex];
+  DataExtractor Data(MemBuffer->getBuffer().substr(AddrInfoOffset), Endian, 4);
+  if (Optional<uint64_t> OptAddr = getAddress(*AddressIndex))
+    return FunctionInfo::lookup(Data, *this, *OptAddr, Addr);
+  return createStringError(std::errc::invalid_argument,
+                           "failed to extract address[%" PRIu64 "]",
+                           *AddressIndex);
+}

diff  --git a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
index 32ed2c709575..1b8c974fdcd2 100644
--- a/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
+++ b/llvm/lib/DebugInfo/GSYM/InlineInfo.cpp
@@ -1,14 +1,14 @@
 //===- InlineInfo.cpp -------------------------------------------*- C++ -*-===//
 //
-//                     The LLVM Compiler Infrastructure
-//
-// This file is distributed under the University of Illinois Open Source
-// License. See LICENSE.TXT for details.
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/GSYM/FileEntry.h"
 #include "llvm/DebugInfo/GSYM/FileWriter.h"
+#include "llvm/DebugInfo/GSYM/GsymReader.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
 #include "llvm/Support/DataExtractor.h"
 #include <algorithm>
@@ -60,6 +60,108 @@ llvm::Optional<InlineInfo::InlineArray> InlineInfo::getInlineStack(uint64_t Addr
   return llvm::None;
 }
 
+/// Skip an InlineInfo object in the specified data at the specified offset.
+///
+/// Used during the InlineInfo::lookup() call to quickly skip child InlineInfo
+/// objects where the addres ranges isn't contained in the InlineInfo object
+/// or its children. This avoids allocations by not appending child InlineInfo
+/// objects to the InlineInfo::Children array.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param SkippedRanges If true, address ranges have already been skipped.
+
+static bool skip(DataExtractor &Data, uint64_t &Offset, bool SkippedRanges) {
+  if (!SkippedRanges) {
+    if (AddressRanges::skip(Data, Offset) == 0)
+      return false;
+  }
+  bool HasChildren = Data.getU8(&Offset) != 0;
+  Data.getU32(&Offset); // Skip Inline.Name.
+  Data.getULEB128(&Offset); // Skip Inline.CallFile.
+  Data.getULEB128(&Offset); // Skip Inline.CallLine.
+  if (HasChildren) {
+    while (skip(Data, Offset, false /* SkippedRanges */))
+      /* Do nothing */;
+  }
+  // We skipped a valid InlineInfo.
+  return true;
+}
+
+/// A Lookup helper functions.
+///
+/// Used during the InlineInfo::lookup() call to quickly only parse an
+/// InlineInfo object if the address falls within this object. This avoids
+/// allocations by not appending child InlineInfo objects to the
+/// InlineInfo::Children array and also skips any InlineInfo objects that do
+/// not contain the address we are looking up.
+///
+/// \param Data The binary stream to read the data from.
+///
+/// \param Offset The byte offset within \a Data.
+///
+/// \param BaseAddr The address that the relative address range offsets are
+///                 relative to.
+
+static bool lookup(const GsymReader &GR, DataExtractor &Data, uint64_t &Offset,
+                   uint64_t BaseAddr, uint64_t Addr, SourceLocations &SrcLocs,
+                   llvm::Error &Err) {
+  InlineInfo Inline;
+  Inline.Ranges.decode(Data, BaseAddr, Offset);
+  if (Inline.Ranges.empty())
+    return true;
+  // Check if the address is contained within the inline information, and if
+  // not, quickly skip this InlineInfo object and all its children.
+  if (!Inline.Ranges.contains(Addr)) {
+    skip(Data, Offset, true /* SkippedRanges */);
+    return false;
+  }
+
+  // The address range is contained within this InlineInfo, add the source
+  // location for this InlineInfo and any children that contain the address.
+  bool HasChildren = Data.getU8(&Offset) != 0;
+  Inline.Name = Data.getU32(&Offset);
+  Inline.CallFile = (uint32_t)Data.getULEB128(&Offset);
+  Inline.CallLine = (uint32_t)Data.getULEB128(&Offset);
+  if (HasChildren) {
+    // Child address ranges are encoded relative to the first address in the
+    // parent InlineInfo object.
+    const auto ChildBaseAddr = Inline.Ranges[0].Start;
+    bool Done = false;
+    while (!Done)
+      Done = lookup(GR, Data, Offset, ChildBaseAddr, Addr, SrcLocs, Err);
+  }
+
+  Optional<FileEntry> CallFile = GR.getFile(Inline.CallFile);
+  if (!CallFile) {
+    Err = createStringError(std::errc::invalid_argument,
+                            "failed to extract file[%" PRIu32 "]",
+                            Inline.CallFile);
+    return false;
+  }
+
+  SourceLocation SrcLoc;
+  SrcLoc.Name = SrcLocs.back().Name;
+  SrcLoc.Dir = GR.getString(CallFile->Dir);
+  SrcLoc.Base = GR.getString(CallFile->Base);
+  SrcLoc.Line = Inline.CallLine;
+  SrcLocs.back().Name = GR.getString(Inline.Name);
+  SrcLocs.push_back(SrcLoc);
+  return true;
+}
+
+llvm::Error InlineInfo::lookup(const GsymReader &GR, DataExtractor &Data,
+                               uint64_t BaseAddr, uint64_t Addr,
+                               SourceLocations &SrcLocs) {
+  // Call our recursive helper function starting at offset zero.
+  uint64_t Offset = 0;
+  llvm::Error Err = Error::success();
+  ::lookup(GR, Data, Offset, BaseAddr, Addr, SrcLocs, Err);
+  return Err;
+}
+
 /// Decode an InlineInfo in Data at the specified offset.
 ///
 /// A local helper function to decode InlineInfo objects. This function is

diff  --git a/llvm/lib/DebugInfo/GSYM/LineTable.cpp b/llvm/lib/DebugInfo/GSYM/LineTable.cpp
index 824c0041be9f..a49a3ba9bf2a 100644
--- a/llvm/lib/DebugInfo/GSYM/LineTable.cpp
+++ b/llvm/lib/DebugInfo/GSYM/LineTable.cpp
@@ -262,8 +262,8 @@ llvm::Expected<LineTable> LineTable::decode(DataExtractor &Data,
 // Parse the line table on the fly and find the row we are looking for.
 // We will need to determine if we need to cache the line table by calling
 // LineTable::parseAllEntries(...) or just call this function each time.
-// There is a CPU vs memory tradeoff we will need to determine.
-LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
+// There is a CPU vs memory tradeoff we will need to determined.
+Expected<LineEntry> LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Addr) {
   LineEntry Result;
   llvm::Error Err = parse(Data, BaseAddr,
                           [Addr, &Result](const LineEntry &Row) -> bool {
@@ -277,7 +277,13 @@ LineEntry LineTable::lookup(DataExtractor &Data, uint64_t BaseAddr, uint64_t Add
     }
     return true; // Keep parsing till we find the right row.
   });
-  return Result;
+  if (Err)
+    return std::move(Err);
+  if (Result.isValid())
+    return Result;
+  return createStringError(std::errc::invalid_argument,
+                           "address 0x%" PRIx64 " is not in the line table",
+                           Addr);
 }
 
 raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LineTable &LT) {

diff  --git a/llvm/lib/DebugInfo/GSYM/LookupResult.cpp b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
new file mode 100644
index 000000000000..7b7ee8c3d799
--- /dev/null
+++ b/llvm/lib/DebugInfo/GSYM/LookupResult.cpp
@@ -0,0 +1,68 @@
+//===- LookupResult.cpp -------------------------------------------------*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/DebugInfo/GSYM/LookupResult.h"
+#include "llvm/ADT/SmallString.h"
+#include "llvm/Support/Format.h"
+#include "llvm/Support/Path.h"
+#include "llvm/Support/raw_ostream.h"
+
+using namespace llvm;
+using namespace gsym;
+
+std::string LookupResult::getSourceFile(uint32_t Index) const {
+  std::string Fullpath;
+  if (Index < Locations.size()) {
+    if (!Locations[Index].Dir.empty()) {
+      if (Locations[Index].Base.empty()) {
+        Fullpath = Locations[Index].Dir;
+      } else {
+        llvm::SmallString<64> Storage;
+        llvm::sys::path::append(Storage, Locations[Index].Dir,
+                                Locations[Index].Base);
+        Fullpath.assign(Storage.begin(), Storage.end());
+      }
+    } else if (!Locations[Index].Base.empty())
+      Fullpath = Locations[Index].Base;
+  }
+  return Fullpath;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const SourceLocation &SL) {
+  OS << SL.Name << " @ ";
+  if (!SL.Dir.empty()) {
+    OS << SL.Dir;
+    if (SL.Dir.contains('\\') and not SL.Dir.contains('/'))
+      OS << '\\';
+    else
+      OS << '/';
+  }
+  if (SL.Base.empty())
+    OS << "<invalid-file>";
+  else
+    OS << SL.Base;
+  OS << ':' << SL.Line;
+  return OS;
+}
+
+raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const LookupResult &LR) {
+  OS << HEX64(LR.LookupAddr) << ": ";
+  auto NumLocations = LR.Locations.size();
+  for (size_t I = 0; I < NumLocations; ++I) {
+    if (I > 0) {
+      OS << '\n';
+      OS.indent(20);
+    }
+    const bool IsInlined = I + 1 != NumLocations;
+    OS << LR.Locations[I];
+    if (IsInlined)
+      OS << " [inlined]";
+  }
+  OS << '\n';
+  return OS;
+}

diff  --git a/llvm/lib/DebugInfo/GSYM/Range.cpp b/llvm/lib/DebugInfo/GSYM/Range.cpp
index 19ab700fdd57..f78101e49bf8 100644
--- a/llvm/lib/DebugInfo/GSYM/Range.cpp
+++ b/llvm/lib/DebugInfo/GSYM/Range.cpp
@@ -100,3 +100,15 @@ void AddressRanges::decode(DataExtractor &Data, uint64_t BaseAddr,
   for (auto &Range : Ranges)
     Range.decode(Data, BaseAddr, Offset);
 }
+
+void AddressRange::skip(DataExtractor &Data, uint64_t &Offset) {
+  Data.getULEB128(&Offset);
+  Data.getULEB128(&Offset);
+}
+
+uint64_t AddressRanges::skip(DataExtractor &Data, uint64_t &Offset) {
+  uint64_t NumRanges = Data.getULEB128(&Offset);
+  for (uint64_t I=0; I<NumRanges; ++I)
+    AddressRange::skip(Data, Offset);
+  return NumRanges;
+}

diff  --git a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
index 9fd8edf6a107..bee1d4091cd4 100644
--- a/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
+++ b/llvm/unittests/DebugInfo/GSYM/GSYMTest.cpp
@@ -20,8 +20,10 @@
 #include "llvm/DebugInfo/GSYM/StringTable.h"
 #include "llvm/Support/DataExtractor.h"
 #include "llvm/Support/Endian.h"
+#include "llvm/Testing/Support/Error.h"
 
 #include "gtest/gtest.h"
+#include "gmock/gmock.h"
 #include <string>
 
 using namespace llvm;
@@ -1302,3 +1304,100 @@ TEST(GSYMTest, TestGsymReader) {
                             "address 0x1030 not in GSYM");
   }
 }
+
+TEST(GSYMTest, TestGsymLookups) {
+  // Test creating a GSYM file with a function that has a inline information.
+  // Verify that lookups work correctly. Lookups do not decode the entire
+  // FunctionInfo or InlineInfo, they only extract information needed for the
+  // lookup to happen which avoids allocations which can slow down
+  // symbolication.
+  GsymCreator GC;
+  FunctionInfo FI(0x1000, 0x100, GC.insertString("main"));
+  const auto ByteOrder = support::endian::system_endianness();
+  FI.OptLineTable = LineTable();
+  const uint32_t MainFileIndex = GC.insertFile("/tmp/main.c");
+  const uint32_t FooFileIndex = GC.insertFile("/tmp/foo.h");
+  FI.OptLineTable->push(LineEntry(0x1000, MainFileIndex, 5));
+  FI.OptLineTable->push(LineEntry(0x1010, FooFileIndex, 10));
+  FI.OptLineTable->push(LineEntry(0x1012, FooFileIndex, 20));
+  FI.OptLineTable->push(LineEntry(0x1014, FooFileIndex, 11));
+  FI.OptLineTable->push(LineEntry(0x1016, FooFileIndex, 30));
+  FI.OptLineTable->push(LineEntry(0x1018, FooFileIndex, 12));
+  FI.OptLineTable->push(LineEntry(0x1020, MainFileIndex, 8));
+  FI.Inline = InlineInfo();
+
+  FI.Inline->Name = GC.insertString("inline1");
+  FI.Inline->CallFile = MainFileIndex;
+  FI.Inline->CallLine = 6;
+  FI.Inline->Ranges.insert(AddressRange(0x1010, 0x1020));
+  InlineInfo Inline2;
+  Inline2.Name = GC.insertString("inline2");
+  Inline2.CallFile = FooFileIndex;
+  Inline2.CallLine = 33;
+  Inline2.Ranges.insert(AddressRange(0x1012, 0x1014));
+  FI.Inline->Children.emplace_back(Inline2);
+  InlineInfo Inline3;
+  Inline3.Name = GC.insertString("inline3");
+  Inline3.CallFile = FooFileIndex;
+  Inline3.CallLine = 35;
+  Inline3.Ranges.insert(AddressRange(0x1016, 0x1018));
+  FI.Inline->Children.emplace_back(Inline3);
+  GC.addFunctionInfo(std::move(FI));
+  Error FinalizeErr = GC.finalize(llvm::nulls());
+  ASSERT_FALSE(FinalizeErr);
+  SmallString<512> Str;
+  raw_svector_ostream OutStrm(Str);
+  FileWriter FW(OutStrm, ByteOrder);
+  llvm::Error Err = GC.encode(FW);
+  ASSERT_FALSE((bool)Err);
+  Expected<GsymReader> GR = GsymReader::copyBuffer(OutStrm.str());
+  ASSERT_TRUE(bool(GR));
+
+  // Verify inline info is correct when doing lookups.
+  auto LR = GR->lookup(0x1000);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5}));
+  LR = GR->lookup(0x100F);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 5}));
+
+  LR = GR->lookup(0x1010);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 10},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1012);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline2", "/tmp", "foo.h", 20},
+                         SourceLocation{"inline1", "/tmp", "foo.h", 33},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1014);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 11},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1016);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline3", "/tmp", "foo.h", 30},
+                         SourceLocation{"inline1", "/tmp", "foo.h", 35},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1018);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"inline1", "/tmp", "foo.h", 12},
+                         SourceLocation{"main", "/tmp", "main.c", 6}));
+
+  LR = GR->lookup(0x1020);
+  ASSERT_THAT_EXPECTED(LR, Succeeded());
+  EXPECT_THAT(LR->Locations,
+    testing::ElementsAre(SourceLocation{"main", "/tmp", "main.c", 8}));
+}


        


More information about the llvm-commits mailing list