[llvm] r370936 - Add encode and decode methods to InlineInfo and document encoding format to the GSYM file format.

Greg Clayton via llvm-commits llvm-commits at lists.llvm.org
Wed Sep 4 10:32:51 PDT 2019


Author: gclayton
Date: Wed Sep  4 10:32:51 2019
New Revision: 370936

URL: http://llvm.org/viewvc/llvm-project?rev=370936&view=rev
Log:
Add encode and decode methods to InlineInfo and document encoding format to the GSYM file format.

This patch adds the ability to encode and decode InlineInfo objects and adds test coverage. Error handling is introduced in the encoding and decoding which will be used from here on out for remaining patches.

Differential Revision: https://reviews.llvm.org/D66600



Modified:
    llvm/trunk/include/llvm/DebugInfo/GSYM/InlineInfo.h
    llvm/trunk/include/llvm/DebugInfo/GSYM/Range.h
    llvm/trunk/lib/DebugInfo/GSYM/InlineInfo.cpp
    llvm/trunk/lib/DebugInfo/GSYM/Range.cpp
    llvm/trunk/unittests/DebugInfo/GSYM/GSYMTest.cpp

Modified: llvm/trunk/include/llvm/DebugInfo/GSYM/InlineInfo.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/GSYM/InlineInfo.h?rev=370936&r1=370935&r2=370936&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/GSYM/InlineInfo.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/GSYM/InlineInfo.h Wed Sep  4 10:32:51 2019
@@ -11,6 +11,7 @@
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/DebugInfo/GSYM/Range.h"
+#include "llvm/Support/Error.h"
 #include <stdint.h>
 #include <vector>
 
@@ -30,6 +31,30 @@ namespace gsym {
 /// Any clients that encode information will need to ensure the ranges are
 /// all contined correctly or lookups could fail. Add ranges in these objects
 /// must be contained in the top level FunctionInfo address ranges as well.
+///
+/// ENCODING
+///
+/// When saved to disk, the inline info encodes all ranges to be relative to
+/// a parent address range. This will be the FunctionInfo's start address if
+/// the InlineInfo is directly contained in a FunctionInfo, or a the start
+/// address of the containing parent InlineInfo's first "Ranges" member. This
+/// allows address ranges to be efficiently encoded using ULEB128 encodings as
+/// we encode the offset and size of each range instead of full addresses. This
+/// also makes any encoded addresses easy to relocate as we just need to
+/// relocate the FunctionInfo's start address.
+///
+/// - The AddressRanges member "Ranges" is encoded using an approriate base
+///   address as described above.
+/// - UINT8 boolean value that specifies if the InlineInfo object has children.
+/// - UINT32 string table offset that points to the name of the inline
+///   function.
+/// - ULEB128 integer that specifies the file of the call site that called
+///   this function.
+/// - ULEB128 integer that specifies the source line of the call site that
+///   called this function.
+/// - if this object has children, enocode each child InlineInfo using the
+///   the first address range's start address as the base address.
+///
 struct InlineInfo {
 
   uint32_t Name; ///< String table offset in the string table.
@@ -61,6 +86,37 @@ struct InlineInfo {
   /// \returns optional vector of InlineInfo objects that describe the
   /// inline call stack for a given address, false otherwise.
   llvm::Optional<InlineArray> getInlineStack(uint64_t Addr) const;
+
+  /// Decode an InlineInfo object from a binary data stream.
+  ///
+  /// \param Data The binary stream to read the data from. This object must
+  /// have the data for the InlineInfo object starting at offset zero. The data
+  /// can contain more data than needed.
+  ///
+  /// \param BaseAddr The base address to use when decoding all address ranges.
+  /// This will be the FunctionInfo's start address if this object is directly
+  /// contained in a FunctionInfo object, or the start address of the first
+  /// address range in an InlineInfo object of this object is a child of
+  /// another InlineInfo object.
+  /// \returns An InlineInfo or an error describing the issue that was
+  /// encountered during decoding.
+  static llvm::Expected<InlineInfo> decode(DataExtractor &Data,
+                                           uint64_t BaseAddr);
+
+  /// Encode this InlineInfo object into FileWriter stream.
+  ///
+  /// \param O The binary stream to write the data to at the current file
+  /// position.
+  ///
+  /// \param BaseAddr The base address to use when encoding all address ranges.
+  /// This will be the FunctionInfo's start address if this object is directly
+  /// contained in a FunctionInfo object, or the start address of the first
+  /// address range in an InlineInfo object of this object is a child of
+  /// another InlineInfo object.
+  ///
+  /// \returns An error object that indicates success or failure or the
+  /// encoding process.
+  llvm::Error encode(FileWriter &O, uint64_t BaseAddr) const;
 };
 
 inline bool operator==(const InlineInfo &LHS, const InlineInfo &RHS) {

Modified: llvm/trunk/include/llvm/DebugInfo/GSYM/Range.h
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/include/llvm/DebugInfo/GSYM/Range.h?rev=370936&r1=370935&r2=370936&view=diff
==============================================================================
--- llvm/trunk/include/llvm/DebugInfo/GSYM/Range.h (original)
+++ llvm/trunk/include/llvm/DebugInfo/GSYM/Range.h Wed Sep  4 10:32:51 2019
@@ -80,6 +80,7 @@ public:
   void clear() { Ranges.clear(); }
   bool empty() const { return Ranges.empty(); }
   bool contains(uint64_t Addr) const;
+  bool contains(AddressRange Range) const;
   void insert(AddressRange Range);
   size_t size() const { return Ranges.size(); }
   bool operator==(const AddressRanges &RHS) const {

Modified: llvm/trunk/lib/DebugInfo/GSYM/InlineInfo.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/GSYM/InlineInfo.cpp?rev=370936&r1=370935&r2=370936&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/GSYM/InlineInfo.cpp (original)
+++ llvm/trunk/lib/DebugInfo/GSYM/InlineInfo.cpp Wed Sep  4 10:32:51 2019
@@ -8,7 +8,9 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/DebugInfo/GSYM/FileEntry.h"
+#include "llvm/DebugInfo/GSYM/FileWriter.h"
 #include "llvm/DebugInfo/GSYM/InlineInfo.h"
+#include "llvm/Support/DataExtractor.h"
 #include <algorithm>
 #include <inttypes.h>
 
@@ -57,3 +59,102 @@ llvm::Optional<InlineInfo::InlineArray>
     return Result;
   return llvm::None;
 }
+
+/// Decode an InlineInfo in Data at the specified offset.
+///
+/// A local helper function to decode InlineInfo objects. This function is
+/// called recursively when parsing child InlineInfo objects.
+///
+/// \param Inline The InlineInfo object to decode into.
+/// \param Data The data extractor to decode from.
+/// \param Offset The offset within \a Data to decode from.
+/// \param BaseAddr The base address to use when decoding address ranges.
+/// \returns An InlineInfo or an error describing the issue that was
+/// encountered during decoding.
+static llvm::Expected<InlineInfo> decode(DataExtractor &Data, uint64_t &Offset,
+                                         uint64_t BaseAddr) {
+  InlineInfo Inline;
+  if (!Data.isValidOffset(Offset))
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": missing InlineInfo address ranges data", Offset);
+  Inline.Ranges.decode(Data, BaseAddr, Offset);
+  if (Inline.Ranges.empty())
+    return Inline;
+  if (!Data.isValidOffsetForDataOfSize(Offset, 1))
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": missing InlineInfo uint8_t indicating children",
+        Offset);
+  bool HasChildren = Data.getU8(&Offset) != 0;
+  if (!Data.isValidOffsetForDataOfSize(Offset, 4))
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": missing InlineInfo uint32_t for name", Offset);
+  Inline.Name = Data.getU32(&Offset);
+  if (!Data.isValidOffset(Offset))
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call file", Offset);
+  Inline.CallFile = (uint32_t)Data.getULEB128(&Offset);
+  if (!Data.isValidOffset(Offset))
+    return createStringError(std::errc::io_error,
+        "0x%8.8" PRIx64 ": missing ULEB128 for InlineInfo call line", Offset);
+  Inline.CallLine = (uint32_t)Data.getULEB128(&Offset);
+  if (HasChildren) {
+    // Child address ranges are encoded relative to the first address in the
+    // parent InlineInfo object.
+    const auto ChildBaseAddr = Inline.Ranges[0].Start;
+    while (true) {
+      llvm::Expected<InlineInfo> Child = decode(Data, Offset, ChildBaseAddr);
+      if (!Child)
+        return Child.takeError();
+      // InlineInfo with empty Ranges termintes a child sibling chain.
+      if (Child.get().Ranges.empty())
+        break;
+      Inline.Children.emplace_back(std::move(*Child));
+    }
+  }
+  return Inline;
+}
+
+llvm::Expected<InlineInfo> InlineInfo::decode(DataExtractor &Data,
+                                              uint64_t BaseAddr) {
+  uint64_t Offset = 0;
+  return ::decode(Data, Offset, BaseAddr);
+}
+
+llvm::Error InlineInfo::encode(FileWriter &O, uint64_t BaseAddr) const {
+  // Users must verify the InlineInfo is valid prior to calling this funtion.
+  // We don't want to emit any InlineInfo objects if they are not valid since
+  // it will waste space in the GSYM file.
+  if (!isValid())
+    return createStringError(std::errc::invalid_argument,
+                             "attempted to encode invalid InlineInfo object");
+  Ranges.encode(O, BaseAddr);
+  bool HasChildren = !Children.empty();
+  O.writeU8(HasChildren);
+  O.writeU32(Name);
+  O.writeULEB(CallFile);
+  O.writeULEB(CallLine);
+  if (HasChildren) {
+    // Child address ranges are encoded as relative to the first
+    // address in the Ranges for this object. This keeps the offsets
+    // small and allows for efficient encoding using ULEB offsets.
+    const uint64_t ChildBaseAddr = Ranges[0].Start;
+    for (const auto &Child : Children) {
+      // Make sure all child address ranges are contained in the parent address
+      // ranges.
+      for (const auto &ChildRange: Child.Ranges) {
+        if (!Ranges.contains(ChildRange))
+          return createStringError(std::errc::invalid_argument,
+                                   "child range not contained in parent");
+      }
+      llvm::Error Err = Child.encode(O, ChildBaseAddr);
+      if (Err)
+        return Err;
+    }
+
+    // Terminate child sibling chain by emitting a zero. This zero will cause
+    // the decodeAll() function above to return false and stop the decoding
+    // of child InlineInfo objects that are siblings.
+    O.writeULEB(0);
+  }
+  return Error::success();
+}

Modified: llvm/trunk/lib/DebugInfo/GSYM/Range.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/lib/DebugInfo/GSYM/Range.cpp?rev=370936&r1=370935&r2=370936&view=diff
==============================================================================
--- llvm/trunk/lib/DebugInfo/GSYM/Range.cpp (original)
+++ llvm/trunk/lib/DebugInfo/GSYM/Range.cpp Wed Sep  4 10:32:51 2019
@@ -42,6 +42,17 @@ bool AddressRanges::contains(uint64_t Ad
   return It != Ranges.begin() && Addr < It[-1].End;
 }
 
+bool AddressRanges::contains(AddressRange Range) const {
+  if (Range.size() == 0)
+    return false;
+  auto It = std::partition_point(
+      Ranges.begin(), Ranges.end(),
+      [=](const AddressRange &R) { return R.Start <= Range.Start; });
+  if (It == Ranges.begin())
+    return false;
+  return Range.End <= It[-1].End;
+}
+
 raw_ostream &llvm::gsym::operator<<(raw_ostream &OS, const AddressRange &R) {
   return OS << '[' << HEX64(R.Start) << " - " << HEX64(R.End) << ")";
 }

Modified: llvm/trunk/unittests/DebugInfo/GSYM/GSYMTest.cpp
URL: http://llvm.org/viewvc/llvm-project/llvm/trunk/unittests/DebugInfo/GSYM/GSYMTest.cpp?rev=370936&r1=370935&r2=370936&view=diff
==============================================================================
--- llvm/trunk/unittests/DebugInfo/GSYM/GSYMTest.cpp (original)
+++ llvm/trunk/unittests/DebugInfo/GSYM/GSYMTest.cpp Wed Sep  4 10:32:51 2019
@@ -134,6 +134,67 @@ TEST(GSYMTest, TestFunctionInfo) {
   EXPECT_LT(FIWithLines, FIWithLinesWithHigherAddress);
 }
 
+void checkError(ArrayRef<std::string> ExpectedMsgs, Error Err) {
+  ASSERT_TRUE(Err.operator bool());
+  size_t WhichMsg = 0;
+  Error Remaining =
+      handleErrors(std::move(Err), [&](const ErrorInfoBase &Actual) {
+        ASSERT_LT(WhichMsg, ExpectedMsgs.size());
+        // Use .str(), because googletest doesn't visualise a StringRef
+        // properly.
+        EXPECT_EQ(Actual.message(), ExpectedMsgs[WhichMsg++]);
+      });
+  EXPECT_EQ(WhichMsg, ExpectedMsgs.size());
+  EXPECT_FALSE(Remaining);
+}
+
+void checkError(std::string ExpectedMsg, Error Err) {
+  checkError(ArrayRef<std::string>{ExpectedMsg}, std::move(Err));
+}
+
+static void TestInlineInfoEncodeDecode(llvm::support::endianness ByteOrder,
+                                       const InlineInfo &Inline) {
+  // Test encoding and decoding InlineInfo objects
+  SmallString<512> Str;
+  raw_svector_ostream OutStrm(Str);
+  FileWriter FW(OutStrm, ByteOrder);
+  const uint64_t BaseAddr = Inline.Ranges[0].Start;
+  llvm::Error Err = Inline.encode(FW, BaseAddr);
+  ASSERT_FALSE(Err);
+  std::string Bytes(OutStrm.str());
+  uint8_t AddressSize = 4;
+  DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize);
+  llvm::Expected<InlineInfo> Decoded = InlineInfo::decode(Data, BaseAddr);
+  // Make sure decoding succeeded.
+  ASSERT_TRUE((bool)Decoded);
+  // Make sure decoded object is the same as the one we encoded.
+  EXPECT_EQ(Inline, Decoded.get());
+}
+
+static void TestInlineInfoDecodeError(llvm::support::endianness ByteOrder,
+                                      std::string Bytes,
+                                      const uint64_t BaseAddr,
+                                      std::string ExpectedErrorMsg) {
+  uint8_t AddressSize = 4;
+  DataExtractor Data(Bytes, ByteOrder == llvm::support::little, AddressSize);
+  llvm::Expected<InlineInfo> Decoded = InlineInfo::decode(Data, BaseAddr);
+  // Make sure decoding fails.
+  ASSERT_FALSE((bool)Decoded);
+  // Make sure decoded object is the same as the one we encoded.
+  checkError(ExpectedErrorMsg, Decoded.takeError());
+}
+
+static void TestInlineInfoEncodeError(llvm::support::endianness ByteOrder,
+                                      const InlineInfo &Inline,
+                                      std::string ExpectedErrorMsg) {
+  SmallString<512> Str;
+  raw_svector_ostream OutStrm(Str);
+  FileWriter FW(OutStrm, ByteOrder);
+  const uint64_t BaseAddr = Inline.Ranges.empty() ? 0 : Inline.Ranges[0].Start;
+  llvm::Error Err = Inline.encode(FW, BaseAddr);
+  checkError(ExpectedErrorMsg, std::move(Err));
+}
+
 TEST(GSYMTest, TestInlineInfo) {
   // Test InlineInfo structs.
   InlineInfo II;
@@ -226,6 +287,69 @@ TEST(GSYMTest, TestInlineInfo) {
   ASSERT_EQ(InlineInfos->size(), 2u);
   ASSERT_EQ(*InlineInfos->at(0), Inline1Sub2);
   ASSERT_EQ(*InlineInfos->at(1), Inline1);
+
+  // Test encoding and decoding InlineInfo objects
+  TestInlineInfoEncodeDecode(llvm::support::little, Root);
+  TestInlineInfoEncodeDecode(llvm::support::big, Root);
+}
+
+TEST(GSYMTest, TestInlineInfoEncodeErrors) {
+  // Test InlineInfo encoding errors.
+
+  // Test that we get an error when trying to encode an InlineInfo object
+  // that has no ranges.
+  InlineInfo Empty;
+  std::string EmptyErr("attempted to encode invalid InlineInfo object");
+  TestInlineInfoEncodeError(llvm::support::little, Empty, EmptyErr);
+  TestInlineInfoEncodeError(llvm::support::big, Empty, EmptyErr);
+
+  // Verify that we get an error trying to encode an InlineInfo object that has
+  // a child InlineInfo that has no ranges.
+  InlineInfo ContainsEmpty;
+  ContainsEmpty.Ranges.insert({0x100,200});
+  ContainsEmpty.Children.push_back(Empty);
+  TestInlineInfoEncodeError(llvm::support::little, ContainsEmpty, EmptyErr);
+  TestInlineInfoEncodeError(llvm::support::big, ContainsEmpty, EmptyErr);
+
+  // Verify that we get an error trying to encode an InlineInfo object that has
+  // a child whose address range is not contained in the parent address range.
+  InlineInfo ChildNotContained;
+  std::string ChildNotContainedErr("child range not contained in parent");
+  ChildNotContained.Ranges.insert({0x100,200});
+  InlineInfo ChildNotContainedChild;
+  ChildNotContainedChild.Ranges.insert({0x200,300});
+  ChildNotContained.Children.push_back(ChildNotContainedChild);
+  TestInlineInfoEncodeError(llvm::support::little, ChildNotContained,
+                            ChildNotContainedErr);
+  TestInlineInfoEncodeError(llvm::support::big, ChildNotContained,
+                            ChildNotContainedErr);
+
+}
+
+TEST(GSYMTest, TestInlineInfoDecodeErrors) {
+  // Test decoding InlineInfo objects that ensure we report an appropriate
+  // error message.
+  const llvm::support::endianness ByteOrder = llvm::support::little;
+  SmallString<512> Str;
+  raw_svector_ostream OutStrm(Str);
+  FileWriter FW(OutStrm, ByteOrder);
+  const uint64_t BaseAddr = 0x100;
+  TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
+      "0x00000000: missing InlineInfo address ranges data");
+  AddressRanges Ranges;
+  Ranges.insert({BaseAddr, BaseAddr+0x100});
+  Ranges.encode(FW, BaseAddr);
+  TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
+      "0x00000004: missing InlineInfo uint8_t indicating children");
+  FW.writeU8(0);
+  TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
+      "0x00000005: missing InlineInfo uint32_t for name");
+  FW.writeU32(0);
+  TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
+      "0x00000009: missing ULEB128 for InlineInfo call file");
+  FW.writeU8(0);
+  TestInlineInfoDecodeError(ByteOrder, OutStrm.str(), BaseAddr,
+      "0x0000000a: missing ULEB128 for InlineInfo call line");
 }
 
 TEST(GSYMTest, TestLineEntry) {
@@ -334,6 +458,18 @@ TEST(GSYMTest, TestRanges) {
   EXPECT_FALSE(Ranges.contains(0x5000 + 1));
   EXPECT_FALSE(Ranges.contains(UINT64_MAX));
 
+  EXPECT_FALSE(Ranges.contains(AddressRange()));
+  EXPECT_FALSE(Ranges.contains(AddressRange(0x1000-1, 0x1000)));
+  EXPECT_FALSE(Ranges.contains(AddressRange(0x1000, 0x1000)));
+  EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x1000+1)));
+  EXPECT_TRUE(Ranges.contains(AddressRange(0x1000, 0x2000)));
+  EXPECT_FALSE(Ranges.contains(AddressRange(0x1000, 0x2001)));
+  EXPECT_TRUE(Ranges.contains(AddressRange(0x2000, 0x3000)));
+  EXPECT_FALSE(Ranges.contains(AddressRange(0x2000, 0x3001)));
+  EXPECT_FALSE(Ranges.contains(AddressRange(0x3000, 0x3001)));
+  EXPECT_FALSE(Ranges.contains(AddressRange(0x1500, 0x4500)));
+  EXPECT_FALSE(Ranges.contains(AddressRange(0x5000, 0x5001)));
+
   // Verify that intersecting ranges get combined
   Ranges.clear();
   Ranges.insert(AddressRange(0x1100, 0x1F00));




More information about the llvm-commits mailing list