[lld] 6836a47 - [DebugInfo] Add error checking around data extraction in DWARFAbbreviationDeclaration::extract

Alex Langford via llvm-commits llvm-commits at lists.llvm.org
Tue Jun 6 10:51:35 PDT 2023


Author: Alex Langford
Date: 2023-06-06T10:50:54-07:00
New Revision: 6836a47b7e6b57927664ec6ec750ae37bb951129

URL: https://github.com/llvm/llvm-project/commit/6836a47b7e6b57927664ec6ec750ae37bb951129
DIFF: https://github.com/llvm/llvm-project/commit/6836a47b7e6b57927664ec6ec750ae37bb951129.diff

LOG: [DebugInfo] Add error checking around data extraction in DWARFAbbreviationDeclaration::extract

In trying to hoist errors further up this callstack, I discovered that
if the data in the debug_abbrev section is invalid entirely, the code
that parses the debug_abbrev section may do strange or unpredictable
things. The underlying issue is that DataExtractor will return a value
of 0 when it encounters an error in extracting a LEB128 value. It's thus
difficult to determine if there was an error just by looking at the
return value. This patch aims to bail at the first sight of an error in
the debug_abbrev parsing code.

Differential Revision: https://reviews.llvm.org/D151755

Added: 
    

Modified: 
    lld/test/MachO/stabs-dwarf5.s
    lld/test/MachO/stabs-icf.s
    lld/test/MachO/stabs.s
    llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
    llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp

Removed: 
    


################################################################################
diff  --git a/lld/test/MachO/stabs-dwarf5.s b/lld/test/MachO/stabs-dwarf5.s
index 61a0986e67d50..169f17987aab4 100644
--- a/lld/test/MachO/stabs-dwarf5.s
+++ b/lld/test/MachO/stabs-dwarf5.s
@@ -51,6 +51,8 @@ Lsection_abbrev:
   .byte  114                     ## DW_AT_str_offsets_base
   .byte  23                      ## DW_FORM_sec_offset
   .byte  0                       ## EOM(1)
+  .byte  0                       ## EOM(2)
+  .byte  0                       ## EOM(3)
 .section  __DWARF,__debug_info,regular,debug
 .set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
   .long  Lset0

diff  --git a/lld/test/MachO/stabs-icf.s b/lld/test/MachO/stabs-icf.s
index 129fc86663df2..99d0871ce4d2c 100644
--- a/lld/test/MachO/stabs-icf.s
+++ b/lld/test/MachO/stabs-icf.s
@@ -64,6 +64,8 @@ Lsection_abbrev:
   .byte  18                      ## DW_AT_high_pc
   .byte  6                       ## DW_FORM_data4
   .byte  0                       ## EOM(1)
+  .byte  0                       ## EOM(2)
+  .byte  0                       ## EOM(3)
 .section  __DWARF,__debug_info,regular,debug
 .set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
   .long  Lset0

diff  --git a/lld/test/MachO/stabs.s b/lld/test/MachO/stabs.s
index aa96f7c147a7e..145e7960b6657 100644
--- a/lld/test/MachO/stabs.s
+++ b/lld/test/MachO/stabs.s
@@ -205,6 +205,8 @@ Lsection_abbrev:
   .byte  18                      ## DW_AT_high_pc
   .byte  6                       ## DW_FORM_data4
   .byte  0                       ## EOM(1)
+  .byte  0                       ## EOM(2)
+  .byte  0                       ## EOM(3)
 .section  __DWARF,__debug_info,regular,debug
 .set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
   .long  Lset0
@@ -254,6 +256,8 @@ Lsection_abbrev:
   .byte  18                      ## DW_AT_high_pc
   .byte  6                       ## DW_FORM_data4
   .byte  0                       ## EOM(1)
+  .byte  0                       ## EOM(2)
+  .byte  0                       ## EOM(3)
 .section  __DWARF,__debug_info,regular,debug
 .set Lset0, Ldebug_info_end0-Ldebug_info_start0 ## Length of Unit
   .long  Lset0

diff  --git a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
index e131c42375ab5..11de45db7a4e2 100644
--- a/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
+++ b/llvm/lib/DebugInfo/DWARF/DWARFAbbreviationDeclaration.cpp
@@ -38,18 +38,28 @@ llvm::Expected<DWARFAbbreviationDeclaration::ExtractState>
 DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) {
   clear();
   const uint64_t Offset = *OffsetPtr;
-  Code = Data.getULEB128(OffsetPtr);
+  Error Err = Error::success();
+  Code = Data.getULEB128(OffsetPtr, &Err);
+  if (Err)
+    return Err;
+
   if (Code == 0)
     return ExtractState::Complete;
 
   CodeByteSize = *OffsetPtr - Offset;
-  Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr));
+  Tag = static_cast<llvm::dwarf::Tag>(Data.getULEB128(OffsetPtr, &Err));
+  if (Err)
+    return Err;
+
   if (Tag == DW_TAG_null) {
     clear();
     return make_error<llvm::object::GenericBinaryError>(
         "abbreviation declaration requires a non-null tag");
   }
-  uint8_t ChildrenByte = Data.getU8(OffsetPtr);
+  uint8_t ChildrenByte = Data.getU8(OffsetPtr, &Err);
+  if (Err)
+    return Err;
+
   HasChildren = (ChildrenByte == DW_CHILDREN_yes);
   // Assign a value to our optional FixedAttributeSize member variable. If
   // this member variable still has a value after the while loop below, then
@@ -58,8 +68,13 @@ DWARFAbbreviationDeclaration::extract(DataExtractor Data, uint64_t *OffsetPtr) {
 
   // Read all of the abbreviation attributes and forms.
   while (Data.isValidOffset(*OffsetPtr)) {
-    auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr));
-    auto F = static_cast<Form>(Data.getULEB128(OffsetPtr));
+    auto A = static_cast<Attribute>(Data.getULEB128(OffsetPtr, &Err));
+    if (Err)
+      return Err;
+
+    auto F = static_cast<Form>(Data.getULEB128(OffsetPtr, &Err));
+    if (Err)
+      return Err;
 
     // We successfully reached the end of this abbreviation declaration
     // since both attribute and form are zero. There may be more abbreviation

diff  --git a/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp b/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
index ad61ccbc22c95..62627ae6956ff 100644
--- a/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
+++ b/llvm/unittests/DebugInfo/DWARF/DWARFDebugAbbrevTest.cpp
@@ -42,6 +42,16 @@ void writeAbbreviationDeclarations(raw_ostream &OS, uint32_t FirstCode,
   encodeULEB128(0, OS);
 }
 
+void writeMalformedULEB128Value(raw_ostream &OS) {
+  OS << static_cast<uint8_t>(0x80);
+}
+
+void writeULEB128LargerThan64Bits(raw_ostream &OS) {
+  static constexpr llvm::StringRef LargeULEB128 =
+      "\x80\x80\x80\x80\x80\x80\x80\x80\x80\x80\x01";
+  OS << LargeULEB128;
+}
+
 TEST(DWARFDebugAbbrevTest, DWARFAbbrevDeclSetExtractSuccess) {
   SmallString<64> RawData;
   raw_svector_ostream OS(RawData);
@@ -104,3 +114,168 @@ TEST(DWARFDebugAbbrevTest, DWARFAbbrevDeclSetExtractSuccessOutOfOrder) {
   EXPECT_FALSE(Abbrev1->hasChildren());
   EXPECT_EQ(Abbrev1->getNumAttributes(), 1u);
 }
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetCodeExtractionError) {
+  SmallString<64> RawData;
+
+  // Check for malformed ULEB128.
+  {
+    raw_svector_ostream OS(RawData);
+    writeMalformedULEB128Value(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_FALSE(AbbrevSet.extract(Data, &Offset));
+    EXPECT_EQ(Offset, 0u);
+  }
+
+  RawData.clear();
+  // Check for ULEB128 too large to fit into a uin64_t.
+  {
+    raw_svector_ostream OS(RawData);
+    writeULEB128LargerThan64Bits(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_FALSE(AbbrevSet.extract(Data, &Offset));
+    EXPECT_EQ(Offset, 0u);
+  }
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetTagExtractionError) {
+  SmallString<64> RawData;
+  const uint32_t Code = 1;
+
+  // Check for malformed ULEB128.
+  {
+    raw_svector_ostream OS(RawData);
+    encodeULEB128(Code, OS);
+    writeMalformedULEB128Value(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+    // Only the code was extracted correctly.
+    EXPECT_EQ(Offset, 1u);
+  }
+
+  RawData.clear();
+  // Check for ULEB128 too large to fit into a uint64_t.
+  {
+    raw_svector_ostream OS(RawData);
+    encodeULEB128(Code, OS);
+    writeULEB128LargerThan64Bits(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+    // Only the code was extracted correctly.
+    EXPECT_EQ(Offset, 1u);
+  }
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviatioDeclSetChildExtractionError) {
+  SmallString<64> RawData;
+  const uint32_t Code = 1;
+  const dwarf::Tag Tag = DW_TAG_compile_unit;
+
+  // We want to make sure that we fail if we reach the end of the stream before
+  // reading the 'children' byte.
+  raw_svector_ostream OS(RawData);
+  encodeULEB128(Code, OS);
+  encodeULEB128(Tag, OS);
+
+  uint64_t Offset = 0;
+  DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+  DWARFAbbreviationDeclarationSet AbbrevSet;
+  EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+  // The code and the tag were extracted correctly.
+  EXPECT_EQ(Offset, 2u);
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetAttributeExtractionError) {
+  SmallString<64> RawData;
+  const uint32_t Code = 1;
+  const dwarf::Tag Tag = DW_TAG_compile_unit;
+  const uint8_t Children = DW_CHILDREN_yes;
+
+  // Check for malformed ULEB128.
+  {
+    raw_svector_ostream OS(RawData);
+    encodeULEB128(Code, OS);
+    encodeULEB128(Tag, OS);
+    OS << Children;
+    writeMalformedULEB128Value(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+    // The code, tag, and child byte were extracted correctly.
+    EXPECT_EQ(Offset, 3u);
+  }
+
+  RawData.clear();
+  // Check for ULEB128 too large to fit into a uint64_t.
+  {
+    raw_svector_ostream OS(RawData);
+    encodeULEB128(Code, OS);
+    encodeULEB128(Tag, OS);
+    OS << Children;
+    writeULEB128LargerThan64Bits(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+    // The code, tag, and child byte were extracted correctly.
+    EXPECT_EQ(Offset, 3u);
+  }
+}
+
+TEST(DWARFDebugAbbrevTest, DWARFAbbreviationDeclSetFormExtractionError) {
+  SmallString<64> RawData;
+  const uint32_t Code = 1;
+  const dwarf::Tag Tag = DW_TAG_compile_unit;
+  const uint8_t Children = DW_CHILDREN_yes;
+  const dwarf::Attribute Attr = DW_AT_name;
+
+  // Check for malformed ULEB128.
+  {
+    raw_svector_ostream OS(RawData);
+    encodeULEB128(Code, OS);
+    encodeULEB128(Tag, OS);
+    OS << Children;
+    encodeULEB128(Attr, OS);
+    writeMalformedULEB128Value(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+    // The code, tag, child byte, and first attribute were extracted correctly.
+    EXPECT_EQ(Offset, 4u);
+  }
+
+  RawData.clear();
+  // Check for ULEB128 too large to fit into a uint64_t.
+  {
+    raw_svector_ostream OS(RawData);
+    encodeULEB128(Code, OS);
+    encodeULEB128(Tag, OS);
+    OS << Children;
+    encodeULEB128(Attr, OS);
+    writeULEB128LargerThan64Bits(OS);
+
+    uint64_t Offset = 0;
+    DataExtractor Data(RawData, sys::IsLittleEndianHost, sizeof(uint64_t));
+    DWARFAbbreviationDeclarationSet AbbrevSet;
+    EXPECT_TRUE(AbbrevSet.extract(Data, &Offset));
+    // The code, tag, child byte, and first attribute were extracted correctly.
+    EXPECT_EQ(Offset, 4u);
+  }
+}


        


More information about the llvm-commits mailing list